From 57e22f58483a4350ebf0d07a2868932503b8ffb2 Mon Sep 17 00:00:00 2001
From: Wilber <jiweibo@baidu.com>
Date: Mon, 28 Nov 2022 20:56:25 +0800
Subject: [PATCH 001/154] move  inference api from fluid to paddle (#48368)

---
 python/paddle/fluid/inference/__init__.py     | 33 -------------------
 python/paddle/inference/__init__.py           | 33 +++++++++++--------
 .../paddle/{fluid => }/inference/wrapper.py   |  8 ++---
 python/setup.py.in                            |  1 -
 4 files changed, 23 insertions(+), 52 deletions(-)
 delete mode 100644 python/paddle/fluid/inference/__init__.py
 rename python/paddle/{fluid => }/inference/wrapper.py (92%)

diff --git a/python/paddle/fluid/inference/__init__.py b/python/paddle/fluid/inference/__init__.py
deleted file mode 100644
index 51127de403f7fd..00000000000000
--- a/python/paddle/fluid/inference/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .wrapper import (
-    Config,
-    DataType,
-    PlaceType,
-    PrecisionType,
-    Tensor,
-    Predictor,
-)
-from .wrapper import convert_to_mixed_precision
-
-from ..core import (
-    create_predictor,
-    get_version,
-    _get_phi_kernel_name,
-    get_num_bytes_of_data_type,
-    PredictorPool,
-    get_trt_compile_version,
-    get_trt_runtime_version,
-)
diff --git a/python/paddle/inference/__init__.py b/python/paddle/inference/__init__.py
index 359f5caccb0fa0..22747d94f2a2e5 100644
--- a/python/paddle/inference/__init__.py
+++ b/python/paddle/inference/__init__.py
@@ -12,20 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ..fluid.inference import Config  # noqa: F401
-from ..fluid.inference import DataType  # noqa: F401
-from ..fluid.inference import PlaceType  # noqa: F401
-from ..fluid.inference import PrecisionType  # noqa: F401
-from ..fluid.inference import Tensor  # noqa: F401
-from ..fluid.inference import Predictor  # noqa: F401
-from ..fluid.inference import create_predictor  # noqa: F401
-from ..fluid.inference import get_version  # noqa: F401
-from ..fluid.inference import _get_phi_kernel_name
-from ..fluid.inference import get_trt_compile_version  # noqa: F401
-from ..fluid.inference import get_trt_runtime_version  # noqa: F401
-from ..fluid.inference import convert_to_mixed_precision  # noqa: F401
-from ..fluid.inference import get_num_bytes_of_data_type  # noqa: F401
-from ..fluid.inference import PredictorPool  # noqa: F401
+from .wrapper import (
+    Config,
+    DataType,
+    PlaceType,
+    PrecisionType,
+    Tensor,
+    Predictor,
+    convert_to_mixed_precision,
+)
+
+from paddle.fluid.core import (
+    create_predictor,
+    get_version,
+    _get_phi_kernel_name,
+    get_trt_compile_version,
+    get_trt_runtime_version,
+    get_num_bytes_of_data_type,
+    PredictorPool,
+)
 
 __all__ = [  # noqa
     'Config',
diff --git a/python/paddle/fluid/inference/wrapper.py b/python/paddle/inference/wrapper.py
similarity index 92%
rename from python/paddle/fluid/inference/wrapper.py
rename to python/paddle/inference/wrapper.py
index ffad69335a4272..c69cfa06f3982e 100644
--- a/python/paddle/fluid/inference/wrapper.py
+++ b/python/paddle/inference/wrapper.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ..core import AnalysisConfig, PaddleDType, PaddlePlace
-from ..core import PaddleInferPredictor, PaddleInferTensor
-from ..core import convert_to_mixed_precision_bind
-from .. import core
+import paddle.fluid.core as core
+from paddle.fluid.core import AnalysisConfig, PaddleDType, PaddlePlace
+from paddle.fluid.core import PaddleInferPredictor, PaddleInferTensor
+from paddle.fluid.core import convert_to_mixed_precision_bind
 
 import os
 import numpy as np
diff --git a/python/setup.py.in b/python/setup.py.in
index 79237c0c238c0e..cab42d8f3613ba 100755
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -330,7 +330,6 @@ packages=['paddle',
           'paddle.inference.contrib',
           'paddle.inference.contrib.utils',
           'paddle.fluid',
-          'paddle.fluid.inference',
           'paddle.fluid.dygraph',
           'paddle.fluid.dygraph.dygraph_to_static',
           'paddle.fluid.dygraph.amp',

From 11b9d85fdb08a1315c335750cb13d06f7592e8e6 Mon Sep 17 00:00:00 2001
From: Wang Bojun <105858416+wwbitejotunn@users.noreply.github.com>
Date: Mon, 28 Nov 2022 21:25:34 +0800
Subject: [PATCH 002/154] fix: multihead matmul biasqk broadcast support for
 [1,1,seq,seq] shape (#47975)

* add trt support
---
 paddle/fluid/inference/tensorrt/op_teller.cc  |  16 +-
 .../tensorrt/plugin/qkv_to_context_plugin.cu  |  45 ++
 .../operators/fused/multihead_matmul_op.cu    |  30 +-
 .../test_trt_convert_multihead_matmul.py      | 414 ++++++++++++++++++
 .../test_fused_multihead_matmul_op.py         | 107 +++++
 5 files changed, 606 insertions(+), 6 deletions(-)

diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index 363b3132a1536b..22bd172e93b40f 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -1744,13 +1744,19 @@ struct SimpleOpTypeSetTeller : public Teller {
                               input_shape[1] == biasqk_shape[3];
         bool is_broadcastable = biasqk_shape[1] == 1 && biasqk_shape[2] == 1 &&
                                 input_shape[1] == biasqk_shape[3];
+        is_broadcastable =
+            is_broadcastable || (biasqk_shape[0] == 1 && biasqk_shape[1] == 1 &&
+                                 input_shape[1] == biasqk_shape[2] &&
+                                 input_shape[1] == biasqk_shape[3]);
         if (!(has_same_shape || is_broadcastable)) {
           VLOG(3) << "The BiasQK's shape is invalid, expect [" << input_shape[0]
-                  << ", 1, 1, " << input_shape[1] << "] or [" << input_shape[0]
-                  << ", " << head_number << ", " << input_shape[1] << ", "
-                  << input_shape[1] << "] but [" << biasqk_shape[0] << ", "
-                  << biasqk_shape[1] << ", " << biasqk_shape[2] << ", "
-                  << biasqk_shape[3] << "].";
+                  << ", 1, 1, " << input_shape[1] << "] "
+                  << "or [" << input_shape[0] << ", " << head_number << ", "
+                  << input_shape[1] << ", " << input_shape[1] << "] "
+                  << "or [" << input_shape[0] << "/1, " << 1 << ", "
+                  << input_shape[1] << ", " << input_shape[1] << "] "
+                  << "but got [" << biasqk_shape[0] << ", " << biasqk_shape[1]
+                  << ", " << biasqk_shape[2] << ", " << biasqk_shape[3] << "].";
           return false;
         }
       } else {
diff --git a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu
index 5e3f078cf9f4d5..731441463df7eb 100644
--- a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu
+++ b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu
@@ -309,6 +309,19 @@ __global__ void broadcast(const T *src,
   }
 }
 
+template <typename T>
+__global__ void broadcast_batch_head_number(const T *src,
+                                            T *dst,
+                                            const int batch_size,
+                                            const int seq_len,
+                                            const int head_num) {
+  int batch_id = blockIdx.x % seq_len;
+  int dst_offset = blockIdx.x * seq_len;
+  if (threadIdx.x < seq_len) {
+    dst[threadIdx.x + dst_offset] = src[threadIdx.x + batch_id * seq_len];
+  }
+}
+
 int QkvToContextPluginDynamic::enqueue(
     const nvinfer1::PluginTensorDesc *input_desc,
     const nvinfer1::PluginTensorDesc *output_desc,
@@ -353,6 +366,22 @@ int QkvToContextPluginDynamic::enqueue(
           head_number_);
       qk_bias = temp_qk_bias;
     }
+    // fit to [batch, head_num, length, length] + [1, 1, length, length]
+    if (ProductDim(input_desc[1].dims) == (seq_len * seq_len)) {
+      temp_qk_bias_tensor.Resize({batch, head_number_, seq_len, seq_len});
+      auto *temp_qk_bias =
+          reinterpret_cast<float *>(temp_qk_bias_tensor.mutable_data<float>(
+              platform::CUDAPlace(device_id)));
+      int grid = batch * head_number_ * seq_len;
+      int block = round_up(seq_len);
+      broadcast_batch_head_number<<<grid, block, 0, stream>>>(
+          static_cast<const float *>(inputs[1]),
+          temp_qk_bias,
+          batch,
+          seq_len,
+          head_number_);
+      qk_bias = temp_qk_bias;
+    }
     // fake qk_bias
     if (ProductDim(input_desc[1].dims) == ProductDim(input_desc[0].dims)) {
       qk_bias = fake_qk_bias_;
@@ -424,6 +453,22 @@ int QkvToContextPluginDynamic::enqueue(
           head_number_);
       qk_bias = temp_qk_bias;
     }
+    // fit to [batch, head_num, length, length] + [1, 1, length, length]
+    if (ProductDim(input_desc[1].dims) == (seq_len * seq_len)) {
+      temp_qk_bias_tensor.Resize({batch, head_number_, seq_len, seq_len});
+      auto *temp_qk_bias =
+          reinterpret_cast<half *>(temp_qk_bias_tensor.mutable_data<int16_t>(
+              platform::CUDAPlace(device_id)));
+      int grid = batch * head_number_ * seq_len;
+      int block = round_up(seq_len);
+      broadcast_batch_head_number<<<grid, block, 0, stream>>>(
+          static_cast<const half *>(inputs[1]),
+          temp_qk_bias,
+          batch,
+          seq_len,
+          head_number_);
+      qk_bias = temp_qk_bias;
+    }
     // padding:    mask_half_ = [1.0,....1.0...1.0....,0.0f]
     // no_padding: mask_half_ = [1.0,....1.0,.........,1.0f]
     bool bias_is_mask = false;
diff --git a/paddle/fluid/operators/fused/multihead_matmul_op.cu b/paddle/fluid/operators/fused/multihead_matmul_op.cu
index f1deedce5f133a..2e8b6f7d0a6b8a 100644
--- a/paddle/fluid/operators/fused/multihead_matmul_op.cu
+++ b/paddle/fluid/operators/fused/multihead_matmul_op.cu
@@ -256,6 +256,19 @@ __global__ void broadcast(const T *src,
   }
 }
 
+template <typename T>
+__global__ void broadcast_batch_head_number(const T *src,
+                                            T *dst,
+                                            const int batch_size,
+                                            const int seq_len,
+                                            const int head_num) {
+  int src_seq_id = blockIdx.x % seq_len;
+  int dst_offset = blockIdx.x * seq_len;
+  if (threadIdx.x < seq_len) {
+    dst[threadIdx.x + dst_offset] = src[threadIdx.x + src_seq_id * seq_len];
+  }
+}
+
 template <typename DeviceContext, typename T>
 class MultiHeadMatMulV2Kernel : public framework::OpKernel<T> {
  public:
@@ -286,6 +299,7 @@ class MultiHeadMatMulV2Kernel : public framework::OpKernel<T> {
     Tensor temp_bias_tensor;
     // if bias_qk is[batch, 1, 1, seq_len], the bias_qk_d need to be broadcasted
     if (bias_qk && bias_qk->numel() == (batch * seq_len)) {
+      VLOG(4) << "Do broadcasted bias_qk from [batch, 1, 1, seq_len]";
       temp_bias_tensor.Resize({batch * head_number * seq_len * seq_len});
       auto *temp_qk_bias = device_ctx.template Alloc<T>(
           &temp_bias_tensor, temp_bias_tensor.numel() * sizeof(T));
@@ -295,6 +309,19 @@ class MultiHeadMatMulV2Kernel : public framework::OpKernel<T> {
           bias_qk_d, temp_qk_bias, seq_len, head_number);
       bias_qk_d = static_cast<const T *>(temp_qk_bias);
     }
+    // if bias_qk is[1, 1, seq_len, seq_len], the bias_qk_d need to be
+    // broadcasted
+    if (bias_qk && bias_qk->numel() == (1 * seq_len * seq_len)) {
+      VLOG(4) << "do broadcasted bias_qk from  [1, 1, seq_len, seq_len]";
+      temp_bias_tensor.Resize({batch * head_number * seq_len * seq_len});
+      auto *temp_qk_bias = device_ctx.template Alloc<T>(
+          &temp_bias_tensor, temp_bias_tensor.numel() * sizeof(T));
+      int grid = batch * head_number * seq_len;
+      int block = round_up(seq_len);
+      broadcast_batch_head_number<<<grid, block, 0, stream>>>(
+          bias_qk_d, temp_qk_bias, batch, seq_len, head_number);
+      bias_qk_d = static_cast<const T *>(temp_qk_bias);
+    }
     if (!bias_qk) {
       int size = batch * head_number * seq_len * seq_len;
       temp_bias_tensor.Resize({size});
@@ -333,7 +360,8 @@ class MultiHeadMatMulV2Kernel : public framework::OpKernel<T> {
     // (B * S, hidden) * (hidden, 3 * N * H) -> (B * S * 3 * N * H)
     auto blas = phi::funcs::GetBlas<phi::GPUContext, T>(device_ctx);
     blas.MatMul(input_matrix, w_matrix, &temp_out_tensor);
-
+    VLOG(2) << "(B * S, hidden) * (hidden, 3 * N * H) -> (B * S * 3 * N * H)";
+    VLOG(2) << temp_out_tensor;
     // temp_out_tensor.Resize(temp_out_dims);
 
     Tensor multihead_temp_tensor;
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py
index 9dd7ae4a8f4325..074b55d5df1ad6 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py
@@ -1081,5 +1081,419 @@ def test(self):
         self.run_test()
 
 
+class TrtConvertMultiHeadMatmulTest_biasqk_seqseq(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        def generate_input1(batch, dim1):
+            return np.random.random((batch, dim1, 768)).astype(np.float32)
+
+        def generate_input2(shape):
+            return np.random.random(shape).astype(np.float32)
+
+        def generate_weight1():
+            return np.random.random((768, 768)).astype(np.float32)
+
+        def generate_weight2():
+            return np.random.random(768).astype(np.float32)
+
+        def generate_weight3():
+            return np.random.random((768, 768)).astype(np.float32)
+
+        for batch in [2]:
+            self.batch = batch
+            for reshape_shape in [[0, 0, 12, 64]]:
+                for dim1 in [128]:
+                    input2_shapes = [
+                        [batch, reshape_shape[2], dim1, dim1],
+                        [batch, 1, 1, dim1],
+                    ]
+                    for input2_shape in input2_shapes:
+                        for axis in [0]:
+                            dics = [
+                                {"x_num_col_dims": 2, "y_num_col_dims": 1},
+                                {"axis": 2},
+                                {"shape": reshape_shape},
+                                {"axis": [0, 2, 1, 3]},
+                                {"x_num_col_dims": 2, "y_num_col_dims": 1},
+                                {"axis": 2},
+                                {"shape": reshape_shape},
+                                {"axis": [0, 2, 1, 3]},
+                                {"x_num_col_dims": 2, "y_num_col_dims": 1},
+                                {"axis": 2},
+                                {"shape": reshape_shape},
+                                {"axis": [0, 2, 1, 3]},
+                                {
+                                    "scale": 0.125,
+                                    "bias": 0.0,
+                                    "bias_after_scale": True,
+                                },
+                                {
+                                    "alpha": 1.0,
+                                    "transpose_X": False,
+                                    "transpose_Y": True,
+                                    "fused_reshape_X": [],
+                                    "fused_reshape_Y": [],
+                                    "fused_transpose_X": [],
+                                    "fused_transpose_Y": [],
+                                    "fused_reshape_Out": [],
+                                    "fused_transpose_Out": [],
+                                },
+                                {"axis": axis},
+                                {"axis": -1, "is_test": True},
+                                {
+                                    "seed": 0,
+                                    "dropout_prob": 0.10000000149011612,
+                                    "dropout_implementation": "upscale_in_train",
+                                    "fix_seed": False,
+                                    "is_test": True,
+                                },
+                                {
+                                    "alpha": 1.0,
+                                    "transpose_X": False,
+                                    "transpose_Y": False,
+                                    "fused_reshape_X": [],
+                                    "fused_reshape_Y": [],
+                                    "fused_transpose_X": [],
+                                    "fused_transpose_Y": [],
+                                    "fused_reshape_Out": [],
+                                    "fused_transpose_Out": [],
+                                },
+                                {"axis": [0, 2, 1, 3]},
+                                {"shape": [0, 0, 768]},
+                                {"x_num_col_dims": 2, "y_num_col_dims": 1},
+                            ]
+
+                            ops_config = [
+                                {
+                                    "op_type": "mul",
+                                    "op_inputs": {
+                                        "X": ["input_data1"],
+                                        "Y": ["mul1_weight"],
+                                    },
+                                    "op_outputs": {"Out": ["mul1_output"]},
+                                    "op_attrs": dics[0],
+                                },
+                                {
+                                    "op_type": "elementwise_add",
+                                    "op_inputs": {
+                                        "X": ["mul1_output"],
+                                        "Y": ["elementwise_add1_weight"],
+                                    },
+                                    "op_outputs": {
+                                        "Out": ["elementwise_add1_output"]
+                                    },
+                                    "op_attrs": dics[1],
+                                },
+                                {
+                                    "op_type": "reshape2",
+                                    "op_inputs": {
+                                        "X": ["elementwise_add1_output"],
+                                    },
+                                    "op_outputs": {
+                                        "Out": ["reshape21_output"],
+                                        "XShape": ["reshape21_output_xshape"],
+                                    },
+                                    "op_attrs": dics[2],
+                                },
+                                {
+                                    "op_type": "transpose2",
+                                    "op_inputs": {"X": ["reshape21_output"]},
+                                    "op_outputs": {
+                                        "Out": ["transpose21_output"],
+                                        "XShape": ["transpose21_output_xshape"],
+                                    },
+                                    "op_attrs": dics[3],
+                                },
+                                {
+                                    "op_type": "mul",
+                                    "op_inputs": {
+                                        "X": ["input_data1"],
+                                        "Y": ["mul2_weight"],
+                                    },
+                                    "op_outputs": {"Out": ["mul2_output"]},
+                                    "op_attrs": dics[4],
+                                },
+                                {
+                                    "op_type": "elementwise_add",
+                                    "op_inputs": {
+                                        "X": ["mul2_output"],
+                                        "Y": ["elementwise_add2_weight"],
+                                    },
+                                    "op_outputs": {
+                                        "Out": ["elementwise_add2_output"]
+                                    },
+                                    "op_attrs": dics[5],
+                                },
+                                {
+                                    "op_type": "reshape2",
+                                    "op_inputs": {
+                                        "X": ["elementwise_add2_output"]
+                                    },
+                                    "op_outputs": {
+                                        "Out": ["reshape22_output"],
+                                        "XShape": ["reshape22_output_xshape"],
+                                    },
+                                    "op_attrs": dics[6],
+                                },
+                                {
+                                    "op_type": "transpose2",
+                                    "op_inputs": {"X": ["reshape22_output"]},
+                                    "op_outputs": {
+                                        "Out": ["transpose22_output"],
+                                        "XShape": ["transpose22_output_xshape"],
+                                    },
+                                    "op_attrs": dics[7],
+                                },
+                                {
+                                    "op_type": "mul",
+                                    "op_inputs": {
+                                        "X": ["input_data1"],
+                                        "Y": ["mul3_weight"],
+                                    },
+                                    "op_outputs": {"Out": ["mul3_output"]},
+                                    "op_attrs": dics[8],
+                                },
+                                {
+                                    "op_type": "elementwise_add",
+                                    "op_inputs": {
+                                        "X": ["mul3_output"],
+                                        "Y": ["elementwise_add3_weight"],
+                                    },
+                                    "op_outputs": {
+                                        "Out": ["elementwise_add3_output"]
+                                    },
+                                    "op_attrs": dics[9],
+                                },
+                                {
+                                    "op_type": "reshape2",
+                                    "op_inputs": {
+                                        "X": ["elementwise_add3_output"]
+                                    },
+                                    "op_outputs": {
+                                        "Out": ["reshape23_output"],
+                                        "XShape": ["reshape23_output_xshape"],
+                                    },
+                                    "op_attrs": dics[10],
+                                },
+                                {
+                                    "op_type": "transpose2",
+                                    "op_inputs": {"X": ["reshape23_output"]},
+                                    "op_outputs": {
+                                        "Out": ["transpose23_output"],
+                                        "XShape": ["transpose23_output_xshape"],
+                                    },
+                                    "op_attrs": dics[11],
+                                },
+                                {
+                                    "op_type": "scale",
+                                    "op_inputs": {
+                                        "X": ["transpose23_output"],
+                                    },
+                                    "op_outputs": {"Out": ["scale_output"]},
+                                    "op_attrs": dics[12],
+                                },
+                                {
+                                    "op_type": "matmul",
+                                    "op_inputs": {
+                                        "X": ["scale_output"],
+                                        "Y": ["transpose22_output"],
+                                    },
+                                    "op_outputs": {"Out": ["matmul1_output"]},
+                                    "op_attrs": dics[13],
+                                },
+                                {
+                                    "op_type": "elementwise_add",
+                                    "op_inputs": {
+                                        "X": ["matmul1_output"],
+                                        "Y": ["input_data2"],
+                                    },
+                                    "op_outputs": {
+                                        "Out": ["elementwise_add4_output"]
+                                    },
+                                    "op_attrs": dics[14],
+                                },
+                                {
+                                    "op_type": "softmax",
+                                    "op_inputs": {
+                                        "X": ["elementwise_add4_output"]
+                                    },
+                                    "op_outputs": {"Out": ["softmax_output"]},
+                                    "op_attrs": dics[15],
+                                },
+                                {
+                                    "op_type": "dropout",
+                                    "op_inputs": {
+                                        "X": ["softmax_output"],
+                                    },
+                                    "op_outputs": {"Out": ["dropout3_output"]},
+                                    "op_attrs": dics[16],
+                                },
+                                {
+                                    "op_type": "matmul",
+                                    "op_inputs": {
+                                        "X": ["dropout3_output"],
+                                        "Y": ["transpose21_output"],
+                                    },
+                                    "op_outputs": {"Out": ["matmul2_output"]},
+                                    "op_attrs": dics[17],
+                                },
+                                {
+                                    "op_type": "transpose2",
+                                    "op_inputs": {"X": ["matmul2_output"]},
+                                    "op_outputs": {
+                                        "Out": ["transpose24_output"],
+                                        "XShape": ["transpose24_output_xshape"],
+                                    },
+                                    "op_attrs": dics[18],
+                                },
+                                {
+                                    "op_type": "reshape2",
+                                    "op_inputs": {"X": ["transpose24_output"]},
+                                    "op_outputs": {
+                                        "Out": ["reshape24_output"],
+                                        "XShape": ["reshape24_output_xshape"],
+                                    },
+                                    "op_attrs": dics[19],
+                                },
+                                # In order to fuse ops with
+                                # multihead_matmul_fuse_pass_v2, the last op
+                                # must be mul.
+                                {
+                                    "op_type": "mul",
+                                    "op_inputs": {
+                                        "X": ["reshape24_output"],
+                                        "Y": ["mul4_weight"],
+                                    },
+                                    "op_outputs": {"Out": ["mul4_output"]},
+                                    "op_attrs": dics[20],
+                                },
+                            ]
+                            ops = self.generate_op_config(ops_config)
+
+                            program_config = ProgramConfig(
+                                ops=ops,
+                                weights={
+                                    "mul1_weight": TensorConfig(
+                                        data_gen=partial(generate_weight1)
+                                    ),
+                                    "mul2_weight": TensorConfig(
+                                        data_gen=partial(generate_weight1)
+                                    ),
+                                    "mul3_weight": TensorConfig(
+                                        data_gen=partial(generate_weight1)
+                                    ),
+                                    "mul4_weight": TensorConfig(
+                                        data_gen=partial(generate_weight1)
+                                    ),
+                                    "elementwise_add1_weight": TensorConfig(
+                                        data_gen=partial(generate_weight2)
+                                    ),
+                                    "elementwise_add2_weight": TensorConfig(
+                                        data_gen=partial(generate_weight3)
+                                    ),
+                                    "elementwise_add3_weight": TensorConfig(
+                                        data_gen=partial(generate_weight2)
+                                    ),
+                                },
+                                inputs={
+                                    "input_data1": TensorConfig(
+                                        data_gen=partial(
+                                            generate_input1, batch, dim1
+                                        )
+                                    ),
+                                    "input_data2": TensorConfig(
+                                        data_gen=partial(
+                                            generate_input2, input2_shape
+                                        )
+                                    ),
+                                },
+                                outputs=["mul4_output"],
+                            )
+
+                            yield program_config
+
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            # The last dim of input1 and input2 should be static.
+            self.dynamic_shape.min_input_shape = {
+                "input_data1": [1, 8, 768],
+                "input_data2": [1, 1, 1, 128],
+                "reshape24_output": [1, 128, 768],
+            }
+            self.dynamic_shape.max_input_shape = {
+                "input_data1": [16, 512, 768],
+                "input_data2": [16, 256, 512, 128],
+                "reshape24_output": [1, 128, 768],
+            }
+            self.dynamic_shape.opt_input_shape = {
+                "input_data1": [8, 128, 768],
+                "input_data2": [8, 32, 64, 128],
+                "reshape24_output": [1, 128, 768],
+            }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        self.trt_param.workspace_size = 2013265920
+        yield self.create_inference_config(), (1, 3), (1e-5, 1e-4)
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), (1, 3), (1e-3, 1e-3)
+
+    def add_skip_trt_case(self):
+        def teller1(program_config, predictor_config):
+            if self.trt_param.precision == paddle_infer.PrecisionType.Half:
+                return True
+            return False
+
+        self.add_skip_case(
+            teller1,
+            SkipReasons.TRT_NOT_IMPLEMENTED,
+            "The output has diff between gpu and trt in fp16 mode.",
+        )
+
+        def teller2(program_config, predictor_config):
+            if (
+                self.trt_param.precision == paddle_infer.PrecisionType.Float32
+                and len(self.dynamic_shape.min_input_shape) != 0
+                and self.batch > 2
+            ):
+                return True
+            return False
+
+        self.add_skip_case(
+            teller2,
+            SkipReasons.TRT_NOT_IMPLEMENTED,
+            "The output has diff between gpu and trt when dynamic fp32 mode and batch size > 2.",
+        )
+
+        def teller3(program_config, predictor_config):
+            if self.trt_param.precision == paddle_infer.PrecisionType.Int8:
+                return True
+            return False
+
+        self.add_skip_case(
+            teller3,
+            SkipReasons.TRT_NOT_IMPLEMENTED,
+            "The output has diff between gpu and trt in int8 mode.",
+        )
+
+    def test(self):
+        self.add_skip_trt_case()
+        self.run_test()
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_fused_multihead_matmul_op.py b/python/paddle/fluid/tests/unittests/test_fused_multihead_matmul_op.py
index e2b53903b6d72a..55c2a563c8cdf9 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_multihead_matmul_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_multihead_matmul_op.py
@@ -29,6 +29,113 @@ def stable_softmax(x):
     return exps / np.sum(exps)
 
 
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(), "Paddle core is not compiled with CUDA"
+)
+class TestFusedMultiHeadMatmulOp_biasqk2(OpTest):
+    def config(self):
+        self.seq_len = 128
+        self.size_per_head = 64
+        self.head_number = 12
+        self.batch_size = 8
+        self.scale = 0.125
+
+    def setUp(self):
+        self.op_type = "multihead_matmul"
+        self.config()
+        h = self.seq_len
+        w = self.head_number * self.size_per_head
+        self.Input = (
+            np.random.random((self.batch_size, h, w)).astype("float32") - 0.5
+        )
+        self.WQ = np.random.random((w, w)).astype("float32")
+        self.KQ = np.random.random((w, w)).astype("float32")
+        self.VQ = np.random.random((w, w)).astype("float32")
+        self.CombinedW = np.hstack((self.WQ, self.KQ, self.VQ)).reshape(
+            (w, 3, w)
+        )
+        self.Q = np.dot(self.Input, self.WQ)
+        self.K = np.dot(self.Input, self.KQ)
+        self.V = np.dot(self.Input, self.VQ)
+
+        self.BiasQ = np.random.random((1, w)).astype("float32")
+        self.BiasK = np.random.random((1, w)).astype("float32")
+        self.BiasV = np.random.random((1, w)).astype("float32")
+        self.CombinedB = np.vstack((self.BiasQ, self.BiasK, self.BiasV))
+        self.BiasQK = np.random.random(
+            (1, 1, self.seq_len, self.seq_len)
+        ).astype("float32")
+        # Compute Q path
+        fc_q = self.Q + self.BiasQ
+        reshape_q = np.reshape(
+            fc_q,
+            (
+                self.batch_size,
+                self.seq_len,
+                self.head_number,
+                self.size_per_head,
+            ),
+        )
+        transpose_q = np.transpose(reshape_q, (0, 2, 1, 3))
+        scale_q = self.scale * transpose_q
+        # Compute K path
+        fc_k = self.K + self.BiasK
+        reshape_k = np.reshape(
+            fc_k,
+            (
+                self.batch_size,
+                self.seq_len,
+                self.head_number,
+                self.size_per_head,
+            ),
+        )
+        transpose_k = np.transpose(reshape_k, (0, 2, 3, 1))
+
+        # Compute Q*K
+        q_k = np.matmul(scale_q, transpose_k)
+        eltadd_qk = q_k + np.tile(
+            self.BiasQK, [self.batch_size, self.head_number, 1, 1]
+        )
+        softmax_qk = np.apply_along_axis(stable_softmax, 3, eltadd_qk)
+        # Compute V path
+        fc_v = self.V + self.BiasV
+        reshape_v = np.reshape(
+            fc_v,
+            (
+                self.batch_size,
+                self.seq_len,
+                self.head_number,
+                self.size_per_head,
+            ),
+        )
+        transpose_v = np.transpose(reshape_v, (0, 2, 1, 3))
+
+        # Compute QK*V
+        qkv = np.matmul(softmax_qk, transpose_v)
+        transpose_qkv = np.transpose(qkv, (0, 2, 1, 3))
+        reshape_qkv = np.reshape(transpose_qkv, (self.batch_size, h, w))
+        print("biasqk shape")
+        print(self.BiasQK.shape)
+        self.inputs = {
+            "Input": self.Input,
+            "W": self.CombinedW,
+            "Bias": self.CombinedB,
+            "BiasQK": self.BiasQK,
+        }
+        self.attrs = {
+            "transpose_Q": False,
+            "transpose_K": True,
+            "transpose_V": False,
+            "head_number": self.head_number,
+            "alpha": self.scale,
+        }
+        self.outputs = {"Out": reshape_qkv}
+
+    def test_check_output(self):
+        place = core.CUDAPlace(0)
+        self.check_output_with_place(place, atol=2e-3)
+
+
 @unittest.skipIf(
     not core.is_compiled_with_cuda(), "Paddle core is not compiled with CUDA"
 )

From 98aaf7974c59b20a9f22a7d59fd590b71580f3db Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Mon, 28 Nov 2022 15:40:28 +0100
Subject: [PATCH 003/154] Reenabled reshape, squeeze and flatten oneDNN kernels
 (#48359)

* re-enabled reshape, squeeze and flatten kernels

* added formatting
---
 paddle/fluid/operators/flatten_op.cc          | 40 +++++++++++++
 .../operators/mkldnn/reshape_mkldnn_op.cc     | 31 ++--------
 paddle/fluid/operators/reshape_op.cc          | 18 ++++++
 paddle/fluid/operators/squeeze_op.cc          | 60 ++++++++++---------
 4 files changed, 94 insertions(+), 55 deletions(-)

diff --git a/paddle/fluid/operators/flatten_op.cc b/paddle/fluid/operators/flatten_op.cc
index 036f3b82224225..65d3f809fa11c8 100644
--- a/paddle/fluid/operators/flatten_op.cc
+++ b/paddle/fluid/operators/flatten_op.cc
@@ -87,6 +87,16 @@ class FlattenOp : public framework::OperatorWithKernel {
       const framework::ExecutionContext &ctx) const override {
     auto input_data_type =
         framework::OperatorWithKernel::IndicateVarDataType(ctx, "X");
+
+#ifdef PADDLE_WITH_MKLDNN
+    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
+      return framework::OpKernelType(input_data_type,
+                                     ctx.GetPlace(),
+                                     phi::DataLayout::ONEDNN,
+                                     framework::LibraryType::kMKLDNN);
+    }
+#endif
+
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };
@@ -159,6 +169,16 @@ class FlattenGradOp : public framework::OperatorWithKernel {
       const framework::ExecutionContext &ctx) const override {
     auto input_data_type = framework::OperatorWithKernel::IndicateVarDataType(
         ctx, framework::GradVarName("Out"));
+
+#ifdef PADDLE_WITH_MKLDNN
+    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
+      return framework::OpKernelType(input_data_type,
+                                     ctx.GetPlace(),
+                                     phi::DataLayout::ONEDNN,
+                                     framework::LibraryType::kMKLDNN);
+    }
+#endif
+
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };
@@ -223,6 +243,16 @@ class Flatten2Op : public framework::OperatorWithKernel {
       const framework::ExecutionContext &ctx) const override {
     auto input_data_type =
         framework::OperatorWithKernel::IndicateVarDataType(ctx, "X");
+
+#ifdef PADDLE_WITH_MKLDNN
+    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
+      return framework::OpKernelType(input_data_type,
+                                     ctx.GetPlace(),
+                                     phi::DataLayout::ONEDNN,
+                                     framework::LibraryType::kMKLDNN);
+    }
+#endif
+
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };
@@ -275,6 +305,16 @@ class Flatten2GradOp : public framework::OperatorWithKernel {
       const framework::ExecutionContext &ctx) const override {
     auto input_data_type = framework::OperatorWithKernel::IndicateVarDataType(
         ctx, framework::GradVarName("Out"));
+
+#ifdef PADDLE_WITH_MKLDNN
+    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
+      return framework::OpKernelType(input_data_type,
+                                     ctx.GetPlace(),
+                                     phi::DataLayout::ONEDNN,
+                                     framework::LibraryType::kMKLDNN);
+    }
+#endif
+
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };
diff --git a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
index f1b321c5ddab7a..902cd8509b4cfd 100644
--- a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
@@ -80,7 +80,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
     out->Resize(x_dims);  // to match x numel, format is changed later
     // reorder is done into a plain tag to allow usage with blocked formats
     auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(
-        out, getPlainFormatTag(x), ctx.GetPlace());
+        out, phi::funcs::GetPlainOneDNNFormat(x_dims.size()), ctx.GetPlace());
     auto reorder_p = reorder_handler.AcquireReorder(reorder_dst_memory_p,
                                                     reorder_src_memory_p);
 
@@ -194,31 +194,6 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
   }
 
  protected:
-  static dnnl::memory::format_tag getPlainFormatTag(
-      const phi::DenseTensor* tensor) {
-    auto tensor_dims_size = tensor->dims().size();
-    PADDLE_ENFORCE_EQ(
-        tensor_dims_size <= 6 && tensor_dims_size >= 1,
-        true,
-        platform::errors::InvalidArgument(
-            "Dims for squeeze_grad oneDNN op must be in range <1, 6>"));
-
-    switch (tensor_dims_size) {
-      case 1:
-        return dnnl::memory::format_tag::a;
-      case 2:
-        return dnnl::memory::format_tag::ab;
-      case 3:
-        return dnnl::memory::format_tag::abc;
-      case 4:
-        return dnnl::memory::format_tag::abcd;
-      case 5:
-        return dnnl::memory::format_tag::abcde;
-      default:
-        return dnnl::memory::format_tag::abcdef;
-    }
-  }
-
   static framework::DDim ValidateShape(const std::vector<int>& shape,
                                        const framework::DDim& in_dims) {
     const int64_t in_size = phi::product(in_dims);
@@ -348,7 +323,9 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel<T, op_name> {
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         dout->mem_desc(), phi::funcs::to_void_cast(dout->data<T>()));
     auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(
-        dx, this->getPlainFormatTag(dout), ctx.GetPlace());
+        dx,
+        phi::funcs::GetPlainOneDNNFormat(dout_vec_dims.size()),
+        ctx.GetPlace());
     auto reorder_p = reorder_handler.AcquireReorder(reorder_dst_memory_p,
                                                     reorder_src_memory_p);
 
diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc
index e143d3e144b915..161f230bacbe44 100644
--- a/paddle/fluid/operators/reshape_op.cc
+++ b/paddle/fluid/operators/reshape_op.cc
@@ -258,6 +258,15 @@ class ReshapeOp : public framework::OperatorWithKernel {
     auto input_data_type =
         framework::OperatorWithKernel::IndicateVarDataType(ctx, "X");
 
+#ifdef PADDLE_WITH_MKLDNN
+    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
+      return framework::OpKernelType(input_data_type,
+                                     ctx.GetPlace(),
+                                     phi::DataLayout::ONEDNN,
+                                     framework::LibraryType::kMKLDNN);
+    }
+#endif
+
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 
@@ -615,6 +624,15 @@ class Reshape2GradOp : public framework::OperatorWithKernel {
     auto input_data_type = framework::OperatorWithKernel::IndicateVarDataType(
         ctx, framework::GradVarName("Out"));
 
+#ifdef PADDLE_WITH_MKLDNN
+    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
+      return framework::OpKernelType(input_data_type,
+                                     ctx.GetPlace(),
+                                     phi::DataLayout::ONEDNN,
+                                     framework::LibraryType::kMKLDNN);
+    }
+#endif
+
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 
diff --git a/paddle/fluid/operators/squeeze_op.cc b/paddle/fluid/operators/squeeze_op.cc
index 93a03c535fe327..1afc7ac8ecd920 100644
--- a/paddle/fluid/operators/squeeze_op.cc
+++ b/paddle/fluid/operators/squeeze_op.cc
@@ -125,13 +125,14 @@ class SqueezeOp : public framework::OperatorWithKernel {
     auto input_data_type =
         framework::OperatorWithKernel::IndicateVarDataType(ctx, "X");
 
-    // #ifdef PADDLE_WITH_MKLDNN
-    //    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
-    //      return framework::OpKernelType(input_data_type, ctx.GetPlace(),
-    //                                     phi::DataLayout::ONEDNN,
-    //                                     framework::LibraryType::kMKLDNN);
-    //    }
-    // #endif
+#ifdef PADDLE_WITH_MKLDNN
+    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
+      return framework::OpKernelType(input_data_type,
+                                     ctx.GetPlace(),
+                                     phi::DataLayout::ONEDNN,
+                                     framework::LibraryType::kMKLDNN);
+    }
+#endif
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };
@@ -152,13 +153,14 @@ class SqueezeGradOp : public framework::OperatorWithKernel {
     auto input_data_type = framework::OperatorWithKernel::IndicateVarDataType(
         ctx, framework::GradVarName("Out"));
 
-    // #ifdef PADDLE_WITH_MKLDNN
-    //    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
-    //      return framework::OpKernelType(input_data_type, ctx.GetPlace(),
-    //                                     phi::DataLayout::ONEDNN,
-    //                                     framework::LibraryType::kMKLDNN);
-    //    }
-    // #endif
+#ifdef PADDLE_WITH_MKLDNN
+    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
+      return framework::OpKernelType(input_data_type,
+                                     ctx.GetPlace(),
+                                     phi::DataLayout::ONEDNN,
+                                     framework::LibraryType::kMKLDNN);
+    }
+#endif
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };
@@ -219,13 +221,14 @@ class Squeeze2Op : public framework::OperatorWithKernel {
     auto input_data_type =
         framework::OperatorWithKernel::IndicateVarDataType(ctx, "X");
 
-    // #ifdef PADDLE_WITH_MKLDNN
-    //    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
-    //      return framework::OpKernelType(input_data_type, ctx.GetPlace(),
-    //                                     phi::DataLayout::ONEDNN,
-    //                                     framework::LibraryType::kMKLDNN);
-    //    }
-    // #endif
+#ifdef PADDLE_WITH_MKLDNN
+    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
+      return framework::OpKernelType(input_data_type,
+                                     ctx.GetPlace(),
+                                     phi::DataLayout::ONEDNN,
+                                     framework::LibraryType::kMKLDNN);
+    }
+#endif
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };
@@ -267,13 +270,14 @@ class Squeeze2GradOp : public framework::OperatorWithKernel {
     auto input_data_type = framework::OperatorWithKernel::IndicateVarDataType(
         ctx, framework::GradVarName("Out"));
 
-    // #ifdef PADDLE_WITH_MKLDNN
-    //    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
-    //      return framework::OpKernelType(input_data_type, ctx.GetPlace(),
-    //                                     phi::DataLayout::ONEDNN,
-    //                                     framework::LibraryType::kMKLDNN);
-    //    }
-    // #endif
+#ifdef PADDLE_WITH_MKLDNN
+    if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
+      return framework::OpKernelType(input_data_type,
+                                     ctx.GetPlace(),
+                                     phi::DataLayout::ONEDNN,
+                                     framework::LibraryType::kMKLDNN);
+    }
+#endif
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };

From a093048466daefc70cd3a5928b601fead26bf2c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C5=82awomir=20Siwek?= <slawomir.siwek@intel.com>
Date: Mon, 28 Nov 2022 16:05:35 +0100
Subject: [PATCH 004/154] eltwises + scale fuse pass (#48400)

---
 .../mkldnn/operator_scale_onednn_fuse_pass.cc |  14 +-
 .../mkldnn/elementwise_mkldnn_op.h            |   5 +
 .../test_mkldnn_elt_act_fuse_pass_new.py      |  76 ----------
 ...nn_elementwise_add_activation_fuse_pass.py | 138 ++++++++++++++++++
 4 files changed, 156 insertions(+), 77 deletions(-)
 delete mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass_new.py
 create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_add_activation_fuse_pass.py

diff --git a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc
index 2e4163feb8b4ab..31b9229bfcedd1 100644
--- a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc
@@ -25,7 +25,15 @@ namespace ir {
 using string::PrettyLogDetail;
 
 void FuseOperatorScaleOneDNNPass::ApplyImpl(Graph *graph) const {
-  const std::vector<std::string> fusable_ops{"fc", "matmul", "matmul_v2"};
+  const std::vector<std::string> fusable_ops{
+      "fc",
+      "matmul",
+      "matmul_v2",
+      "elementwise_add",
+      "elementwise_sub",
+      "elementwise_mul",
+      "elementwise_div",
+  };
   for (const auto &op : fusable_ops) FuseScale(graph, op);
 }
 
@@ -105,4 +113,8 @@ REGISTER_PASS_CAPABILITY(operator_scale_onednn_fuse_pass)
             .EQ("fc", 0)
             .LE("matmul", 1)
             .EQ("matmul_v2", 0)
+            .LE("elementwise_add", 1)
+            .LE("elementwise_sub", 1)
+            .LE("elementwise_mul", 1)
+            .LE("elementwise_div", 1)
             .EQ("scale", 0));
diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
index af6ef1fbdb0511..bb670363e79e3b 100644
--- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
+++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
@@ -116,6 +116,11 @@ class EltwiseMKLDNNKernel : public framework::OpKernel<T> {
   dnnl::post_ops get_post_ops(const framework::ExecutionContext& ctx) const {
     dnnl::post_ops post_operations;
     platform::AppendActivation(ctx, post_operations);
+    if (ctx.HasAttr("fused_output_scale")) {
+      float scale_alpha = ctx.Attr<float>("fused_output_scale");
+      post_operations.append_eltwise(
+          1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f);
+    }
     return post_operations;
   }
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass_new.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass_new.py
deleted file mode 100644
index a795f4ab8689e7..00000000000000
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass_new.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-from functools import partial
-
-import hypothesis.strategies as st
-import numpy as np
-from auto_scan_test import PassAutoScanTest
-from program_config import ProgramConfig, TensorConfig
-
-
-class TestElementWiseAddReluFusePass(PassAutoScanTest):
-    def is_program_valid(self, program_config: ProgramConfig) -> bool:
-        return True
-
-    def sample_program_config(self, draw):
-        batch_size = draw(st.integers(min_value=1, max_value=4))
-
-        def generate_input():
-            return np.random.random([batch_size, 3, 100, 100]).astype(
-                np.float32
-            )
-
-        ops_config = [
-            {
-                "op_type": "elementwise_add",
-                "op_inputs": {"X": ["A"], "Y": ["B"]},
-                "op_outputs": {"Out": ["add_output"]},
-                "op_attrs": {},
-            },
-            {
-                "op_type": "relu",
-                "op_inputs": {"X": ["add_output"]},
-                "op_outputs": {"Out": ["relu_output"]},
-                "op_attrs": {},
-            },
-        ]
-
-        ops = self.generate_op_config(ops_config)
-
-        program_config = ProgramConfig(
-            ops=ops,
-            weights={},
-            inputs={
-                "A": TensorConfig(data_gen=partial(generate_input)),
-                "B": TensorConfig(data_gen=partial(generate_input)),
-            },
-            outputs=["relu_output"],
-        )
-
-        return program_config
-
-    def sample_predictor_configs(self, program_config):
-        config = self.create_inference_config(use_mkldnn=True)
-        yield config, ["elementwise_add"], (1e-5, 1e-5)
-
-    def test(self):
-        self.run_and_statis(
-            quant=False, passes=["elt_act_mkldnn_fuse_pass"], min_success_num=4
-        )
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_add_activation_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_add_activation_fuse_pass.py
new file mode 100644
index 00000000000000..93165f829b1c8a
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_add_activation_fuse_pass.py
@@ -0,0 +1,138 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from functools import partial
+
+import hypothesis.strategies as st
+import numpy as np
+from auto_scan_test import PassAutoScanTest
+from program_config import OpConfig, ProgramConfig, TensorConfig
+
+
+class TestElementwiseAddActivationOneDNNFusePass(PassAutoScanTest):
+    def sample_program_config(self, draw):
+        batch_size = draw(st.sampled_from([1, 32]))
+        activation_type = draw(
+            st.sampled_from(
+                [
+                    'relu',
+                    'gelu',
+                    'swish',
+                    'mish',
+                    'sqrt',
+                    'hard_swish',
+                    'sigmoid',
+                    'abs',
+                    'relu6',
+                    'clip',
+                    'tanh',
+                    'hard_sigmoid',
+                    'leaky_relu',
+                    'scale',
+                ]
+            )
+        )
+
+        def generate_input():
+            return np.random.random([batch_size, 3, 100, 100]).astype(
+                np.float32
+            )
+
+        elementwise_op = OpConfig(
+            type='elementwise_add',
+            inputs={'X': ['eltwise_X'], 'Y': ['eltwise_Y']},
+            outputs={'Out': ['eltwise_output']},
+            attrs={"use_mkldnn": True},
+        )
+
+        if activation_type == 'relu6':
+            activation_op = OpConfig(
+                activation_type,
+                inputs={'X': ['eltwise_output']},
+                outputs={'Out': ['activation_output']},
+                threshold=draw(st.floats(min_value=1.0, max_value=10.0)),
+            )
+        elif activation_type == "leaky_relu":
+            activation_op = OpConfig(
+                activation_type,
+                inputs={"X": ["eltwise_output"]},
+                outputs={"Out": ["activation_output"]},
+                alpha=draw(st.floats(min_value=0.1, max_value=1.0)),
+            )
+        elif activation_type == "scale":
+            activation_op = OpConfig(
+                activation_type,
+                inputs={"X": ["eltwise_output"]},
+                outputs={"Out": ["activation_output"]},
+                scale=draw(st.sampled_from([0.125, 0.4, 0.875, 2])),
+            )
+        elif activation_type == 'swish':
+            activation_op = OpConfig(
+                activation_type,
+                inputs={'X': ['eltwise_output']},
+                outputs={'Out': ['activation_output']},
+                beta=draw(st.floats(min_value=0.1, max_value=1.0)),
+            )
+        elif activation_type == 'clip':
+            activation_op = OpConfig(
+                activation_type,
+                inputs={'X': ['eltwise_output']},
+                outputs={'Out': ['activation_output']},
+                min=draw(st.floats(min_value=0.1, max_value=0.49)),
+                max=draw(st.floats(min_value=0.5, max_value=1.0)),
+            )
+        else:
+            activation_op = OpConfig(
+                activation_type,
+                inputs={'X': ['eltwise_output']},
+                outputs={'Out': ['activation_output']},
+            )
+
+        mini_graph = [elementwise_op, activation_op]
+
+        program_config = ProgramConfig(
+            ops=mini_graph,
+            weights={},
+            inputs={
+                "eltwise_X": TensorConfig(data_gen=partial(generate_input)),
+                "eltwise_Y": TensorConfig(data_gen=partial(generate_input)),
+            },
+            outputs=["activation_output"],
+        )
+
+        return program_config
+
+    def sample_predictor_configs(self, program_config):
+        config = self.create_inference_config(
+            use_mkldnn=True,
+            passes=[
+                'elt_act_mkldnn_fuse_pass',
+                'operator_scale_onednn_fuse_pass',
+            ],
+        )
+        yield config, ['elementwise_add'], (1e-5, 1e-5)
+
+    def test(self):
+        self.run_and_statis(
+            quant=False,
+            passes=[
+                'elt_act_mkldnn_fuse_pass',
+                'operator_scale_onednn_fuse_pass',
+            ],
+        )
+
+
+if __name__ == '__main__':
+    unittest.main()

From af3fabf91649f0f24de75608f6445be58d455928 Mon Sep 17 00:00:00 2001
From: yunyaoXYY <109218879+yunyaoXYY@users.noreply.github.com>
Date: Tue, 29 Nov 2022 09:38:11 +0800
Subject: [PATCH 005/154] [Clean fluid] Clean fluid
 roi_pool,roi_align,psroi_pool and prroi_pool (#48393)

* Clean fluid resize_linear API

* Clean fluid image_resize_short API

* add image_resize back

* Clean psroi_pool and prroo_pool

* Clean roi_pool and roi_align

* delete test_trt_roi_align_op.py
---
 python/paddle/fluid/layers/nn.py              | 394 ------------------
 .../fluid/tests/unittests/CMakeLists.txt      |   1 -
 .../ir/inference/test_trt_roi_align_op.py     | 139 ------
 .../fluid/tests/unittests/test_layers.py      |  80 ----
 .../tests/unittests/test_prroi_pool_op.py     | 324 --------------
 .../fluid/tests/unittests/test_roi_pool_op.py |  29 --
 6 files changed, 967 deletions(-)
 delete mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_roi_align_op.py
 delete mode 100644 python/paddle/fluid/tests/unittests/test_prroi_pool_op.py

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 21cf3242f1d1f4..eb7532c0549923 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -97,8 +97,6 @@
     'lod_reset',
     'lod_append',
     'pad',
-    'roi_pool',
-    'roi_align',
     'image_resize',
     'resize_bilinear',
     'resize_trilinear',
@@ -136,8 +134,6 @@
     'get_tensor_from_selected_rows',
     'temporal_shift',
     'py_func',
-    'psroi_pool',
-    'prroi_pool',
     'pixel_shuffle',
     'fsp_matrix',
     'continuous_value_model',
@@ -4979,240 +4975,6 @@ def pad(x, paddings, pad_value=0.0, name=None):
     return out
 
 
-@templatedoc()
-def roi_pool(
-    input,
-    rois,
-    pooled_height=1,
-    pooled_width=1,
-    spatial_scale=1.0,
-    rois_num=None,
-    name=None,
-):
-    """
-
-    This operator implements the roi_pooling layer.
-    Region of interest pooling (also known as RoI pooling) is to perform max pooling on inputs of nonuniform sizes to obtain fixed-size feature maps (e.g. 7*7).
-
-    The operator has three steps:
-
-        1. Dividing each region proposal into equal-sized sections with the pooled_width and pooled_height;
-        2. Finding the largest value in each section;
-        3. Copying these max values to the output buffer.
-
-    For more information, please refer to https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn
-
-    Args:
-        input (Variable): Input feature, 4D-Tensor with the shape of [N,C,H,W], where N is the batch size, C is the input channel, H is Height, W is weight. The data type is float32 or float64.
-        rois (Variable): ROIs (Regions of Interest) to pool over. 2D-LoDTensor with the shape of [num_rois,4], the lod level is 1. Given as [[x1, y1, x2, y2], ...], (x1, y1) is the top left coordinates, and (x2, y2) is the bottom right coordinates.
-        pooled_height (int, optional): The pooled output height, data type is int32. Default: 1
-        pooled_width (int, optional): The pooled output height, data type is int32. Default: 1
-        spatial_scale (float, optional): Multiplicative spatial scale factor to translate ROI coords from their input scale to the scale used when pooling. Default: 1.0
-        rois_num (Tensor): The number of RoIs in each image. Default: None
-        name(str, optional): For detailed information, please refer
-            to :ref:`api_guide_Name`. Usually name is no need to set and
-            None by default.
-
-
-    Returns:
-        Variable: The pooled feature, 4D-Tensor with the shape of [num_rois, C, pooled_height, pooled_width].
-
-
-    Examples:
-
-    ..  code-block:: python
-
-        import paddle.fluid as fluid
-        import numpy as np
-        import paddle
-        paddle.enable_static()
-
-        DATATYPE='float32'
-
-        place = fluid.CPUPlace()
-        #place = fluid.CUDAPlace(0)
-
-        input_data = np.array([i for i in range(1,17)]).reshape(1,1,4,4).astype(DATATYPE)
-        roi_data =fluid.create_lod_tensor(np.array([[1., 1., 2., 2.], [1.5, 1.5, 3., 3.]]).astype(DATATYPE),[[2]], place)
-        rois_num_data = np.array([2]).astype('int32')
-
-        x = fluid.data(name='input', shape=[None,1,4,4], dtype=DATATYPE)
-        rois = fluid.data(name='roi', shape=[None,4], dtype=DATATYPE)
-        rois_num = fluid.data(name='rois_num', shape=[None], dtype='int32')
-
-        pool_out = fluid.layers.roi_pool(
-                input=x,
-                rois=rois,
-                pooled_height=1,
-                pooled_width=1,
-                spatial_scale=1.0,
-                rois_num=rois_num)
-
-        exe = fluid.Executor(place)
-        out, = exe.run(feed={'input':input_data ,'roi':roi_data, 'rois_num': rois_num_data}, fetch_list=[pool_out.name])
-        print(out)   #array([[[[11.]]], [[[16.]]]], dtype=float32)
-        print(np.array(out).shape)  # (2, 1, 1, 1)
-    """
-    if _non_static_mode():
-        assert (
-            rois_num is not None
-        ), "rois_num should not be None in dygraph mode."
-        pool_out, argmaxes = _legacy_C_ops.roi_pool(
-            input,
-            rois,
-            rois_num,
-            "pooled_height",
-            pooled_height,
-            "pooled_width",
-            pooled_width,
-            "spatial_scale",
-            spatial_scale,
-        )
-        return pool_out, argmaxes
-
-    check_variable_and_dtype(input, 'input', ['float32'], 'roi_pool')
-    check_variable_and_dtype(rois, 'rois', ['float32'], 'roi_pool')
-    helper = LayerHelper('roi_pool', **locals())
-    dtype = helper.input_dtype()
-    pool_out = helper.create_variable_for_type_inference(dtype)
-    argmaxes = helper.create_variable_for_type_inference(dtype='int32')
-
-    inputs = {
-        "X": input,
-        "ROIs": rois,
-    }
-    if rois_num is not None:
-        inputs['RoisNum'] = rois_num
-    helper.append_op(
-        type="roi_pool",
-        inputs=inputs,
-        outputs={"Out": pool_out, "Argmax": argmaxes},
-        attrs={
-            "pooled_height": pooled_height,
-            "pooled_width": pooled_width,
-            "spatial_scale": spatial_scale,
-        },
-    )
-    return pool_out
-
-
-@templatedoc()
-def roi_align(
-    input,
-    rois,
-    pooled_height=1,
-    pooled_width=1,
-    spatial_scale=1.0,
-    sampling_ratio=-1,
-    rois_num=None,
-    name=None,
-):
-    """
-
-    ${comment}
-
-    Args:
-        input (Variable): ${x_comment}
-        rois (Variable): ROIs (Regions of Interest) to pool over.It should be
-            a 2-D LoDTensor of shape (num_rois, 4), the lod level is 1. The
-            data type is float32 or float64. Given as [[x1, y1, x2, y2], ...],
-            (x1, y1) is the top left coordinates, and (x2, y2) is the bottom
-            right coordinates.
-        pooled_height (int32, optional): ${pooled_height_comment} Default: 1
-        pooled_width (int32, optional): ${pooled_width_comment} Default: 1
-        spatial_scale (float32, optional): ${spatial_scale_comment} Default: 1.0
-        sampling_ratio(int32, optional): ${sampling_ratio_comment} Default: -1
-        rois_num (Tensor): The number of RoIs in each image. Default: None
-        name(str, optional): For detailed information, please refer
-            to :ref:`api_guide_Name`. Usually name is no need to set and
-            None by default.
-
-    Returns:
-        Variable:
-
-        Output: ${out_comment}.
-
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import paddle
-            paddle.enable_static()
-
-            x = fluid.data(
-                name='data', shape=[None, 256, 32, 32], dtype='float32')
-            rois = fluid.data(
-                name='rois', shape=[None, 4], dtype='float32')
-            rois_num = fluid.data(name='rois_num', shape=[None], dtype='int32')
-            align_out = fluid.layers.roi_align(input=x,
-                                               rois=rois,
-                                               pooled_height=7,
-                                               pooled_width=7,
-                                               spatial_scale=0.5,
-                                               sampling_ratio=-1,
-                                               rois_num=rois_num)
-    """
-    if in_dygraph_mode():
-        assert (
-            rois_num is not None
-        ), "rois_num should not be None in dygraph mode."
-        return _C_ops.roi_align(
-            input,
-            rois,
-            rois_num,
-            pooled_height,
-            pooled_width,
-            spatial_scale,
-            sampling_ratio,
-            False,
-        )
-    if _in_legacy_dygraph():
-        assert (
-            rois_num is not None
-        ), "rois_num should not be None in dygraph mode."
-        align_out = _legacy_C_ops.roi_align(
-            input,
-            rois,
-            rois_num,
-            "pooled_height",
-            pooled_height,
-            "pooled_width",
-            pooled_width,
-            "spatial_scale",
-            spatial_scale,
-            "sampling_ratio",
-            sampling_ratio,
-        )
-        return align_out
-
-    check_variable_and_dtype(
-        input, 'input', ['float32', 'float64'], 'roi_align'
-    )
-    check_variable_and_dtype(rois, 'rois', ['float32', 'float64'], 'roi_align')
-    helper = LayerHelper('roi_align', **locals())
-    dtype = helper.input_dtype()
-    align_out = helper.create_variable_for_type_inference(dtype)
-    inputs = {
-        "X": input,
-        "ROIs": rois,
-    }
-    if rois_num is not None:
-        inputs['RoisNum'] = rois_num
-    helper.append_op(
-        type="roi_align",
-        inputs=inputs,
-        outputs={"Out": align_out},
-        attrs={
-            "pooled_height": pooled_height,
-            "pooled_width": pooled_width,
-            "spatial_scale": spatial_scale,
-            "sampling_ratio": sampling_ratio,
-        },
-    )
-    return align_out
-
-
 def image_resize(
     input,
     out_shape=None,
@@ -9097,162 +8859,6 @@ def py_func_demo():
 py_func.registered_func_num = PyFuncRegistry.registered_func_num
 
 
-@templatedoc()
-def psroi_pool(
-    input,
-    rois,
-    output_channels,
-    spatial_scale,
-    pooled_height,
-    pooled_width,
-    name=None,
-):
-    """
-
-    ${comment}
-
-    Parameters:
-        input (Variable): ${x_comment}
-        rois (Variable): LoDTensor, ROIs (Regions of Interest) to pool over.It should be
-                         a 2-D LoDTensor of shape (num_rois, 4), the lod level
-                         is 1. Given as [[x1, y1, x2, y2], ...], (x1, y1) is
-                         the top left coordinates, and (x2, y2) is the bottom
-                         right coordinates. The data type is the same as `input`
-        output_channels (int): ${output_channels_comment}
-        spatial_scale (float): ${spatial_scale_comment} Default: 1.0
-        pooled_height (int): ${pooled_height_comment} Default: 1
-        pooled_width (int): ${pooled_width_comment} Default: 1
-        name(str, optional): The default value is None.
-                             Normally there is no need for user to set this property.
-                             For more information, please refer to :ref:`api_guide_Name`
-
-    Returns:
-        ${out_comment}.
-
-    Return Type:
-        Variable
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import paddle
-            paddle.enable_static()
-            x = fluid.data(name='x', shape=[100, 490, 28, 28], dtype='float32')
-            rois = fluid.data(name='rois', shape=[None, 4], lod_level=1, dtype='float32')
-            pool_out = fluid.layers.psroi_pool(x, rois, 10, 1.0, 7, 7)
-    """
-    helper = LayerHelper('psroi_pool', **locals())
-    # check attrs
-    if not isinstance(output_channels, int):
-        raise TypeError("output_channels must be int type")
-    if not isinstance(spatial_scale, float):
-        raise TypeError("spatial_scale must be float type")
-    if not isinstance(pooled_height, int):
-        raise TypeError("pooled_height must be int type")
-    if not isinstance(pooled_width, int):
-        raise TypeError("pooled_width must be int type")
-    dtype = helper.input_dtype()
-    out = helper.create_variable_for_type_inference(dtype)
-    helper.append_op(
-        type='psroi_pool',
-        inputs={'X': input, 'ROIs': rois},
-        outputs={'Out': out},
-        attrs={
-            'output_channels': output_channels,
-            'spatial_scale': spatial_scale,
-            'pooled_height': pooled_height,
-            'pooled_width': pooled_width,
-        },
-    )
-    return out
-
-
-@templatedoc()
-def prroi_pool(
-    input,
-    rois,
-    spatial_scale=1.0,
-    pooled_height=1,
-    pooled_width=1,
-    batch_roi_nums=None,
-    name=None,
-):
-    """
-
-    The precise roi pooling implementation for paddle. Reference: https://arxiv.org/pdf/1807.11590.pdf
-
-    Args:
-        input (Variable):The input of precise roi pooliing.The shape of input tensor is
-                        [N,C,H,W]. Where N is batch size,C is number of input channels,H
-                        is height of the feature, and W is the width of the feature.
-        rois (Variable): ROIs (Regions of Interest) to pool over.It should be
-                        a 2-D LoDTensor or Tensor of shape (num_rois, 4), the lod level
-                        is 1 when it is LoDTensor. The LoD include the rois's batch index
-                        information. If rois is Tensor, its batch index information should
-                        be provided by batch_index.
-                        Given as [[x1, y1, x2, y2], ...], (x1, y1) is
-                        the top left coordinates, and (x2, y2) is the bottom
-                        right coordinates.
-        spatial_scale (float): Ratio of input feature map height (or width) to raw image height (or width).
-                             Equals the reciprocal of total stride in convolutional layers, Default: 1.0.
-        pooled_height (integer): The pooled output height. Default: 1.
-        pooled_width (integer): The pooled output width. Default: 1.
-        batch_roi_nums (Variable): The number of roi for each image in batch. It
-                         should be 1-D Tensor, with shape [N] and dtype int64,
-                         where N is the batch size. Default: None. Be note: The lod of input should be
-                         empty when batch_roi_nums has values;
-        name (str, default None): The name of this operation.
-
-    Returns:
-        Variable(Tensor):The shape of the returned Tensor is (N, C, pooled_height, pooled_width), with value type float32,float16. N, C denote batch_size and channels of input respectively.
-
-    Examples:
-        .. code-block:: python
-
-            ## prroi_pool without batch_roi_num
-            import paddle.fluid as fluid
-            x = fluid.data(name='x', shape=[None, 490, 28, 28], dtype='float32')
-            rois = fluid.data(name='rois', shape=[None, 4], lod_level=1, dtype='float32')
-            pool_out = fluid.layers.prroi_pool(x, rois, 1.0, 7, 7)
-
-            ## prroi_pool with batch_roi_num
-            batchsize=4
-            x2 = fluid.data(name='x2', shape=[batchsize, 490, 28, 28], dtype='float32')
-            rois2 = fluid.data(name='rois2', shape=[batchsize, 4], dtype='float32')
-            batch_rois_num = fluid.data(name='rois_nums', shape=[batchsize], dtype='int64')
-            pool_out2 = fluid.layers.prroi_pool(x2, rois2, 1.0, 7, 7, batch_roi_nums=batch_rois_num)
-
-
-    """
-    check_variable_and_dtype(input, 'input', ['float32'], 'prroi_pool')
-    check_variable_and_dtype(rois, 'rois', ['float32'], 'prroi_pool')
-    helper = LayerHelper('prroi_pool', **locals())
-    # check attrs
-    if not isinstance(spatial_scale, float):
-        raise TypeError("spatial_scale must be float type")
-    if not isinstance(pooled_height, int):
-        raise TypeError("pooled_height must be int type")
-    if not isinstance(pooled_width, int):
-        raise TypeError("pooled_width must be int type")
-    dtype = helper.input_dtype()
-    out = helper.create_variable_for_type_inference(dtype)
-    inputs_op = {'X': input, 'ROIs': rois}
-    if batch_roi_nums is not None:
-        inputs_op['BatchRoINums'] = batch_roi_nums
-    helper.append_op(
-        type='prroi_pool',
-        inputs=inputs_op,
-        outputs={'Out': out},
-        attrs={
-            'spatial_scale': spatial_scale,
-            'pooled_height': pooled_height,
-            'pooled_width': pooled_width,
-        },
-    )
-    return out
-
-
 def pixel_shuffle(x, upscale_factor):
     """
 
diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt
index d1eaebcdc2e654..2b9b5e7c3d23aa 100755
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -1090,7 +1090,6 @@ set_tests_properties(test_parallel_executor_mnist PROPERTIES TIMEOUT 120)
 set_tests_properties(test_imperative_ptb_rnn PROPERTIES TIMEOUT 120)
 set_tests_properties(test_imperative_save_load_v2 PROPERTIES TIMEOUT 120)
 set_tests_properties(test_conv2d_transpose_op PROPERTIES TIMEOUT 120)
-set_tests_properties(test_prroi_pool_op PROPERTIES TIMEOUT 120)
 set_tests_properties(test_multiprocess_dataloader_iterable_dataset_static
                      PROPERTIES TIMEOUT 120)
 set_tests_properties(test_lstm_cudnn_op PROPERTIES TIMEOUT 120)
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_roi_align_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_roi_align_op.py
deleted file mode 100644
index 0cd00ed0e4dad1..00000000000000
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_roi_align_op.py
+++ /dev/null
@@ -1,139 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-from inference_pass_test import InferencePassTest
-
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-from paddle.fluid.core import AnalysisConfig, PassVersionChecker
-
-
-class TRTRoiAlignTest(InferencePassTest):
-    def setUp(self):
-        self.bs = 2
-        self.num_rois = 4
-        self.channel = 8
-        self.height = 16
-        self.width = 16
-        self.precision = AnalysisConfig.Precision.Float32
-        self.serialize = False
-        self.enable_trt = True
-
-    def build(self):
-        self.trt_parameters = TRTRoiAlignTest.TensorRTParam(
-            1 << 30,
-            self.bs * self.num_rois,
-            1,
-            self.precision,
-            self.serialize,
-            False,
-        )
-        with fluid.program_guard(self.main_program, self.startup_program):
-            data_shape = [-1, self.channel, self.height, self.width]
-            data = fluid.data(name='data', shape=data_shape, dtype='float32')
-            rois = fluid.data(
-                name='rois', shape=[-1, 4], dtype='float32', lod_level=1
-            )
-            roi_align_out = fluid.layers.roi_align(data, rois)
-            out = fluid.layers.batch_norm(roi_align_out, is_test=True)
-
-        rois_lod = fluid.create_lod_tensor(
-            np.random.random([self.bs * self.num_rois, 4]).astype('float32'),
-            [[self.num_rois, self.num_rois]],
-            fluid.CPUPlace(),
-        )
-
-        data_shape[0] = self.bs
-        self.feeds = {
-            'data': np.random.random(data_shape).astype('float32'),
-            'rois': rois_lod,
-        }
-        self.fetch_list = [out]
-
-    def check_output(self):
-        if core.is_compiled_with_cuda():
-            use_gpu = True
-            atol = 1e-5
-            if self.trt_parameters.precision == AnalysisConfig.Precision.Half:
-                atol = 1e-3
-            self.check_output_with_option(use_gpu, atol, flatten=True)
-            self.assertTrue(
-                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
-            )
-
-    def set_dynamic(self):
-        min_shape_spec = dict()
-        max_shape_spec = dict()
-        opt_shape_spec = dict()
-        min_shape_spec['data'] = [
-            self.bs,
-            self.channel,
-            self.height // 2,
-            self.width // 2,
-        ]
-        min_shape_spec['rois'] = [1, 4]
-        max_shape_spec['data'] = [
-            self.bs,
-            self.channel,
-            self.height * 2,
-            self.width * 2,
-        ]
-        max_shape_spec['rois'] = [self.bs * self.num_rois, 4]
-        opt_shape_spec['data'] = [
-            self.bs,
-            self.channel,
-            self.height,
-            self.width,
-        ]
-        opt_shape_spec['rois'] = [self.bs * self.num_rois, 4]
-
-        self.dynamic_shape_params = InferencePassTest.DynamicShapeParam(
-            min_shape_spec, max_shape_spec, opt_shape_spec, False
-        )
-
-    def run_test(self):
-        self.build()
-        self.check_output()
-
-    def test_base(self):
-        self.run_test()
-
-    def test_fp16(self):
-        self.precision = AnalysisConfig.Precision.Half
-        self.run_test()
-
-    def test_serialize(self):
-        self.serialize = True
-        self.run_test()
-
-    def test_dynamic(self):
-        self.set_dynamic()
-        self.run_test()
-
-    def test_dynamic_fp16(self):
-        self.set_dynamic()
-        self.precision = AnalysisConfig.Precision.Half
-        self.run_test()
-
-    def test_dynamic_serialize(self):
-        self.set_dynamic()
-        self.serialize = True
-        self.run_test()
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index f5294fd5862726..02f946810b4b9f 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -3841,16 +3841,6 @@ def test_fill_constant_batch_size_like(self):
             )
             return out
 
-    def test_psroi_pool(self):
-        # TODO(minqiyang): dygraph do not support lod now
-        with self.static_graph():
-            x = layers.data(name="x", shape=[245, 30, 30], dtype="float32")
-            rois = layers.data(
-                name="rois", shape=[4], dtype="float32", lod_level=1
-            )
-            output = layers.psroi_pool(x, rois, 5, 0.25, 7, 7)
-            return output
-
     def test_sequence_expand(self):
         # TODO(minqiyang): dygraph do not support lod now
         with self.static_graph():
@@ -3990,82 +3980,12 @@ def test_rank_attention(self):
             )
             return out
 
-    def test_roi_pool(self):
-        x_np = np.random.rand(2, 3, 8, 8).astype('float32')
-        rois_np = np.random.rand(3, 4).astype('float32')
-        rois_num_np = np.array([1, 2]).astype('int32')
-
-        with self.static_graph():
-            x = layers.data(name="x", shape=[3, 8, 8], dtype="float32")
-            rois = layers.data(name="rois", shape=[4], dtype="float32")
-            rois_num = fluid.data(name="rois_num", shape=[None], dtype="int32")
-            output = layers.roi_pool(x, rois, 4, 4, 0.5, rois_num=rois_num)
-            static_res = self.get_static_graph_result(
-                feed={'x': x_np, 'rois': rois_np, 'rois_num': rois_num_np},
-                fetch_list=[output],
-            )[0]
-
-        with self.dynamic_graph():
-            with _test_eager_guard():
-                x_dy = base.to_variable(x_np)
-                rois_dy = base.to_variable(rois_np)
-                rois_num_dy = base.to_variable(rois_num_np)
-                dy_eager_res = layers.roi_pool(
-                    x_dy, rois_dy, 4, 4, 0.5, rois_num=rois_num_dy
-                )
-                dy_eager_res_value = dy_eager_res[0].numpy()
-
-            x_dy = base.to_variable(x_np)
-            rois_dy = base.to_variable(rois_np)
-            rois_num_dy = base.to_variable(rois_num_np)
-            dy_res = layers.roi_pool(
-                x_dy, rois_dy, 4, 4, 0.5, rois_num=rois_num_dy
-            )
-            dy_res_value = dy_res[0].numpy()
-        np.testing.assert_array_equal(static_res, dy_res_value)
-        np.testing.assert_array_equal(static_res, dy_eager_res_value)
-
     def test_sequence_enumerate(self):
         # TODO(minqiyang): dygraph do not support lod now
         with self.static_graph():
             x = layers.data(name="input", shape=[1], dtype='int32', lod_level=1)
             out = layers.sequence_enumerate(input=x, win_size=2, pad_value=0)
 
-    def test_roi_align(self):
-        x_np = np.random.rand(2, 3, 8, 8).astype('float32')
-        rois_np = np.random.rand(3, 4).astype('float32')
-        rois_num_np = np.array([1, 2]).astype('int32')
-
-        with self.static_graph():
-            x = layers.data(name="x", shape=[3, 8, 8], dtype="float32")
-            rois = layers.data(name="rois", shape=[4], dtype="float32")
-            rois_num = fluid.data(name="rois_num", shape=[None], dtype="int32")
-            output = layers.roi_align(x, rois, 4, 4, 0.5, 2, rois_num=rois_num)
-            static_res = self.get_static_graph_result(
-                feed={'x': x_np, 'rois': rois_np, 'rois_num': rois_num_np},
-                fetch_list=[output],
-            )[0]
-
-        with self.dynamic_graph():
-            with _test_eager_guard():
-                x_dy = base.to_variable(x_np)
-                rois_dy = base.to_variable(rois_np)
-                rois_num_dy = base.to_variable(rois_num_np)
-                dy_eager_res = layers.roi_align(
-                    x_dy, rois_dy, 4, 4, 0.5, 2, rois_num=rois_num_dy
-                )
-                dy_eager_res_value = dy_eager_res.numpy()
-
-            x_dy = base.to_variable(x_np)
-            rois_dy = base.to_variable(rois_np)
-            rois_num_dy = base.to_variable(rois_num_np)
-            dy_res = layers.roi_align(
-                x_dy, rois_dy, 4, 4, 0.5, 2, rois_num=rois_num_dy
-            )
-            dy_res_value = dy_res.numpy()
-        np.testing.assert_array_equal(static_res, dy_eager_res_value)
-        np.testing.assert_array_equal(static_res, dy_res_value)
-
     def test_roi_perspective_transform(self):
         # TODO(minqiyang): dygraph do not support lod now
         with self.static_graph():
diff --git a/python/paddle/fluid/tests/unittests/test_prroi_pool_op.py b/python/paddle/fluid/tests/unittests/test_prroi_pool_op.py
deleted file mode 100644
index 4a953b463bea51..00000000000000
--- a/python/paddle/fluid/tests/unittests/test_prroi_pool_op.py
+++ /dev/null
@@ -1,324 +0,0 @@
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import unittest
-from py_precise_roi_pool import PyPrRoIPool
-from op_test import OpTest
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
-
-
-class TestPRROIPoolOp(OpTest):
-    def set_data(self):
-        self.init_test_case()
-        self.make_rois()
-        self.prRoIPool = PyPrRoIPool()
-        self.outs = self.prRoIPool.compute(
-            self.x,
-            self.rois,
-            self.output_channels,
-            self.spatial_scale,
-            self.pooled_height,
-            self.pooled_width,
-        ).astype('float32')
-        self.inputs = {'X': self.x, 'ROIs': (self.rois[:, 1:5], self.rois_lod)}
-        self.attrs = {
-            'output_channels': self.output_channels,
-            'spatial_scale': self.spatial_scale,
-            'pooled_height': self.pooled_height,
-            'pooled_width': self.pooled_width,
-        }
-        self.outputs = {'Out': self.outs}
-
-    def init_test_case(self):
-        self.batch_size = 3
-        self.channels = 3 * 2 * 2
-        self.height = 12
-        self.width = 16
-
-        self.x_dim = [self.batch_size, self.channels, self.height, self.width]
-
-        self.spatial_scale = 1.0 / 2.0
-        self.output_channels = self.channels
-        self.pooled_height = 4
-        self.pooled_width = 4
-
-        self.x = np.random.random(self.x_dim).astype('float32')
-
-    def make_rois(self):
-        rois = []
-        self.rois_lod = [[]]
-        for bno in range(self.batch_size):
-            self.rois_lod[0].append(bno + 1)
-            for i in range(bno + 1):
-                x1 = np.random.uniform(
-                    0, self.width // self.spatial_scale - self.pooled_width
-                )
-                y1 = np.random.uniform(
-                    0, self.height // self.spatial_scale - self.pooled_height
-                )
-
-                x2 = np.random.uniform(
-                    x1 + self.pooled_width, self.width // self.spatial_scale
-                )
-                y2 = np.random.uniform(
-                    y1 + self.pooled_height, self.height // self.spatial_scale
-                )
-                roi = [bno, x1, y1, x2, y2]
-                rois.append(roi)
-        self.rois_num = len(rois)
-        self.rois = np.array(rois).astype('float32')
-
-    def setUp(self):
-        self.op_type = 'prroi_pool'
-        self.set_data()
-
-    def test_check_output(self):
-        self.check_output(check_eager=True)
-
-    def test_backward(self):
-        places = [fluid.CPUPlace()]
-        if fluid.core.is_compiled_with_cuda():
-            places.append(fluid.CUDAPlace(0))
-        for place in places:
-            self.check_grad_with_place(place, ['X'], 'Out', check_eager=True)
-
-    def run_net(self, place):
-        with program_guard(Program(), Program()):
-            x = fluid.layers.data(
-                name="X",
-                shape=[self.channels, self.height, self.width],
-                dtype="float32",
-            )
-            rois = fluid.layers.data(
-                name="ROIs", shape=[4], dtype="float32", lod_level=1
-            )
-            output = fluid.layers.prroi_pool(x, rois, 0.25, 2, 2)
-            loss = paddle.mean(output)
-            optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
-            optimizer.minimize(loss)
-            input_x = fluid.create_lod_tensor(self.x, [], place)
-            input_rois = fluid.create_lod_tensor(
-                self.rois[:, 1:5], self.rois_lod, place
-            )
-            exe = fluid.Executor(place)
-            exe.run(fluid.default_startup_program())
-            exe.run(
-                fluid.default_main_program(), {'X': input_x, "ROIs": input_rois}
-            )
-
-    def test_net(self):
-        places = [fluid.CPUPlace()]
-        if fluid.core.is_compiled_with_cuda():
-            places.append(fluid.CUDAPlace(0))
-        for place in places:
-            self.run_net(place)
-
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            x = fluid.layers.data(
-                name="x", shape=[245, 30, 30], dtype="float32"
-            )
-            rois = fluid.layers.data(
-                name="rois", shape=[4], dtype="float32", lod_level=1
-            )
-            # spatial_scale must be float type
-            self.assertRaises(
-                TypeError, fluid.layers.prroi_pool, x, rois, 2, 7, 7
-            )
-            # pooled_height must be int type
-            self.assertRaises(
-                TypeError, fluid.layers.prroi_pool, x, rois, 0.25, 0.7, 7
-            )
-            # pooled_width must be int type
-            self.assertRaises(
-                TypeError, fluid.layers.prroi_pool, x, rois, 0.25, 7, 0.7
-            )
-
-
-class TestPRROIPoolOpTensorRoIs(OpTest):
-    def set_data(self):
-        self.init_test_case()
-        self.make_rois()
-        self.prRoIPool = PyPrRoIPool()
-        self.outs = self.prRoIPool.compute(
-            self.x,
-            self.rois,
-            self.output_channels,
-            self.spatial_scale,
-            self.pooled_height,
-            self.pooled_width,
-        ).astype('float32')
-
-        self.rois_index = np.array(self.rois_lod).reshape([-1]).astype(np.int64)
-        self.inputs = {
-            'X': self.x,
-            'ROIs': self.rois[:, 1:5],
-            'BatchRoINums': self.rois_index,
-        }
-        self.attrs = {
-            'output_channels': self.output_channels,
-            'spatial_scale': self.spatial_scale,
-            'pooled_height': self.pooled_height,
-            'pooled_width': self.pooled_width,
-        }
-        self.outputs = {'Out': self.outs}
-
-    def init_test_case(self):
-        self.batch_size = 3
-        self.channels = 3 * 2 * 2
-        self.height = 12
-        self.width = 16
-
-        self.x_dim = [self.batch_size, self.channels, self.height, self.width]
-
-        self.spatial_scale = 1.0 / 2.0
-        self.output_channels = self.channels
-        self.pooled_height = 4
-        self.pooled_width = 4
-
-        self.x = np.random.random(self.x_dim).astype('float32')
-
-    def make_rois(self):
-        rois = []
-        self.rois_lod = []
-        for bno in range(self.batch_size):
-            self.rois_lod.append(bno + 1)
-            for i in range(bno + 1):
-                x1 = np.random.uniform(
-                    0, self.width // self.spatial_scale - self.pooled_width
-                )
-                y1 = np.random.uniform(
-                    0, self.height // self.spatial_scale - self.pooled_height
-                )
-
-                x2 = np.random.uniform(
-                    x1 + self.pooled_width, self.width // self.spatial_scale
-                )
-                y2 = np.random.uniform(
-                    y1 + self.pooled_height, self.height // self.spatial_scale
-                )
-                roi = [bno, x1, y1, x2, y2]
-                rois.append(roi)
-        self.rois_num = len(rois)
-        self.rois = np.array(rois).astype('float32')
-
-    def setUp(self):
-        self.op_type = 'prroi_pool'
-        self.set_data()
-
-    def test_check_output(self):
-        self.check_output(check_eager=True)
-
-    def test_backward(self):
-        places = [fluid.CPUPlace()]
-        if fluid.core.is_compiled_with_cuda():
-            places.append(fluid.CUDAPlace(0))
-        for place in places:
-            self.check_grad_with_place(place, ['X'], 'Out', check_eager=True)
-
-    def run_net(self, place):
-        with program_guard(Program(), Program()):
-            x = fluid.layers.data(
-                name="X",
-                shape=[self.channels, self.height, self.width],
-                dtype="float32",
-            )
-            rois = fluid.layers.data(name="ROIs", shape=[4], dtype="float32")
-            rois_index = fluid.layers.data(
-                name='rois_idx', shape=[], dtype="int64"
-            )
-            output = fluid.layers.prroi_pool(
-                x, rois, 0.25, 2, 2, batch_roi_nums=rois_index
-            )
-            loss = paddle.mean(output)
-            optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
-            optimizer.minimize(loss)
-            exe = fluid.Executor(place)
-            exe.run(fluid.default_startup_program())
-            exe.run(
-                fluid.default_main_program(),
-                {
-                    'X': self.x,
-                    "ROIs": self.rois[:, 1:5],
-                    "rois_idx": self.rois_index,
-                },
-            )
-
-    def test_net(self):
-        places = [fluid.CPUPlace()]
-        if fluid.core.is_compiled_with_cuda():
-            places.append(fluid.CUDAPlace(0))
-        for place in places:
-            self.run_net(place)
-
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            x = fluid.layers.data(
-                name="x", shape=[245, 30, 30], dtype="float32"
-            )
-            rois = fluid.layers.data(
-                name="rois", shape=[4], dtype="float32", lod_level=1
-            )
-            # spatial_scale must be float type
-            self.assertRaises(
-                TypeError, fluid.layers.prroi_pool, x, rois, 2, 7, 7
-            )
-            # pooled_height must be int type
-            self.assertRaises(
-                TypeError, fluid.layers.prroi_pool, x, rois, 0.25, 0.7, 7
-            )
-            # pooled_width must be int type
-            self.assertRaises(
-                TypeError, fluid.layers.prroi_pool, x, rois, 0.25, 7, 0.7
-            )
-
-            def test_bad_x():
-                x = fluid.layers.data(
-                    name='data1',
-                    shape=[2, 3, 16, 16],
-                    dtype='int64',
-                    append_batch_size=False,
-                )
-                label = fluid.layers.data(
-                    name='label1',
-                    shape=[2, 4],
-                    dtype='float32',
-                    lod_level=1,
-                    append_batch_size=False,
-                )
-                output = fluid.layers.prroi_pool(x, label, 0.25, 2, 2)
-
-            self.assertRaises(TypeError, test_bad_x)
-
-            def test_bad_y():
-                x = fluid.layers.data(
-                    name='data2',
-                    shape=[2, 3, 16, 16],
-                    dtype='float32',
-                    append_batch_size=False,
-                )
-                label = [[1, 2, 3, 4], [2, 3, 4, 5]]
-                output = fluid.layers.prroi_pool(x, label, 0.25, 2, 2)
-
-            self.assertRaises(TypeError, test_bad_y)
-
-
-if __name__ == '__main__':
-    import paddle
-
-    paddle.enable_static()
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_roi_pool_op.py b/python/paddle/fluid/tests/unittests/test_roi_pool_op.py
index 2fcd3eda287f76..b84cca41bbd294 100644
--- a/python/paddle/fluid/tests/unittests/test_roi_pool_op.py
+++ b/python/paddle/fluid/tests/unittests/test_roi_pool_op.py
@@ -18,7 +18,6 @@
 import math
 import sys
 from op_test import OpTest
-import paddle.fluid as fluid
 
 from decimal import Decimal, ROUND_HALF_UP
 
@@ -176,34 +175,6 @@ def test_check_grad(self):
         self.check_grad(['X'], 'Out', check_eager=True)
 
 
-class BadInputTestRoiPool(unittest.TestCase):
-    def test_error(self):
-        with fluid.program_guard(fluid.Program()):
-
-            def test_bad_x():
-                x = fluid.layers.data(
-                    name='data1', shape=[2, 1, 4, 4], dtype='int64'
-                )
-                label = fluid.layers.data(
-                    name='label', shape=[2, 4], dtype='float32', lod_level=1
-                )
-                output = fluid.layers.roi_pool(x, label, 1, 1, 1.0)
-
-            self.assertRaises(TypeError, test_bad_x)
-
-            def test_bad_y():
-                x = fluid.layers.data(
-                    name='data2',
-                    shape=[2, 1, 4, 4],
-                    dtype='float32',
-                    append_batch_size=False,
-                )
-                label = [[1, 2, 3, 4], [2, 3, 4, 5]]
-                output = fluid.layers.roi_pool(x, label, 1, 1, 1.0)
-
-            self.assertRaises(TypeError, test_bad_y)
-
-
 class TestROIPoolInLodOp(TestROIPoolOp):
     def set_data(self):
         self.init_test_case()

From 505f4100a02ebbc410136194e0816a679adcd8d5 Mon Sep 17 00:00:00 2001
From: 201716010711 <87008376+201716010711@users.noreply.github.com>
Date: Tue, 29 Nov 2022 09:52:06 +0800
Subject: [PATCH 006/154] delete size api (#48397)

---
 python/paddle/fluid/layers/nn.py              | 48 -------------------
 .../tests/unittests/mlu/test_size_op_mlu.py   | 10 ++--
 .../tests/unittests/npu/test_size_op_npu.py   | 10 ++--
 .../fluid/tests/unittests/test_size_op.py     | 11 +++--
 4 files changed, 16 insertions(+), 63 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index eb7532c0549923..fcafa992beb847 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -116,7 +116,6 @@
     'sum',
     'slice',
     'shape',
-    'size',
     'clip',
     'clip_by_norm',
     'mean',
@@ -6912,53 +6911,6 @@ def shape(input):
     return out
 
 
-@deprecated(since="2.0.0", update_to="paddle.numel")
-def size(input):
-    """
-    **Size Layer**
-
-    Returns the number of elements for a tensor, which is a int64 Tensor with shape [1].
-
-    Args:
-        input (Tensor): The input Tensor, it's data type can be bool, float16, float32, float64, int32, int64.
-
-    Returns:
-        Tensor: The number of elements for the input Tensor.
-
-    Raises:
-        TypeError: ``input`` must be a Tensor and the data type of ``input`` must be one of bool, float16, float32, float64, int32, int64.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-            import paddle.fluid.layers as layers
-            paddle.enable_static()
-
-            input = layers.data(
-                name="input", shape=[3, 100], dtype="float32", append_batch_size=False)
-            rank = layers.size(input) # 300
-    """
-
-    if in_dygraph_mode():
-        return _C_ops.numel(input)
-
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.size(input)
-
-    check_variable_and_dtype(
-        input,
-        'input',
-        ['bool', 'float16', 'float32', 'float64', 'int32', 'int64'],
-        "size",
-    )
-    helper = LayerHelper('size', **locals())
-    out = helper.create_variable_for_type_inference(dtype='int64')
-    helper.append_op(type='size', inputs={'Input': input}, outputs={'Out': out})
-
-    return out
-
-
 def _elementwise_op(helper):
     op_type = helper.layer_type
     x = helper.kwargs.get('x', None)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_size_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_size_op_mlu.py
index 977359f8a9634d..a0cd8eba6dc5b1 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_size_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_size_op_mlu.py
@@ -71,8 +71,8 @@ def test_size_static(self):
             x_2 = paddle.fluid.data(shape=shape2, dtype='int32', name='x_2')
             input_1 = np.random.random(shape1).astype("int32")
             input_2 = np.random.random(shape2).astype("int32")
-            out_1 = paddle.fluid.layers.size(x_1)
-            out_2 = paddle.fluid.layers.size(x_2)
+            out_1 = paddle.numel(x_1)
+            out_2 = paddle.numel(x_2)
             exe = paddle.static.Executor(place=paddle.MLUPlace(0))
             res_1, res_2 = exe.run(
                 feed={
@@ -94,8 +94,8 @@ def test_size_imperative(self):
         input_2 = np.random.random([1, 4, 5]).astype("int32")
         x_1 = paddle.to_tensor(input_1)
         x_2 = paddle.to_tensor(input_2)
-        out_1 = paddle.fluid.layers.size(x_1)
-        out_2 = paddle.fluid.layers.size(x_2)
+        out_1 = paddle.numel(x_1)
+        out_2 = paddle.numel(x_2)
         assert np.array_equal(out_1.numpy().item(0), np.size(input_1))
         assert np.array_equal(out_2.numpy().item(0), np.size(input_2))
         paddle.enable_static()
@@ -108,7 +108,7 @@ def test_error(self):
             def test_x_type():
                 shape = [1, 4, 5]
                 input_1 = np.random.random(shape).astype("int32")
-                out_1 = paddle.fluid.layers.size(input_1)
+                out_1 = paddle.numel(input_1)
 
             self.assertRaises(TypeError, test_x_type)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_size_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_size_op_npu.py
index 1e768a5dd185a4..0b1d87b3dfaf26 100755
--- a/python/paddle/fluid/tests/unittests/npu/test_size_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_size_op_npu.py
@@ -100,8 +100,8 @@ def test_size_static(self):
             x_2 = paddle.fluid.data(shape=shape2, dtype='int32', name='x_2')
             input_1 = np.random.random(shape1).astype("int32")
             input_2 = np.random.random(shape2).astype("int32")
-            out_1 = paddle.fluid.layers.size(x_1)
-            out_2 = paddle.fluid.layers.size(x_2)
+            out_1 = paddle.numel(x_1)
+            out_2 = paddle.numel(x_2)
             exe = paddle.static.Executor(place=self.place)
             res_1, res_2 = exe.run(
                 feed={
@@ -123,8 +123,8 @@ def test_size_imperative(self):
         input_2 = np.random.random([1, 4, 5]).astype("int32")
         x_1 = paddle.to_tensor(input_1)
         x_2 = paddle.to_tensor(input_2)
-        out_1 = paddle.fluid.layers.size(x_1)
-        out_2 = paddle.fluid.layers.size(x_2)
+        out_1 = paddle.numel(x_1)
+        out_2 = paddle.numel(x_2)
         assert np.array_equal(out_1.numpy().item(0), np.size(input_1))
         assert np.array_equal(out_2.numpy().item(0), np.size(input_2))
         paddle.enable_static()
@@ -137,7 +137,7 @@ def test_error(self):
             def test_x_type():
                 shape = [1, 4, 5]
                 input_1 = np.random.random(shape).astype("int32")
-                out_1 = paddle.fluid.layers.size(input_1)
+                out_1 = paddle.numel(input_1)
 
             self.assertRaises(TypeError, test_x_type)
 
diff --git a/python/paddle/fluid/tests/unittests/test_size_op.py b/python/paddle/fluid/tests/unittests/test_size_op.py
index 87fcfdf5a9646b..ac070c099e63c0 100644
--- a/python/paddle/fluid/tests/unittests/test_size_op.py
+++ b/python/paddle/fluid/tests/unittests/test_size_op.py
@@ -17,6 +17,7 @@
 import paddle
 import paddle.fluid as fluid
 from op_test import OpTest
+import paddle
 
 
 class TestSizeOp(OpTest):
@@ -66,8 +67,8 @@ def test_size_static(self):
             x_2 = paddle.fluid.data(shape=shape2, dtype='int32', name='x_2')
             input_1 = np.random.random(shape1).astype("int32")
             input_2 = np.random.random(shape2).astype("int32")
-            out_1 = paddle.fluid.layers.size(x_1)
-            out_2 = paddle.fluid.layers.size(x_2)
+            out_1 = paddle.numel(x_1)
+            out_2 = paddle.numel(x_2)
             exe = paddle.static.Executor(place=paddle.CPUPlace())
             res_1, res_2 = exe.run(
                 feed={
@@ -90,8 +91,8 @@ def test_size_imperative(self):
         input_2 = np.random.random([1, 4, 5]).astype("int32")
         x_1 = paddle.to_tensor(input_1)
         x_2 = paddle.to_tensor(input_2)
-        out_1 = paddle.fluid.layers.size(x_1)
-        out_2 = paddle.fluid.layers.size(x_2)
+        out_1 = paddle.numel(x_1)
+        out_2 = paddle.numel(x_2)
         assert np.array_equal(out_1.numpy().item(0), np.size(input_1))
         assert np.array_equal(out_2.numpy().item(0), np.size(input_2))
         paddle.enable_static()
@@ -104,7 +105,7 @@ def test_error(self):
             def test_x_type():
                 shape = [1, 4, 5]
                 input_1 = np.random.random(shape).astype("int32")
-                out_1 = paddle.fluid.layers.size(input_1)
+                out_1 = paddle.numel(input_1)
 
             self.assertRaises(TypeError, test_x_type)
 

From 41ba27229243d5f222d094360295ad8b9d9fc438 Mon Sep 17 00:00:00 2001
From: zhangkaihuo <zhangkaihuo@baidu.com>
Date: Tue, 29 Nov 2022 09:58:44 +0800
Subject: [PATCH 007/154] group the index in not cutlass mode (#48439)

---
 paddle/phi/kernels/sparse/gpu/conv_kernel.cu | 38 ++++++++++----------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/paddle/phi/kernels/sparse/gpu/conv_kernel.cu b/paddle/phi/kernels/sparse/gpu/conv_kernel.cu
index e5e3cd0f5c184d..87037581e52f79 100644
--- a/paddle/phi/kernels/sparse/gpu/conv_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/conv_kernel.cu
@@ -123,25 +123,6 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx,
         dev_ctx, x, key, tmp_rulebook, h_counter, out, rulebook, counter);
   }
 
-  if (subm) {
-    auto config =
-        phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, rulebook_len, 1);
-    unique_value.ResizeAndAllocate(
-        {static_cast<int>(out->nnz() * kernel_size)});
-    out_index.ResizeAndAllocate({static_cast<int>(rulebook_len)});
-    int* out_index_ptr = out_index.data<int>();
-    int* unique_value_ptr = unique_value.data<int>();
-    phi::backends::gpu::GpuMemsetAsync(
-        out_index_ptr, 0, sizeof(int) * rulebook_len, dev_ctx.stream());
-    GroupIndexs<<<config.block_per_grid,
-                  config.thread_per_block,
-                  0,
-                  dev_ctx.stream()>>>(rulebook_len,
-                                      kernel_size,
-                                      rulebook_ptr + rulebook_len,
-                                      out_index_ptr,
-                                      unique_value_ptr);
-  }
 #ifdef PADDLE_WITH_CUTLASS
   bool cutlass = true;
   if (dev_ctx.GetComputeCapability() < 80) cutlass = false;
@@ -226,6 +207,25 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx,
     }
   } else {
 #endif
+    if (subm) {
+      auto config =
+          phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, rulebook_len, 1);
+      unique_value.ResizeAndAllocate(
+          {static_cast<int>(out->nnz() * kernel_size)});
+      out_index.ResizeAndAllocate({static_cast<int>(rulebook_len)});
+      int* out_index_ptr = out_index.data<int>();
+      int* unique_value_ptr = unique_value.data<int>();
+      phi::backends::gpu::GpuMemsetAsync(
+          out_index_ptr, 0, sizeof(int) * rulebook_len, dev_ctx.stream());
+      GroupIndexs<<<config.block_per_grid,
+                    config.thread_per_block,
+                    0,
+                    dev_ctx.stream()>>>(rulebook_len,
+                                        kernel_size,
+                                        rulebook_ptr + rulebook_len,
+                                        out_index_ptr,
+                                        unique_value_ptr);
+    }
     // 2. gather
     phi::DenseTensor in_features =
         phi::Empty<T>(dev_ctx, {rulebook_len, in_channels});

From d33d6db0d3bea87551bce612c00c0f37005afb4e Mon Sep 17 00:00:00 2001
From: zhangkaihuo <zhangkaihuo@baidu.com>
Date: Tue, 29 Nov 2022 10:13:51 +0800
Subject: [PATCH 008/154] [Sparse]BatchNorm use inplace (#48254)

---
 .../operators/generator/generate_sparse_op.py |  2 ++
 paddle/phi/api/yaml/sparse_backward.yaml      |  2 +-
 paddle/phi/api/yaml/sparse_ops.yaml           |  4 +--
 paddle/phi/kernels/sparse/batch_norm_kernel.h | 36 +++++++++----------
 python/paddle/sparse/nn/layer/norm.py         |  2 +-
 5 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/paddle/fluid/operators/generator/generate_sparse_op.py b/paddle/fluid/operators/generator/generate_sparse_op.py
index 6c9f70b9283f88..10ee034ff3b473 100644
--- a/paddle/fluid/operators/generator/generate_sparse_op.py
+++ b/paddle/fluid/operators/generator/generate_sparse_op.py
@@ -82,6 +82,8 @@ def main(op_yaml_path, backward_yaml_path, output_op_path, output_arg_map_path):
     backward_op_dict = to_named_dict(backward_ops)
 
     for op in ops:
+        if op['name'][-1] == '_':
+            op['name'] = op['name'][:-1]
         op['op_name'] = SPARSE_OP_PREFIX + op['name']
         op['name'] = op['op_name']
         if op["backward"] is not None:
diff --git a/paddle/phi/api/yaml/sparse_backward.yaml b/paddle/phi/api/yaml/sparse_backward.yaml
index 3e654210b90082..949a6c4c19b12c 100644
--- a/paddle/phi/api/yaml/sparse_backward.yaml
+++ b/paddle/phi/api/yaml/sparse_backward.yaml
@@ -101,7 +101,7 @@
            atanh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
 
 - backward_op : batch_norm_grad
-  forward : batch_norm (Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, bool is_test, float momentum, float epsilon, str data_layout, bool use_global_stats, bool trainable_statistics) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
+  forward : batch_norm_ (Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, bool is_test, float momentum, float epsilon, str data_layout, bool use_global_stats, bool trainable_statistics) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
   args : (Tensor x, Tensor scale, Tensor bias, Tensor mean_out, Tensor variance_out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics)
   output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad)
   infer_meta :
diff --git a/paddle/phi/api/yaml/sparse_ops.yaml b/paddle/phi/api/yaml/sparse_ops.yaml
index 545042b6f073e4..2208d34ecf4cda 100644
--- a/paddle/phi/api/yaml/sparse_ops.yaml
+++ b/paddle/phi/api/yaml/sparse_ops.yaml
@@ -87,7 +87,7 @@
     layout : x
   backward : atanh_grad
 
-- op : batch_norm
+- op : batch_norm_
   args : (Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, bool is_test, float momentum, float epsilon, str data_layout, bool use_global_stats, bool trainable_statistics)
   output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
   infer_meta :
@@ -95,7 +95,7 @@
   kernel :
     func : batch_norm_coo {sparse_coo, dense, dense, dense, dense -> sparse_coo, dense, dense, dense, dense, dense}
     data_type : x
-  view : (mean -> mean_out), (variance -> variance_out)
+  inplace : (mean -> mean_out), (variance -> variance_out)
   backward : batch_norm_grad
 
 - op : cast
diff --git a/paddle/phi/kernels/sparse/batch_norm_kernel.h b/paddle/phi/kernels/sparse/batch_norm_kernel.h
index 03e621cc65325b..41656280c500e3 100644
--- a/paddle/phi/kernels/sparse/batch_norm_kernel.h
+++ b/paddle/phi/kernels/sparse/batch_norm_kernel.h
@@ -23,24 +23,24 @@ namespace phi {
 namespace sparse {
 
 template <typename T, typename Context>
-void BatchNormKernel(const Context& dev_ctx,
-                     const SparseCooTensor& x,
-                     const DenseTensor& scale,
-                     const DenseTensor& bias,
-                     const DenseTensor& mean,
-                     const DenseTensor& variance,
-                     float momentum,
-                     float epsilon,
-                     const std::string& data_layout,
-                     bool is_test,
-                     bool use_global_stats,
-                     bool trainable_statistics,
-                     SparseCooTensor* y,
-                     DenseTensor* mean_out,
-                     DenseTensor* variance_out,
-                     DenseTensor* saved_mean,
-                     DenseTensor* saved_variance,
-                     DenseTensor* reserve_space);
+void BatchNormCooKernel(const Context& dev_ctx,
+                        const SparseCooTensor& x,
+                        const DenseTensor& mean,
+                        const DenseTensor& variance,
+                        const DenseTensor& scale,
+                        const DenseTensor& bias,
+                        bool is_test,
+                        float momentum,
+                        float epsilon,
+                        const std::string& data_layout,
+                        bool use_global_stats,
+                        bool trainable_statistics,
+                        SparseCooTensor* y,
+                        DenseTensor* mean_out,
+                        DenseTensor* variance_out,
+                        DenseTensor* saved_mean,
+                        DenseTensor* saved_variance,
+                        DenseTensor* reserve_space);
 
 }  // namespace sparse
 }  // namespace phi
diff --git a/python/paddle/sparse/nn/layer/norm.py b/python/paddle/sparse/nn/layer/norm.py
index 5714514b61355c..eb242a72c54b61 100644
--- a/python/paddle/sparse/nn/layer/norm.py
+++ b/python/paddle/sparse/nn/layer/norm.py
@@ -138,7 +138,7 @@ def forward(self, input):
         data_format = 'NCHW' if self._data_format[1] == 'C' else 'NHWC'
 
         if in_dynamic_mode():
-            batch_norm_out, _, _, _, _, _ = _C_ops.sparse_batch_norm(
+            batch_norm_out, _, _, _, _, _ = _C_ops.sparse_batch_norm_(
                 input,
                 self._mean,
                 self._variance,

From 105bb929497f9f83f3ccf46e61d3b329cdc29512 Mon Sep 17 00:00:00 2001
From: Matsumoto Ruko <38883252+gsq7474741@users.noreply.github.com>
Date: Tue, 29 Nov 2022 10:16:50 +0800
Subject: [PATCH 009/154] Remove py36 code in .py files (#48426)

---
 python/paddle/distributed/spawn.py             | 18 ------------------
 .../fleet/test_parallel_dygraph_mnist.py       |  3 +--
 .../fleet/test_parallel_dygraph_no_sync.py     |  7 +++----
 .../fleet/test_parallel_dygraph_se_resnext.py  |  3 +--
 .../test_parallel_dygraph_sparse_embedding.py  |  3 +--
 ...lel_dygraph_sparse_embedding_over_height.py |  3 +--
 .../test_parallel_dygraph_unused_variables.py  |  3 +--
 .../tests/unittests/test_paddle_save_load.py   |  7 +------
 .../tests/unittests/test_static_save_load.py   |  7 +------
 python/paddle/hapi/progressbar.py              | 12 +-----------
 .../incubate/multiprocessing/reductions.py     |  9 ---------
 python/paddle/vision/datasets/folder.py        | 11 +----------
 tools/dockerfile/build_scripts/ssl-check.py    |  4 ----
 13 files changed, 12 insertions(+), 78 deletions(-)

diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py
index 4820d6dcd2f03e..f3505da0bb7180 100644
--- a/python/paddle/distributed/spawn.py
+++ b/python/paddle/distributed/spawn.py
@@ -68,16 +68,6 @@ def __init__(self):
         self.selected_devices = None
 
 
-def _py_supported_check():
-    if not sys.version_info >= (3, 4):
-        raise RuntimeError(
-            "Use `paddle.distributed.spawn` to start parallel training "
-            "requires python version greater than 3.4, if your python "
-            "is lower than this version, please use "
-            "`paddle.distributed.launch` instead."
-        )
-
-
 def _options_valid_check(options):
     # `print_config` keeped as a debug options, not show to users
     supported_options = [
@@ -414,7 +404,6 @@ def _func_wrapper(func, args, error_queue, return_queue, env_dict, backend):
 
 class MultiprocessContext:
     def __init__(self, processes, error_queues, return_queues):
-        _py_supported_check()
         self.error_queues = error_queues
         # NOTE(chenweihang): The `spawn` method is mainly used
         # to wrap the outermost execution function of the program for
@@ -598,13 +587,6 @@ def train(print_result=False):
             if __name__ == '__main__':
                 dist.spawn(train, args=(True,), nprocs=2, gpus='4,5')
     """
-    # NOTE(chenweihang): [ why only supports python3.4+ ? ]
-    # Python supported setting the child process startup method
-    # since 3.4. The previous version can only use the default startup
-    # method, while the default startup method of Unix is fork, which
-    # cannot support CUDA runtime multi-process
-    _py_supported_check()
-
     # Give an error hint when the users enter a configuration option
     # that does not exist
     _options_valid_check(options)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_mnist.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_mnist.py
index 9008c0ef9f7477..67e27fde7fd2d0 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_mnist.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_mnist.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import os
-import sys
 import unittest
 
 from parallel_dygraph_mnist import TestMnist
@@ -64,7 +63,7 @@ def test_mnist_xpu(self):
 
 class TestParallelDygraphMnistSpawn(TestDistSpawnRunner):
     def test_mnist_with_spawn(self):
-        if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4):
+        if fluid.core.is_compiled_with_cuda():
             self.check_dist_result_with_spawn(test_class=TestMnist, delta=1e-5)
 
 
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_no_sync.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_no_sync.py
index 6858a2a2d3b2f1..e940dbde86d768 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_no_sync.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_no_sync.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import os
-import sys
 import unittest
 
 from parallel_dygraph_no_sync import TestNoSync
@@ -80,7 +79,7 @@ def test_no_sync_control_flow(self):
 
 class TestParallelDygraphNoSyncSpawn(TestDistSpawnRunner):
     def test_no_sync_with_spawn(self):
-        if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4):
+        if fluid.core.is_compiled_with_cuda():
             self.check_dist_result_with_spawn(test_class=TestNoSync, delta=1e-5)
 
 
@@ -89,7 +88,7 @@ def _args_config(self, args):
         args.find_unused_parameters = True
 
     def test_no_sync_with_spawn(self):
-        if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4):
+        if fluid.core.is_compiled_with_cuda():
             self.check_dist_result_with_spawn(
                 test_class=TestNoSyncUnusedParam, delta=1e-5
             )
@@ -100,7 +99,7 @@ def _args_config(self, args):
         args.find_unused_parameters = True
 
     def test_no_sync_with_spawn(self):
-        if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4):
+        if fluid.core.is_compiled_with_cuda():
             self.check_dist_result_with_spawn(
                 test_class=TestNoSyncControlFlow, delta=1e-5
             )
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_se_resnext.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_se_resnext.py
index b0d0dbf392baf9..51cc556e07c79b 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_se_resnext.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import os
-import sys
 import unittest
 
 from parallel_dygraph_se_resnext import TestSeResNeXt
@@ -43,7 +42,7 @@ def test_se_resnext(self):
 
 class TestParallelDygraphSeResNeXtSpawn(TestDistSpawnRunner):
     def test_se_resnext_with_spawn(self):
-        if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4):
+        if fluid.core.is_compiled_with_cuda():
             self.check_dist_result_with_spawn(
                 test_class=TestSeResNeXt, delta=0.01
             )
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_sparse_embedding.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_sparse_embedding.py
index cc89ebecab16c6..62570959f89156 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_sparse_embedding.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_sparse_embedding.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import os
-import sys
 import unittest
 
 from parallel_dygraph_sparse_embedding import TestSparseEmbedding
@@ -61,7 +60,7 @@ def test_sparse_embedding_fp64(self):
 
 class TestParallelDygraphSparseEmdeddingSpawn(TestDistSpawnRunner):
     def test_sparse_embedding_with_spawn(self):
-        if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4):
+        if fluid.core.is_compiled_with_cuda():
             self.check_dist_result_with_spawn(
                 test_class=TestSparseEmbedding, delta=1e-5
             )
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_sparse_embedding_over_height.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_sparse_embedding_over_height.py
index 87284c6203ebc1..96038bf18f62ef 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_sparse_embedding_over_height.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_sparse_embedding_over_height.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import os
-import sys
 import unittest
 
 from parallel_dygraph_sparse_embedding_over_height import (
@@ -47,7 +46,7 @@ def test_sparse_embedding(self):
 
 class TestParallelDygraphSparseEmdeddingOverHeightSpawn(TestDistSpawnRunner):
     def test_sparse_embedding_with_spawn(self):
-        if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4):
+        if fluid.core.is_compiled_with_cuda():
             self.check_dist_result_with_spawn(
                 test_class=TestSparseEmbeddingOverHeight, delta=1e-5
             )
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_unused_variables.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_unused_variables.py
index 2d1701bb08b562..f19a4935974e7e 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_unused_variables.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_unused_variables.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import os
-import sys
 import unittest
 
 from parallel_dygraph_unused_variables import TestSparseEmbeddingUnusedVars
@@ -51,7 +50,7 @@ def _setup_config(self):
 
 class TestSparseEmbeddingUnusedVarsSpawn(TestDistSpawnRunner):
     def test_mnist_with_spawn(self):
-        if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4):
+        if fluid.core.is_compiled_with_cuda():
             self.check_dist_result_with_spawn(
                 test_class=TestSparseEmbeddingUnusedVars, delta=1e-5
             )
diff --git a/python/paddle/fluid/tests/unittests/test_paddle_save_load.py b/python/paddle/fluid/tests/unittests/test_paddle_save_load.py
index d891b072940e92..ed9ffcaab9da99 100644
--- a/python/paddle/fluid/tests/unittests/test_paddle_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_paddle_save_load.py
@@ -15,7 +15,6 @@
 import unittest
 import numpy as np
 import os
-import sys
 from io import BytesIO
 import tempfile
 
@@ -149,11 +148,7 @@ def test_pickle_protocol(self):
         with self.assertRaises(ValueError):
             paddle.save(save_dict, path, 5)
 
-        protocols = [
-            2,
-        ]
-        if sys.version_info.major >= 3 and sys.version_info.minor >= 4:
-            protocols += [3, 4]
+        protocols = [2, 3, 4]
         for protocol in protocols:
             paddle.save(save_dict, path, pickle_protocol=protocol)
             dict_load = paddle.load(path)
diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py
index 07e24903cf65c5..27bc86259f7d87 100644
--- a/python/paddle/fluid/tests/unittests/test_static_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import sys
 
 import unittest
 import paddle
@@ -1798,11 +1797,7 @@ def test_pickle_protocol(self):
             with self.assertRaises(ValueError):
                 paddle.fluid.save(prog, path, 5)
 
-            protocols = [
-                2,
-            ]
-            if sys.version_info.major >= 3 and sys.version_info.minor >= 4:
-                protocols += [3, 4]
+            protocols = [2, 3, 4]
             for protocol in protocols:
                 paddle.fluid.save(prog, path, protocol)
                 # set var to zero
diff --git a/python/paddle/hapi/progressbar.py b/python/paddle/hapi/progressbar.py
index 77f090471dfb91..0f6f375b2a38be 100644
--- a/python/paddle/hapi/progressbar.py
+++ b/python/paddle/hapi/progressbar.py
@@ -17,7 +17,6 @@
 import time
 import numpy as np
 import struct
-from collections import namedtuple
 
 __all__ = []
 
@@ -57,16 +56,7 @@ def __init__(
         )
 
     def _get_max_width(self):
-        if sys.version_info > (3, 3):
-            from shutil import get_terminal_size
-        else:
-            try:
-                from backports.shutil_get_terminal_size import get_terminal_size
-            except:
-
-                def get_terminal_size():
-                    terminal_size = namedtuple("terminal_size", "columns lines")
-                    return terminal_size(80, 24)
+        from shutil import get_terminal_size
 
         terminal_width, _ = get_terminal_size()
         terminal_width = terminal_width if terminal_width > 0 else 80
diff --git a/python/paddle/incubate/multiprocessing/reductions.py b/python/paddle/incubate/multiprocessing/reductions.py
index e60d90a9d62a08..04fe123fdc1930 100644
--- a/python/paddle/incubate/multiprocessing/reductions.py
+++ b/python/paddle/incubate/multiprocessing/reductions.py
@@ -18,7 +18,6 @@
 # TODO: check serializing named tensor
 # TODO: check influence on autograd
 import sys
-import warnings
 import copy
 import threading
 from multiprocessing.util import register_after_fork
@@ -34,14 +33,6 @@ def _supported_check():
 
         return False
 
-    if not sys.version_info >= (3, 4):
-        warnings.warn(
-            "Use `paddle.multiprocessing` to share paddle tensor "
-            "requires python version greater than 3.4 ."
-            " `paddle.multiprocessing` will not take any effect !!!"
-        )
-        return False
-
     return True
 
 
diff --git a/python/paddle/vision/datasets/folder.py b/python/paddle/vision/datasets/folder.py
index 6ac0c4ca917238..96a5b9b200eea9 100644
--- a/python/paddle/vision/datasets/folder.py
+++ b/python/paddle/vision/datasets/folder.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import os
-import sys
 from PIL import Image
 
 import paddle
@@ -246,15 +245,7 @@ def _find_classes(self, dir):
                     and class_to_idx is a dictionary.
 
         """
-        if sys.version_info >= (3, 5):
-            # Faster and available in Python 3.5 and above
-            classes = [d.name for d in os.scandir(dir) if d.is_dir()]
-        else:
-            classes = [
-                d
-                for d in os.listdir(dir)
-                if os.path.isdir(os.path.join(dir, d))
-            ]
+        classes = [d.name for d in os.scandir(dir) if d.is_dir()]
         classes.sort()
         class_to_idx = {classes[i]: i for i in range(len(classes))}
         return classes, class_to_idx
diff --git a/tools/dockerfile/build_scripts/ssl-check.py b/tools/dockerfile/build_scripts/ssl-check.py
index b44a5753c1a6f9..6c0f07330e9354 100644
--- a/tools/dockerfile/build_scripts/ssl-check.py
+++ b/tools/dockerfile/build_scripts/ssl-check.py
@@ -21,10 +21,6 @@
 
 print("Testing SSL certificate checking for Python:", sys.version)
 
-if sys.version_info[:2] < (2, 7) or sys.version_info[:2] < (3, 4):
-    print("This version never checks SSL certs; skipping tests")
-    sys.exit(0)
-
 if sys.version_info[0] >= 3:
     from urllib.request import urlopen
 

From b6b6fca60d1cac6e491e1e91c8003b6de9caee40 Mon Sep 17 00:00:00 2001
From: Vvsmile <450864116@qq.com>
Date: Tue, 29 Nov 2022 10:32:26 +0800
Subject: [PATCH 010/154] [Clean Fluid API]Remove API: gather_nd (#47956)

* Replace paddle.fluid.layers.gather_nd with paddle.gather_nd

* replace gather_nd with paddle.gather_nd

* fix the call of gather_nd

* fix code style of gather_nd
---
 python/paddle/fluid/layers/nn.py              | 99 -------------------
 python/paddle/fluid/layers/rnn.py             |  2 +-
 .../seq2seq_dygraph_model.py                  |  4 +-
 .../transformer_dygraph_model.py              |  2 +-
 .../ir/inference/test_trt_gather_nd_op.py     |  5 +-
 .../test_dynamic_rnn_stop_gradient.py         |  2 +-
 .../tests/unittests/test_gather_nd_op.py      |  8 +-
 7 files changed, 12 insertions(+), 110 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index fcafa992beb847..91a5376abb0f34 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -101,7 +101,6 @@
     'resize_bilinear',
     'resize_trilinear',
     'resize_nearest',
-    'gather_nd',
     'relu',
     'log',
     'prelu',
@@ -6017,104 +6016,6 @@ def resize_nearest(
     )
 
 
-@deprecated(since="2.0.0", update_to="paddle.gather_nd")
-def gather_nd(input, index, name=None):
-    """
-    **Gather Nd Layer**
-
-    This function is actually a high-dimensional extension of :code:`gather`
-    and supports for simultaneous indexing by multiple axes. :attr:`index` is a
-    K-dimensional integer tensor, which is regarded as a (K-1)-dimensional
-    tensor of :attr:`index` into :attr:`input`, where each element defines
-    a slice of params:
-
-    .. math::
-
-        output[(i_0, ..., i_{K-2})] = input[index[(i_0, ..., i_{K-2})]]
-
-    Obviously, :code:`index.shape[-1] <= input.rank` . And, the output tensor has
-    shape :code:`index.shape[:-1] + input.shape[index.shape[-1]:]` .
-
-    .. code-block:: text
-
-            Given:
-                input = [[[ 0,  1,  2,  3],
-                          [ 4,  5,  6,  7],
-                          [ 8,  9, 10, 11]],
-                         [[12, 13, 14, 15],
-                          [16, 17, 18, 19],
-                          [20, 21, 22, 23]]]
-                input.shape = (2, 3, 4)
-
-            * Case 1:
-                index = [[1]]
-
-                gather_nd(input, index)
-                         = [input[1, :, :]]
-                         = [[12, 13, 14, 15],
-                            [16, 17, 18, 19],
-                            [20, 21, 22, 23]]
-
-            * Case 2:
-                index = [[0,2]]
-
-                gather_nd(input, index)
-                         = [input[0, 2, :]]
-                         = [8, 9, 10, 11]
-
-            * Case 3:
-                index = [[1, 2, 3]]
-
-                gather_nd(input, index)
-                         = [input[1, 2, 3]]
-                         = [23]
-
-    Args:
-        input (Tensor): The input Tensor which it's data type should be bool, float32, float64, int32, int64.
-        index (Tensor): The index input with rank > 1, index.shape[-1] <= input.rank.
-                        Its dtype should be int32, int64.
-        name(str, optional): The default value is None.  Normally there is no need for user to set this property.
-                        For more information, please refer to :ref:`api_guide_Name` .
-
-    Returns:
-        output (Tensor): A tensor with the shape index.shape[:-1] + input.shape[index.shape[-1]:]
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle
-            import paddle.fluid as fluid
-            paddle.enable_static()
-
-            x = fluid.data(name='x', shape=[3, 4, 5], dtype='float32')
-            index = fluid.data(name='index', shape=[2, 2], dtype='int32')
-            output = fluid.layers.gather_nd(x, index)
-
-    """
-    if in_dygraph_mode():
-        return _C_ops.gather_nd(input, index)
-    else:
-        if _in_legacy_dygraph():
-            return _legacy_C_ops.gather_nd(input, index)
-    check_variable_and_dtype(
-        input,
-        'input',
-        ['bool', 'float32', 'float64', 'int16', 'int32', 'int64'],
-        'gather_np',
-    )
-    check_variable_and_dtype(index, 'index', ['int32', 'int64'], 'gather_np')
-    helper = LayerHelper('gather_nd', **locals())
-    dtype = helper.input_dtype()
-    output = helper.create_variable_for_type_inference(dtype)
-    helper.append_op(
-        type="gather_nd",
-        inputs={"X": input, "Index": index},
-        outputs={"Out": output},
-    )
-    return output
-
-
 def log(x, name=None):
     r"""
     Calculates the natural log of the given input tensor, element-wise.
diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py
index 024eb4208ecc24..b82a965e84fc87 100644
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -1167,7 +1167,7 @@ def _gather(self, x, indices, batch_size):
         )
         topk_coordinates = paddle.stack([batch_pos, indices], axis=2)
         topk_coordinates.stop_gradient = True
-        return nn.gather_nd(x, topk_coordinates)
+        return paddle.gather_nd(x, topk_coordinates)
 
     class OutputWrapper(
         collections.namedtuple(
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
index da9214c466fa36..aa0219880a073c 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
@@ -199,7 +199,7 @@ def _real_state(self, state, new_state, step_mask):
 
     def _gather(self, x, indices, batch_pos):
         topk_coordinates = paddle.stack([batch_pos, indices], axis=2)
-        return fluid.layers.gather_nd(x, topk_coordinates)
+        return paddle.gather_nd(x, topk_coordinates)
 
     @declarative
     def forward(self, inputs):
@@ -684,7 +684,7 @@ def _real_state(self, state, new_state, step_mask):
 
     def _gather(self, x, indices, batch_pos):
         topk_coordinates = paddle.stack([batch_pos, indices], axis=2)
-        return fluid.layers.gather_nd(x, topk_coordinates)
+        return paddle.gather_nd(x, topk_coordinates)
 
     def attention(self, query, enc_output, mask=None):
         query = fluid.layers.unsqueeze(query, [1])
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
index 296d221c415c2d..bcd881c7996024 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
@@ -768,7 +768,7 @@ def mask_probs(probs, finished, noend_mask_tensor):
 
         def gather(input, indices, batch_pos):
             topk_coordinates = paddle.stack([batch_pos, indices], axis=2)
-            return layers.gather_nd(input, topk_coordinates)
+            return paddle.gather_nd(input, topk_coordinates)
 
         # run encoder
         enc_output = self.encoder(src_word, src_pos, src_slf_attn_bias)
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_nd_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_nd_op.py
index 3a193e496c05a1..b96eddb87e779f 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_nd_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_nd_op.py
@@ -17,6 +17,7 @@
 import numpy as np
 from inference_pass_test import InferencePassTest
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -27,7 +28,7 @@ def setUp(self):
         with fluid.program_guard(self.main_program, self.startup_program):
             data = fluid.data(name="data", shape=[-1, 3, 4], dtype="float32")
             index = fluid.data(name="index", shape=[-1, 2, 2], dtype="int32")
-            gather_nd = fluid.layers.gather_nd(data, index)
+            gather_nd = paddle.gather_nd(data, index)
             out = fluid.layers.batch_norm(gather_nd, is_test=True)
 
         self.feeds = {
@@ -64,7 +65,7 @@ def setUp(self):
                 name="data", shape=[-1, 1280, 192], dtype="float32"
             )
             index = fluid.data(name="index", shape=[-1, 1028, 2], dtype="int32")
-            gather_nd = fluid.layers.gather_nd(data, index)
+            gather_nd = paddle.gather_nd(data, index)
             out = fluid.layers.batch_norm(gather_nd, is_test=True)
 
         index_data = np.zeros((1, 1028, 2), dtype='int32')
diff --git a/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py b/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
index d20ad3d0c0ec67..fff63bcb0057ee 100644
--- a/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
@@ -49,7 +49,7 @@ def build_and_run_program(place, batch_size, beam_size, stop_gradient=False):
         )
         topk_coordinates = paddle.stack([batch_pos, indices], axis=2)
         topk_coordinates.stop_gradient = stop_gradient
-        score = layers.gather_nd(x, topk_coordinates)
+        score = paddle.gather_nd(x, topk_coordinates)
         layers.increment(x=step_idx, value=1.0, in_place=True)
         layers.array_write(score, i=step_idx, array=scores)
         length_cond = layers.less_than(x=step_idx, y=max_len)
diff --git a/python/paddle/fluid/tests/unittests/test_gather_nd_op.py b/python/paddle/fluid/tests/unittests/test_gather_nd_op.py
index c49db2815ac1e6..20fc57ff023685 100644
--- a/python/paddle/fluid/tests/unittests/test_gather_nd_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gather_nd_op.py
@@ -162,17 +162,17 @@ def test_case1(self):
             name='x1', shape=[30, 40, 50, 60], dtype='float32'
         )
         index1 = fluid.layers.data(name='index1', shape=[2, 4], dtype='int32')
-        output1 = fluid.layers.gather_nd(x1, index1)
+        output1 = paddle.gather_nd(x1, index1)
 
     def test_case2(self):
         x2 = fluid.layers.data(name='x2', shape=[30, 40, 50], dtype='float32')
         index2 = fluid.layers.data(name='index2', shape=[2, 2], dtype='int64')
-        output2 = fluid.layers.gather_nd(x2, index2)
+        output2 = paddle.gather_nd(x2, index2)
 
     def test_case3(self):
         x3 = fluid.layers.data(name='x3', shape=[3, 4, 5], dtype='float32')
         index3 = fluid.layers.data(name='index3', shape=[2, 1], dtype='int32')
-        output3 = fluid.layers.gather_nd(x3, index3, name="gather_nd_layer")
+        output3 = paddle.gather_nd(x3, index3, name="gather_nd_layer")
 
 
 # Test Raise Index Error
@@ -186,7 +186,7 @@ def check_raise_is_test():
                 index = fluid.layers.data(
                     name='index', shape=[2, 10], dtype='int32'
                 )
-                output = fluid.layers.gather_nd(x, index)
+                output = paddle.gather_nd(x, index)
             except Exception as e:
                 t = "Input(Index).shape[-1] should be no greater than Input(X).rank"
                 if t in str(e):

From ed33b860b6c97155128eccbd944d52a4a9af6f5e Mon Sep 17 00:00:00 2001
From: ccrrong <101700995+ccrrong@users.noreply.github.com>
Date: Tue, 29 Nov 2022 10:49:28 +0800
Subject: [PATCH 011/154] remove triu from fluid (#48427)

* remove triu

* use paddle.triu
---
 python/paddle/fluid/layers/tensor.py                      | 8 --------
 .../fluid/tests/unittests/mlu/test_tril_triu_op_mlu.py    | 2 +-
 .../fluid/tests/unittests/npu/test_tril_triu_op_npu.py    | 2 +-
 python/paddle/fluid/tests/unittests/test_tril_triu_op.py  | 2 +-
 4 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py
index b7e0e60145df3e..c5a28a913a285d 100644
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -68,7 +68,6 @@
     'zeros_like',
     'ones_like',
     'diag',
-    'triu',
 ]
 
 
@@ -1830,10 +1829,3 @@ def ones_like(x, out=None):
         outputs={'Out': [out]},
     )
     return out
-
-
-@deprecated(since="2.0.0", update_to="paddle.triu")
-def triu(input, diagonal=0, name=None):
-    import paddle
-
-    return paddle.tensor.triu(x=input, diagonal=diagonal, name=name)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_tril_triu_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_tril_triu_op_mlu.py
index 2ed56de3c6d311..66b4a480bbf093 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_tril_triu_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_tril_triu_op_mlu.py
@@ -184,7 +184,7 @@ def test_fluid_api(self):
             with program_guard(prog, startup_prog):
                 data = np.random.random([1, 9, 9, 4]).astype(dtype)
                 x = fluid.data(shape=[1, 9, -1, 4], dtype=dtype, name='x')
-                triu_out = fluid.layers.triu(x)
+                triu_out = paddle.triu(x)
 
                 place = fluid.MLUPlace(0)
                 exe = fluid.Executor(place)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_tril_triu_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_tril_triu_op_npu.py
index aca61989b7b638..83ecf1f5a6c9c3 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_tril_triu_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_tril_triu_op_npu.py
@@ -185,7 +185,7 @@ def test_fluid_api(self):
             with program_guard(prog, startup_prog):
                 data = np.random.random([1, 9, 9, 4]).astype(dtype)
                 x = fluid.data(shape=[1, 9, -1, 4], dtype=dtype, name='x')
-                triu_out = fluid.layers.triu(x)
+                triu_out = paddle.triu(x)
 
                 place = fluid.NPUPlace(0)
                 exe = fluid.Executor(place)
diff --git a/python/paddle/fluid/tests/unittests/test_tril_triu_op.py b/python/paddle/fluid/tests/unittests/test_tril_triu_op.py
index b6473f66a416b0..b0b754dc9630d1 100644
--- a/python/paddle/fluid/tests/unittests/test_tril_triu_op.py
+++ b/python/paddle/fluid/tests/unittests/test_tril_triu_op.py
@@ -183,7 +183,7 @@ def test_fluid_api(self):
             with program_guard(prog, startup_prog):
                 data = np.random.random([1, 9, 9, 4]).astype(dtype)
                 x = fluid.data(shape=[1, 9, -1, 4], dtype=dtype, name='x')
-                triu_out = fluid.layers.triu(x)
+                triu_out = paddle.triu(x)
 
                 place = (
                     fluid.CUDAPlace(0)

From d5387de21f814d814ab0ca34342f3228d4d84e94 Mon Sep 17 00:00:00 2001
From: lzydev <1528794076@qq.com>
Date: Tue, 29 Nov 2022 10:57:10 +0800
Subject: [PATCH 012/154] Generate static graph code for lerp by yaml (#48322)

* generate static graph code for lerp by yaml, test=develop

* modify the op_compat.yaml of lerp, test=develop

* generate static graph code for lerp by yaml, test=develop

* modify the op_compat.yaml of lerp, test=develop

* remove the 'attrs' of lerp, test=develop

Signed-off-by: lizhiyu02 <1528794076@qq.com>

Signed-off-by: lizhiyu02 <1528794076@qq.com>
---
 paddle/fluid/operators/lerp_op.cc        | 100 -----------------------
 paddle/phi/api/yaml/backward.yaml        |  10 +++
 paddle/phi/api/yaml/legacy_backward.yaml |  10 ---
 paddle/phi/api/yaml/legacy_ops.yaml      |  10 ---
 paddle/phi/api/yaml/op_compat.yaml       |   7 ++
 paddle/phi/api/yaml/ops.yaml             |  10 +++
 paddle/phi/ops/compat/lerp_sig.cc        |  33 --------
 7 files changed, 27 insertions(+), 153 deletions(-)
 delete mode 100644 paddle/fluid/operators/lerp_op.cc
 delete mode 100644 paddle/phi/ops/compat/lerp_sig.cc

diff --git a/paddle/fluid/operators/lerp_op.cc b/paddle/fluid/operators/lerp_op.cc
deleted file mode 100644
index 84e82e2950755b..00000000000000
--- a/paddle/fluid/operators/lerp_op.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/framework/infershape_utils.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/phi/core/infermeta_utils.h"
-#include "paddle/phi/infermeta/ternary.h"
-
-namespace paddle {
-namespace operators {
-
-class LerpOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-};
-
-class LerpOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput("X", "(Tensor), The input tensor of lerp op.");
-    AddInput("Y", "(Tensor), The input tensor of lerp op.");
-    AddInput("Weight", "(Tensor, optional), The input tensor of lerp op.");
-    AddOutput("Out", "(Tensor), The output tensor of lerp op.");
-    AddComment(R"DOC(
-Lerp Operator.
-
-This operator is used to do a linear interpolation of input $X$ and $Y$ with $Weight$.
-
-The equation is:
-
-$$Out = X + Weight * (Y - X)$$
-
-Both the input $X$ and $Y$ can carry the LoD (Level of Details) information,
-or not. But the output only shares the LoD information with input $X$.
-
-)DOC");
-  }
-};
-
-class LerpGradOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    if (ctx->HasOutput(framework::GradVarName("X"))) {
-      ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
-    }
-    if (ctx->HasOutput(framework::GradVarName("Y"))) {
-      ctx->SetOutputDim(framework::GradVarName("Y"), ctx->GetInputDim("Y"));
-    }
-  }
-};
-
-template <typename T>
-class LerpOpGradMaker : public framework::SingleGradOpMaker<T> {
- public:
-  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
-
-  void Apply(GradOpPtr<T> op) const override {
-    op->SetType("lerp_grad");
-    op->SetInput("X", this->Input("X"));
-    op->SetInput("Y", this->Input("Y"));
-    op->SetInput("Weight", this->Input("Weight"));
-    op->SetInput("Out", this->Output("Out"));
-    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
-    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
-    op->SetOutput(framework::GradVarName("Y"), this->InputGrad("Y"));
-    op->SetAttrMap(this->Attrs());
-  }
-};
-
-DECLARE_INPLACE_OP_INFERER(LerpInplaceInferer, {"X", "Out"});
-
-}  // namespace operators
-}  // namespace paddle
-
-DECLARE_INFER_SHAPE_FUNCTOR(lerp,
-                            LerpInferShapeFunctor,
-                            PD_INFER_META(phi::LerpInferMeta));
-REGISTER_OPERATOR(
-    lerp,
-    paddle::operators::LerpOp,
-    paddle::operators::LerpOpMaker,
-    paddle::operators::LerpOpGradMaker<paddle::framework::OpDesc>,
-    paddle::operators::LerpOpGradMaker<paddle::imperative::OpBase>,
-    paddle::operators::LerpInplaceInferer,
-    LerpInferShapeFunctor);
-
-REGISTER_OPERATOR(lerp_grad, paddle::operators::LerpGradOp);
diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml
index a67e791c4d9ec7..44afc43c046d70 100644
--- a/paddle/phi/api/yaml/backward.yaml
+++ b/paddle/phi/api/yaml/backward.yaml
@@ -588,6 +588,16 @@
   backward : leaky_relu_double_grad
   inplace : (out_grad -> x_grad)
 
+- backward_op : lerp_grad
+  forward : lerp (Tensor x, Tensor y, Tensor weight) -> Tensor(out)
+  args : (Tensor x, Tensor y, Tensor weight, Tensor out, Tensor out_grad)
+  output : Tensor(x_grad), Tensor(y_grad)
+  infer_meta :
+    func : GeneralBinaryGradInferMeta
+    param : [x, y]
+  kernel :
+    func : lerp_grad
+
 - backward_op : lgamma_grad
   forward : lgamma(Tensor x) -> Tensor(out)
   args : (Tensor x, Tensor out_grad)
diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
index 17854d80c7abda..814b3c89c01a11 100755
--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -749,16 +749,6 @@
   no_need_buffer : bias
   optional : scale, bias
 
-- backward_op : lerp_grad
-  forward : lerp (Tensor x, Tensor y, Tensor weight) -> Tensor(out)
-  args : (Tensor x, Tensor y, Tensor weight, Tensor out, Tensor out_grad)
-  output : Tensor(x_grad), Tensor(y_grad)
-  infer_meta :
-    func : GeneralBinaryGradInferMeta
-    param : [x, y]
-  kernel :
-    func : lerp_grad
-
 - backward_op : linear_interp_grad
   forward : linear_interp (Tensor x, Tensor out_size, Tensor[] size_tensor, Tensor scale_tensor, str data_layout, int out_d, int out_h, int out_w, float[] scale, str interp_method, bool align_corners, int align_mode) -> Tensor(output)
   args : (Tensor x, Tensor out_size, Tensor[] size_tensor, Tensor scale_tensor, Tensor output_grad, str data_layout, int out_d, int out_h, int out_w, float[] scale, str interp_method, bool align_corners, int align_mode)
diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml
index 5f1f55596cee5c..b506c41cdff163 100755
--- a/paddle/phi/api/yaml/legacy_ops.yaml
+++ b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -1039,16 +1039,6 @@
   backward : layer_norm_grad
   optional : scale, bias
 
-- op : lerp
-  args : (Tensor x, Tensor y, Tensor weight)
-  output : Tensor(out)
-  infer_meta :
-    func : LerpInferMeta
-  kernel :
-    func : lerp
-  inplace : (x -> out)
-  backward : lerp_grad
-
 - op : less_equal
   args : (Tensor x, Tensor y)
   output : Tensor(out)
diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml
index 0c59acbc98839a..5640ca7eb8b0ff 100644
--- a/paddle/phi/api/yaml/op_compat.yaml
+++ b/paddle/phi/api/yaml/op_compat.yaml
@@ -680,6 +680,13 @@
   extra :
     attrs : [bool use_mkldnn = false]
 
+- op : lerp
+  backward : lerp_grad
+  inputs :
+    {x : X, y : Y, weight : Weight}
+  outputs :
+    out : Out
+
 - op : lgamma
   inputs :
     x : X
diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml
index 45fa68a37678c4..10b6645c61667b 100644
--- a/paddle/phi/api/yaml/ops.yaml
+++ b/paddle/phi/api/yaml/ops.yaml
@@ -563,6 +563,16 @@
     func : leaky_relu
   backward : leaky_relu_grad
 
+- op : lerp
+  args : (Tensor x, Tensor y, Tensor weight)
+  output : Tensor(out)
+  infer_meta :
+    func : LerpInferMeta
+  kernel :
+    func : lerp
+  inplace : (x -> out)
+  backward : lerp_grad
+
 - op : lgamma
   args : (Tensor x)
   output : Tensor(out)
diff --git a/paddle/phi/ops/compat/lerp_sig.cc b/paddle/phi/ops/compat/lerp_sig.cc
deleted file mode 100644
index 154424468d6605..00000000000000
--- a/paddle/phi/ops/compat/lerp_sig.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/phi/core/compat/op_utils.h"
-
-namespace phi {
-
-KernelSignature LerpOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("lerp", {"X", "Y", "Weight"}, {}, {"Out"});
-}
-
-KernelSignature LerpGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("lerp_grad",
-                         {"X", "Y", "Weight", "Out", "Out@GRAD"},
-                         {},
-                         {"X@GRAD", "Y@GRAD"});
-}
-
-}  // namespace phi
-
-PD_REGISTER_ARG_MAPPING_FN(lerp, phi::LerpOpArgumentMapping);
-PD_REGISTER_ARG_MAPPING_FN(lerp_grad, phi::LerpGradOpArgumentMapping);

From 8d9fdd8b1323665a5d9cc567b1dacc9bdfd8f9f2 Mon Sep 17 00:00:00 2001
From: yunyaoXYY <109218879+yunyaoXYY@users.noreply.github.com>
Date: Tue, 29 Nov 2022 10:58:23 +0800
Subject: [PATCH 013/154] [Clean fluid] Clean maxout, space_to_depth,
 affine_channel, similarity_focus and add_position_encoding (#48410)

* Clean fluid maxout

* Clean fluid space_to_depth

* Clean fluid affine_channel and related tests

* Clean fluid similarity_focus and related tests

* Clean fluid add_position_encoding and related tests

* Fix code style
---
 python/paddle/fluid/layers/nn.py              | 412 ------------------
 .../ipu/test_affine_channel_op_ipu.py         |  96 ----
 .../inference/test_trt_affine_channel_op.py   | 157 -------
 .../test_add_position_encoding_op.py          |  32 --
 .../tests/unittests/test_affine_channel_op.py |  36 --
 .../fluid/tests/unittests/test_layers.py      |  12 -
 .../fluid/tests/unittests/test_maxout_op.py   |  16 -
 .../tests/unittests/test_op_name_conflict.py  |  67 ---
 .../unittests/test_similarity_focus_op.py     |  32 --
 .../xpu/test_affine_channel_op_xpu.py         |  36 --
 10 files changed, 896 deletions(-)
 delete mode 100644 python/paddle/fluid/tests/unittests/ipu/test_affine_channel_op_ipu.py
 delete mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_affine_channel_op.py

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 91a5376abb0f34..e760b357e091da 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -119,14 +119,9 @@
     'clip_by_norm',
     'mean',
     'mul',
-    'maxout',
-    'space_to_depth',
-    'affine_channel',
-    'similarity_focus',
     'hash',
     'grid_sampler',
     'log_loss',
-    'add_position_encoding',
     'bilinear_tensor_product',
     'merge_selected_rows',
     'get_tensor_from_selected_rows',
@@ -7606,343 +7601,6 @@ def mul(x, y, x_num_col_dims=1, y_num_col_dims=1, name=None):
     return out
 
 
-@deprecated(since="2.0.0", update_to="paddle.nn.functional.maxout")
-@templatedoc()
-def maxout(x, groups, name=None, axis=1):
-    """
-    ${comment}
-
-    Args:
-        x(${x_type}): ${x_comment}
-        groups(int): ${groups_comment}
-        axis(int, optional): ${axis_comment}
-        name(str, optional): For detailed information, please refer
-            to :ref:`api_guide_Name`. Usually name is no need to set and
-            None by default.
-
-    Returns:
-        Variable: ${out_comment}
-
-    Raises:
-        ValueError: If `axis` is not 1, -1 or 3.
-        ValueError: If the number of input channels can not be divisible by `groups`.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import paddle
-            paddle.enable_static()
-
-            input = fluid.data(
-                name='data',
-                shape=[None, 256, 32, 32],
-                dtype='float32')
-            out = fluid.layers.maxout(input, groups=2)
-    """
-    return paddle.nn.functional.maxout(**locals())
-
-
-def space_to_depth(x, blocksize, name=None):
-    r"""
-
-    Gives a blocksize to space_to_depth the input LoDtensor with Layout: [batch, channel, height, width]
-
-    This op rearranges blocks of spatial data, into depth. More specifically, this op outputs a copy of \
-        theinput LoDtensor where values from the height and width dimensions are moved to the channel \
-        dimension.
-    The attr blocksize indicates the input block size.
-
-    space_to_depth will reorganize the elements of input with shape[batch, channel, height, width] \
-        according to blocksize to construct output with shape \
-        [batch, channel * blocksize * blocksize, height/blocksize, width/blocksize]:
-
-    - Non-overlapping blocks of size block_size x block size are rearranged into depth at each location.
-    - The Y, X coordinates within each block of the input become the high order component of the output channel index
-    - channel should be divisible by square of blocksize
-    - height, width should be divsible by blocksize
-
-    This OP is useful for resizing the activations between convolutions \
-        (but keeping all data)
-
-    .. code-block:: text
-
-        Given the input x with the shape [1, 1, 4, 4]:
-        x.data = [[[[1,   2,  5,  6],
-                    [3,   4,  7,  8],
-                    [9,  10, 13, 14],
-                    [11, 12, 15, 16]]]]
-        blocksize = 2
-
-        then get the output with the shape [1, 4, 2, 2]:
-        out.data = [[[[1,   2],  [3,  4]],
-                     [[5,   6],  [7,  8]],
-                     [[9,  10], [11, 12]],
-                     [[13, 14], [15, 16]]]]
-
-    Args:
-        x (Variable): The input, which should be 4 dims Tensor or LodTensor, with the shape \
-            [batch, channel, height, width]
-        blocksize (int): The blocksize to select the element on each feature map should be > 2
-        name(str, optional): For detailed information, please refer \
-            to :ref:`api_guide_Name`. Usually name is no need to set and \
-            None by default.
-
-    Returns:
-            Tensor, The output, which should be 4 dims Tensor or LodTensor, with the shape \
-            [batch, channel * blocksize * blocksize, height/blocksize, width/blocksize]
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import numpy as np
-            import numpy as np
-            import paddle
-
-            paddle.enable_static()
-            data = fluid.data(
-                name='data', shape=[1, 4, 2, 2], dtype='float32')
-            space_to_depthed = fluid.layers.space_to_depth(
-                x=data, blocksize=2)
-
-            exe = fluid.Executor(fluid.CPUPlace())
-            data_np = np.arange(0,16).reshape((1,4,2,2)).astype('float32')
-
-            print(data_np)
-            #array([[[[ 0.,  1.], [ 2.,  3.]],
-            #        [[ 4.,  5.], [ 6.,  7.]],
-            #        [[ 8.,  9.], [10., 11.]],
-            #        [[12., 13.], [14., 15.]]]], dtype=float32)
-
-            out_main = exe.run(fluid.default_main_program(),
-                        feed={'data': data_np},
-                        fetch_list=[space_to_depthed])
-
-            print(out_main)
-            #[array([[[[ 0.]], [[ 4.]], [[ 1.]], [[ 5.]],
-            #         [[ 8.]], [[12.]], [[ 9.]], [[13.]],
-            #         [[ 2.]], [[ 6.]], [[ 3.]], [[ 7.]],
-            #         [[10.]], [[14.]], [[11.]], [[15.]]]], dtype=float32)]
-
-    """
-
-    helper = LayerHelper("space_to_depth", **locals())
-
-    if not (isinstance(blocksize, int)):
-        raise ValueError("blocksize must be a python Int")
-
-    check_variable_and_dtype(
-        x,
-        'x',
-        ['float16', 'float32', 'float64', 'int32', 'int64'],
-        'space_to_depth',
-    )
-
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-
-    helper.append_op(
-        type="space_to_depth",
-        inputs={"X": x},
-        attrs={"blocksize": blocksize},
-        outputs={"Out": out},
-    )
-    return out
-
-
-def affine_channel(
-    x, scale=None, bias=None, data_layout='NCHW', name=None, act=None
-):
-    """
-
-    Applies a separate affine transformation to each channel of the input.
-    Useful for replacing spatial batch norm with its equivalent fixed
-    transformation. The input also can be 2D tensor and applies a affine
-    transformation in second dimension.
-
-    Args:
-        x (Variable): Feature map input can be a 4D tensor with order NCHW
-            or NHWC. It also can be a 2D tensor and the affine transformation
-            is applied in the second dimension.The data type is float32 or float64.
-        scale (Variable): 1D input of shape (C), the c-th element is the scale
-            factor of the affine transformation for the c-th channel of
-            the input.The data type is float32 or float64.
-        bias (Variable): 1D input of shape (C), the c-th element is the bias
-            of the affine transformation for the c-th channel of the input.
-            The data type is float32 or float64.
-        data_layout (str, optional): Specify the data format of the input, and the data format of the output
-            will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
-            The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
-            `[batch_size, input_channels, input_height, input_width]`. If input is 2D Tensor, you can ignore
-            data_layout.
-        name (str, default None): The name of this layer. For more information,
-            please refer to :ref:`api_guide_Name` .
-        act (str, default None): Activation to be applied to the output of this layer.
-
-    Returns:
-        Variable: A tensor which has the same shape, data layout and data type with x.
-
-    Examples:
-        .. code-block:: python
-
-            import numpy as np
-            import paddle.fluid as fluid
-            import paddle.fluid as fluid
-            import paddle
-
-            paddle.enable_static()
-            use_gpu = False
-            place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
-            exe = fluid.Executor(place)
-
-            data = fluid.data(name='data', shape=[None, 1, 2, 2], dtype='float32')
-            input_scale = fluid.layers.create_parameter(shape=[1], dtype="float32",
-                                    default_initializer=fluid.initializer.Constant(2.0))
-            input_bias = fluid.layers.create_parameter(shape=[1],dtype="float32",
-                                    default_initializer=fluid.initializer.Constant(0.5))
-            out = fluid.layers.affine_channel(data,scale=input_scale,
-                                    bias=input_bias)
-
-            exe.run(fluid.default_startup_program())
-            test_program = fluid.default_main_program().clone(for_test=True)
-
-            [out_array] = exe.run(test_program,
-                                  fetch_list=out,
-                                  feed={'data': np.ones([1,1,2,2]).astype('float32')})
-            # out_array is [[[[2.5, 2.5],
-            #                [2.5, 2.5]]]] with shape: [1, 1, 2, 2]
-
-    """
-    helper = LayerHelper("affine_channel", **locals())
-    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'affine_channel')
-    check_type(scale, 'scale', (Variable, type(None)), 'affine_channel')
-    check_type(bias, 'bias', (Variable, type(None)), 'affine_channel')
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-
-    helper.append_op(
-        type="affine_channel",
-        inputs={"X": x, 'Scale': scale, 'Bias': bias},
-        attrs={"data_layout": data_layout},
-        outputs={"Out": out},
-    )
-    return helper.append_activation(out)
-
-
-def similarity_focus(input, axis, indexes, name=None):
-    r"""
-    SimilarityFocus Operator
-
-    Generate a similarity focus mask with the same shape of input using the following method:
-
-    1. Extract the 3-D tensor(here the first dimension is BatchSize) corresponding
-       to the axis according to the indexes. For example, if axis=1 and indexes=[a],
-       it will get the matrix T=X[:, a, :, :]. In this case, if the shape of input X
-       is (BatchSize, A, B, C), the shape of tensor T is (BatchSize, B, C).
-    2. For each index, find the largest numbers in the tensor T, so that the same
-       row and same column has at most one number(what it means is that if the
-       largest number has been found in the i-th row and the j-th column, then
-       the numbers in the i-th row or j-th column will be skipped. And then the
-       next largest number will be selected from the remaining numbers. Obviously
-       there will be min(B, C) numbers), and mark the corresponding position of the
-       3-D similarity focus mask as 1, otherwise as 0. Do elementwise-or for
-       each index.
-    3. Broadcast the 3-D similarity focus mask to the same shape of input X.
-
-    Refer to `Similarity Focus Layer <http://www.aclweb.org/anthology/N16-1108>`_
-
-    .. code-block:: text
-
-        * Example :
-
-            Given a 4-D tensor x with the shape (BatchSize, C, A, B), where C is
-            the number of channels and the shape of feature map is (A, B):
-                x.shape = (2, 3, 2, 2)
-                x.data = [[[[0.8, 0.1],
-                            [0.4, 0.5]],
-
-                           [[0.9, 0.7],
-                            [0.9, 0.9]],
-
-                           [[0.8, 0.9],
-                            [0.1, 0.2]]],
-
-
-                          [[[0.2, 0.5],
-                            [0.3, 0.4]],
-
-                           [[0.9, 0.7],
-                            [0.8, 0.4]],
-
-                           [[0.0, 0.2],
-                            [0.4, 0.7]]]]
-
-            Given axis: 1 (the axis of the channel)
-            Given indexes: [0]
-
-            then we get a 4-D tensor out with the same shape of input x:
-                out.shape = (2, 3, 2, 2)
-                out.data = [[[[1.0, 0.0],
-                              [0.0, 1.0]],
-
-                             [[1.0, 0.0],
-                              [0.0, 1.0]],
-
-                             [[1.0, 0.0],
-                              [0.0, 1.0]]],
-
-                            [[[0.0, 1.0],
-                              [1.0, 0.0]],
-
-                             [[0.0, 1.0],
-                              [1.0, 0.0]],
-
-                             [[0.0, 1.0],
-                              [1.0, 0.0]]]]
-
-    Args:
-        input(Variable): The input tensor variable(default float). It should
-            be a 4-D tensor with shape [BatchSize, A, B, C]. Data type is
-            float32 or float64.
-        axis(int): Indicating the dimension to be selected. It can only be
-            1, 2 or 3.
-        indexes(list): Indicating the indexes of the selected dimension.
-
-    Returns:
-        Variable: A tensor variable with the same shape and same type \
-                  as the input.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import paddle
-            paddle.enable_static()
-            data = fluid.data(
-                name='data', shape=[-1, 3, 2, 2], dtype='float32')
-            fluid.layers.similarity_focus(input=data, axis=1, indexes=[0])
-    """
-    helper = LayerHelper('similarity_focus', **locals())
-    # check attrs
-    check_variable_and_dtype(
-        input, 'input', ['float32', 'float64'], "similarity_focus"
-    )
-    check_type(axis, 'axis', int, "similarity_focus")
-    check_type(indexes, 'indexes', list, "similarity_focus")
-    if axis != 1 and axis != 2 and axis != 3:
-        raise ValueError("axis must be 1, 2 or 3.")
-    if len(indexes) == 0:
-        raise ValueError("indexes can not be empty.")
-
-    out = helper.create_variable_for_type_inference(dtype=input.dtype)
-    helper.append_op(
-        type='similarity_focus',
-        inputs={'X': input},
-        outputs={'Out': out},
-        attrs={"axis": axis, "indexes": indexes},
-    )
-    return out
-
-
 def hash(input, hash_size, num_hash=1, name=None):
     """
 
@@ -8156,76 +7814,6 @@ def log_loss(input, label, epsilon=1e-4, name=None):
     return paddle.nn.functional.log_loss(input, label, epsilon, name)
 
 
-def add_position_encoding(input, alpha, beta, name=None):
-    r"""
-
-    This operator performs weighted sum of input feature at each position
-    (position in the sequence) and the corresponding position encoding.
-
-    For more details of position encoding, please refer to `Attention Is All You
-    Need <http://arxiv.org/pdf/1706.03762.pdf>`_ .
-
-    The formula is as follows:
-
-    .. math::
-        PE(pos, 2i) &= \\sin{(pos / 10000^{2i / P})}   \\\\
-        PE(pos, 2i + 1) &= \\cos{(pos / 10000^{2i / P})}  \\\\
-        Out(:, pos, i) &= \\alpha * input(:, pos, i) + \\beta * PE(pos, i)
-
-    Where:
-      - :math:`PE(pos, 2i)` : the value at even index `2i` for encoding of position `pos`.
-      - :math:`PE(pos, 2i + 1)` : the value at odd index `2i+1` for encoding of position `pos`
-
-    Args:
-        input(Variable): A Tensor or LoDTensor (lod level is 1). If it is a
-            Tensor, the shape should be `[N, M, P]`, where `N` stands for
-            batch size, `M` for sequence length, `P` for the size of feature
-            dimension. If it is a LoDTensor, the shape should be `[N, P]`,
-            where `N` stands for the total sequence lengths in this mini-batch,
-            `P` for the size of feature. The data type should be float32 or float64.
-        alpha(float): Indicate the weight coefficient for `input` when performing
-            weighted sum.
-        beta(float): Indicate the weight coefficient for position encoding when
-            performing weighted sum.
-        name(str, optional): For detailed information, please refer
-            to :ref:`api_guide_Name`. Usually name is no need to set and
-            None by default.
-
-    Returns:
-        Variable: A Tensor or LoDTensor. It has the same shape, data type and lod as `input`.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle
-
-          tensor = paddle.randn([16, 32, 64])
-          position_tensor = paddle.fluid.layers.add_position_encoding(
-                input=tensor, alpha=1.0, beta=1.0)
-
-    """
-    if _non_static_mode():
-        return _legacy_C_ops.add_position_encoding(
-            input, "alpha", alpha, "beta", beta
-        )
-
-    helper = LayerHelper('add_position_encoding', **locals())
-    check_variable_and_dtype(
-        input, 'input', ['float32', 'float64'], "add_position_encoding"
-    )
-    dtype = helper.input_dtype()
-
-    out = helper.create_variable_for_type_inference(dtype=dtype)
-
-    helper.append_op(
-        type="add_position_encoding",
-        inputs={"X": input},
-        outputs={"Out": out},
-        attrs={"alpha": alpha, "beta": beta},
-    )
-    return out
-
-
 def bilinear_tensor_product(
     x, y, size, act=None, name=None, param_attr=None, bias_attr=None
 ):
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_affine_channel_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_affine_channel_op_ipu.py
deleted file mode 100644
index 75f2391cc2cb1b..00000000000000
--- a/python/paddle/fluid/tests/unittests/ipu/test_affine_channel_op_ipu.py
+++ /dev/null
@@ -1,96 +0,0 @@
-#  Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-
-import paddle
-import paddle.static
-from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest
-
-
-class TestBase(IPUOpTest):
-    def setUp(self):
-        self.set_atol()
-        self.set_training()
-        self.set_data_feed()
-        self.set_feed_attr()
-        self.set_op_attrs()
-
-    @property
-    def fp16_enabled(self):
-        return False
-
-    def set_data_feed(self):
-        data = np.random.uniform(size=[1, 3, 32, 32])
-        self.feed_fp32 = {'data': data.astype(np.float32)}
-        self.feed_fp16 = {'data': data.astype(np.float16)}
-
-    def set_feed_attr(self):
-        self.feed_shape = [x.shape for x in self.feed_fp32.values()]
-        self.feed_list = list(self.feed_fp32.keys())
-
-    def set_op_attrs(self):
-        self.attrs = {}
-        self.attrs['data_layout'] = 'NCHW'
-
-    @IPUOpTest.static_graph
-    def build_model(self):
-        data = paddle.static.data(
-            name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32'
-        )
-        input_scale = paddle.fluid.layers.create_parameter(
-            shape=[self.feed_shape[0][1]], dtype="float32"
-        )
-        input_bias = paddle.fluid.layers.create_parameter(
-            shape=[self.feed_shape[0][1]], dtype="float32"
-        )
-        out = paddle.fluid.layers.affine_channel(
-            data, scale=input_scale, bias=input_bias
-        )
-        self.fetch_list = [out.name]
-
-    def run_model(self, exec_mode):
-        self.run_op_test(exec_mode)
-
-    def test(self):
-        for m in IPUOpTest.ExecutionMode:
-            if not self.skip_mode(m):
-                self.build_model()
-                self.run_model(m)
-        self.check()
-
-
-class TestCase1(TestBase):
-    def set_data_feed(self):
-        data = np.random.uniform(size=[2, 4, 64, 64])
-        self.feed_fp32 = {'data': data.astype(np.float32)}
-        self.feed_fp16 = {'data': data.astype(np.float16)}
-
-
-@unittest.skip("Only support NCHW")
-class TestNHWC(TestBase):
-    def set_op_attrs(self):
-        self.attrs = {}
-        self.attrs['data_layout'] = 'NHWC'
-
-    def set_data_feed(self):
-        data = np.random.uniform(size=[2, 64, 64, 3])
-        self.feed_fp32 = {'data': data.astype(np.float32)}
-        self.feed_fp16 = {'data': data.astype(np.float16)}
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_affine_channel_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_affine_channel_op.py
deleted file mode 100644
index 70b605684be451..00000000000000
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_affine_channel_op.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import itertools
-import unittest
-
-import numpy as np
-from inference_pass_test import InferencePassTest
-
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-from paddle.fluid.core import AnalysisConfig, PassVersionChecker
-
-
-class TRTAffineChannelTest(InferencePassTest):
-    def setUp(self):
-        self.bs = 2
-        self.channel = 8
-        self.height = 16
-        self.width = 16
-        self.data_layout = 'NCHW'
-        self.precision = AnalysisConfig.Precision.Float32
-        self.serialize = False
-        self.enable_trt = True
-
-    def build(self):
-        # set min_graph_size to 2,
-        # because affine channel doesn't support nhwc format
-        self.trt_parameters = InferencePassTest.TensorRTParam(
-            1 << 30, self.bs, 2, self.precision, self.serialize, False
-        )
-
-        with fluid.program_guard(self.main_program, self.startup_program):
-            if self.data_layout == 'NCHW':
-                shape = [-1, self.channel, self.height, self.width]
-            else:
-                shape = [-1, self.height, self.width, self.channel]
-
-            data = fluid.data(name='in', shape=shape, dtype='float32')
-            # set scale, bias by constant
-            scale = fluid.layers.create_parameter(
-                shape=[self.channel],
-                dtype='float32',
-                default_initializer=fluid.initializer.Constant(2.0),
-            )
-            bias = fluid.layers.create_parameter(
-                shape=[self.channel],
-                dtype='float32',
-                default_initializer=fluid.initializer.Constant(0.5),
-            )
-            affine_channel_out = fluid.layers.affine_channel(
-                data, scale=scale, bias=bias, data_layout=self.data_layout
-            )
-            out = fluid.layers.batch_norm(affine_channel_out, is_test=True)
-
-        shape[0] = self.bs
-        self.feeds = {
-            'in': np.random.random(shape).astype('float32'),
-        }
-        self.fetch_list = [out]
-
-    def check_output(self):
-        if core.is_compiled_with_cuda():
-            use_gpu = True
-            atol = 1e-5
-            if self.trt_parameters.precision == AnalysisConfig.Precision.Half:
-                atol = 2e-2
-            self.check_output_with_option(use_gpu, atol, flatten=True)
-            self.assertTrue(
-                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
-            )
-
-    def run_test(self):
-        self.build()
-        self.check_output()
-
-    def run_test_all(self):
-        precision_opt = [
-            AnalysisConfig.Precision.Float32,
-            AnalysisConfig.Precision.Half,
-        ]
-        serialize_opt = [False, True]
-
-        if self.data_layout == 'NCHW':
-            min_shape = [
-                self.bs,
-                self.channel,
-                self.height // 2,
-                self.width // 2,
-            ]
-            max_shape = [self.bs, self.channel, self.height * 2, self.width * 2]
-            opt_shape = [self.bs, self.channel, self.height, self.width]
-
-        if self.data_layout == 'NHWC':
-            min_shape = [
-                self.bs,
-                self.height // 2,
-                self.width // 2,
-                self.channel,
-            ]
-            max_shape = [self.bs, self.height * 2, self.width * 2, self.channel]
-            opt_shape = [self.bs, self.height, self.width, self.channel]
-
-        dynamic_shape_profile = InferencePassTest.DynamicShapeParam(
-            {'in': min_shape}, {'in': max_shape}, {'in': opt_shape}, False
-        )
-        dynamic_shape_opt = [None, dynamic_shape_profile]
-
-        for precision, serialize, dynamic_shape in itertools.product(
-            precision_opt, serialize_opt, dynamic_shape_opt
-        ):
-            self.precision = precision
-            self.serialize = serialize
-            self.dynamic_shape_params = dynamic_shape
-            self.run_test()
-
-    def test_base(self):
-        self.run_test()
-
-    def test_fp16(self):
-        self.precision = AnalysisConfig.Precision.Half
-        self.run_test()
-
-    def test_serialize(self):
-        self.serialize = True
-        self.run_test()
-
-    def test_dynamic(self):
-        self.dynamic_shape_params = InferencePassTest.DynamicShapeParam(
-            {'in': [self.bs, self.channel, self.height // 2, self.width // 2]},
-            {'in': [self.bs, self.channel, self.height * 2, self.width * 2]},
-            {'in': [self.bs, self.channel, self.height, self.width]},
-            False,
-        )
-        self.run_test()
-
-    def test_nchw_all(self):
-        self.run_test_all()
-
-    def test_nhwc(self):
-        self.data_layout = 'NHWC'
-        self.run_test_all()
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_add_position_encoding_op.py b/python/paddle/fluid/tests/unittests/test_add_position_encoding_op.py
index 791c2351d7340c..c908bfb5a4fc33 100644
--- a/python/paddle/fluid/tests/unittests/test_add_position_encoding_op.py
+++ b/python/paddle/fluid/tests/unittests/test_add_position_encoding_op.py
@@ -15,9 +15,6 @@
 import numpy as np
 import math
 from op_test import OpTest
-import paddle.fluid as fluid
-import paddle
-from paddle.fluid import Program, program_guard
 
 
 def add_position_encoding(input, alpha=1.0, beta=1.0):
@@ -151,34 +148,5 @@ def init_input_output(self):
             start += max_length
 
 
-class TestAddPositionEncodingOpError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            input_data = np.random.random((4, 16, 8)).astype("float32")
-
-            def test_Variable():
-                # the input type must be Variable
-                fluid.layers.add_position_encoding(
-                    input=input_data, alpha=1.0, beta=1.0
-                )
-
-            self.assertRaises(TypeError, test_Variable)
-
-
-class TestAddPositionEncodingOpDygraph(unittest.TestCase):
-    def test_dygraph(self):
-        paddle.disable_static()
-        tensor = np.random.randn(16, 32, 64)
-        position_tensor = paddle.fluid.layers.add_position_encoding(
-            input=paddle.to_tensor(tensor), alpha=1.0, beta=1.0
-        ).numpy()
-        paddle.enable_static()
-
-        position_tensor_np = add_position_encoding(tensor, 1.0, 1.0)
-        np.testing.assert_allclose(
-            position_tensor, position_tensor_np, rtol=1e-05
-        )
-
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_affine_channel_op.py b/python/paddle/fluid/tests/unittests/test_affine_channel_op.py
index f09e5be5775065..5221fd9d69465c 100644
--- a/python/paddle/fluid/tests/unittests/test_affine_channel_op.py
+++ b/python/paddle/fluid/tests/unittests/test_affine_channel_op.py
@@ -18,7 +18,6 @@
 import unittest
 import numpy as np
 from op_test import OpTest
-import paddle.fluid as fluid
 
 
 def affine_channel(x, scale, bias, layout):
@@ -65,41 +64,6 @@ def init_test_case(self):
         self.layout = 'NCHW'
 
 
-class TestAffineChannelOpError(unittest.TestCase):
-    def test_errors(self):
-        with fluid.program_guard(fluid.Program()):
-
-            def test_x_type():
-                input_data = np.random.random(2, 1, 2, 2).astype("float32")
-                fluid.layers.affine_channel(input_data)
-
-            self.assertRaises(TypeError, test_x_type)
-
-            def test_x_dtype():
-                x2 = fluid.layers.data(
-                    name='x2', shape=[None, 1, 2, 2], dtype='int32'
-                )
-                fluid.layers.affine_channel(x2)
-
-            self.assertRaises(TypeError, test_x_dtype)
-
-            def test_scale_type():
-                x3 = fluid.layers.data(
-                    name='x3', shape=[None, 1, 2, 2], dtype='float32'
-                )
-                fluid.layers.affine_channel(x3, scale=1)
-
-            self.assertRaises(TypeError, test_scale_type)
-
-            def test_bias_type():
-                x4 = fluid.layers.data(
-                    name='x4', shape=[None, 1, 2, 2], dtype='float32'
-                )
-                fluid.layers.affine_channel(x4, bias=1)
-
-            self.assertRaises(TypeError, test_bias_type)
-
-
 class TestAffineChannelNHWC(TestAffineChannelOp):
     def init_test_case(self):
         self.shape = [2, 3, 3, 100]
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 02f946810b4b9f..30e50294448f50 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -3220,18 +3220,6 @@ def make_softmax(self):
             hid = layers.fc(input=data, size=20)
             return layers.softmax(hid, axis=1)
 
-    def make_space_to_depth(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            data = self._get_data(
-                name='data',
-                shape=[32, 9, 6, 6],
-                append_batch_size=False,
-                dtype='float32',
-            )
-            return layers.space_to_depth(data, 3)
-
     def make_get_places(self):
         with program_guard(
             fluid.default_main_program(), fluid.default_startup_program()
diff --git a/python/paddle/fluid/tests/unittests/test_maxout_op.py b/python/paddle/fluid/tests/unittests/test_maxout_op.py
index 9334c37f94bd40..2c34333bd38e5e 100644
--- a/python/paddle/fluid/tests/unittests/test_maxout_op.py
+++ b/python/paddle/fluid/tests/unittests/test_maxout_op.py
@@ -15,7 +15,6 @@
 import unittest
 import numpy as np
 import paddle
-import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.nn.functional as F
 from op_test import OpTest
@@ -122,21 +121,6 @@ def func_test_dygraph_api(self):
         np.testing.assert_allclose(out3_ref, out3.numpy(), rtol=1e-05)
         paddle.enable_static()
 
-    def test_fluid_api(self):
-        with fluid.program_guard(fluid.Program()):
-            x = fluid.data('X', self.x_np.shape, self.x_np.dtype)
-            out = fluid.layers.maxout(x, groups=self.groups, axis=self.axis)
-            exe = fluid.Executor(self.place)
-            res = exe.run(feed={'X': self.x_np}, fetch_list=[out])
-        out_ref = maxout_forward_naive(self.x_np, self.groups, self.axis)
-        np.testing.assert_allclose(out_ref, res[0], rtol=1e-05)
-
-        paddle.disable_static(self.place)
-        x = paddle.to_tensor(self.x_np)
-        out = paddle.fluid.layers.maxout(x, groups=self.groups, axis=self.axis)
-        np.testing.assert_allclose(out_ref, out.numpy(), rtol=1e-05)
-        paddle.enable_static()
-
     def test_errors(self):
         with paddle.static.program_guard(paddle.static.Program()):
             # The input type must be Variable.
diff --git a/python/paddle/fluid/tests/unittests/test_op_name_conflict.py b/python/paddle/fluid/tests/unittests/test_op_name_conflict.py
index 10e9b4d3028c3c..e255eb7b01f967 100644
--- a/python/paddle/fluid/tests/unittests/test_op_name_conflict.py
+++ b/python/paddle/fluid/tests/unittests/test_op_name_conflict.py
@@ -46,73 +46,6 @@ def test_conflict(self):
                 self.assertEqual(n_v[0], 8.0)
                 self.assertEqual(p_v[0], 13.0)
 
-    def test_layers(self):
-        main = fluid.Program()
-        startup = fluid.Program()
-        with fluid.unique_name.guard():
-            with fluid.program_guard(main, startup):
-                place = (
-                    fluid.CUDAPlace(0)
-                    if fluid.core.is_compiled_with_cuda()
-                    else fluid.CPUPlace()
-                )
-                exe = fluid.Executor(place)
-
-                data = fluid.data(
-                    name='data', shape=[None, 1, 2, 2], dtype='float32'
-                )
-                tensor = fluid.data(
-                    name='tensor', shape=[None, 32, 64], dtype='float32'
-                )
-                x = fluid.data(
-                    name='x', shape=[None, 1], dtype='float32', lod_level=1
-                )
-
-                input_scale = fluid.layers.create_parameter(
-                    shape=[1],
-                    dtype="float32",
-                    default_initializer=fluid.initializer.Constant(2.0),
-                )
-                input_bias = fluid.layers.create_parameter(
-                    shape=[1],
-                    dtype="float32",
-                    default_initializer=fluid.initializer.Constant(0.5),
-                )
-                out_affine = fluid.layers.affine_channel(
-                    data, scale=input_scale, bias=input_bias
-                )
-                out_similarity = fluid.layers.similarity_focus(
-                    input=data, axis=1, indexes=[0]
-                )
-                position_tensor = fluid.layers.add_position_encoding(
-                    input=tensor, alpha=1.0, beta=1.0
-                )
-                x_reversed = fluid.layers.sequence_reverse(x)
-
-                exe.run(fluid.default_startup_program())
-                test_program = fluid.default_main_program().clone(for_test=True)
-
-                x_d = fluid.create_lod_tensor(
-                    np.array([[1.1], [2.2], [3.3], [4.4]]).astype('float32'),
-                    [[1, 3]],
-                    place,
-                )
-                outs = exe.run(
-                    test_program,
-                    fetch_list=[
-                        out_affine,
-                        out_similarity,
-                        position_tensor,
-                        x_reversed,
-                    ],
-                    feed={
-                        data.name: np.ones([1, 1, 2, 2]).astype('float32'),
-                        tensor.name: np.ones([1, 32, 64]).astype('float32'),
-                        x.name: x_d,
-                    },
-                    return_numpy=False,
-                )
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py b/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py
index 15a050211a15d3..6b92a3253b51b3 100755
--- a/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py
+++ b/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py
@@ -15,8 +15,6 @@
 import unittest
 import numpy as np
 from op_test import OpTest
-import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
 
 
 class TestSimilarityFocusOp(OpTest):
@@ -229,35 +227,5 @@ def test_check_output(self):
         self.check_output()
 
 
-class TestSimilarityFocusOpError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            data = fluid.data(name='data', shape=[16, 3, 2, 2], dtype='float32')
-
-            def test_input_Variable():
-                input = np.random.rand(16, 3, 2, 2).astype("float32")
-                out = fluid.layers.similarity_focus(
-                    input=input, axis=1, indexes=[0]
-                )
-
-            self.assertRaises(TypeError, test_input_Variable)
-
-            def test_axis_Int():
-                axis = 1.0
-                out = fluid.layers.similarity_focus(
-                    input=data, axis=axis, indexes=[0]
-                )
-
-            self.assertRaises(TypeError, test_axis_Int)
-
-            def test_indexes_List():
-                indexes = 0
-                out = fluid.layers.similarity_focus(
-                    input=data, axis=1, indexes=indexes
-                )
-
-            self.assertRaises(TypeError, test_indexes_List)
-
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_affine_channel_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_affine_channel_op_xpu.py
index 0718c040bcacac..fc3c9612ead016 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_affine_channel_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_affine_channel_op_xpu.py
@@ -24,7 +24,6 @@
 from op_test_xpu import XPUOpTest
 import paddle
 import paddle.fluid.core as core
-import paddle.fluid as fluid
 
 
 def affine_channel(x, scale, bias, layout):
@@ -87,41 +86,6 @@ def init_test_case(self):
         self.layout = 'NCHW'
 
 
-class TestAffineChannelOpError(unittest.TestCase):
-    def test_errors(self):
-        with fluid.program_guard(fluid.Program()):
-
-            def test_x_type():
-                input_data = np.random.random(2, 1, 2, 2).astype("float32")
-                fluid.layers.affine_channel(input_data)
-
-            self.assertRaises(TypeError, test_x_type)
-
-            def test_x_dtype():
-                x2 = fluid.layers.data(
-                    name='x2', shape=[None, 1, 2, 2], dtype='int32'
-                )
-                fluid.layers.affine_channel(x2)
-
-            self.assertRaises(TypeError, test_x_dtype)
-
-            def test_scale_type():
-                x3 = fluid.layers.data(
-                    name='x3', shape=[None, 1, 2, 2], dtype='float32'
-                )
-                fluid.layers.affine_channel(x3, scale=1)
-
-            self.assertRaises(TypeError, test_scale_type)
-
-            def test_bias_type():
-                x4 = fluid.layers.data(
-                    name='x4', shape=[None, 1, 2, 2], dtype='float32'
-                )
-                fluid.layers.affine_channel(x4, bias=1)
-
-            self.assertRaises(TypeError, test_bias_type)
-
-
 class TestAffineChannelNHWC(TestAffineChannelOp):
     def init_test_case(self):
         self.shape = [2, 3, 3, 100]

From 8591480033a9bea2ba67c4b69eb130253d494b97 Mon Sep 17 00:00:00 2001
From: hong <43953930+phlrain@users.noreply.github.com>
Date: Tue, 29 Nov 2022 11:07:29 +0800
Subject: [PATCH 014/154] fix ninja compile error (#48435)

* fix lite rocksdb ninja compile error

* remove useless code

* fix bug
---
 cmake/external/lite.cmake    | 62 +++++++++++++++++++-----------------
 cmake/external/rocksdb.cmake |  7 ++--
 2 files changed, 35 insertions(+), 34 deletions(-)

diff --git a/cmake/external/lite.cmake b/cmake/external/lite.cmake
index 1d5dd6ae8f4258..693ba66917cc62 100644
--- a/cmake/external/lite.cmake
+++ b/cmake/external/lite.cmake
@@ -35,6 +35,21 @@ if(LITE_WITH_XPU)
   endif()
 endif()
 
+if(WITH_ARM)
+  if(LITE_WITH_XPU)
+    set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.xpu)
+  elseif(LITE_WITH_NNADAPTER)
+    message("Enable LITE_WITH_NNADAPTER")
+    if(NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
+      set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.nnadapter)
+    endif()
+  else()
+    set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8)
+  endif()
+else()
+  set(LITE_OUTPUT_BIN_DIR inference_lite_lib)
+endif()
+
 if(LITE_WITH_NNADAPTER)
   add_definitions(-DLITE_SUBGRAPH_WITH_NNADAPTER)
   if(NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
@@ -50,6 +65,12 @@ if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
   set(LITE_PROJECT extern_lite)
   set(LITE_PREFIX_DIR ${THIRD_PARTY_PATH}/lite)
   set(LITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lite)
+  set(LITE_BINARY_DIR ${LITE_PREFIX_DIR}/src/extern_lite-build)
+  set(LITE_SOURCE_DIR ${LITE_PREFIX_DIR}/src/extern_lite)
+
+  set(LITE_SHARED_LIB
+      ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so
+  )
 
   if(NOT LITE_GIT_TAG)
     set(LITE_GIT_TAG 81ef66554099800c143a0feff6e0a491b3b0d12e)
@@ -61,7 +82,8 @@ if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
 
   # No quotes, so cmake can resolve it as a command with arguments.
   if(WITH_ARM)
-    set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
+    set(LITE_BUILD_COMMAND ${CMAKE_COMMAND} --build . --target
+                           publish_inference -j)
     message(WARNING "BUILD_COMMAND: ${LITE_BUILD_COMMAND}")
     set(LITE_OPTIONAL_ARGS
         -DWITH_MKL=OFF
@@ -110,9 +132,11 @@ if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
                  -DCMAKE_POSITION_INDEPENDENT_CODE=ON
                  -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
                  ${EXTERNAL_OPTIONAL_ARGS}
-                 ${LITE_OPTIONAL_ARGS})
+                 ${LITE_OPTIONAL_ARGS}
+      BUILD_BYPRODUCTS ${LITE_SHARED_LIB})
   else()
-    set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
+    set(LITE_BUILD_COMMAND ${CMAKE_COMMAND} --build . --target
+                           publish_inference -j)
     set(LITE_OPTIONAL_ARGS
         -DWITH_MKL=ON
         -DLITE_WITH_CUDA=OFF
@@ -157,28 +181,9 @@ if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
                  -DCMAKE_POSITION_INDEPENDENT_CODE=ON
                  -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
                  ${EXTERNAL_OPTIONAL_ARGS}
-                 ${LITE_OPTIONAL_ARGS})
+                 ${LITE_OPTIONAL_ARGS}
+      BUILD_BYPRODUCTS ${LITE_SHARED_LIB})
   endif()
-  ExternalProject_Get_Property(${LITE_PROJECT} BINARY_DIR)
-  ExternalProject_Get_Property(${LITE_PROJECT} SOURCE_DIR)
-  set(LITE_BINARY_DIR ${BINARY_DIR})
-  set(LITE_SOURCE_DIR ${SOURCE_DIR})
-
-endif()
-
-if(WITH_ARM)
-  if(LITE_WITH_XPU)
-    set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.xpu)
-  elseif(LITE_WITH_NNADAPTER)
-    message("Enable LITE_WITH_NNADAPTER")
-    if(NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
-      set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.nnadapter)
-    endif()
-  else()
-    set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8)
-  endif()
-else()
-  set(LITE_OUTPUT_BIN_DIR inference_lite_lib)
 endif()
 
 message(STATUS "Paddle-lite BINARY_DIR: ${LITE_BINARY_DIR}")
@@ -199,12 +204,9 @@ function(external_lite_libs alias path)
 endfunction()
 
 external_lite_libs(
-  lite_full_static
+  lite_full_shared
   ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so
 )
-set(LITE_SHARED_LIB
-    ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so
-)
 
 if(LITE_WITH_NNADAPTER)
   set(LITE_NNADAPTER_LIB
@@ -214,13 +216,13 @@ if(LITE_WITH_NNADAPTER)
       lite_nnadapter
       ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so
       ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libhuawei_ascend_npu.so)
-    set(LITE_DEPS lite_full_static lite_nnadapter)
+    set(LITE_DEPS lite_full_shared lite_nnadapter)
     set(LITE_NNADAPTER_NPU_LIB
         ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libhuawei_ascend_npu.so
     )
   endif()
 else()
-  set(LITE_DEPS lite_full_static)
+  set(LITE_DEPS lite_full_shared)
 endif()
 
 add_definitions(-DPADDLE_WITH_LITE)
diff --git a/cmake/external/rocksdb.cmake b/cmake/external/rocksdb.cmake
index 673b143aba8536..40af6b564b3fc1 100644
--- a/cmake/external/rocksdb.cmake
+++ b/cmake/external/rocksdb.cmake
@@ -39,18 +39,17 @@ ExternalProject_Add(
              -DWITH_GFLAGS=OFF
              -DCMAKE_CXX_FLAGS=${ROCKSDB_CMAKE_CXX_FLAGS}
              -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-  #    BUILD_BYPRODUCTS ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/librocksdb.a
   INSTALL_COMMAND
     mkdir -p ${ROCKSDB_INSTALL_DIR}/lib/ && cp
     ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/librocksdb.a ${ROCKSDB_LIBRARIES}
     && cp -r ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/include
     ${ROCKSDB_INSTALL_DIR}/
   BUILD_IN_SOURCE 1
-  BYPRODUCTS ${ROCKSDB_LIBRARIES})
-
-add_dependencies(extern_rocksdb snappy)
+  BUILD_BYPRODUCTS ${ROCKSDB_LIBRARIES})
 
 add_library(rocksdb STATIC IMPORTED GLOBAL)
+
+add_dependencies(extern_rocksdb snappy)
 set_property(TARGET rocksdb PROPERTY IMPORTED_LOCATION ${ROCKSDB_LIBRARIES})
 add_dependencies(rocksdb extern_rocksdb)
 

From fa051eecb107ed4072c5c34fb3abd47f049d9fd5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C5=82awomir=20Siwek?= <slawomir.siwek@intel.com>
Date: Tue, 29 Nov 2022 04:20:48 +0100
Subject: [PATCH 015/154] [PHI decoupling] Move MKLDNN code (#48352)

---
 .../fluid/framework/data_layout_transform.cc  | 119 +-----------------
 .../fluid/framework/data_layout_transform.h   |  51 +-------
 .../framework/data_layout_transform_test.cc   |   4 +-
 paddle/fluid/framework/data_transform.cc      |  34 +++--
 .../new_executor/interpreter/data_transfer.cc |   4 +-
 paddle/fluid/framework/operator.cc            |   8 +-
 .../fluid/inference/api/analysis_predictor.cc |  15 ++-
 .../inference/api/details/zero_copy_tensor.cc |  16 ++-
 .../fluid/inference/api/mkldnn_quantizer.cc   |   7 +-
 ...nalyzer_detect_functional_mkldnn_tester.cc |   3 +-
 .../fluid/operators/controlflow/fetch_op.cc   |   7 +-
 .../operators/controlflow/fetch_v2_op.cc      |   5 +-
 .../operators/elementwise/elementwise_op.h    |   6 +-
 .../mkldnn/elementwise_mkldnn_op.h            |  18 ++-
 .../fused/mkldnn/fusion_gru_mkldnn_op.cc      |  17 +--
 .../fused/mkldnn/fusion_lstm_mkldnn_op.cc     |  19 +--
 .../fused/mkldnn/fusion_rnn_mkldnn.h          |  33 +++--
 .../fused/mkldnn/multi_gru_mkldnn_op.cc       |  24 ++--
 paddle/fluid/operators/matmul_op.cc           |   6 +-
 paddle/fluid/operators/matmul_v2_op.cc        |   4 +-
 .../operators/mkldnn/dequantize_mkldnn_op.cc  |   5 +-
 paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc |  25 ++--
 .../operators/mkldnn/interpolate_mkldnn_op.cc |   5 +-
 .../operators/mkldnn/layer_norm_mkldnn_op.cc  |   5 +-
 .../fluid/operators/mkldnn/lrn_mkldnn_op.cc   |  11 +-
 .../operators/mkldnn/matmul_v2_mkldnn_op.cc   |  23 ++--
 .../fluid/operators/mkldnn/mul_mkldnn_op.cc   |  36 +++---
 .../operators/mkldnn/quantize_mkldnn_op.cc    |   5 +-
 .../operators/mkldnn/requantize_mkldnn_op.cc  |   5 +-
 .../operators/mkldnn/reshape_mkldnn_op.cc     |  10 +-
 .../mkldnn/shuffle_channel_mkldnn_op.cc       |   5 +-
 .../operators/mkldnn/test_mkldnn_caching.cc   |   5 +-
 .../operators/mkldnn/transpose_mkldnn_op.cc   |  11 +-
 paddle/fluid/operators/transfer_layout_op.h   |  35 +++---
 paddle/fluid/operators/transpose_op.cc        |   4 +-
 paddle/fluid/platform/device_context.cc       |   2 +-
 paddle/fluid/platform/device_context.h        |   5 -
 paddle/fluid/platform/mkldnn_helper.h         | 108 ++--------------
 paddle/phi/backends/onednn/onednn_helper.h    |   2 +
 .../kernels/funcs/data_layout_transform.cc    |  14 +--
 .../phi/kernels/funcs/data_layout_transform.h |  14 +--
 paddle/phi/kernels/transfer_layout_kernel.cc  |   2 +-
 42 files changed, 232 insertions(+), 505 deletions(-)

diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc
index e31c9055320da2..3c0e8d4f0ec722 100644
--- a/paddle/fluid/framework/data_layout_transform.cc
+++ b/paddle/fluid/framework/data_layout_transform.cc
@@ -14,11 +14,8 @@
 
 #include "paddle/fluid/framework/data_layout_transform.h"
 
-#include "paddle/phi/kernels/funcs/math_function.h"
-#ifdef PADDLE_WITH_MKLDNN
-#include "paddle/fluid/platform/mkldnn_reuse.h"
-#endif
 #include "paddle/fluid/framework/convert_utils.h"
+#include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace paddle {
 namespace framework {
@@ -92,119 +89,5 @@ void TransDataLayout(const OpKernelType& kernel_type_for_var,
   out->set_layout(expected_kernel_type.data_layout_);
 }
 
-#ifdef PADDLE_WITH_MKLDNN
-using dnnl::memory;
-using dnnl::primitive;
-using dnnl::reorder;
-
-void* GetDataFromTensor(const phi::DenseTensor& tensor,
-                        dnnl::memory::data_type type) {
-  switch (type) {
-    case dnnl::memory::data_type::f32:
-      return phi::funcs::to_void_cast(tensor.data<float>());
-    case dnnl::memory::data_type::s8:
-      return phi::funcs::to_void_cast(tensor.data<int8_t>());
-    case dnnl::memory::data_type::u8:
-      return phi::funcs::to_void_cast(tensor.data<unsigned char>());
-    case dnnl::memory::data_type::s32:
-      return phi::funcs::to_void_cast(tensor.data<int32_t>());
-    case dnnl::memory::data_type::bf16:
-      return phi::funcs::to_void_cast(
-          tensor.data<paddle::platform::bfloat16>());
-    default:
-      PADDLE_THROW(
-          platform::errors::InvalidArgument("Wrong mkldnn type provided."));
-  }
-}
-
-void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
-                               const OpKernelType& expected_kernel_type,
-                               const phi::DenseTensor& in,
-                               phi::DenseTensor* out) {
-  auto in_layout = kernel_type_for_var.data_layout_;
-  auto out_layout = expected_kernel_type.data_layout_;
-  auto place = expected_kernel_type.place_;
-
-  PADDLE_ENFORCE(
-      in_layout == DataLayout::ONEDNN && out_layout != DataLayout::ONEDNN,
-      platform::errors::InvalidArgument(
-          "TransDataLayoutFromMKLDNN only supports transform from MKLDNN to "
-          "non-MKLDNN"));
-
-  innerTransDataLayoutFromMKLDNN(
-      in_layout,
-      paddle::platform::MKLDNNDeviceContext::tls().get_cur_paddle_data_layout(),
-      in,
-      out,
-      place);
-}
-
-void innerTransDataLayoutFromMKLDNN(DataLayout in_layout,
-                                    DataLayout out_layout,
-                                    const phi::DenseTensor& in,
-                                    phi::DenseTensor* out,
-                                    platform::Place place,
-                                    bool always_copy) {
-  // Set default as NCHW in case not specified
-  out_layout =
-      out_layout == DataLayout::kAnyLayout ? DataLayout::kNCHW : out_layout;
-
-  auto& pool = platform::DeviceContextPool::Instance();
-  auto* dev_ctx = dynamic_cast<platform::MKLDNNDeviceContext*>(pool.Get(place));
-  auto& cpu_engine = dev_ctx->GetEngine();
-
-  auto in_tz = phi::vectorize<int64_t>(in.dims());
-  auto out_tz = in_tz;
-
-  memory::data_type in_type =
-      ToMKLDNNDataType(framework::TransToProtoVarType(in.dtype()));
-  PADDLE_ENFORCE_NE(
-      in_type,
-      memory::data_type::undef,
-      platform::errors::InvalidArgument(
-          "Input tensor type (%s) is not supported.",
-          DataTypeToString(framework::TransToProtoVarType(in.dtype()))));
-
-  auto out_format =
-      phi::funcs::OneDNNFormatForSize(in_tz.size(), ToOneDNNFormat(out_layout));
-  dnnl::memory::desc out_mem_desc(out_tz, in_type, out_format);
-
-  // output tensor has the same dims as input. Reorder don't change dims
-  out->set_mem_desc(out_mem_desc);
-  out->Resize(in.dims());
-
-  // Note(0x45f): Using initialized() to support slice Tensors
-  // with shapes like [0, 0, 0].
-  if (in.initialized() && ((in.mem_desc() != out->mem_desc()) || always_copy)) {
-    void* in_data = GetDataFromTensor(in, in_type);
-
-    phi::funcs::ReorderOneDNNHandler handler(
-        in_tz, in.dtype(), in_type, cpu_engine);
-
-    auto reorder_src_memory_p =
-        handler.AcquireSrcMemory(in.mem_desc(), in_data);
-    auto reorder_dst_memory_p =
-        handler.AcquireDstMemory(out, out->mem_desc(), place);
-    auto reorder_p =
-        handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);
-
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
-    platform::RecordEvent record_reorder("ext_reorder",
-                                         platform::TracerEventType::UserDefined,
-                                         2,
-                                         platform::EventRole::kUniqueOp);
-    reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
-    astream.wait();
-  } else {
-    out->ShareDataWith(in);
-  }
-  // For exepected NHWC data format we need to reshape the Output tensor
-  // As MKL-DNN description was in NCHW and paddle is expecting NHWC
-  phi::funcs::MatchShapeToLayout(out, in_layout, out_layout);
-
-  out->set_layout(DataLayout::kNCHW);
-}
-#endif
-
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/data_layout_transform.h b/paddle/fluid/framework/data_layout_transform.h
index f3bfdc17497f05..bad13e7e90384b 100644
--- a/paddle/fluid/framework/data_layout_transform.h
+++ b/paddle/fluid/framework/data_layout_transform.h
@@ -21,6 +21,7 @@
 #include "paddle/fluid/framework/op_kernel_type.h"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/framework/variable.h"
+#include "paddle/phi/kernels/funcs/data_layout_transform.h"
 
 namespace paddle {
 namespace framework {
@@ -29,7 +30,7 @@ class OpKernelType;
 }  // namespace paddle
 
 #ifdef PADDLE_WITH_MKLDNN
-#include "paddle/fluid/platform/mkldnn_helper.h"
+#include "paddle/phi/backends/onednn/onednn_helper.h"
 #endif
 
 namespace paddle {
@@ -51,54 +52,6 @@ struct CastDataLayout {
   void apply();
 };
 
-#ifdef PADDLE_WITH_MKLDNN
-using OneDNNDataType = dnnl::memory::data_type;
-
-inline OneDNNMemoryFormat ToOneDNNFormat(const DataLayout& layout) {
-  switch (layout) {
-    case DataLayout::kNHWC:
-      return OneDNNMemoryFormat::nhwc;
-    case DataLayout::kNCHW:
-      return OneDNNMemoryFormat::nchw;
-    case DataLayout::kNCDHW:
-      return OneDNNMemoryFormat::ncdhw;
-    case DataLayout::kNDHWC:
-      return OneDNNMemoryFormat::ndhwc;
-    default:
-      PADDLE_THROW(platform::errors::InvalidArgument(
-          "Fail to convert layout %s to oneDNN format.",
-          phi::DataLayoutToString(layout)));
-  }
-}
-
-inline OneDNNDataType ToMKLDNNDataType(proto::VarType::Type type) {
-  static std::unordered_map<int, OneDNNDataType> dict{
-      {DataTypeTrait<float>::DataType(), OneDNNDataType::f32},
-      {DataTypeTrait<int8_t>::DataType(), OneDNNDataType::s8},
-      {DataTypeTrait<uint8_t>::DataType(), OneDNNDataType::u8},
-      {DataTypeTrait<int32_t>::DataType(), OneDNNDataType::s32},
-      {DataTypeTrait<platform::bfloat16>::DataType(), OneDNNDataType::bf16}};
-  auto iter = dict.find(static_cast<int>(type));
-  if (iter != dict.end()) return iter->second;
-  return OneDNNDataType::undef;
-}
-
-void innerTransDataLayoutFromMKLDNN(DataLayout in_layout,
-                                    DataLayout out_layout,
-                                    const phi::DenseTensor& in,
-                                    phi::DenseTensor* out,
-                                    platform::Place place,
-                                    bool always_copy = false);
-
-void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
-                               const OpKernelType& expected_kernel_type,
-                               const phi::DenseTensor& in,
-                               phi::DenseTensor* out);
-
-void* GetDataFromTensor(const phi::DenseTensor& tensor, OneDNNDataType type);
-
-#endif
-
 std::vector<int> GetAxis(const DataLayout& from, const DataLayout& to);
 
 void TransDataLayout(const OpKernelType& kernel_type_for_var,
diff --git a/paddle/fluid/framework/data_layout_transform_test.cc b/paddle/fluid/framework/data_layout_transform_test.cc
index 68fee94d617751..9b314fbb2c1609 100644
--- a/paddle/fluid/framework/data_layout_transform_test.cc
+++ b/paddle/fluid/framework/data_layout_transform_test.cc
@@ -53,7 +53,7 @@ TEST(DataTransformBf16, GetDataFromTensorDNNL) {
                                               place);
 
   void* in_data =
-      paddle::framework::GetDataFromTensor(in, dnnl::memory::data_type::bf16);
+      phi::funcs::GetDataFromTensor(in, dnnl::memory::data_type::bf16);
   EXPECT_EQ(in_data,
             phi::funcs::to_void_cast(in.data<paddle::platform::bfloat16>()));
 }
@@ -64,7 +64,7 @@ TEST(DataTransformInt32, GetDataFromTensorDNNL) {
   in.mutable_data<int32_t>(phi::make_ddim({2, 3, 1, 2}), place);
 
   void* in_data =
-      paddle::framework::GetDataFromTensor(in, dnnl::memory::data_type::s32);
+      phi::funcs::GetDataFromTensor(in, dnnl::memory::data_type::s32);
   EXPECT_EQ(in_data, phi::funcs::to_void_cast(in.data<int32_t>()));
 }
 #endif
diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc
index b9247571f1923e..fff4f6acb3e74f 100644
--- a/paddle/fluid/framework/data_transform.cc
+++ b/paddle/fluid/framework/data_transform.cc
@@ -57,11 +57,11 @@ void TransformData(const OpKernelType &expected_kernel_type,
               "No layout transform needed between two oneDNN OPKernels."));
 
       if (lin != DataLayout::ONEDNN && lout == DataLayout::ONEDNN) {
-        // Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel
+        // Case1 - transform from Non-ONEDNN OPKernel to ONEDNN OPKernel
         // Just set layout/format. No real transform occur
 
-        auto out_format = phi::funcs::OneDNNFormatForSize(in.dims().size(),
-                                                          ToOneDNNFormat(lin));
+        auto out_format = phi::funcs::OneDNNFormatForSize(
+            in.dims().size(), phi::funcs::ToOneDNNFormat(lin));
         out.ShareDataWith(input_tensor);
         // For NHWC data we need reshape of tensors as MKL-DNN
         // is expecting NHWC dims description order
@@ -69,26 +69,36 @@ void TransformData(const OpKernelType &expected_kernel_type,
           phi::funcs::MatchShapeToLayout(&out, lin, lout);
           // We register only NHWC assuming that model is consistent e.g. either
           // NHWC or NCHW
-          paddle::platform::MKLDNNDeviceContext::tls()
-              .set_cur_paddle_data_layout(lin);
+          phi::OneDNNContext::tls().set_cur_paddle_data_layout(lin);
         }
         dnnl::memory::desc out_mem_desc(
             vectorize(out.dims()),
-            ToMKLDNNDataType(TransToProtoVarType(in.type())),
+            phi::funcs::ToOneDNNDataType(in.dtype()),
             out_format);
         out.set_mem_desc(out_mem_desc);
       } else {
-        // Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
-        // Do transform via MKLDNN lib
-        TransDataLayoutFromMKLDNN(
-            kernel_type_for_var, expected_kernel_type, in, &out);
+        // Case2 - transfrom from ONEDNN OPKernel to Non-ONEDNN OPKernel
+        // Do transform via ONEDNN lib
+        PADDLE_ENFORCE(
+            kernel_type_for_var.data_layout_ == DataLayout::ONEDNN &&
+                expected_kernel_type.data_layout_ != DataLayout::ONEDNN,
+            platform::errors::InvalidArgument(
+                "TransDataLayoutFromOneDNN only supports "
+                "transform from ONEDNN to non-ONEDNN"));
+
+        phi::funcs::TransDataLayoutFromOneDNN(
+            kernel_type_for_var.data_layout_,
+            phi::OneDNNContext::tls().get_cur_paddle_data_layout(),
+            in,
+            &out,
+            expected_kernel_type.place_);
       }
     } else {
-      // Case3 - transfrom between Non-MKLDNN OPKernels
+      // Case3 - transfrom between Non-ONEDNN OPKernels
       TransDataLayout(kernel_type_for_var, expected_kernel_type, in, &out);
     }
 #else
-    // Case3 - transfrom between Non-MKLDNN OPKernels
+    // Case3 - transfrom between Non-ONEDNN OPKernels
     TransDataLayout(kernel_type_for_var, expected_kernel_type, in, &out);
 #endif
     transformed = true;
diff --git a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc
index b70242d228fa04..8f9209f6a91d58 100644
--- a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc
+++ b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc
@@ -494,8 +494,8 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key,
           if ((tensor_in->layout() == DataLayout::ONEDNN) &&
               (var->IsType<phi::DenseTensor>() == true) &&
               (expected_kernel_key.data_layout_ != DataLayout::ONEDNN) &&
-              (paddle::platform::MKLDNNDeviceContext::tls()
-                   .get_cur_paddle_data_layout() == DataLayout::kNHWC)) {
+              (phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
+               DataLayout::kNHWC)) {
             VLOG(7) << "Created reshaped dummy input based on MKL-DNN "
                        "phi::DenseTensor , "
                        "but kNHWC layout"
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index 3ad1b4c2579733..0784c8330686a3 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -2304,8 +2304,8 @@ Scope* OperatorWithKernel::PrepareData(
         if ((tensor_in->layout() == DataLayout::ONEDNN) &&
             (var->IsType<phi::DenseTensor>() == true) &&
             (expected_kernel_key.data_layout_ != DataLayout::ONEDNN) &&
-            (paddle::platform::MKLDNNDeviceContext::tls()
-                 .get_cur_paddle_data_layout() == DataLayout::kNHWC) &&
+            (phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
+             DataLayout::kNHWC) &&
             (tensor_in->dims().size() >= 3)) {
           // Mixed execution : oneDNN and GPU is not supported!
           if (!new_scope) {
@@ -2757,8 +2757,8 @@ OpKernelType OperatorWithKernel::GetKernelTypeForVar(
   // then we also need to rotate shape NHWC -> NCWH
   if ((expected_kernel_type.data_layout_ == phi::DataLayout::ONEDNN) &&
       (tensor.layout() != phi::DataLayout::ONEDNN) &&
-      paddle::platform::MKLDNNDeviceContext::tls()
-              .get_cur_paddle_data_layout() == phi::DataLayout::kNHWC) {
+      phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
+          phi::DataLayout::kNHWC) {
     return framework::OpKernelType(expected_kernel_type.data_type_,
                                    tensor.place(),
                                    phi::DataLayout::kNHWC);
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index c1ca6d8e9608ca..618de300fcb022 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -868,13 +868,12 @@ void AnalysisPredictor::MkldnnPreSet(
     const std::vector<std::vector<int>> &inputs_shape) {
 #ifdef PADDLE_WITH_MKLDNN
   VLOG(2) << "AnalysisPredictor::ZeroCopyRun get_cur_mkldnn_session_id="
-          << platform::MKLDNNDeviceContext::tls().get_cur_mkldnn_session_id();
+          << phi::OneDNNContext::tls().get_cur_mkldnn_session_id();
   // In cache clearing mode.
   if (config_.mkldnn_cache_capacity_ > 0) {
     VLOG(2) << "In mkldnn cache clear mode.";
-    platform::MKLDNNDeviceContext::tls().set_cur_mkldnn_session_id(
-        platform::MKLDNNDeviceContextThreadLocals::
-            kMKLDNNSessionID_CacheClearing);
+    phi::OneDNNContext::tls().set_cur_mkldnn_session_id(
+        phi::OneDNNContextThreadLocals::kMKLDNNSessionID_CacheClearing);
     // Set current_input_shape for caching dynamic shape.
     std::stringstream ss;
     for (size_t i = 0; i < inputs_shape.size(); ++i) {
@@ -883,9 +882,9 @@ void AnalysisPredictor::MkldnnPreSet(
       }
     }
     VLOG(2) << "Set input shape=" << ss.str();
-    platform::MKLDNNDeviceContext::tls().set_cur_input_shape_str(ss.str());
+    phi::OneDNNContext::tls().set_cur_input_shape_str(ss.str());
   }
-  platform::MKLDNNDeviceContext::tls().set_cur_input_shape_cache_capacity(
+  phi::OneDNNContext::tls().set_cur_input_shape_cache_capacity(
       config_.mkldnn_cache_capacity_);
 
 #endif
@@ -895,11 +894,11 @@ void AnalysisPredictor::MkldnnPostReset() {
 #ifdef PADDLE_WITH_MKLDNN
   // In cache clearing mode.
   if (config_.mkldnn_cache_capacity_ > 0 &&
-      static_cast<platform::MKLDNNDeviceContext *>(
+      static_cast<phi::OneDNNContext *>(
           (&platform::DeviceContextPool::Instance())->Get(platform::CPUPlace()))
               ->GetCachedObjectsNumber() > 0) {
     if (VLOG_IS_ON(2)) {
-      auto shape_blob_size = static_cast<platform::MKLDNNDeviceContext *>(
+      auto shape_blob_size = static_cast<phi::OneDNNContext *>(
                                  (&platform::DeviceContextPool::Instance())
                                      ->Get(platform::CPUPlace()))
                                  ->GetShapeBlobSize();
diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
index c72ef18cb0f9b8..b87d7b36113622 100644
--- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -378,10 +378,9 @@ void Tensor::CopyToCpuImpl(T *data,
   if (paddle::platform::is_cpu_place(t_place)) {
 #ifdef PADDLE_WITH_MKLDNN
     if (tensor->layout() == phi::DataLayout::ONEDNN)
-      paddle::framework::innerTransDataLayoutFromMKLDNN(
+      phi::funcs::TransDataLayoutFromOneDNN(
           tensor->layout(),
-          paddle::platform::MKLDNNDeviceContext::tls()
-              .get_cur_paddle_data_layout(),
+          phi::OneDNNContext::tls().get_cur_paddle_data_layout(),
           *tensor,
           &out,
           paddle::platform::CPUPlace(),
@@ -661,12 +660,12 @@ std::vector<int> Tensor::shape() const {
       tensor_,
       paddle::platform::errors::PreconditionNotMet(
           "Not found tensor called %s in the scope", name_));
-// mkldnn may does layout transform internally, so need to reorder before
+// oneDNN may does layout transform internally, so need to reorder before
 // return
 #ifdef PADDLE_WITH_MKLDNN
   if (tensor->layout() == phi::DataLayout::ONEDNN) {
-    phi::DataLayout out_layout = paddle::platform::MKLDNNDeviceContext::tls()
-                                     .get_cur_paddle_data_layout();
+    phi::DataLayout out_layout =
+        phi::OneDNNContext::tls().get_cur_paddle_data_layout();
     // Set default as NCHW in case not specified
     out_layout = out_layout == phi::DataLayout::kAnyLayout
                      ? phi::DataLayout::kNCHW
@@ -853,10 +852,9 @@ void InternalUtils::CopyToCpuWithIoStream(paddle_infer::Tensor *t,
   if (paddle::platform::is_cpu_place(t_place)) {
 #ifdef PADDLE_WITH_MKLDNN
     if (tensor->layout() == phi::DataLayout::ONEDNN)
-      paddle::framework::innerTransDataLayoutFromMKLDNN(
+      phi::funcs::TransDataLayoutFromOneDNN(
           tensor->layout(),
-          paddle::platform::MKLDNNDeviceContext::tls()
-              .get_cur_paddle_data_layout(),
+          phi::OneDNNContext::tls().get_cur_paddle_data_layout(),
           *tensor,
           &out,
           paddle::platform::CPUPlace(),
diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.cc b/paddle/fluid/inference/api/mkldnn_quantizer.cc
index 69bb5b7ed8589a..7231559e199ce1 100644
--- a/paddle/fluid/inference/api/mkldnn_quantizer.cc
+++ b/paddle/fluid/inference/api/mkldnn_quantizer.cc
@@ -581,10 +581,9 @@ AnalysisPredictor::MkldnnQuantizer::Histogram(
 
 void AnalysisPredictor::MkldnnQuantizer::ClearDeviceContext() const {
   platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
-  platform::MKLDNNDeviceContext* dev_ctx =
-      (platform::MKLDNNDeviceContext*)pool.Get(predictor_.place_);
-  dev_ctx->ResetBlobMap(
-      paddle::platform::MKLDNNDeviceContext::tls().get_curr_exec());
+  phi::OneDNNContext* dev_ctx =
+      (phi::OneDNNContext*)pool.Get(predictor_.place_);
+  dev_ctx->ResetBlobMap(phi::OneDNNContext::tls().get_curr_exec());
 }
 
 void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const {
diff --git a/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc
index 4f0c6ae6499f85..f73798faa49894 100644
--- a/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc
@@ -79,8 +79,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
 int GetNumCachedObjects(void) {
   auto &pool = platform::DeviceContextPool::Instance();
   platform::CPUPlace place;
-  auto onednn_dev_ctx =
-      dynamic_cast<platform::MKLDNNDeviceContext *>(pool.Get(place));
+  auto onednn_dev_ctx = dynamic_cast<phi::OneDNNContext *>(pool.Get(place));
   return onednn_dev_ctx->GetCachedObjectsNumber();
 }
 
diff --git a/paddle/fluid/operators/controlflow/fetch_op.cc b/paddle/fluid/operators/controlflow/fetch_op.cc
index 17b0f577f14687..596de7575142f3 100644
--- a/paddle/fluid/operators/controlflow/fetch_op.cc
+++ b/paddle/fluid/operators/controlflow/fetch_op.cc
@@ -33,13 +33,12 @@ static void DataCopy(const phi::DenseTensor &src_item,
       phi::DenseTensor out;
       // Convert to desired Paddle layout, apart from grads of filter
       // as params are not a subject to paddle's data_format
-      VLOG(4) << "innerTransDataLayoutFromMKLDNN";
-      framework::innerTransDataLayoutFromMKLDNN(
+      VLOG(4) << "TransDataLayoutFromOneDNN";
+      phi::funcs::TransDataLayoutFromOneDNN(
           src_item.layout(),
           fetch_var_name == framework::GradVarName("Filter")
               ? phi::DataLayout::kNCHW
-              : paddle::platform::MKLDNNDeviceContext::tls()
-                    .get_cur_paddle_data_layout(),
+              : phi::OneDNNContext::tls().get_cur_paddle_data_layout(),
           src_item,
           &out,
           platform::CPUPlace());
diff --git a/paddle/fluid/operators/controlflow/fetch_v2_op.cc b/paddle/fluid/operators/controlflow/fetch_v2_op.cc
index 939e58ebbb39cf..b70211c1e16793 100644
--- a/paddle/fluid/operators/controlflow/fetch_v2_op.cc
+++ b/paddle/fluid/operators/controlflow/fetch_v2_op.cc
@@ -41,12 +41,11 @@ static void DeepCopy(const phi::DenseTensor &src_item,
       phi::DenseTensor out;
       // Convert to desired Paddle layout, apart from grads of filter
       // as params are not a subject to paddle's data_format
-      framework::innerTransDataLayoutFromMKLDNN(
+      phi::funcs::TransDataLayoutFromOneDNN(
           src_item.layout(),
           fetch_var_name == framework::GradVarName("Filter")
               ? phi::DataLayout::kNCHW
-              : paddle::platform::MKLDNNDeviceContext::tls()
-                    .get_cur_paddle_data_layout(),
+              : phi::OneDNNContext::tls().get_cur_paddle_data_layout(),
           src_item,
           &out,
           platform::CPUPlace());
diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h
index 6ab782e07fdca1..6bc9c345fcd4e1 100644
--- a/paddle/fluid/operators/elementwise/elementwise_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_op.h
@@ -115,7 +115,7 @@ class ElementwiseOp : public framework::OperatorWithKernel {
       // if model is using NHWC and any of shapes in at least 3D
       bool should_rotate =
           ctx->IsRunMKLDNNKernel() &&
-          (platform::MKLDNNDeviceContext::tls().get_cur_paddle_data_layout() ==
+          (phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
            phi::DataLayout::kNHWC) &&
           (x_dims.size() >= 3 || y_dims.size() >= 3);
       if (should_rotate) {
@@ -177,8 +177,8 @@ class ElementwiseOp : public framework::OperatorWithKernel {
       // then we also need to rotate shape NHWC -> NCWH
       if ((expected_kernel_type.data_layout_ == phi::DataLayout::ONEDNN) &&
           (tensor.layout() != phi::DataLayout::ONEDNN) &&
-          paddle::platform::MKLDNNDeviceContext::tls()
-                  .get_cur_paddle_data_layout() == phi::DataLayout::kNHWC) {
+          phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
+              phi::DataLayout::kNHWC) {
         return framework::OpKernelType(expected_kernel_type.data_type_,
                                        tensor.place(),
                                        phi::DataLayout::kNHWC);
diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
index bb670363e79e3b..6c7a8a7a66cf51 100644
--- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
+++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
@@ -28,6 +28,7 @@ using dnnl::memory;
 using dnnl::primitive;
 using dnnl::stream;
 using phi::DataLayout;
+using phi::OneDNNContext;
 using phi::funcs::BinaryOneDNNHandler;
 
 inline std::vector<int64_t> CalculateBroadcastedDims(
@@ -63,9 +64,8 @@ inline void AddSubNonBroadcast(
   auto reorder_p =
       reorder_handler->AcquireReorder(dst_memory, src_memory, reorder_attr);
 
-  reorder_p->execute(platform::MKLDNNDeviceContext::tls().get_stream(),
-                     *src_memory,
-                     *dst_memory);
+  reorder_p->execute(
+      OneDNNContext::tls().get_stream(), *src_memory, *dst_memory);
 }
 
 template <typename T>
@@ -99,7 +99,7 @@ inline void BroadcastReduction(const framework::ExecutionContext& ctx,
   dst_memory = reduction_handler.AcquireDstMemory(grad_tensor);
 
   auto reduction_p = reduction_handler.AcquireForwardPrimitive();
-  auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
+  auto astream = OneDNNContext::tls().get_stream();
   reduction_p->execute(astream,
                        {
                            {DNNL_ARG_SRC, *src_memory},
@@ -126,8 +126,7 @@ class EltwiseMKLDNNKernel : public framework::OpKernel<T> {
 
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    const auto& dev_ctx =
-        ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
+    const auto& dev_ctx = ctx.template device_context<OneDNNContext>();
     const auto& mkldnn_engine = dev_ctx.GetEngine();
 
     auto* x = ctx.Input<phi::DenseTensor>("X");
@@ -188,7 +187,7 @@ class EltwiseMKLDNNKernel : public framework::OpKernel<T> {
 
     const auto binary_prim = handler.AcquireForwardPrimitive();
 
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = OneDNNContext::tls().get_stream();
 
     const std::unordered_map<int, dnnl::memory> args = {
         {DNNL_ARG_SRC_0, *src_x_memory},
@@ -217,8 +216,7 @@ class EltwiseMKLDNNGradKernel : public ElemwiseGradKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     ElemwiseGradKernel<T>::Compute(ctx);
 
-    auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    auto& dev_ctx = ctx.template device_context<OneDNNContext>();
     const auto& onednn_engine = dev_ctx.GetEngine();
 
     auto* x = ctx.Input<phi::DenseTensor>("X");
@@ -257,7 +255,7 @@ class EltwiseMKLDNNGradKernel : public ElemwiseGradKernel<T> {
     std::shared_ptr<dnnl::memory> dst_memory;
     std::shared_ptr<dnnl::memory> broadcast_src_memory = reorder_src_memory;
 
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = OneDNNContext::tls().get_stream();
     if (dx) {
       // elementwise_add & elementwise_sub
       if (BINARY_OP == dnnl::algorithm::binary_add ||
diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
index 31e74372cb9b87..00f0f94175e434 100644
--- a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
+++ b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
@@ -20,14 +20,16 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
+using phi::OneDNNContext;
 using phi::funcs::OneDNNGetDataType;
 using phi::funcs::OneDNNMemDesc;
+using phi::funcs::RNNReorderType;
 
 template <typename T, typename T_out = T>
 class GRUMKLDNNHandler : public RNNMKLDNNHandler<T, dnnl::gru_forward, T_out> {
  public:
   GRUMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
-                   const platform::MKLDNNDeviceContext& dev_ctx,
+                   const OneDNNContext& dev_ctx,
                    const dnnl::engine mkldnn_engine,
                    platform::Place cpu_place,
                    const phi::DenseTensor* input,
@@ -142,7 +144,7 @@ class GRUMKLDNNHandler : public RNNMKLDNNHandler<T, dnnl::gru_forward, T_out> {
       memory_p = std::make_shared<dnnl::memory>(
           this->fwd_pd_->weights_layer_desc(), this->engine_);
 
-      auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+      auto& astream = OneDNNContext::tls().get_stream();
       dnnl::reorder(user_memory, *memory_p, this->attr_)
           .execute(astream, user_memory, *memory_p);
 
@@ -196,7 +198,7 @@ class GRUMKLDNNHandler : public RNNMKLDNNHandler<T, dnnl::gru_forward, T_out> {
       memory_p = std::make_shared<dnnl::memory>(
           this->fwd_pd_->weights_iter_desc(), this->engine_);
 
-      auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+      auto& astream = OneDNNContext::tls().get_stream();
       dnnl::reorder(user_memory, *memory_p, this->attr_)
           .execute(astream, user_memory, *memory_p);
 
@@ -253,8 +255,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {
 
   template <typename Tout = T>
   void RunKernel(const framework::ExecutionContext& ctx) const {
-    auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    auto& dev_ctx = ctx.template device_context<OneDNNContext>();
     const auto& mkldnn_engine = dev_ctx.GetEngine();
 
     // Get Tensors
@@ -349,7 +350,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {
 
     auto gru_forward_p = handler.AcquireForwardPrimitive();
 
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = OneDNNContext::tls().get_stream();
     gru_forward_p->execute(astream, gru_args);
     astream.wait();
 
@@ -361,13 +362,13 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {
                              hidden_data,
                              input_lod,
                              is_reverse,
-                             platform::RNNReorderType::NTC_PP);
+                             RNNReorderType::NTC_PP);
     } else {
       handler.reorderRNNdata(hidden_onednn_data,
                              hidden_data,
                              input_lod,
                              is_reverse,
-                             platform::RNNReorderType::TNC_PP);
+                             RNNReorderType::TNC_PP);
     }
   }
 };
diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc
index 1ce97637358d97..68ab4f55a5e56e 100644
--- a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc
+++ b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc
@@ -20,15 +20,17 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
+using phi::OneDNNContext;
 using phi::funcs::OneDNNGetDataType;
 using phi::funcs::OneDNNMemDesc;
+using phi::funcs::RNNReorderType;
 
 template <typename T, typename T_out = T>
 class LSTMMKLDNNHandler
     : public RNNMKLDNNHandler<T, dnnl::lstm_forward, T_out> {
  public:
   LSTMMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
-                    const platform::MKLDNNDeviceContext& dev_ctx,
+                    const OneDNNContext& dev_ctx,
                     const dnnl::engine mkldnn_engine,
                     platform::Place cpu_place,
                     const phi::DenseTensor* input,
@@ -186,7 +188,7 @@ class LSTMMKLDNNHandler
       memory_p = std::make_shared<dnnl::memory>(
           this->fwd_pd_->weights_layer_desc(), this->engine_);
 
-      auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+      auto& astream = OneDNNContext::tls().get_stream();
       dnnl::reorder(user_memory, *memory_p, this->attr_)
           .execute(astream, user_memory, *memory_p);
 
@@ -218,7 +220,7 @@ class LSTMMKLDNNHandler
       memory_p = std::make_shared<dnnl::memory>(
           this->fwd_pd_->weights_iter_desc(), this->engine_);
 
-      auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+      auto& astream = OneDNNContext::tls().get_stream();
       dnnl::reorder(user_memory, *memory_p, this->attr_)
           .execute(astream, user_memory, *memory_p);
 
@@ -308,7 +310,7 @@ class LSTMMKLDNNHandler
       memory_p = std::make_shared<dnnl::memory>(
           this->fwd_pd_->src_iter_c_desc(), this->engine_);
 
-      auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+      auto& astream = OneDNNContext::tls().get_stream();
       dnnl::reorder(user_c0_memory, *memory_p)
           .execute(astream, user_c0_memory, *memory_p);
 
@@ -335,8 +337,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> {
 
   template <typename Tout = T>
   void RunKernel(const framework::ExecutionContext& ctx) const {
-    auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    auto& dev_ctx = ctx.template device_context<OneDNNContext>();
     const auto& mkldnn_engine = dev_ctx.GetEngine();
 
     // Get Tensors
@@ -444,7 +445,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> {
 
     auto lstm_forward_p = handler.AcquireForwardPrimitive();
 
-    auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = OneDNNContext::tls().get_stream();
     lstm_forward_p->execute(astream, lstm_args);
     astream.wait();
 
@@ -456,13 +457,13 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> {
                              hidden_data,
                              input_lod,
                              is_reverse,
-                             platform::RNNReorderType::NTC_PP);
+                             RNNReorderType::NTC_PP);
     } else {
       handler.reorderRNNdata(hidden_onednn_data,
                              hidden_data,
                              input_lod,
                              is_reverse,
-                             platform::RNNReorderType::TNC_PP);
+                             RNNReorderType::TNC_PP);
     }
   }
 };
diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h b/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h
index 2ed30d3c16af03..51b2d9c9956258 100644
--- a/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h
+++ b/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h
@@ -19,14 +19,15 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using paddle::platform::CreateKey;
+using phi::funcs::CreateKey;
 using phi::funcs::OneDNNGetDataType;
+using phi::funcs::RNNReorderType;
 
 template <typename T, typename T_alg, typename T_out = T>
 class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
  public:
   RNNMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
-                   const platform::MKLDNNDeviceContext& dev_ctx,
+                   const phi::OneDNNContext& dev_ctx,
                    const dnnl::engine mkldnn_engine,
                    platform::Place cpu_place,
                    const phi::DenseTensor* input,
@@ -51,7 +52,7 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
         G(G) {
     // Create memory key without Ti because weights, bias and h0 memories
     // do not depend on Ti size but primitive and input/output memory do
-    memory_key_ = platform::ExtendKeyWithThreadInfoIfNeeded(
+    memory_key_ = phi::funcs::ExtendKeyWithThreadInfoIfNeeded(
         dev_ctx, CreateKey(dev_ctx, unique_name, OneDNNGetDataType<T>()));
 
     // Is it int8 kernel
@@ -86,10 +87,10 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
                       void* output_data,
                       std::vector<size_t> lod,
                       const bool is_reverse,
-                      platform::RNNReorderType reorder_type) {
+                      RNNReorderType reorder_type) {
     switch (reorder_type) {
       // Reorder input memory [WORDS, C] + LoD -> [N, T, C]
-      case platform::RNNReorderType::PP_NTC: {
+      case RNNReorderType::PP_NTC: {
         auto* input_data_iter = reinterpret_cast<T*>(input_data);
         auto* output_data_iter = reinterpret_cast<T*>(output_data);
         for (int n = 0; n < N; ++n) {
@@ -102,7 +103,7 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
         }
       } break;
       // Reorder input memory [WORDS, C] + LoD -> [T, N, C]
-      case platform::RNNReorderType::PP_TNC: {
+      case RNNReorderType::PP_TNC: {
         auto* input_data_iter = reinterpret_cast<T*>(input_data);
         auto* output_data_iter = reinterpret_cast<T*>(output_data);
         for (int n = 0; n < N; ++n) {
@@ -117,7 +118,7 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
         }
       } break;
       // Reorder output values to PP format [N, T, C] -> [WORDS, C]
-      case platform::RNNReorderType::NTC_PP: {
+      case RNNReorderType::NTC_PP: {
         auto* input_data_iter = reinterpret_cast<T_out*>(input_data);
         auto* output_data_iter = reinterpret_cast<T_out*>(output_data);
         for (int n = 0; n < N; ++n) {
@@ -130,7 +131,7 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
         }
       } break;
       // Reorder output values to PP format [T, N, C] -> [WORDS, C]
-      case platform::RNNReorderType::TNC_PP: {
+      case RNNReorderType::TNC_PP: {
         auto* input_data_iter = reinterpret_cast<T_out*>(input_data);
         auto* output_data_iter = reinterpret_cast<T_out*>(output_data);
         for (int n = 0; n < N; ++n) {
@@ -166,17 +167,11 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
     memset(x_onednn_data, 0, sizeof(T) * N * Ti * IC);
 
     if (is_NTC(this->fwd_pd_->src_desc())) {
-      reorderRNNdata(x_data,
-                     x_onednn_data,
-                     input_lod,
-                     is_reverse,
-                     platform::RNNReorderType::PP_NTC);
+      reorderRNNdata(
+          x_data, x_onednn_data, input_lod, is_reverse, RNNReorderType::PP_NTC);
     } else {
-      reorderRNNdata(x_data,
-                     x_onednn_data,
-                     input_lod,
-                     is_reverse,
-                     platform::RNNReorderType::PP_TNC);
+      reorderRNNdata(
+          x_data, x_onednn_data, input_lod, is_reverse, RNNReorderType::PP_TNC);
     }
     return memory_p;
   }
@@ -219,7 +214,7 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
       memory_p = std::make_shared<dnnl::memory>(this->fwd_pd_->src_iter_desc(),
                                                 this->engine_);
 
-      auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+      auto& astream = phi::OneDNNContext::tls().get_stream();
       dnnl::reorder(user_h0_memory, *memory_p, attr_)
           .execute(astream, user_h0_memory, *memory_p);
 
diff --git a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
index c7acc1cf730ca9..a0cc21892597d1 100644
--- a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
+++ b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
@@ -26,11 +26,11 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using paddle::platform::CreateKey;
 using phi::vectorize;
 using phi::funcs::OneDNNGetDataType;
 using phi::funcs::OneDNNMemDesc;
 using Direction = dnnl::rnn_direction;
+using phi::OneDNNContext;
 
 namespace {
 
@@ -52,7 +52,7 @@ template <typename T, typename T_out = T>
 class MultiGRUHandler {
  public:
   MultiGRUHandler(const paddle::framework::ExecutionContext& ctx,
-                  const platform::MKLDNNDeviceContext& dev_ctx)
+                  const OneDNNContext& dev_ctx)
       : dev_ctx_(dev_ctx),
         engine_(dev_ctx.GetEngine()),
         place_(ctx.GetPlace()),
@@ -112,8 +112,9 @@ class MultiGRUHandler {
     const std::string unique_name = ctx.OutputName("Hidden");
     // Create memory key without Ti because weights, bias and h0 memories
     // do not depend on Ti size but primitive and input/output memory do
-    memory_key_ = platform::ExtendKeyWithThreadInfoIfNeeded(
-        dev_ctx, CreateKey(dev_ctx, unique_name, OneDNNGetDataType<T>()));
+    memory_key_ = phi::funcs::ExtendKeyWithThreadInfoIfNeeded(
+        dev_ctx,
+        phi::funcs::CreateKey(dev_ctx, unique_name, OneDNNGetDataType<T>()));
     key_ = memory_key_;
     key_.append("T").append(std::to_string(Ti_));
 
@@ -320,7 +321,7 @@ class MultiGRUHandler {
 
     auto gru_forward_p0 = AcquireGruPrimitive(layer, dir);
 
-    auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = OneDNNContext::tls().get_stream();
     gru_forward_p0->execute(astream, gru_args);
     astream.wait();
     return out_mem;
@@ -343,7 +344,7 @@ class MultiGRUHandler {
       memory_p = std::make_shared<dnnl::memory>(
           gru_pds_[{layer, dir}]->src_iter_desc(), engine_);
 
-      auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+      auto& astream = OneDNNContext::tls().get_stream();
       dnnl::reorder(user_h0_memory, *memory_p, attrs_[2 * layer + (dir == R2L)])
           .execute(astream, user_h0_memory, *memory_p);
 
@@ -383,7 +384,7 @@ class MultiGRUHandler {
       memory_p = std::make_shared<dnnl::memory>(
           gru_pds_[{layer, dir}]->weights_layer_desc(), engine_);
 
-      auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+      auto& astream = OneDNNContext::tls().get_stream();
       dnnl::reorder(user_memory, *memory_p, attrs_[2 * layer + (dir == R2L)])
           .execute(astream, user_memory, *memory_p);
 
@@ -440,7 +441,7 @@ class MultiGRUHandler {
       memory_p = std::make_shared<dnnl::memory>(
           gru_pds_[{layer, dir}]->weights_iter_desc(), engine_);
 
-      auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+      auto& astream = OneDNNContext::tls().get_stream();
       dnnl::reorder(user_memory, *memory_p, attrs_[2 * layer + (dir == R2L)])
           .execute(astream, user_memory, *memory_p);
 
@@ -547,7 +548,7 @@ class MultiGRUHandler {
 
     auto concat_p = AcquireConcatPrimitive(layer);
 
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = OneDNNContext::tls().get_stream();
     concat_p->execute(astream, concat_args);
     astream.wait();
     return out_mem;
@@ -654,7 +655,7 @@ class MultiGRUHandler {
   int64_t N_, Ti_;
   std::vector<int64_t> ICs, OCs;
 
-  const platform::MKLDNNDeviceContext& dev_ctx_;
+  const OneDNNContext& dev_ctx_;
   const dnnl::engine engine_;
   const platform::Place place_;
   const bool origin_mode_;
@@ -695,8 +696,7 @@ class MultiGRUMKLDNNKernel : public framework::OpKernel<T> {
 
   template <typename Tout = T>
   void RunKernel(const framework::ExecutionContext& ctx) const {
-    auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    auto& dev_ctx = ctx.template device_context<OneDNNContext>();
     MultiGRUHandler<T, Tout> handler(ctx, dev_ctx);
 
     int layers = handler.getLayers();
diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc
index 2f01aec1f7c48b..64db8598150c06 100644
--- a/paddle/fluid/operators/matmul_op.cc
+++ b/paddle/fluid/operators/matmul_op.cc
@@ -587,7 +587,7 @@ class MatMulOp : public framework::OperatorWithKernel {
     // to be computed like instead x*y we are to do y*x
     bool channelwise_onednn =
         context->IsRunMKLDNNKernel() &&
-        (platform::MKLDNNDeviceContext::tls().get_cur_paddle_data_layout() ==
+        (phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
          phi::DataLayout::kNHWC);
     if (channelwise_onednn) {
       std::swap(dim_x, dim_y);
@@ -717,8 +717,8 @@ class MatMulOp : public framework::OperatorWithKernel {
       // then we also need to rotate shape NHWC -> NCWH
       if ((expected_kernel_type.data_layout_ == phi::DataLayout::ONEDNN) &&
           (tensor.layout() != phi::DataLayout::ONEDNN) &&
-          paddle::platform::MKLDNNDeviceContext::tls()
-                  .get_cur_paddle_data_layout() == phi::DataLayout::kNHWC) {
+          phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
+              phi::DataLayout::kNHWC) {
         return framework::OpKernelType(expected_kernel_type.data_type_,
                                        tensor.place(),
                                        phi::DataLayout::kNHWC);
diff --git a/paddle/fluid/operators/matmul_v2_op.cc b/paddle/fluid/operators/matmul_v2_op.cc
index 1412c9fe715b53..0a76f43175dc4f 100644
--- a/paddle/fluid/operators/matmul_v2_op.cc
+++ b/paddle/fluid/operators/matmul_v2_op.cc
@@ -154,8 +154,8 @@ class MatMulV2Op : public framework::OperatorWithKernel {
       // op previously) then we also need to rotate shape NHWC -> NCWH
       if ((expected_kernel_type.data_layout_ == phi::DataLayout::ONEDNN) &&
           (tensor.layout() != phi::DataLayout::ONEDNN) &&
-          paddle::platform::MKLDNNDeviceContext::tls()
-                  .get_cur_paddle_data_layout() == phi::DataLayout::kNHWC) {
+          phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
+              phi::DataLayout::kNHWC) {
         return framework::OpKernelType(expected_kernel_type.data_type_,
                                        tensor.place(),
                                        phi::DataLayout::kNHWC);
diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
index 38c5bd10290491..d27234344ff271 100644
--- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
@@ -49,8 +49,7 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
                        "255 and greater or equal to 0, but got %f",
                        quantization_shift));
 
-    auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
 
     auto x_tz = phi::vectorize<int64_t>(x->dims());
     auto x_type = phi::funcs::ToOneDNNDataType(x->dtype());
@@ -78,7 +77,7 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
     auto reorder_p = reorder_handler.AcquireReorder(
         reorder_dst_memory_p, reorder_src_memory_p, attrs);
 
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = phi::OneDNNContext::tls().get_stream();
     reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
     astream.wait();
 
diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
index b4ced90db807c2..fa376cd45e9d57 100644
--- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
@@ -28,9 +28,9 @@ using dnnl::prop_kind;
 using dnnl::stream;
 using framework::DDim;
 using framework::ExecutionContext;
+using phi::OneDNNContext;
 using phi::funcs::OneDNNGetDataType;
 using phi::funcs::to_void_cast;
-using platform::MKLDNNDeviceContext;
 
 struct InnerProductCache {
   dnnl::inner_product_forward inner_product_p;
@@ -45,7 +45,7 @@ class FCMKLDNNHandler
                                                  dnnl::inner_product_forward> {
  public:
   FCMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
-                  const platform::MKLDNNDeviceContext& dev_ctx,
+                  const OneDNNContext& dev_ctx,
                   const phi::DenseTensor* x,
                   const phi::DenseTensor* weights,
                   const phi::DenseTensor* bias,
@@ -220,7 +220,7 @@ class FCMKLDNNHandler
     auto reorder_p = std::make_shared<dnnl::reorder>(
         *user_memory_p, *target_memory_p, attrs);
 
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = OneDNNContext::tls().get_stream();
     {
       platform::RecordEvent record_reorder(
           "int_reorder",
@@ -237,7 +237,7 @@ class FCMKLDNNHandler
   }
 
   std::string memory_key_;
-  const platform::MKLDNNDeviceContext& dev_ctx_;
+  const OneDNNContext& dev_ctx_;
 
  public:
   std::shared_ptr<dnnl::memory> AcquireSrcMemoryWithReorder(
@@ -388,7 +388,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
       dnnl::memory x_mem(x_md, engine, to_void_cast<T_in>(x->data<T_in>()));
       auto reorder_p = dnnl::reorder(x_mem, *src_mem);
 
-      auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+      auto& astream = OneDNNContext::tls().get_stream();
       reorder_p.execute(astream, x_mem, *src_mem);
       astream.wait();
     } else {
@@ -398,8 +398,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
 
   template <typename T_out, typename T_w>
   void RunKernel(const framework::ExecutionContext& ctx) const {
-    const auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    const auto& dev_ctx = ctx.template device_context<OneDNNContext>();
     const auto& mkldnn_engine = dev_ctx.GetEngine();
 
     const auto* x = ctx.Input<phi::DenseTensor>("Input");
@@ -417,12 +416,12 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
 
     std::string cache_key;
     cache_key.reserve(64);
-    cache_key = platform::ExtendKeyWithThreadInfoIfNeeded(
+    cache_key = phi::funcs::ExtendKeyWithThreadInfoIfNeeded(
         dev_ctx,
-        platform::CreateKey(dev_ctx,
-                            ctx.InputName("Input"),
-                            ctx.InputName("W"),
-                            phi::vectorize(x->dims())));
+        phi::funcs::CreateKey(dev_ctx,
+                              ctx.InputName("Input"),
+                              ctx.InputName("W"),
+                              phi::vectorize(x->dims())));
 
     auto inner_product_cache =
         std::static_pointer_cast<InnerProductCache>(dev_ctx.GetBlob(cache_key));
@@ -479,7 +478,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
       fc_p = handler.AcquireForwardPrimitive();
     }
 
-    auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = OneDNNContext::tls().get_stream();
 
     std::unordered_map<int, dnnl::memory> fc_args = {
         {DNNL_ARG_SRC, *src_memory_p},
diff --git a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc
index ff3cbce546874a..a4f97c865414f2 100644
--- a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc
@@ -130,8 +130,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel<T> {
 
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    const auto& dev_ctx =
-        ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
+    const auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
     const auto& mkldnn_engine = dev_ctx.GetEngine();
 
     const auto* x = ctx.Input<phi::DenseTensor>("X");
@@ -155,7 +154,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel<T> {
     auto resampling_prim = handler.AcquireForwardPrimitive();
     const std::unordered_map<int, dnnl::memory> args = {
         {DNNL_ARG_SRC, *src_memory_p}, {DNNL_ARG_DST, *dst_memory_p}};
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = phi::OneDNNContext::tls().get_stream();
 
     resampling_prim->execute(astream, args);
     astream.wait();
diff --git a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
index 1cee039640f8cf..df3804ab766ddc 100644
--- a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
@@ -98,8 +98,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
     const auto begin_norm_axis = ctx.Attr<int>("begin_norm_axis");
     const bool is_test = ctx.Attr<bool>("is_test");
 
-    auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
     const auto& mkldnn_engine = dev_ctx.GetEngine();
 
     auto src_tz = phi::vectorize(x->dims());
@@ -125,7 +124,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
 
     auto layer_norm_p = handler.AcquireForwardPrimitive();
 
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = phi::OneDNNContext::tls().get_stream();
     std::unordered_map<int, dnnl::memory> args = {{DNNL_ARG_SRC, *src_memory},
                                                   {DNNL_ARG_DST, *dst_memory}};
 
diff --git a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc
index a163a20309a9b9..e10e868c468410 100644
--- a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc
@@ -17,7 +17,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using paddle::platform::MKLDNNDeviceContext;
+using phi::OneDNNContext;
 
 template <typename T>
 class LRNOneDNNHandler
@@ -124,8 +124,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
                       true,
                       paddle::platform::errors::PreconditionNotMet(
                           "Operator DNNL LRN must use CPUPlace"));
-    auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    auto& dev_ctx = ctx.template device_context<OneDNNContext>();
     const auto& mkldnn_engine = dev_ctx.GetEngine();
 
     auto x = ctx.Input<phi::DenseTensor>("X");
@@ -142,7 +141,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
     auto workspace_memory = handler.AcquireWorkspaceMemory(mid);
     mid->set_layout(phi::DataLayout::ONEDNN);
 
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = OneDNNContext::tls().get_stream();
     if (!workspace_memory->get_desc().is_zero()) {
       mid->set_mem_desc(workspace_memory->get_desc());
       lrn_p->execute(astream,
@@ -179,7 +178,7 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
     auto out_grad = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
     auto in_x_grad = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
 
-    auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
+    auto& dev_ctx = ctx.template device_context<OneDNNContext>();
     const auto& mkldnn_engine = dev_ctx.GetEngine();
 
     LRNOneDNNHandler<T> handler(
@@ -192,7 +191,7 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
 
     auto lrn_bwd = handler.AcquireBackwardPrimitive();
 
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = OneDNNContext::tls().get_stream();
     lrn_bwd->execute(astream,
                      {{DNNL_ARG_SRC, *src_memory},
                       {DNNL_ARG_DIFF_DST, *diff_dst_memory},
diff --git a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc
index 810c0eaff18612..b4d782da78f024 100644
--- a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc
@@ -21,12 +21,11 @@ namespace {
 using dnnl::memory;
 using paddle::framework::ExecutionContext;
 using paddle::platform::MatMulV2MKLDNNHandler;
-using paddle::platform::MKLDNNDeviceContext;
+using phi::OneDNNContext;
 using phi::vectorize;
 using phi::funcs::OneDNNGetDataType;
 using Tensor = phi::DenseTensor;
 using paddle::framework::GradVarName;
-using phi::make_ddim;
 
 // Reshape a rank-3 tensor from P x M x N to (P * M) x N.
 // Identity op if the tensor is not of rank 3.
@@ -43,7 +42,7 @@ static Tensor FoldOuterDims(const Tensor &input) {
 // (Warning: This requires transposing data and writes into new memory.)
 // Identity op if the tensor is not of rank 3.
 template <typename T>
-static Tensor FoldFirstAndLastDims(const MKLDNNDeviceContext &dev_ctx,
+static Tensor FoldFirstAndLastDims(const OneDNNContext &dev_ctx,
                                    const Tensor *input) {
   auto input_dims = vectorize(input->dims());
   if (input_dims.size() != 3) {
@@ -55,8 +54,7 @@ static Tensor FoldFirstAndLastDims(const MKLDNNDeviceContext &dev_ctx,
 
   auto output_dims = vectorize(output.dims());
 
-  memory::data_type input_type = paddle::framework::ToMKLDNNDataType(
-      paddle::framework::TransToProtoVarType(input->dtype()));
+  memory::data_type input_type = phi::funcs::ToOneDNNDataType(input->dtype());
   phi::funcs::ReorderOneDNNHandler reorder_handler(
       output_dims, input->dtype(), input_type, dev_ctx.GetEngine());
 
@@ -67,7 +65,7 @@ static Tensor FoldFirstAndLastDims(const MKLDNNDeviceContext &dev_ctx,
   auto reorder_p = reorder_handler.AcquireReorder(reorder_src_memory_p,
                                                   reorder_dst_memory_p);
 
-  auto &astream = MKLDNNDeviceContext::tls().get_stream();
+  auto &astream = OneDNNContext::tls().get_stream();
   reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
   astream.wait();
 
@@ -153,7 +151,7 @@ class MatMulMKLDNNHandler
         {DNNL_ARG_WEIGHTS, *weights_memory_p},
         {DNNL_ARG_DST, *dst_memory_p}};
 
-    auto &astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+    auto &astream = OneDNNContext::tls().get_stream();
 
     // Simulate batch matmul by processing in loop
     void *x_ptr = src_memory_p->get_data_handle();
@@ -366,7 +364,7 @@ void ExecuteMatMulV2(const ExecutionContext &ctx,
                         *residual_data_memory_p});
   }
 
-  auto &astream = MKLDNNDeviceContext::tls().get_stream();
+  auto &astream = OneDNNContext::tls().get_stream();
   matmul_p->execute(astream, matmul_args);
   astream.wait();
 
@@ -402,7 +400,7 @@ class MatMulV2MKLDNNKernel : public paddle::framework::OpKernel<T> {
                                        : false;
     constexpr bool fuse_relu = false;  // TODO(intel): Enable eltwise fuses
 
-    const auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
+    const auto &dev_ctx = ctx.template device_context<OneDNNContext>();
     const auto &onednn_engine = dev_ctx.GetEngine();
 
     auto *x = ctx.Input<phi::DenseTensor>("X");
@@ -531,8 +529,7 @@ class MatMulGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
               ctx.Attr<int>("head_number")));
     }
 
-    const auto &dev_ctx =
-        ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
+    const auto &dev_ctx = ctx.template device_context<OneDNNContext>();
     const auto &onednn_engine = dev_ctx.GetEngine();
 
     auto x = *ctx.Input<phi::DenseTensor>("X");
@@ -639,7 +636,7 @@ class MatMulGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
 
  private:
   void ExecuteMatMulGrad(const ExecutionContext &ctx,
-                         const MKLDNNDeviceContext &dev_ctx,
+                         const OneDNNContext &dev_ctx,
                          const dnnl::engine &engine,
                          phi::DenseTensor *x,
                          bool trans_x,
@@ -685,7 +682,7 @@ class MatMulGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
         {DNNL_ARG_WEIGHTS, *weights_memory_p},
         {DNNL_ARG_DST, *dst_memory_p}};
 
-    auto &astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+    auto &astream = OneDNNContext::tls().get_stream();
     matmul_p->execute(astream, matmul_args);
     astream.wait();
 
diff --git a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
index a0bc835a4cc29d..b8638ab17c7dbc 100644
--- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
@@ -27,8 +27,8 @@ namespace operators {
 using framework::DDim;
 using framework::ExecutionContext;
 
+using phi::OneDNNContext;
 using platform::MatMulV2MKLDNNHandler;
-using platform::MKLDNNDeviceContext;
 
 using dnnl::inner_product_forward;
 using dnnl::memory;
@@ -105,7 +105,7 @@ class MulPrimitiveFactory {
 
     auto reorder = dnnl::reorder(reorder_pd);
 
-    auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto &astream = OneDNNContext::tls().get_stream();
     {
       platform::RecordEvent record_reorder(
           "int_reorder",
@@ -183,7 +183,7 @@ class MulPrimitiveFactory {
   }
 
   void Execute() {
-    auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto &astream = OneDNNContext::tls().get_stream();
     (*mul_).execute(astream,
                     {{DNNL_ARG_SRC, *x_input_},
                      {DNNL_ARG_WEIGHTS, *y_input_},
@@ -278,7 +278,7 @@ class MulPrimitiveFactory {
 
     auto reorder = dnnl::reorder(src_mem, dst_mem);
 
-    auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto &astream = OneDNNContext::tls().get_stream();
     {
       platform::RecordEvent record_reorder(
           "int_reorder",
@@ -313,19 +313,19 @@ class MulPrimitiveFactory {
 /* OT: output data type */
 template <typename XT, typename YT, typename OT>
 std::shared_ptr<MulPrimitiveFactory<XT, YT, OT>> GetPrimitiveFactory(
-    const MKLDNNDeviceContext &dev_ctx,
+    const OneDNNContext &dev_ctx,
     const ExecutionContext &ctx,
     const Tensor *input_x,
     const Tensor *input_y,
     const dnnl::engine &mkldnn_engine) {
   std::string key =
-      platform::CreateKey(dev_ctx,
-                          framework::TransToProtoVarType(input_x->dtype()),
-                          phi::vectorize(input_x->dims()),
-                          framework::TransToProtoVarType(input_y->dtype()),
-                          phi::vectorize(input_y->dims()),
-                          ctx.OutputName("Out"));
-  key = platform::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, key);
+      phi::funcs::CreateKey(dev_ctx,
+                            framework::TransToProtoVarType(input_x->dtype()),
+                            phi::vectorize(input_x->dims()),
+                            framework::TransToProtoVarType(input_y->dtype()),
+                            phi::vectorize(input_y->dims()),
+                            ctx.OutputName("Out"));
+  key = phi::funcs::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, key);
 
   auto prim_creator = std::static_pointer_cast<MulPrimitiveFactory<XT, YT, OT>>(
       dev_ctx.GetBlob(key));
@@ -341,7 +341,7 @@ std::shared_ptr<MulPrimitiveFactory<XT, YT, OT>> GetPrimitiveFactory(
 
 /* XT: input x data type, YT: input y data type */
 template <typename XT, typename YT>
-inner_product_forward GetMulPrimitive(const MKLDNNDeviceContext &dev_ctx,
+inner_product_forward GetMulPrimitive(const OneDNNContext &dev_ctx,
                                       const ExecutionContext &ctx,
                                       const Tensor *input_x,
                                       const Tensor *input_y,
@@ -372,8 +372,8 @@ class MulMKLDNNINT8Kernel : public framework::OpKernel<XT> {
                       true,
                       paddle::platform::errors::PreconditionNotMet(
                           "Operator DNNL Mul must use CPUPlace"));
-    platform::MKLDNNDeviceContext::tls().log_lib_version();
-    auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
+    OneDNNContext::tls().log_lib_version();
+    auto &dev_ctx = ctx.template device_context<OneDNNContext>();
     auto &mkldnn_engine = dev_ctx.GetEngine();
 
     const Tensor *x = ctx.Input<phi::DenseTensor>("X");
@@ -401,7 +401,7 @@ class MulMKLDNNKernel : public framework::OpKernel<XT> {
 
  protected:
   void ExecuteMatMul(const ExecutionContext &ctx,
-                     const MKLDNNDeviceContext &dev_ctx,
+                     const OneDNNContext &dev_ctx,
                      const dnnl::engine &onednn_engine,
                      const platform::Place &cpu_place,
                      const Tensor *x,
@@ -434,7 +434,7 @@ class MulMKLDNNKernel : public framework::OpKernel<XT> {
         {DNNL_ARG_WEIGHTS, *weights_memory_p},
         {DNNL_ARG_DST, *dst_memory_p}};
 
-    auto &astream = MKLDNNDeviceContext::tls().get_stream();
+    auto &astream = OneDNNContext::tls().get_stream();
     matmul_p->execute(astream, matmul_args);
     astream.wait();
 
@@ -447,7 +447,7 @@ class MulMKLDNNKernel : public framework::OpKernel<XT> {
 
  private:
   void RunKernel(const ExecutionContext &ctx) const {
-    const auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
+    const auto &dev_ctx = ctx.template device_context<OneDNNContext>();
     const auto &onednn_engine = dev_ctx.GetEngine();
 
     const auto *x = ctx.Input<phi::DenseTensor>("X");
diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
index 047f7470cd6a6c..098623ea524664 100644
--- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
@@ -51,8 +51,7 @@ class QuantOpKernel : public framework::OpKernel<T> {
                        "255 and greater or equal to 0, but got %f",
                        quantization_shift));
 
-    auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
 
     auto x_tz = phi::vectorize<int64_t>(x->dims());
 
@@ -95,7 +94,7 @@ class QuantOpKernel : public framework::OpKernel<T> {
     auto reorder_p = reorder_handler.AcquireReorder(
         reorder_dst_memory_p, reorder_src_memory_p, attrs);
 
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = phi::OneDNNContext::tls().get_stream();
     reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
     astream.wait();
 
diff --git a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
index c9b80ba1e7a560..36498e60f4e54c 100644
--- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
@@ -63,8 +63,7 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
                                           "shift for signed input."));
     }
 
-    auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
 
     auto src_tz = phi::vectorize(input->dims());
 
@@ -102,7 +101,7 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
     auto reorder_p =
         reorder_handler.AcquireReorder(dst_memory_p, src_memory_p, attrs);
 
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = phi::OneDNNContext::tls().get_stream();
     reorder_p->execute(astream, *src_memory_p, *dst_memory_p);
     astream.wait();
 
diff --git a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
index 902cd8509b4cfd..ff2484c7ced380 100644
--- a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
@@ -59,8 +59,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
 
  private:
   void RunKernel(const framework::ExecutionContext& ctx) const {
-    const auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    const auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
     const auto& onednn_engine = dev_ctx.GetEngine();
 
     auto* x = ctx.Input<phi::DenseTensor>("X");
@@ -84,7 +83,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
     auto reorder_p = reorder_handler.AcquireReorder(reorder_dst_memory_p,
                                                     reorder_src_memory_p);
 
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = phi::OneDNNContext::tls().get_stream();
     reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
 
     astream.wait();
@@ -304,8 +303,7 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel<T, op_name> {
 
  private:
   void RunKernel(const framework::ExecutionContext& ctx) const {
-    const auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    const auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
     const auto& onednn_engine = dev_ctx.GetEngine();
 
     auto* dout = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
@@ -329,7 +327,7 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel<T, op_name> {
     auto reorder_p = reorder_handler.AcquireReorder(reorder_dst_memory_p,
                                                     reorder_src_memory_p);
 
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = phi::OneDNNContext::tls().get_stream();
     reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
     astream.wait();
 
diff --git a/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc
index 424aa906eb22b5..36ea07dfdd710a 100644
--- a/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc
@@ -37,8 +37,7 @@ template <typename T>
 class ShuffleChannelMKLDNNKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    const auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    const auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
     const auto& mkldnn_engine = dev_ctx.GetEngine();
 
     const auto* x = ctx.Input<phi::DenseTensor>("X");
@@ -55,7 +54,7 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel<T> {
 
     auto shuffle_p = handler.AcquireForwardPrimitive();
 
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = phi::OneDNNContext::tls().get_stream();
     shuffle_p->execute(
         astream,
         {{DNNL_ARG_SRC, *src_memory_p}, {DNNL_ARG_DST, *dst_memory_p}});
diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc
index 63b7cfd51fb55a..e005683e242288 100644
--- a/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc
+++ b/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc
@@ -52,8 +52,7 @@ class CacheTester {
     // Clear oneDNN cache
     auto &pool = platform::DeviceContextPool::Instance();
     platform::CPUPlace place;
-    onednn_dev_ctx_ =
-        dynamic_cast<platform::MKLDNNDeviceContext *>(pool.Get(place));
+    onednn_dev_ctx_ = dynamic_cast<phi::OneDNNContext *>(pool.Get(place));
     onednn_dev_ctx_->ResetBlobMap(nullptr);
   }
 
@@ -63,7 +62,7 @@ class CacheTester {
   }
 
  private:
-  platform::MKLDNNDeviceContext *onednn_dev_ctx_;
+  phi::OneDNNContext *onednn_dev_ctx_;
 };
 
 template <typename T>
diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
index b3f02153f0d755..e59b901b6a38d5 100644
--- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
@@ -23,6 +23,7 @@ namespace operators {
 
 using Tensor = phi::DenseTensor;
 using phi::DataLayout;
+using phi::OneDNNContext;
 
 template <typename T>
 class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
@@ -32,15 +33,14 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
                       true,
                       paddle::platform::errors::PreconditionNotMet(
                           "Operator DNNL Transpose must use CPUPlace"));
-    auto& dev_ctx =
-        ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
+    auto& dev_ctx = ctx.template device_context<OneDNNContext>();
     const auto& dnnl_engine = dev_ctx.GetEngine();
     std::vector<int> transpose_axis = ctx.Attr<std::vector<int>>("axis");
     int ndims = transpose_axis.size();
     const phi::DenseTensor* x = ctx.Input<Tensor>("X");
     auto* out = ctx.Output<Tensor>("Out");
 
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = OneDNNContext::tls().get_stream();
 
     platform::SetInMemDescWithLogicalLayoutFusesSupport(
         ctx, const_cast<phi::DenseTensor*>(x), x->mem_desc());
@@ -131,12 +131,11 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
     const auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
     auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
     if (!dx) return;
-    auto& dev_ctx =
-        ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
+    auto& dev_ctx = ctx.template device_context<OneDNNContext>();
     const auto& dnnl_engine = dev_ctx.GetEngine();
     std::vector<int> transpose_axis = ctx.Attr<std::vector<int>>("axis");
 
-    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
+    auto& astream = OneDNNContext::tls().get_stream();
 
     int ndims = transpose_axis.size();
     if (ndims == 1) {
diff --git a/paddle/fluid/operators/transfer_layout_op.h b/paddle/fluid/operators/transfer_layout_op.h
index 3d5fe77afad579..72e1677646383b 100644
--- a/paddle/fluid/operators/transfer_layout_op.h
+++ b/paddle/fluid/operators/transfer_layout_op.h
@@ -78,52 +78,51 @@ class TransferLayoutFunctor {
               "No layout transform needed between two oneDNN OPKernels."));
 
       if (in_layout != DataLayout::ONEDNN && out_layout == DataLayout::ONEDNN) {
-        // Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel
+        // Case1 - transform from Non-ONEDNN OPKernel to ONEDNN OPKernel
         // Just set layout/format. No real transform occur
 
         auto out_format = phi::funcs::OneDNNFormatForSize(
-            in_tensor.dims().size(), framework::ToOneDNNFormat(in_layout));
+            in_tensor.dims().size(), phi::funcs::ToOneDNNFormat(in_layout));
         out_tensor.ShareDataWith(in_tensor);
         // For NHWC data we need reshape of tensors as MKL-DNN
         // is expecting NHWC dims description order
         if (in_layout == DataLayout::kNHWC) {
           VLOG(4) << "kNHWC";
           phi::funcs::MatchShapeToLayout(&out_tensor, in_layout, out_layout);
-          paddle::platform::MKLDNNDeviceContext::tls()
-              .set_cur_paddle_data_layout(in_layout);
+          phi::OneDNNContext::tls().set_cur_paddle_data_layout(in_layout);
         }
 
         auto out_tz = phi::vectorize<int64_t>(out_tensor.dims());
-        dnnl::memory::data_type in_type = framework::ToMKLDNNDataType(
-            framework::TransToProtoVarType(in_tensor.dtype()));
+        dnnl::memory::data_type in_type =
+            phi::funcs::ToOneDNNDataType(in_tensor.dtype());
 
         dnnl::memory::desc out_mem_desc(out_tz, in_type, out_format);
         out_tensor.set_mem_desc(out_mem_desc);
       } else {
-        auto target_layout = paddle::platform::MKLDNNDeviceContext::tls()
-                                 .get_cur_paddle_data_layout();
+        auto target_layout =
+            phi::OneDNNContext::tls().get_cur_paddle_data_layout();
         // NOTE(zhiqiu): hot fix, follow the same logic in DataCopy() in
         // fetch_op.cc
         if (out_layout == DataLayout::kNCHW &&
             in_name_ == framework::GradVarName("Filter")) {
           target_layout = out_layout;
         }
-        VLOG(4) << "innerTransDataLayoutFromMKLDNN: " << in_layout << "->"
+        VLOG(4) << "TransDataLayoutFromOneDNN: " << in_layout << "->"
                 << target_layout;
-        // Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
-        // Do transform via MKLDNN lib
-        paddle::framework::innerTransDataLayoutFromMKLDNN(in_layout,
-                                                          target_layout,
-                                                          in_tensor,
-                                                          &out_tensor,
-                                                          dev_ctx_.GetPlace());
+        // Case2 - transfrom from ONEDNN OPKernel to Non-ONEDNN OPKernel
+        // Do transform via ONEDNN lib
+        phi::funcs::TransDataLayoutFromOneDNN(in_layout,
+                                              target_layout,
+                                              in_tensor,
+                                              &out_tensor,
+                                              dev_ctx_.GetPlace());
       }
     } else {
-      // Case3 - transfrom between Non-MKLDNN OPKernels
+      // Case3 - transfrom between Non-ONEDNN OPKernels
       TransDataLayout(dev_ctx_, in_tensor, &out_tensor);
     }
 #else
-    // Case3 - transfrom between Non-MKLDNN OPKernels
+    // Case3 - transfrom between Non-ONEDNN OPKernels
     TransDataLayout(dev_ctx_, in_tensor, &out_tensor);
 #endif
     framework::SetTensorToVariable(*in_, out_tensor, out_);
diff --git a/paddle/fluid/operators/transpose_op.cc b/paddle/fluid/operators/transpose_op.cc
index 179339fae6b6ca..9ee0196d8c7dc3 100644
--- a/paddle/fluid/operators/transpose_op.cc
+++ b/paddle/fluid/operators/transpose_op.cc
@@ -82,8 +82,8 @@ class TransposeOp : public framework::OperatorWithKernel {
     // Here we need to match dims to paddle layout
     // as we are producing non-oneDNN result
     if (ctx->IsRunMKLDNNKernel() && (x_dims.size() >= 3) &&
-        (paddle::platform::MKLDNNDeviceContext::tls()
-             .get_cur_paddle_data_layout() == phi::DataLayout::kNHWC)) {
+        (phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
+         phi::DataLayout::kNHWC)) {
       auto dims = phi::vectorize<int>(x_dims);
       std::rotate(dims.begin() + 1, dims.begin() + 2, dims.end());
       x_dims = x_dims.reshape(dims);
diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc
index cafb7e1da0f82b..a09f438c505e80 100644
--- a/paddle/fluid/platform/device_context.cc
+++ b/paddle/fluid/platform/device_context.cc
@@ -224,7 +224,7 @@ void EmplaceDeviceContexts(
   for (auto& p : set) {
     if (platform::is_cpu_place(p)) {
 #ifdef PADDLE_WITH_MKLDNN
-      EmplaceDeviceContext<MKLDNNDeviceContext>(
+      EmplaceDeviceContext<phi::OneDNNContext>(
           place_to_device_context,
           p,
           disable_setting_default_stream_for_allocator);
diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h
index 0ad10cd8a7b3a2..2c90acd6100980 100644
--- a/paddle/fluid/platform/device_context.h
+++ b/paddle/fluid/platform/device_context.h
@@ -312,11 +312,6 @@ struct DefaultDeviceContextType<platform::CUDAPinnedPlace> {
 };
 #endif
 
-#ifdef PADDLE_WITH_MKLDNN
-using MKLDNNDeviceContextThreadLocals = phi::OneDNNContextThreadLocals;
-using MKLDNNDeviceContext = phi::OneDNNContext;
-#endif
-
 #ifdef PADDLE_WITH_CUSTOM_DEVICE
 class CustomDeviceContext : public phi::CustomContext {
  public:
diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h
index eb2552434c2fed..a7f15dc1297928 100644
--- a/paddle/fluid/platform/mkldnn_helper.h
+++ b/paddle/fluid/platform/mkldnn_helper.h
@@ -24,27 +24,20 @@ limitations under the License. */
 #include "dnnl.hpp"  // NOLINT
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/platform/place.h"
-#include "paddle/fluid/platform/profiler/event_tracing.h"
 #include "paddle/phi/backends/onednn/onednn_helper.h"
 namespace paddle {
 #ifdef PADDLE_WITH_MKLDNN
 using OneDNNMemoryFormat = dnnl::memory::format_tag;
+using phi::OneDNNContext;
 #endif
 namespace platform {
 
-template <class Type>
-using tf_desc = typename Type::desc;
-
-template <class Type>
-using tf_pd = typename Type::primitive_desc;
-
 inline void ClearMKLDNNCache(const platform::Place& place,
                              void* ptr = nullptr) {
   // Clear mkl-dnn cache,
   if (platform::is_cpu_place(place)) {
     platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
-    platform::MKLDNNDeviceContext* dev_ctx =
-        (platform::MKLDNNDeviceContext*)pool.Get(place);
+    OneDNNContext* dev_ctx = reinterpret_cast<OneDNNContext*>(pool.Get(place));
     dev_ctx->ResetBlobMap(ptr);
   }
 }
@@ -53,71 +46,11 @@ inline void DontClearMKLDNNCache(const platform::Place& place) {
   // Clear mkl-dnn cache,
   if (platform::is_cpu_place(place)) {
     platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
-    platform::MKLDNNDeviceContext* dev_ctx =
-        (platform::MKLDNNDeviceContext*)pool.Get(place);
+    OneDNNContext* dev_ctx = reinterpret_cast<OneDNNContext*>(pool.Get(place));
     dev_ctx->BlockNextCacheClearing();
   }
 }
 
-inline void Reorder(dnnl::memory src,
-                    dnnl::memory dst,
-                    const dnnl::engine& engine) {
-  auto reorder_prim = dnnl::reorder(src, dst);
-  auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
-  platform::RecordEvent record_reorder("int_reorder",
-                                       platform::TracerEventType::UserDefined,
-                                       2,
-                                       platform::EventRole::kUniqueOp);
-  reorder_prim.execute(astream, src, dst);
-  astream.wait();
-}
-
-inline std::string ThreadIDasStr(void) {
-  return std::to_string(
-      std::hash<std::thread::id>()(std::this_thread::get_id()));
-}
-
-template <typename T>
-inline void AppendKey(std::string* key, const T& num) {
-  key->append(std::to_string(num));
-}
-
-template <>
-inline void AppendKey(std::string* key,
-                      const dnnl::memory::format_tag& format) {
-  key->append(std::to_string(static_cast<int>(format)));
-}
-
-template <>
-inline void AppendKey(std::string* key,
-                      const dnnl::memory::data_type& data_type) {
-  key->append(std::to_string(static_cast<int>(data_type)));
-}
-
-template <>
-inline void AppendKey(std::string* key, const dnnl::algorithm& algorithm) {
-  key->append(std::to_string(static_cast<int>(algorithm)));
-}
-
-template <>
-inline void AppendKey(std::string* key,
-                      const dnnl::normalization_flags& flags) {
-  key->append(std::to_string(static_cast<int>(flags)));
-}
-
-inline void AppendKey(std::string* key, const std::string& str) {
-  key->append(str);
-}
-
-inline void AppendKey(std::string* key, const char* str) { key->append(str); }
-
-template <typename T>
-inline void AppendKey(std::string* key, const std::vector<T>& dims) {
-  for (size_t i = 0; i < dims.size(); i++) {
-    AppendKey(key, std::to_string(dims[i]));
-  }
-}
-
 // If MKLDNN build and CPU place then register suffix in DeviceContext
 inline void AttachPointerHashToMKLDNNKey(void* ptr,
                                          const platform::Place& place) {
@@ -128,49 +61,30 @@ inline void AttachPointerHashToMKLDNNKey(void* ptr,
     static std::mutex static_vars_barrier;
     static_vars_barrier.lock();
     static auto first_exec = ptr;
-    static auto first_thread = ThreadIDasStr();
+    static auto first_thread = phi::funcs::ThreadIDasStr();
     static_vars_barrier.unlock();
 
     if (first_exec != ptr) {
-      paddle::platform::MKLDNNDeviceContext::tls().set_key_suffix(
+      OneDNNContext::tls().set_key_suffix(
           "E" + std::to_string(reinterpret_cast<uintptr_t>(ptr)));
     }
     // Let's register adress of current executor
-    paddle::platform::MKLDNNDeviceContext::tls().set_curr_exec(ptr);
+    OneDNNContext::tls().set_curr_exec(ptr);
 
     // For first thread
-    if (first_thread == ThreadIDasStr()) {
-      paddle::platform::MKLDNNDeviceContext::tls().disable_tid_in_key();
+    if (first_thread == phi::funcs::ThreadIDasStr()) {
+      OneDNNContext::tls().disable_tid_in_key();
     }
   }
 }
 
-template <typename... ArgTypes>
-inline std::string CreateKey(const platform::MKLDNNDeviceContext& dev_ctx,
-                             ArgTypes&&... args) {
-  std::string key;
-  key.reserve(64);
-  using expand_type = int[];
-  expand_type{0, (AppendKey(&key, std::forward<ArgTypes>(args)), 0)...};
-  key += paddle::platform::MKLDNNDeviceContext::tls().get_key_suffix();
-  return key;
-}
-
-inline std::string ExtendKeyWithThreadInfoIfNeeded(
-    const platform::MKLDNNDeviceContext& dev_ctx, const std::string& key) {
-  return (paddle::platform::MKLDNNDeviceContext::tls().is_tid_used_in_key() ==
-          true)
-             ? key + "-t:" + ThreadIDasStr()
-             : key;
-}
-
 inline void RegisterModelLayout(
     std::vector<std::unique_ptr<framework::OperatorBase>>& ops,  // NOLINT
     const platform::Place& place) {
   if (platform::is_cpu_place(place)) {
     // If there is already registered NHWC then quit this call
     // not to overwrite setting with analysis of internal "while" op block
-    if (platform::MKLDNNDeviceContext::tls().get_cur_paddle_data_layout() ==
+    if (OneDNNContext::tls().get_cur_paddle_data_layout() ==
         phi::DataLayout::kNHWC)
       return;
 
@@ -179,7 +93,7 @@ inline void RegisterModelLayout(
                            const std::string& attrib_name) -> bool {
       if (op->HasAttr(attrib_name)) {
         auto data_format = op->Attr<std::string>(attrib_name);
-        platform::MKLDNNDeviceContext::tls().set_cur_paddle_data_layout(
+        OneDNNContext::tls().set_cur_paddle_data_layout(
             data_format.compare("NHWC") == 0 ? phi::DataLayout::kNHWC
                                              : phi::DataLayout::kNCHW);
         return true;
@@ -208,8 +122,6 @@ inline bool HasOpBFLOAT16DataType(const paddle::framework::OpDesc* op) {
   return op->GetAttrIfExists<std::string>("mkldnn_data_type") == "bfloat16";
 }
 
-enum class RNNReorderType { PP_NTC, PP_TNC, NTC_PP, TNC_PP };
-
 }  // namespace platform
 
 inline std::string FindInputNameByVarName(framework::OpDesc* op,
diff --git a/paddle/phi/backends/onednn/onednn_helper.h b/paddle/phi/backends/onednn/onednn_helper.h
index f50aa112271582..040122b692da64 100644
--- a/paddle/phi/backends/onednn/onednn_helper.h
+++ b/paddle/phi/backends/onednn/onednn_helper.h
@@ -284,5 +284,7 @@ inline std::string ExtendKeyWithThreadInfoIfNeeded(const OneDNNContext& dev_ctx,
              : key;
 }
 
+enum class RNNReorderType { PP_NTC, PP_TNC, NTC_PP, TNC_PP };
+
 }  // namespace funcs
 }  // namespace phi
diff --git a/paddle/phi/kernels/funcs/data_layout_transform.cc b/paddle/phi/kernels/funcs/data_layout_transform.cc
index 767566cef2ff94..b11e766e0e0488 100644
--- a/paddle/phi/kernels/funcs/data_layout_transform.cc
+++ b/paddle/phi/kernels/funcs/data_layout_transform.cc
@@ -48,16 +48,16 @@ void* GetDataFromTensor(const DenseTensor& tensor,
     case dnnl::memory::data_type::bf16:
       return to_void_cast(tensor.data<dtype::bfloat16>());
     default:
-      PADDLE_THROW(errors::InvalidArgument("Wrong mkldnn type provided."));
+      PADDLE_THROW(errors::InvalidArgument("Wrong oneDNN type provided."));
   }
 }
 
-void innerTransDataLayoutFromOneDNN(DataLayout in_layout,
-                                    DataLayout out_layout,
-                                    const DenseTensor& in,
-                                    DenseTensor* out,
-                                    Place place,
-                                    bool always_copy) {
+void TransDataLayoutFromOneDNN(DataLayout in_layout,
+                               DataLayout out_layout,
+                               const DenseTensor& in,
+                               DenseTensor* out,
+                               Place place,
+                               bool always_copy) {
   // Set default as NCHW in case not specified
   out_layout = out_layout == DataLayout::ANY ? DataLayout::NCHW : out_layout;
 
diff --git a/paddle/phi/kernels/funcs/data_layout_transform.h b/paddle/phi/kernels/funcs/data_layout_transform.h
index 1bc665daa06c17..54e7d9b729f011 100644
--- a/paddle/phi/kernels/funcs/data_layout_transform.h
+++ b/paddle/phi/kernels/funcs/data_layout_transform.h
@@ -43,7 +43,7 @@ inline OneDNNMemoryFormat ToOneDNNFormat(const DataLayout& layout) {
       return OneDNNMemoryFormat::ndhwc;
     default:
       PADDLE_THROW(
-          errors::InvalidArgument("Fail to convert layout %s to MKLDNN format.",
+          errors::InvalidArgument("Fail to convert layout %s to oneDNN format.",
                                   ::phi::DataLayoutToString(layout)));
   }
 }
@@ -77,12 +77,12 @@ inline OneDNNDataType ToOneDNNDataType(DataType type) {
   return OneDNNDataType::undef;
 }
 
-void innerTransDataLayoutFromOneDNN(DataLayout in_layout,
-                                    DataLayout out_layout,
-                                    const DenseTensor& in,
-                                    DenseTensor* out,
-                                    Place place,
-                                    bool always_copy = false);
+void TransDataLayoutFromOneDNN(DataLayout in_layout,
+                               DataLayout out_layout,
+                               const DenseTensor& in,
+                               DenseTensor* out,
+                               Place place,
+                               bool always_copy = false);
 void* GetDataFromTensor(const DenseTensor& tensor, OneDNNDataType type);
 
 #endif
diff --git a/paddle/phi/kernels/transfer_layout_kernel.cc b/paddle/phi/kernels/transfer_layout_kernel.cc
index 300fd9b5cc65ed..d7b8d55707a6d4 100644
--- a/paddle/phi/kernels/transfer_layout_kernel.cc
+++ b/paddle/phi/kernels/transfer_layout_kernel.cc
@@ -130,7 +130,7 @@ void TransferLayoutMKLDNN(const Context& dev_ctx,
              dst_layout != DataLayout::ONEDNN) {
     // Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
     // Do transform via MKLDNN lib
-    funcs::innerTransDataLayoutFromOneDNN(
+    funcs::TransDataLayoutFromOneDNN(
         src_layout, dst_layout, x, out, dev_ctx.GetPlace());
   } else if (src_layout == DataLayout::ONEDNN &&
              dst_layout == DataLayout::ONEDNN) {

From 57e097ac005e8078be43d73fa028b73f208b3adc Mon Sep 17 00:00:00 2001
From: LiYuRio <63526175+LiYuRio@users.noreply.github.com>
Date: Tue, 29 Nov 2022 12:25:27 +0800
Subject: [PATCH 016/154] [Fluid API] Move instance_norm, group_norm, data_norm
 from fluid to static (#48448)

* move instance_norm from fluid to static

* move group_norm, data_norm to static
---
 python/paddle/fluid/layers/nn.py              | 456 -----------------
 .../unittests/ipu/test_groupnorm_op_ipu.py    |   4 +-
 .../unittests/ipu/test_instancenorm_op_ipu.py |   4 +-
 .../ir/inference/test_trt_instance_norm_op.py |   3 +-
 .../ir/inference/test_trt_subgraph_pass.py    |   2 +-
 .../unittests/npu/test_group_norm_op_npu.py   |   6 +-
 .../tests/unittests/test_data_norm_op.py      |   3 +-
 .../tests/unittests/test_dist_fleet_ps2.py    |   2 +-
 .../fluid/tests/unittests/test_fleet.py       |   5 +-
 .../tests/unittests/test_group_norm_op.py     |  11 +-
 .../unittests/test_imperative_double_grad.py  |   2 +-
 .../test_imperative_load_static_param.py      |   8 +-
 ...perative_star_gan_with_gradient_penalty.py |   2 +-
 .../tests/unittests/test_instance_norm_op.py  |   4 +-
 .../fluid/tests/unittests/test_layers.py      |   4 +-
 .../tests/unittests/test_norm_nn_grad.py      |   4 +-
 python/paddle/static/nn/__init__.py           |   6 +-
 python/paddle/static/nn/common.py             | 457 +++++++++++++++++-
 18 files changed, 496 insertions(+), 487 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index e760b357e091da..ca0d3cd721d1b7 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -73,8 +73,6 @@
     'pool2d',
     'pool3d',
     'batch_norm',
-    'instance_norm',
-    'data_norm',
     'reduce_mean',
     'reduce_all',
     'reduce_any',
@@ -88,7 +86,6 @@
     'row_conv',
     'multiplex',
     'layer_norm',
-    'group_norm',
     'spectral_norm',
     'smooth_l1',
     'one_hot',
@@ -2462,349 +2459,6 @@ def batch_norm(
     return helper.append_activation(batch_norm_out)
 
 
-def instance_norm(
-    input, epsilon=1e-05, param_attr=None, bias_attr=None, name=None
-):
-    r"""
-    :api_attr: Static Graph
-
-    **Instance Normalization Layer**
-
-    Can be used as a normalizer function for convolution or fully_connected operations.
-    The required data format for this layer is one of the following:
-
-    DataLayout: NCHW `[batch, in_channels, in_height, in_width]`
-
-    Refer to `Instance Normalization: The Missing Ingredient for
-    Fast Stylization <https://arxiv.org/pdf/1607.08022.pdf>`_
-    for more details.
-
-    :math:`input` is the input features over a mini-batch.
-
-    ..  math::
-
-        \\mu_{\\beta} &\\gets \\frac{1}{HW} \\sum_{i=1}^{HW} x_i \\qquad &//\\
-        \\ mean\ of\ one\  feature\ map\ in\ mini-batch \\\\
-        \\sigma_{\\beta}^{2} &\\gets \\frac{1}{HW} \\sum_{i=1}^{HW}(x_i - \\
-        \\mu_{\\beta})^2 \\qquad &//\ variance\ of\ one\ feature\ map\ in\ mini-batch \\\\
-        \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
-        \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
-        y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
-
-    Note:
-        `H` means height of feature map, `W` means width of feature map.
-
-    Args:
-        input(Tensor): The rank of input tensor can be 2, 3, 4, 5.
-            The data type is float32 or float64.
-        epsilon(float, Default 1e-05): A value added to the denominator for
-            numerical stability. Default is 1e-5.
-        param_attr(ParamAttr|None|bool, optional): The parameter attribute for Parameter `scale`
-             of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
-	     will create ParamAttr as param_attr, the name of scale can be set in ParamAttr.
-	     If the Initializer of the param_attr is not set, the parameter is initialized
-	     with Xavier. If the param_attr is set to False, instance_norm will not create param_attr.
-             Default: None.
-        bias_attr(ParamAttr|None|bool, optional): The parameter attribute for the bias of instance_norm.
-             If it is set to None or one attribute of ParamAttr, instance_norm
-	     will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
-	     If the Initializer of the bias_attr is not set, the bias is initialized zero.
-             If the bias_attr is set to False, instance_norm will not create bias_attr.
-	     Default: None.
-        name(string, Default None): A name for this layer(optional). If set None, the layer
-            will be named automatically.
-
-    Returns:
-        A Tensor which is the result after applying instance normalization on the input,
-        has same shape and data type with input.
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle
-            paddle.enable_static()
-            x = paddle.static.data(name='x', shape=[3, 7, 3, 7], dtype='float32')
-            hidden1 = paddle.static.nn.fc(x, size=200)
-            hidden2 = paddle.static.nn.instance_norm(hidden1)
-    """
-    check_variable_and_dtype(
-        input, 'input', ['float32', 'float64'], 'instance_norm'
-    )
-    if param_attr is False:
-        assert (
-            bias_attr is False
-        ), "param_attr and bias_attr must be set to False at the same time in instance_norm"
-
-    helper = LayerHelper('instance_norm', **locals())
-    dtype = helper.input_dtype()
-
-    # use fp32 for in parameter
-    if dtype == core.VarDesc.VarType.FP16:
-        dtype = core.VarDesc.VarType.FP32
-
-    input_shape = input.shape
-    if len(input.shape) < 2 or len(input.shape) > 5:
-        raise ValueError(
-            'expected 2D or 3D or 4D or 5D input (got {}D input, input shape is: {})'.format(
-                len(input.shape), input_shape
-            )
-        )
-    channel_num = input_shape[1]
-
-    param_shape = [channel_num]
-
-    if param_attr != False and bias_attr != False:
-        # create parameter
-        scale = helper.create_parameter(
-            attr=helper.param_attr,
-            shape=param_shape,
-            dtype=dtype,
-            default_initializer=Constant(1.0),
-        )
-        bias = helper.create_parameter(
-            attr=helper.bias_attr,
-            shape=param_shape,
-            dtype=dtype,
-            is_bias=True,
-            default_initializer=Constant(0.0),
-        )
-
-    # create output
-    saved_mean = helper.create_variable_for_type_inference(
-        dtype=dtype, stop_gradient=True
-    )
-    saved_variance = helper.create_variable_for_type_inference(
-        dtype=dtype, stop_gradient=True
-    )
-
-    instance_norm_out = helper.create_variable_for_type_inference(dtype)
-
-    inputs = {"X": input}
-    if param_attr != False and bias_attr != False:
-        inputs["Scale"] = scale
-        inputs["Bias"] = bias
-
-    helper.append_op(
-        type="instance_norm",
-        inputs=inputs,
-        outputs={
-            "Y": instance_norm_out,
-            "SavedMean": saved_mean,
-            "SavedVariance": saved_variance,
-        },
-        attrs={
-            "epsilon": epsilon,
-        },
-    )
-
-    return instance_norm_out
-
-
-@static_only
-def data_norm(
-    input,
-    act=None,
-    epsilon=1e-05,
-    param_attr=None,
-    data_layout='NCHW',
-    in_place=False,
-    name=None,
-    moving_mean_name=None,
-    moving_variance_name=None,
-    do_model_average_for_mean_and_var=True,
-    slot_dim=-1,
-    sync_stats=False,
-    summary_decay_rate=0.9999999,
-    enable_scale_and_shift=False,
-):
-    r"""
-    :api_attr: Static Graph
-
-    **Data Normalization Layer**
-
-    This op can be used as a normalizer function for conv2d and fully_connected operations.
-    The required data format for this layer is one of the following:
-
-    1. NHWC `[batch, in_height, in_width, in_channels]`
-
-    2. NCHW `[batch, in_channels, in_height, in_width]`
-
-    :math:`input` is the input features over a mini-batch.
-
-    ..  math::
-
-        \\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\
-        \ mini-batch\ mean \\\\
-        \\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\
-        \\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\
-        \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
-        \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
-        y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
-
-    Args:
-        input(Tensor): The input Tensor.
-        act(string, Default None): Activation type, linear|relu|prelu|...
-        epsilon(float, Default 1e-05):
-        param_attr(ParamAttr): The parameter attribute for Parameter `scale`.
-        data_layout (str, optional): Specify the data format of the input, and the data format of the output
-            will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
-            The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
-            `[batch_size, input_channels, input_height, input_width]`.
-        in_place(bool, Default False): Make the input and output of batch norm reuse memory.
-        name(string, Default None): A name for this layer(optional). If set None, the layer
-            will be named automatically.
-        moving_mean_name(string, Default None): The name of moving_mean which store the global Mean.
-        moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance.
-        do_model_average_for_mean_and_var(bool, Default True): Whether parameter mean and variance
-            should do model average when model average is enabled.
-        slot_dim(int): The embedding dimension of one slot. Slot is a set of one specific feature. In pslib mode, we
-            distinguish feature ids by slot and pull their embeddings from parameter server (pslib). The first
-            place of the embedding is the historical show number (occurence time of this feature id with a label 0).
-            If the input of this op is concated by slot-wise embeddings, and the show number is zero when this slot
-            is new or empty, the normalization result may be impractical. To avoid this, we add slot_dim to locate
-            the show number and judge if the show number is zero. If so, we choose to skip normalization on this
-            embedding.
-        sync_stats(bool, Default False): When running with multiple GPU cards, using allreduce to sync the
-            summary messages.
-        summary_decay_rate(float, Default 0.9999999): The decay rate when updating summary.
-        enable_scale_and_shift(bool, Default False): do scale&shift after normalization.
-
-    Returns:
-        Tensor: A tensor which is the result after applying data normalization on the input.
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle
-            paddle.enable_static()
-
-            x = paddle.randn(shape=[32,100])
-            hidden2 = paddle.static.nn.data_norm(input=x)
-    """
-    helper = LayerHelper('data_norm', **locals())
-    dtype = helper.input_dtype()
-
-    input_shape = input.shape
-    if data_layout == 'NCHW':
-        channel_num = input_shape[1]
-    else:
-        if data_layout == 'NHWC':
-            channel_num = input_shape[-1]
-        else:
-            raise ValueError("unsupported data layout:" + data_layout)
-
-    param_shape = [channel_num]
-
-    batch_size_default = 1e4
-    batch_sum_default = 0.0
-    batch_square_sum_default = 1e4
-    scale_w_default = 1.0
-    bias_default = 0.0
-
-    if param_attr and isinstance(param_attr, dict):
-        batch_size_default = param_attr.get("batch_size", 1e4)
-        batch_sum_default = param_attr.get("batch_sum", 0.0)
-        batch_square_sum_default = param_attr.get("batch_square", 1e4)
-    if enable_scale_and_shift:
-        scale_w_default = param_attr.get("scale_w", 1.0)
-        bias_default = param_attr.get("bias", 0.0)
-
-    # create scale and shift(bias) when enable_scale_and_shift is True
-    if name is None:
-        name = "dn"
-    if enable_scale_and_shift:
-        scale_w = helper.create_parameter(
-            attr=ParamAttr(
-                name=name + '.scale_w',
-                initializer=Constant(value=float(scale_w_default)),
-                trainable=True,
-            ),
-            shape=param_shape,
-            dtype=input.dtype,
-        )
-        bias = helper.create_parameter(
-            attr=ParamAttr(
-                name=name + '.bias',
-                initializer=Constant(value=float(bias_default)),
-                trainable=True,
-            ),
-            shape=param_shape,
-            dtype=input.dtype,
-        )
-    # create parameter
-    batch_size = helper.create_parameter(
-        attr=ParamAttr(
-            name=name + '.batch_size',
-            initializer=Constant(value=float(batch_size_default)),
-            trainable=True,
-        ),
-        shape=param_shape,
-        dtype=input.dtype,
-    )
-
-    batch_sum = helper.create_parameter(
-        attr=ParamAttr(
-            name=name + '.batch_sum',
-            initializer=Constant(value=float(batch_sum_default)),
-            trainable=True,
-        ),
-        shape=param_shape,
-        dtype=input.dtype,
-    )
-
-    batch_square_sum = helper.create_parameter(
-        attr=ParamAttr(
-            name=name + '.batch_square_sum',
-            initializer=Constant(value=float(batch_square_sum_default)),
-            trainable=True,
-        ),
-        shape=param_shape,
-        dtype=input.dtype,
-    )
-
-    means = helper.create_variable(dtype=dtype, stop_gradient=True)
-    scales = helper.create_variable(dtype=dtype, stop_gradient=True)
-
-    data_norm_out = input if in_place else helper.create_variable(dtype=dtype)
-
-    inputs = {
-        "X": input,
-        "BatchSize": batch_size,
-        "BatchSum": batch_sum,
-        "BatchSquareSum": batch_square_sum,
-    }
-    attrs = {
-        "epsilon": epsilon,
-        "data_layout": data_layout,
-        "sync_stats": sync_stats,
-        "summary_decay_rate": summary_decay_rate,
-    }
-    if slot_dim > 0:
-        attrs["slot_dim"] = slot_dim
-    if enable_scale_and_shift:
-        attrs["enable_scale_and_shift"] = enable_scale_and_shift
-    if enable_scale_and_shift:
-        inputs["scale_w"] = scale_w
-        inputs["bias"] = bias
-    helper.append_op(
-        type="data_norm",
-        inputs=inputs,
-        outputs={
-            "Y": data_norm_out,
-            "Means": means,
-            "Scales": scales,
-            "BatchSize": batch_size,
-            "BatchSum": batch_sum,
-            "BatchSquareSum": batch_square_sum,
-        },
-        attrs=attrs,
-    )
-
-    return helper.append_activation(data_norm_out)
-
-
 @templatedoc()
 def layer_norm(
     input,
@@ -2941,116 +2595,6 @@ def layer_norm(
     return helper.append_activation(layer_norm_out)
 
 
-@templatedoc()
-def group_norm(
-    input,
-    groups,
-    epsilon=1e-05,
-    param_attr=None,
-    bias_attr=None,
-    act=None,
-    data_layout='NCHW',
-    name=None,
-):
-    """
-    :api_attr: Static Graph
-
-    **Group Normalization Layer**
-
-    Refer to `Group Normalization <https://arxiv.org/abs/1803.08494>`_ .
-
-    Parameters:
-        input(Tensor): Tensor with dimension greater than 1, the data type is float32 or float64.
-        groups(int): The number of groups that divided from channels, the data type
-            is int32.
-        epsilon(float, optional): The small value added to the variance to prevent
-            division by zero, the data type is float32. Default: 1e-05.
-        param_attr(ParamAttr|bool, optional): ParamAttr object that specifies weight parameter
-            attribute. If a bool type, only False is supported, which means there is no weight parameter.
-            Default: None, the default weight parameter attribute is used. For more information, please
-            refer to :ref:`api_guide_ParamAttr` .
-        bias_attr(ParamAttr|bool, optional): ParamAttr object that specifies bias parameter
-            attribute. If a bool type, only False is supported, which means there is no bias parameter.
-            Default: None, the default bias parameter attribute is used. For more information, please
-            refer to :ref:`api_guide_ParamAttr` .
-        act(str, optional): Activation to be applied to the output of group normalization.
-        data_layout(str, optional): Specify the data format of the input, and the data format of the output
-            will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
-            The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
-            `[batch_size, input_channels, *]`.
-        name (str, optional): The default value is None. Normally there is no need for user to set this
-            property. For more information, please refer to :ref:`api_guide_Name` .
-
-    Returns:
-        Tensor: A Tensor has same data type and data format with `input`.
-
-    Examples:
-       .. code-block:: python
-
-            import paddle
-            paddle.enable_static()
-
-            data = paddle.static.data(name='data', shape=[2, 8, 32, 32], dtype='float32')
-            x = paddle.static.nn.group_norm(input=data, groups=4)
-            print(x.shape) # [2, 8, 32, 32]
-    """
-    helper = LayerHelper('group_norm', **locals())
-    dtype = helper.input_dtype()
-    check_variable_and_dtype(
-        input, 'input', ['float32', 'float64'], 'group_norm'
-    )
-    # create intput and parameters
-    inputs = {'X': input}
-    input_shape = input.shape
-    if len(input_shape) < 2:
-        raise ValueError(
-            f"The dimensions of Op(fluid.layers.group_norm)'s input should be more than 1. But received {len(input_shape)}"
-        )
-    if data_layout != 'NCHW' and data_layout != 'NHWC':
-        raise ValueError(
-            "Param(data_layout) of Op(fluid.layers.group_norm) got wrong value: received "
-            + data_layout
-            + " but only NCHW or NHWC supported."
-        )
-    channel_num = input_shape[1] if data_layout == 'NCHW' else input_shape[-1]
-    param_shape = [channel_num]
-    if param_attr:
-        scale = helper.create_parameter(
-            attr=helper.param_attr,
-            shape=param_shape,
-            dtype=dtype,
-            default_initializer=Constant(1.0),
-        )
-        inputs['Scale'] = scale
-    if bias_attr:
-        bias = helper.create_parameter(
-            attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True
-        )
-        inputs['Bias'] = bias
-
-    # create output
-    mean_out = helper.create_variable(dtype=dtype, stop_gradient=True)
-    variance_out = helper.create_variable(dtype=dtype, stop_gradient=True)
-    group_norm_out = helper.create_variable(dtype=dtype)
-
-    helper.append_op(
-        type="group_norm",
-        inputs=inputs,
-        outputs={
-            "Y": group_norm_out,
-            "Mean": mean_out,
-            "Variance": variance_out,
-        },
-        attrs={
-            "epsilon": epsilon,
-            "groups": groups,
-            "data_layout": data_layout,
-        },
-    )
-
-    return helper.append_activation(group_norm_out)
-
-
 @templatedoc()
 def spectral_norm(weight, dim=0, power_iters=1, eps=1e-12, name=None):
     r"""
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_groupnorm_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_groupnorm_op_ipu.py
index 609b212e03af3f..457858ac08c13c 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_groupnorm_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_groupnorm_op_ipu.py
@@ -63,7 +63,7 @@ def build_model(self):
             )
             scale = paddle.ParamAttr(trainable=True)
             bias = paddle.ParamAttr(trainable=True)
-            out = paddle.fluid.layers.nn.group_norm(
+            out = paddle.static.nn.group_norm(
                 conv1, param_attr=scale, bias_attr=bias, **self.attrs
             )
             loss = paddle.mean(out)
@@ -71,7 +71,7 @@ def build_model(self):
             adam.minimize(loss)
             self.fetch_list = [loss.name]
         else:
-            out = paddle.fluid.layers.nn.group_norm(
+            out = paddle.static.nn.group_norm(
                 x, param_attr=True, bias_attr=True, **self.attrs
             )
             self.fetch_list = [out.name]
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_instancenorm_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_instancenorm_op_ipu.py
index b2fb872e369ebd..14210c69e4f602 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_instancenorm_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_instancenorm_op_ipu.py
@@ -60,7 +60,7 @@ def build_model(self):
             )
             scale = paddle.ParamAttr(trainable=True)
             bias = paddle.ParamAttr(trainable=True)
-            out = paddle.fluid.layers.nn.instance_norm(
+            out = paddle.static.nn.instance_norm(
                 conv1, param_attr=scale, bias_attr=bias, **self.attrs
             )
             loss = paddle.mean(out)
@@ -68,7 +68,7 @@ def build_model(self):
             adam.minimize(loss)
             self.fetch_list = [loss.name]
         else:
-            out = paddle.fluid.layers.nn.instance_norm(
+            out = paddle.static.nn.instance_norm(
                 x, param_attr=True, bias_attr=True, **self.attrs
             )
             self.fetch_list = [out.name]
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_instance_norm_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_instance_norm_op.py
index 695bf42b3db29b..2901238ffe4a4a 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_instance_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_instance_norm_op.py
@@ -20,6 +20,7 @@
 import numpy as np
 from inference_pass_test import InferencePassTest
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -43,7 +44,7 @@ def build(self):
         with fluid.program_guard(self.main_program, self.startup_program):
             shape = [-1, self.channel, self.height, self.width]
             data = fluid.data(name='in', shape=shape, dtype='float32')
-            instance_norm_out = fluid.layers.instance_norm(data)
+            instance_norm_out = paddle.static.nn.instance_norm(data)
             out = fluid.layers.batch_norm(instance_norm_out, is_test=True)
 
         shape[0] = self.bs
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py
index f6136d77f2d3bc..235f2446cb1490 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py
@@ -186,7 +186,7 @@ def setUp(self):
                 name='instance_norm_b',
                 initializer=fluid.initializer.Constant(value=0.0),
             )
-            out = fluid.layers.instance_norm(
+            out = paddle.static.nn.instance_norm(
                 input=data, param_attr=param_attr, bias_attr=bias_attr
             )
         self.feeds = {
diff --git a/python/paddle/fluid/tests/unittests/npu/test_group_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_group_norm_op_npu.py
index 06d0f5dd1d029c..7f95e2b55c66df 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_group_norm_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_group_norm_op_npu.py
@@ -52,7 +52,7 @@ def test_errors(self):
             def test_x_type():
                 input = np.random.random(2, 100, 3, 5).astype('float32')
                 groups = 2
-                fluid.layers.group_norm(input, groups)
+                paddle.static.nn.group_norm(input, groups)
 
             self.assertRaises(TypeError, test_x_type)
 
@@ -61,7 +61,7 @@ def test_x_dtype():
                     name='x2', shape=[2, 100, 3, 5], dtype='int32'
                 )
                 groups = 2
-                fluid.layers.group_norm(x2, groups)
+                paddle.static.nn.group_norm(x2, groups)
 
             self.assertRaises(TypeError, test_x_dtype)
 
@@ -219,7 +219,7 @@ def test_exception(self):
         data = fluid.data(name='data', shape=[None, 3, 3, 4], dtype="float64")
 
         def attr_data_format():
-            out = fluid.layers.group_norm(
+            out = paddle.static.nn.group_norm(
                 input=data, groups=2, data_layout="NDHW"
             )
 
diff --git a/python/paddle/fluid/tests/unittests/test_data_norm_op.py b/python/paddle/fluid/tests/unittests/test_data_norm_op.py
index 1f32feb35276ea..b5a2e76fe87bf9 100644
--- a/python/paddle/fluid/tests/unittests/test_data_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_data_norm_op.py
@@ -15,6 +15,7 @@
 
 import unittest
 import numpy as np
+import paddle
 import paddle.fluid.core as core
 from paddle.fluid.op import Operator
 import paddle.fluid as fluid
@@ -517,7 +518,7 @@ def test_errors(self):
         with program_guard(Program(), Program()):
             x2 = fluid.layers.data(name='x2', shape=[3, 4], dtype="int32")
             # self.assertRaises(TypeError, fluid.data_norm, x2)
-            fluid.layers.data_norm(
+            paddle.static.nn.data_norm(
                 input=x2, param_attr={}, enable_scale_and_shift=True
             )
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
index 3bc478a00851c3..30f3f8134889c0 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
@@ -89,7 +89,7 @@ def get_loss(cos_q_pt, cos_q_nt):
         # vsum
         q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum')
         q_ss = paddle.nn.functional.softsign(q_sum)
-        q_ss = fluid.layers.data_norm(input=q_ss)
+        q_ss = paddle.static.nn.data_norm(input=q_ss)
         # fc layer after conv
         q_fc = fluid.layers.fc(
             input=q_ss,
diff --git a/python/paddle/fluid/tests/unittests/test_fleet.py b/python/paddle/fluid/tests/unittests/test_fleet.py
index a9a75868ee3969..75d6ab31754831 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet.py
@@ -32,6 +32,7 @@ def setUp(self):
 
     def test_pslib_1(self):
         """Test cases for pslib."""
+        import paddle
         import paddle.fluid as fluid
         from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
         from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
@@ -66,7 +67,9 @@ def test_pslib_1(self):
                 param_attr=fluid.ParamAttr(name="embedding"),
             )
             bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
-            bow = fluid.layers.data_norm(input=bow, epsilon=1e-4, name="norm")
+            bow = paddle.static.nn.data_norm(
+                input=bow, epsilon=1e-4, name="norm"
+            )
             fc = fluid.layers.fc(input=bow, size=1, act=None)
             label = fluid.layers.data(
                 name="click",
diff --git a/python/paddle/fluid/tests/unittests/test_group_norm_op.py b/python/paddle/fluid/tests/unittests/test_group_norm_op.py
index df5c832e2f21c8..2b74636939e993 100644
--- a/python/paddle/fluid/tests/unittests/test_group_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_group_norm_op.py
@@ -15,6 +15,7 @@
 import unittest
 import numpy as np
 
+import paddle
 import paddle.fluid.core as core
 import paddle.fluid as fluid
 from op_test import OpTest, skip_check_grad_ci
@@ -46,7 +47,7 @@ def test_errors(self):
             def test_x_type():
                 input = np.random.random(2, 100, 3, 5).astype('float32')
                 groups = 2
-                fluid.layers.group_norm(input, groups)
+                paddle.static.nn.group_norm(input, groups)
 
             self.assertRaises(TypeError, test_x_type)
 
@@ -55,7 +56,7 @@ def test_x_dtype():
                     name='x2', shape=[2, 100, 3, 5], dtype='int32'
                 )
                 groups = 2
-                fluid.layers.group_norm(x2, groups)
+                paddle.static.nn.group_norm(x2, groups)
 
             self.assertRaises(TypeError, test_x_dtype)
 
@@ -245,11 +246,11 @@ def init_test_case(self):
 class TestGroupNormAPI_With_NHWC(unittest.TestCase):
     def test_case1(self):
         data1 = fluid.data(name='data1', shape=[None, 3, 3, 4], dtype='float64')
-        out1 = fluid.layers.group_norm(
+        out1 = paddle.static.nn.group_norm(
             input=data1, groups=2, data_layout="NHWC"
         )
         data2 = fluid.data(name='data2', shape=[None, 4, 3, 3], dtype='float64')
-        out2 = fluid.layers.group_norm(
+        out2 = paddle.static.nn.group_norm(
             input=data2, groups=2, data_layout="NCHW"
         )
 
@@ -282,7 +283,7 @@ def test_exception(self):
         data = fluid.data(name='data', shape=[None, 3, 3, 4], dtype="float64")
 
         def attr_data_format():
-            out = fluid.layers.group_norm(
+            out = paddle.static.nn.group_norm(
                 input=data, groups=2, data_layout="NDHW"
             )
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
index c38caf69e086a7..4b5e008cb74828 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
@@ -636,7 +636,7 @@ def raise_no_grad_op(self):
         with fluid.dygraph.guard():
             x = fluid.layers.ones(shape=[2, 3, 2, 2], dtype='float32')
             x.stop_gradient = False
-            y = paddle.fluid.layers.group_norm(x, groups=1)
+            y = paddle.static.nn.group_norm(x, groups=1)
 
             dx = fluid.dygraph.grad(
                 outputs=[y], inputs=[x], create_graph=True, retain_graph=True
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
index 3ee24ec9821039..0fb5f40470a092 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
@@ -123,8 +123,12 @@ def testLoadStaticModel(self):
         groupnorm_in = fluid.data(
             name='groupnorm_in', shape=[None, 8, 32, 32], dtype='float32'
         )
-        groupnorm_out1 = fluid.layers.group_norm(input=groupnorm_in, groups=4)
-        groupnorm_out2 = fluid.layers.group_norm(input=groupnorm_in, groups=4)
+        groupnorm_out1 = paddle.static.nn.group_norm(
+            input=groupnorm_in, groups=4
+        )
+        groupnorm_out2 = paddle.static.nn.group_norm(
+            input=groupnorm_in, groups=4
+        )
         '''
         spec_norm = fluid.data(name='spec_norm', shape=[2, 8, 32, 32], dtype='float32')
         spe_norm_out_1 = fluid.layers.spectral_norm(weight=spec_norm, dim=1, power_iters=2)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py
index 92279d501e464e..f9034aa45f6f93 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py
@@ -120,7 +120,7 @@ def forward(self, input):
             )
             return out
         else:
-            return fluid.layers.instance_norm(
+            return paddle.static.nn.instance_norm(
                 input,
                 epsilon=self.epsilon,
                 param_attr=fluid.ParamAttr(self.scale.name),
diff --git a/python/paddle/fluid/tests/unittests/test_instance_norm_op.py b/python/paddle/fluid/tests/unittests/test_instance_norm_op.py
index ed9e01259e6c54..c5cf210f340b07 100644
--- a/python/paddle/fluid/tests/unittests/test_instance_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_instance_norm_op.py
@@ -239,11 +239,11 @@ def test_errors(self):
             x1 = fluid.create_lod_tensor(
                 np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()
             )
-            self.assertRaises(TypeError, fluid.layers.instance_norm, x1)
+            self.assertRaises(TypeError, paddle.static.nn.instance_norm, x1)
 
             # the input dtype of instance_norm must be float32 or float64
             x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="int32")
-            self.assertRaises(TypeError, fluid.layers.instance_norm, x2)
+            self.assertRaises(TypeError, paddle.static.nn.instance_norm, x2)
 
 
 class TestInstanceNormOpErrorCase1(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 30e50294448f50..1d5521f4bdc43f 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -1880,7 +1880,7 @@ def func_group_norm(self):
                 lod_level=1,
                 append_batch_size=False,
             )
-            ret = layers.group_norm(
+            ret = paddle.static.nn.group_norm(
                 input=X,
                 groups=2,
                 param_attr=fluid.initializer.Uniform(low=-0.5, high=0.5),
@@ -1953,7 +1953,7 @@ def test_instance_norm(self):
             X = fluid.layers.data(
                 name='X', shape=shape, dtype='float32', append_batch_size=False
             )
-            ret = layers.instance_norm(input=X)
+            ret = paddle.static.nn.instance_norm(input=X)
             static_ret = self.get_static_graph_result(
                 feed={'X': input}, fetch_list=[ret]
             )[0]
diff --git a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py
index 26dea91aecce41..ed6b94432a491c 100644
--- a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py
@@ -35,7 +35,7 @@ def func(self, place):
             eps = 0.005
             atol = 1e-4
             x = layers.create_parameter(dtype=dtype, shape=shape, name='x')
-            z = fluid.layers.instance_norm(input=x)
+            z = paddle.static.nn.instance_norm(input=x)
             x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
             gradient_checker.double_grad_check(
                 [x], z, x_init=x_arr, atol=atol, place=place, eps=eps
@@ -63,7 +63,7 @@ def func(self, place):
             eps = 0.005
             atol = 1e-4
             x = layers.create_parameter(dtype=dtype, shape=shape, name='x')
-            z = fluid.layers.instance_norm(
+            z = paddle.static.nn.instance_norm(
                 input=x, param_attr=False, bias_attr=False
             )
             x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py
index 6f27289efc7ae6..5dfae6c98092f2 100755
--- a/python/paddle/static/nn/__init__.py
+++ b/python/paddle/static/nn/__init__.py
@@ -13,6 +13,9 @@
 # limitations under the License.
 
 from .common import fc  # noqa: F401
+from .common import instance_norm  # noqa: F401
+from .common import data_norm  # noqa: F401
+from .common import group_norm  # noqa: F401
 from .common import deform_conv2d  # noqa: F401
 from .common import conv3d  # noqa: F401
 from .common import conv2d_transpose  # noqa: F401
@@ -25,9 +28,6 @@
 from ...fluid.layers import conv2d  # noqa: F401
 from ...fluid.layers import create_parameter  # noqa: F401
 from ...fluid.layers import crf_decoding  # noqa: F401
-from ...fluid.layers import data_norm  # noqa: F401
-from ...fluid.layers import group_norm  # noqa: F401
-from ...fluid.layers import instance_norm  # noqa: F401
 from ...fluid.layers import layer_norm  # noqa: F401
 from ...fluid.layers import multi_box_head  # noqa: F401
 from .loss import nce  # noqa: F401
diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py
index a7470f2fb2e03f..da3b58bb18205a 100755
--- a/python/paddle/static/nn/common.py
+++ b/python/paddle/static/nn/common.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import paddle
-from paddle.fluid.initializer import Normal
+from paddle.fluid.param_attr import ParamAttr
+from paddle.fluid.initializer import Normal, Constant
 from paddle.fluid.framework import static_only, Variable, _non_static_mode
+from paddle.fluid.layers.layer_function_generator import templatedoc
 
 from paddle.fluid.data_feeder import check_dtype
 
@@ -177,6 +179,459 @@ def fc(
     )
 
 
+def instance_norm(
+    input, epsilon=1e-05, param_attr=None, bias_attr=None, name=None
+):
+    r"""
+    :api_attr: Static Graph
+
+    **Instance Normalization Layer**
+
+    Can be used as a normalizer function for convolution or fully_connected operations.
+    The required data format for this layer is one of the following:
+
+    DataLayout: NCHW `[batch, in_channels, in_height, in_width]`
+
+    Refer to `Instance Normalization: The Missing Ingredient for
+    Fast Stylization <https://arxiv.org/pdf/1607.08022.pdf>`_
+    for more details.
+
+    :math:`input` is the input features over a mini-batch.
+
+    ..  math::
+
+        \\mu_{\\beta} &\\gets \\frac{1}{HW} \\sum_{i=1}^{HW} x_i \\qquad &//\\
+        \\ mean\ of\ one\  feature\ map\ in\ mini-batch \\\\
+        \\sigma_{\\beta}^{2} &\\gets \\frac{1}{HW} \\sum_{i=1}^{HW}(x_i - \\
+        \\mu_{\\beta})^2 \\qquad &//\ variance\ of\ one\ feature\ map\ in\ mini-batch \\\\
+        \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
+        \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
+        y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
+
+    Note:
+        `H` means height of feature map, `W` means width of feature map.
+
+    Args:
+        input(Tensor): The rank of input tensor can be 2, 3, 4, 5.
+            The data type is float32 or float64.
+        epsilon(float, Default 1e-05): A value added to the denominator for
+            numerical stability. Default is 1e-5.
+        param_attr(ParamAttr|None|bool, optional): The parameter attribute for Parameter `scale`
+             of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
+         will create ParamAttr as param_attr, the name of scale can be set in ParamAttr.
+         If the Initializer of the param_attr is not set, the parameter is initialized
+         with Xavier. If the param_attr is set to False, instance_norm will not create param_attr.
+             Default: None.
+        bias_attr(ParamAttr|None|bool, optional): The parameter attribute for the bias of instance_norm.
+             If it is set to None or one attribute of ParamAttr, instance_norm
+         will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
+         If the Initializer of the bias_attr is not set, the bias is initialized zero.
+             If the bias_attr is set to False, instance_norm will not create bias_attr.
+         Default: None.
+        name(string, Default None): A name for this layer(optional). If set None, the layer
+            will be named automatically.
+
+    Returns:
+        A Tensor which is the result after applying instance normalization on the input,
+        has same shape and data type with input.
+
+    Examples:
+
+        .. code-block:: python
+
+            import paddle
+            paddle.enable_static()
+            x = paddle.static.data(name='x', shape=[3, 7, 3, 7], dtype='float32')
+            hidden1 = paddle.static.nn.fc(x, size=200)
+            hidden2 = paddle.static.nn.instance_norm(hidden1)
+    """
+    check_variable_and_dtype(
+        input, 'input', ['float32', 'float64'], 'instance_norm'
+    )
+    if param_attr is False:
+        assert (
+            bias_attr is False
+        ), "param_attr and bias_attr must be set to False at the same time in instance_norm"
+
+    helper = LayerHelper('instance_norm', **locals())
+    dtype = helper.input_dtype()
+
+    # use fp32 for in parameter
+    if dtype == paddle.framework.core.VarDesc.VarType.FP16:
+        dtype = paddle.framework.core.VarDesc.VarType.FP32
+
+    input_shape = input.shape
+    if len(input.shape) < 2 or len(input.shape) > 5:
+        raise ValueError(
+            'expected 2D or 3D or 4D or 5D input (got {}D input, input shape is: {})'.format(
+                len(input.shape), input_shape
+            )
+        )
+    channel_num = input_shape[1]
+
+    param_shape = [channel_num]
+
+    if param_attr and bias_attr:
+        # create parameter
+        scale = helper.create_parameter(
+            attr=helper.param_attr,
+            shape=param_shape,
+            dtype=dtype,
+            default_initializer=Constant(1.0),
+        )
+        bias = helper.create_parameter(
+            attr=helper.bias_attr,
+            shape=param_shape,
+            dtype=dtype,
+            is_bias=True,
+            default_initializer=Constant(0.0),
+        )
+
+    # create output
+    saved_mean = helper.create_variable_for_type_inference(
+        dtype=dtype, stop_gradient=True
+    )
+    saved_variance = helper.create_variable_for_type_inference(
+        dtype=dtype, stop_gradient=True
+    )
+
+    instance_norm_out = helper.create_variable_for_type_inference(dtype)
+
+    inputs = {"X": input}
+    if param_attr and bias_attr:
+        inputs["Scale"] = scale
+        inputs["Bias"] = bias
+
+    helper.append_op(
+        type="instance_norm",
+        inputs=inputs,
+        outputs={
+            "Y": instance_norm_out,
+            "SavedMean": saved_mean,
+            "SavedVariance": saved_variance,
+        },
+        attrs={
+            "epsilon": epsilon,
+        },
+    )
+
+    return instance_norm_out
+
+
+@static_only
+def data_norm(
+    input,
+    act=None,
+    epsilon=1e-05,
+    param_attr=None,
+    data_layout='NCHW',
+    in_place=False,
+    name=None,
+    moving_mean_name=None,
+    moving_variance_name=None,
+    do_model_average_for_mean_and_var=True,
+    slot_dim=-1,
+    sync_stats=False,
+    summary_decay_rate=0.9999999,
+    enable_scale_and_shift=False,
+):
+    r"""
+    :api_attr: Static Graph
+
+    **Data Normalization Layer**
+
+    This op can be used as a normalizer function for conv2d and fully_connected operations.
+    The required data format for this layer is one of the following:
+
+    1. NHWC `[batch, in_height, in_width, in_channels]`
+
+    2. NCHW `[batch, in_channels, in_height, in_width]`
+
+    :math:`input` is the input features over a mini-batch.
+
+    ..  math::
+
+        \\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\
+        \ mini-batch\ mean \\\\
+        \\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\
+        \\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\
+        \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
+        \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
+        y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
+
+    Args:
+        input(Tensor): The input Tensor.
+        act(string, Default None): Activation type, linear|relu|prelu|...
+        epsilon(float, Default 1e-05):
+        param_attr(ParamAttr): The parameter attribute for Parameter `scale`.
+        data_layout (str, optional): Specify the data format of the input, and the data format of the output
+            will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
+            The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
+            `[batch_size, input_channels, input_height, input_width]`.
+        in_place(bool, Default False): Make the input and output of batch norm reuse memory.
+        name(string, Default None): A name for this layer(optional). If set None, the layer
+            will be named automatically.
+        moving_mean_name(string, Default None): The name of moving_mean which store the global Mean.
+        moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance.
+        do_model_average_for_mean_and_var(bool, Default True): Whether parameter mean and variance
+            should do model average when model average is enabled.
+        slot_dim(int): The embedding dimension of one slot. Slot is a set of one specific feature. In pslib mode, we
+            distinguish feature ids by slot and pull their embeddings from parameter server (pslib). The first
+            place of the embedding is the historical show number (occurence time of this feature id with a label 0).
+            If the input of this op is concated by slot-wise embeddings, and the show number is zero when this slot
+            is new or empty, the normalization result may be impractical. To avoid this, we add slot_dim to locate
+            the show number and judge if the show number is zero. If so, we choose to skip normalization on this
+            embedding.
+        sync_stats(bool, Default False): When running with multiple GPU cards, using allreduce to sync the
+            summary messages.
+        summary_decay_rate(float, Default 0.9999999): The decay rate when updating summary.
+        enable_scale_and_shift(bool, Default False): do scale&shift after normalization.
+
+    Returns:
+        Tensor: A tensor which is the result after applying data normalization on the input.
+
+    Examples:
+
+        .. code-block:: python
+
+            import paddle
+            paddle.enable_static()
+
+            x = paddle.randn(shape=[32,100])
+            hidden2 = paddle.static.nn.data_norm(input=x)
+    """
+    helper = LayerHelper('data_norm', **locals())
+    dtype = helper.input_dtype()
+
+    input_shape = input.shape
+    if data_layout == 'NCHW':
+        channel_num = input_shape[1]
+    else:
+        if data_layout == 'NHWC':
+            channel_num = input_shape[-1]
+        else:
+            raise ValueError("unsupported data layout:" + data_layout)
+
+    param_shape = [channel_num]
+
+    batch_size_default = 1e4
+    batch_sum_default = 0.0
+    batch_square_sum_default = 1e4
+    scale_w_default = 1.0
+    bias_default = 0.0
+
+    if param_attr and isinstance(param_attr, dict):
+        batch_size_default = param_attr.get("batch_size", 1e4)
+        batch_sum_default = param_attr.get("batch_sum", 0.0)
+        batch_square_sum_default = param_attr.get("batch_square", 1e4)
+    if enable_scale_and_shift:
+        scale_w_default = param_attr.get("scale_w", 1.0)
+        bias_default = param_attr.get("bias", 0.0)
+
+    # create scale and shift(bias) when enable_scale_and_shift is True
+    if name is None:
+        name = "dn"
+    if enable_scale_and_shift:
+        scale_w = helper.create_parameter(
+            attr=ParamAttr(
+                name=name + '.scale_w',
+                initializer=Constant(value=float(scale_w_default)),
+                trainable=True,
+            ),
+            shape=param_shape,
+            dtype=input.dtype,
+        )
+        bias = helper.create_parameter(
+            attr=ParamAttr(
+                name=name + '.bias',
+                initializer=Constant(value=float(bias_default)),
+                trainable=True,
+            ),
+            shape=param_shape,
+            dtype=input.dtype,
+        )
+    # create parameter
+    batch_size = helper.create_parameter(
+        attr=ParamAttr(
+            name=name + '.batch_size',
+            initializer=Constant(value=float(batch_size_default)),
+            trainable=True,
+        ),
+        shape=param_shape,
+        dtype=input.dtype,
+    )
+
+    batch_sum = helper.create_parameter(
+        attr=ParamAttr(
+            name=name + '.batch_sum',
+            initializer=Constant(value=float(batch_sum_default)),
+            trainable=True,
+        ),
+        shape=param_shape,
+        dtype=input.dtype,
+    )
+
+    batch_square_sum = helper.create_parameter(
+        attr=ParamAttr(
+            name=name + '.batch_square_sum',
+            initializer=Constant(value=float(batch_square_sum_default)),
+            trainable=True,
+        ),
+        shape=param_shape,
+        dtype=input.dtype,
+    )
+
+    means = helper.create_variable(dtype=dtype, stop_gradient=True)
+    scales = helper.create_variable(dtype=dtype, stop_gradient=True)
+
+    data_norm_out = input if in_place else helper.create_variable(dtype=dtype)
+
+    inputs = {
+        "X": input,
+        "BatchSize": batch_size,
+        "BatchSum": batch_sum,
+        "BatchSquareSum": batch_square_sum,
+    }
+    attrs = {
+        "epsilon": epsilon,
+        "data_layout": data_layout,
+        "sync_stats": sync_stats,
+        "summary_decay_rate": summary_decay_rate,
+    }
+    if slot_dim > 0:
+        attrs["slot_dim"] = slot_dim
+    if enable_scale_and_shift:
+        attrs["enable_scale_and_shift"] = enable_scale_and_shift
+    if enable_scale_and_shift:
+        inputs["scale_w"] = scale_w
+        inputs["bias"] = bias
+    helper.append_op(
+        type="data_norm",
+        inputs=inputs,
+        outputs={
+            "Y": data_norm_out,
+            "Means": means,
+            "Scales": scales,
+            "BatchSize": batch_size,
+            "BatchSum": batch_sum,
+            "BatchSquareSum": batch_square_sum,
+        },
+        attrs=attrs,
+    )
+
+    return helper.append_activation(data_norm_out)
+
+
+@templatedoc()
+def group_norm(
+    input,
+    groups,
+    epsilon=1e-05,
+    param_attr=None,
+    bias_attr=None,
+    act=None,
+    data_layout='NCHW',
+    name=None,
+):
+    """
+    :api_attr: Static Graph
+
+    **Group Normalization Layer**
+
+    Refer to `Group Normalization <https://arxiv.org/abs/1803.08494>`_ .
+
+    Parameters:
+        input(Tensor): Tensor with dimension greater than 1, the data type is float32 or float64.
+        groups(int): The number of groups that divided from channels, the data type
+            is int32.
+        epsilon(float, optional): The small value added to the variance to prevent
+            division by zero, the data type is float32. Default: 1e-05.
+        param_attr(ParamAttr|bool, optional): ParamAttr object that specifies weight parameter
+            attribute. If a bool type, only False is supported, which means there is no weight parameter.
+            Default: None, the default weight parameter attribute is used. For more information, please
+            refer to :ref:`api_guide_ParamAttr` .
+        bias_attr(ParamAttr|bool, optional): ParamAttr object that specifies bias parameter
+            attribute. If a bool type, only False is supported, which means there is no bias parameter.
+            Default: None, the default bias parameter attribute is used. For more information, please
+            refer to :ref:`api_guide_ParamAttr` .
+        act(str, optional): Activation to be applied to the output of group normalization.
+        data_layout(str, optional): Specify the data format of the input, and the data format of the output
+            will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
+            The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
+            `[batch_size, input_channels, *]`.
+        name (str, optional): The default value is None. Normally there is no need for user to set this
+            property. For more information, please refer to :ref:`api_guide_Name` .
+
+    Returns:
+        Tensor: A Tensor has same data type and data format with `input`.
+
+    Examples:
+       .. code-block:: python
+
+            import paddle
+            paddle.enable_static()
+
+            data = paddle.static.data(name='data', shape=[2, 8, 32, 32], dtype='float32')
+            x = paddle.static.nn.group_norm(input=data, groups=4)
+            print(x.shape) # [2, 8, 32, 32]
+    """
+    helper = LayerHelper('group_norm', **locals())
+    dtype = helper.input_dtype()
+    check_variable_and_dtype(
+        input, 'input', ['float32', 'float64'], 'group_norm'
+    )
+    # create intput and parameters
+    inputs = {'X': input}
+    input_shape = input.shape
+    if len(input_shape) < 2:
+        raise ValueError(
+            f"The dimensions of Op(static.nn.group_norm)'s input should be more than 1. But received {len(input_shape)}"
+        )
+    if data_layout != 'NCHW' and data_layout != 'NHWC':
+        raise ValueError(
+            "Param(data_layout) of Op(static.nn.group_norm) got wrong value: received "
+            + data_layout
+            + " but only NCHW or NHWC supported."
+        )
+    channel_num = input_shape[1] if data_layout == 'NCHW' else input_shape[-1]
+    param_shape = [channel_num]
+    if param_attr:
+        scale = helper.create_parameter(
+            attr=helper.param_attr,
+            shape=param_shape,
+            dtype=dtype,
+            default_initializer=Constant(1.0),
+        )
+        inputs['Scale'] = scale
+    if bias_attr:
+        bias = helper.create_parameter(
+            attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True
+        )
+        inputs['Bias'] = bias
+
+    # create output
+    mean_out = helper.create_variable(dtype=dtype, stop_gradient=True)
+    variance_out = helper.create_variable(dtype=dtype, stop_gradient=True)
+    group_norm_out = helper.create_variable(dtype=dtype)
+
+    helper.append_op(
+        type="group_norm",
+        inputs=inputs,
+        outputs={
+            "Y": group_norm_out,
+            "Mean": mean_out,
+            "Variance": variance_out,
+        },
+        attrs={
+            "epsilon": epsilon,
+            "groups": groups,
+            "data_layout": data_layout,
+        },
+    )
+
+    return helper.append_activation(group_norm_out)
+
+
 def conv3d(
     input,
     num_filters,

From 0707c0afbdce070c45cc24d31393465144771305 Mon Sep 17 00:00:00 2001
From: 201716010711 <87008376+201716010711@users.noreply.github.com>
Date: Tue, 29 Nov 2022 12:26:53 +0800
Subject: [PATCH 017/154] delete slice api (#48399)

---
 python/paddle/fluid/contrib/layers/nn.py      |   7 +-
 .../dygraph_to_static/convert_operators.py    |   3 +-
 python/paddle/fluid/layers/detection.py       |   2 +-
 python/paddle/fluid/layers/nn.py              | 256 ------------------
 python/paddle/fluid/layers/rnn.py             |   2 +-
 .../dygraph_to_static/bert_dygraph_model.py   |   2 +-
 .../unittests/dygraph_to_static/test_bmn.py   |   4 +-
 .../unittests/dygraph_to_static/test_lac.py   |   4 +-
 .../unittests/dygraph_to_static/yolov3.py     |   4 +-
 .../tests/unittests/ipu/test_slice_op_ipu.py  |   6 +-
 .../test_trt_slice_dynamic_plugin.py          |   5 +-
 .../ir/inference/test_trt_slice_plugin.py     |  13 +-
 .../test_eager_deletion_padding_rnn.py        |  20 +-
 .../tests/unittests/test_imperative_basic.py  |   4 +-
 .../test_imperative_ocr_attention_model.py    |   8 +-
 .../unittests/test_imperative_ptb_rnn.py      |   6 +-
 .../unittests/test_imperative_save_load.py    |   6 +-
 .../unittests/test_imperative_save_load_v2.py |   6 +-
 .../fluid/tests/unittests/test_layers.py      |   2 +-
 .../fluid/tests/unittests/test_nn_grad.py     |   2 +-
 .../tests/unittests/test_static_save_load.py  |   6 +-
 21 files changed, 47 insertions(+), 321 deletions(-)

diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py
index 4bfbe7538617c2..02c5a7bfe4f87b 100644
--- a/python/paddle/fluid/contrib/layers/nn.py
+++ b/python/paddle/fluid/contrib/layers/nn.py
@@ -36,7 +36,6 @@
 from paddle.fluid.param_attr import ParamAttr
 
 from paddle.fluid.framework import Variable, convert_np_dtype_to_dtype_
-from paddle.fluid.layers import slice
 import paddle
 import warnings
 from paddle import _C_ops, _legacy_C_ops
@@ -1540,13 +1539,13 @@ def tdm_sampler(
 
         for layer_sample_num in neg_samples_num_list:
             end_offset = start_offset + layer_sample_num + positive_flag
-            layer_samples = slice(
+            layer_samples = paddle.slice(
                 out, axes=[1], starts=[start_offset], ends=[end_offset]
             )
-            layer_labels = slice(
+            layer_labels = paddle.slice(
                 labels, axes=[1], starts=[start_offset], ends=[end_offset]
             )
-            layer_mask = slice(
+            layer_mask = paddle.slice(
                 mask, axes=[1], starts=[start_offset], ends=[end_offset]
             )
 
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py b/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py
index 91084b3ff40be3..ee8fc9e4c7242b 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py
+++ b/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py
@@ -29,7 +29,6 @@
 from paddle.fluid.layers import (
     assign,
     fill_constant,
-    slice,
     reduce_all,
     reduce_any,
 )
@@ -819,7 +818,7 @@ def true_fn():
         return null_array
 
     def false_fn(array, start, end):
-        new_array = slice(array, starts=[start], ends=[end], axes=[0])
+        new_array = paddle.slice(array, starts=[start], ends=[end], axes=[0])
         return new_array
 
     new_array = cond(start == end, true_fn, lambda: false_fn(array, start, end))
diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py
index 543f63b639ea7c..fa00813146862f 100644
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -1768,7 +1768,7 @@ def __reshape_to_2d(var):
     target_label.stop_gradient = True
     conf_loss = softmax_with_cross_entropy(confidence, target_label)
     # 3. Mining hard examples
-    actual_shape = nn.slice(conf_shape, axes=[0], starts=[0], ends=[2])
+    actual_shape = paddle.slice(conf_shape, axes=[0], starts=[0], ends=[2])
     actual_shape.stop_gradient = True
     # shape=(-1, 0) is set for compile-time, the correct shape is set by
     # actual_shape in runtime.
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index ca0d3cd721d1b7..00da0deea20c7c 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -110,7 +110,6 @@
     'gaussian_random',
     'sampling_id',
     'sum',
-    'slice',
     'shape',
     'clip',
     'clip_by_norm',
@@ -6007,261 +6006,6 @@ def sum(x):
     return paddle.add_n(x)
 
 
-@templatedoc()
-def slice(input, axes, starts, ends):
-    """
-    This operator produces a slice of ``input`` along multiple axes. Similar to numpy:
-    https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html
-    Slice uses ``axes``, ``starts`` and ``ends`` attributes to specify the start and
-    end dimension for each axis in the list of axes and Slice uses this information
-    to slice the input data tensor. If a negative value is passed to
-    ``starts`` or ``ends`` such as :math:`-i`,  it represents the reverse position of the
-    axis :math:`i-1` (here 0 is the initial position).
-    If the value passed to ``starts`` or ``ends`` is greater than n
-    (the number of elements in this dimension), it represents n.
-    For slicing to the end of a dimension with unknown size, it is recommended
-    to pass in INT_MAX. The size of ``axes`` must be equal to ``starts`` and ``ends``.
-    Following examples will explain how slice works:
-
-    .. code-block:: text
-
-        Case1:
-            Given:
-                data = [ [1, 2, 3, 4], [5, 6, 7, 8], ]
-                axes = [0, 1]
-                starts = [1, 0]
-                ends = [2, 3]
-            Then:
-                result = [ [5, 6, 7], ]
-
-        Case2:
-            Given:
-                data = [ [1, 2, 3, 4], [5, 6, 7, 8], ]
-                axes = [0, 1]
-                starts = [0, 1]
-                ends = [-1, 1000]       # -1 denotes the reverse 0th position of dimension 0.
-            Then:
-                result = [ [2, 3, 4], ] # result = data[0:1, 1:4]
-
-    Args:
-        input (Tensor): A ``Tensor`` . The data type is ``float16``, ``float32``, ``float64``, ``int32`` or ``int64``.
-        axes (list|tuple): The data type is ``int32`` . Axes that `starts` and `ends` apply to .
-        starts (list|tuple|Tensor): The data type is ``int32`` . If ``starts`` is a list or tuple, the elements of
-                it should be integers or Tensors with shape [1]. If ``starts`` is an Tensor, it should be an 1-D Tensor.
-                It represents starting indices of corresponding axis in ``axes``.
-        ends (list|tuple|Tensor): The data type is ``int32`` . If ``ends`` is a list or tuple, the elements of
-                it should be integers or Tensors with shape [1]. If ``ends`` is an Tensor, it should be an 1-D Tensor .
-                It represents ending indices of corresponding axis in ``axes``.
-
-    Returns:
-        Tensor:  A ``Tensor``. The data type is same as ``input``.
-
-    Raises:
-        TypeError: The type of ``starts`` must be list, tuple or Tensor.
-        TypeError: The type of ``ends`` must be list, tuple or Tensor.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-
-            input = paddle.rand(shape=[4, 5, 6], dtype='float32')
-            # example 1:
-            # attr starts is a list which doesn't contain tensor.
-            axes = [0, 1, 2]
-            starts = [-3, 0, 2]
-            ends = [3, 2, 4]
-            sliced_1 = paddle.slice(input, axes=axes, starts=starts, ends=ends)
-            # sliced_1 is input[0:3, 0:2, 2:4].
-
-            # example 2:
-            # attr starts is a list which contain tensor.
-            minus_3 = paddle.full([1], -3, "int32")
-            sliced_2 = paddle.slice(input, axes=axes, starts=[minus_3, 0, 2], ends=ends)
-            # sliced_2 is input[0:3, 0:2, 2:4].
-    """
-    if in_dygraph_mode():
-        attrs = ()
-        starts_tensor = None
-        ends_tensor = None
-
-        if isinstance(axes, (list, tuple)):
-            axes = list(axes)
-            if len(axes) == 0:
-                raise ValueError(
-                    "Input axes should not be an empty list/tuple."
-                )
-            for i in range(len(axes)):
-                if axes[i] < 0:
-                    axes[i] = max(0, axes[i] + len(input.shape))
-                else:
-                    axes[i] = min(len(input.shape) - 1, axes[i])
-
-        else:
-            raise ValueError(
-                "Input axes must be a python list or tuple, but reveived {}".format(
-                    type(axes)
-                )
-            )
-
-        infer_flags = list(1 for i in range(len(axes)))
-
-        tmp_tensor_type = core.eager.Tensor
-        if isinstance(starts, (list, tuple)):
-            starts = [
-                item.numpy().item(0)
-                if isinstance(item, tmp_tensor_type)
-                else item
-                for item in starts
-            ]
-        elif isinstance(starts, tmp_tensor_type):
-            tensor_t = starts.numpy()
-            starts = [ele for ele in tensor_t]
-
-        if isinstance(ends, (list, tuple)):
-            ends = [
-                item.numpy().item(0)
-                if isinstance(item, tmp_tensor_type)
-                else item
-                for item in ends
-            ]
-            attrs += ('ends', ends)
-        elif isinstance(ends, tmp_tensor_type):
-            tensor_t = ends.numpy()
-            ends = [ele for ele in tensor_t]
-
-        return _C_ops.slice(input, axes, starts, ends, infer_flags, [])
-    else:
-        if _in_legacy_dygraph():
-            attrs = ()
-            starts_tensor = None
-            ends_tensor = None
-
-            if isinstance(axes, (list, tuple)):
-                axes = list(axes)
-                if len(axes) == 0:
-                    raise ValueError(
-                        "Input axes should not be an empty list/tuple."
-                    )
-                for i in range(len(axes)):
-                    if axes[i] < 0:
-                        axes[i] = max(0, axes[i] + len(input.shape))
-                    else:
-                        axes[i] = min(len(input.shape) - 1, axes[i])
-
-            else:
-                raise ValueError(
-                    "Input axes must be a python list or tuple, but reveived {}".format(
-                        type(axes)
-                    )
-                )
-
-            infer_flags = list(1 for i in range(len(axes)))
-
-            tmp_tensor_type = Variable
-
-            if isinstance(starts, (list, tuple)):
-                starts = [
-                    item.numpy().item(0)
-                    if isinstance(item, tmp_tensor_type)
-                    else item
-                    for item in starts
-                ]
-                attrs += ('starts', starts)
-            elif isinstance(starts, tmp_tensor_type):
-                starts_tensor = starts
-                starts.stop_gradient = True
-                infer_flags = list(-1 for i in range(len(axes)))
-
-            if isinstance(ends, (list, tuple)):
-                ends = [
-                    item.numpy().item(0)
-                    if isinstance(item, tmp_tensor_type)
-                    else item
-                    for item in ends
-                ]
-                attrs += ('ends', ends)
-            elif isinstance(ends, tmp_tensor_type):
-                ends_tensor = ends
-                ends_tensor.stop_gradient = True
-                infer_flags = list(-1 for i in range(len(axes)))
-
-            return _legacy_C_ops.slice(
-                input,
-                starts_tensor,
-                ends_tensor,
-                None,
-                None,
-                'axes',
-                axes,
-                'infer_flags',
-                infer_flags,
-                *attrs,
-            )
-
-    if not isinstance(starts, (list, tuple, Variable)):
-        raise ValueError(
-            "Input starts must be an Variable, python list or tuple."
-        )
-    if not isinstance(ends, (list, tuple, Variable)):
-        raise ValueError(
-            "Input ends must be an Variable, python list or tuple."
-        )
-
-    helper = LayerHelper('slice', **locals())
-
-    inputs = {'Input': input}
-    attrs = {'axes': axes}
-    infer_flags = list(1 for i in range(len(axes)))
-
-    # starts
-    if isinstance(starts, Variable):
-        starts.stop_gradient = True
-        inputs['StartsTensor'] = starts
-        infer_flags = list(-1 for i in range(len(axes)))
-    elif isinstance(starts, (list, tuple)):
-        attrs['starts'] = []
-        if utils._contain_var(starts):
-            inputs['StartsTensorList'] = utils._convert_to_tensor_list(starts)
-            for i, dim in enumerate(starts):
-                if isinstance(dim, Variable):
-                    attrs['starts'].append(-1)
-                    infer_flags[i] = -1
-                else:
-                    attrs['starts'].append(dim)
-        else:
-            attrs['starts'] = starts
-
-    # ends
-    if isinstance(ends, Variable):
-        ends.stop_gradient = True
-        inputs['EndsTensor'] = ends
-        infer_flags = list(-1 for i in range(len(axes)))
-    elif isinstance(ends, (list, tuple)):
-        attrs['ends'] = []
-        if utils._contain_var(ends):
-            inputs['EndsTensorList'] = utils._convert_to_tensor_list(ends)
-            for i, dim in enumerate(ends):
-                if isinstance(dim, Variable):
-                    attrs['ends'].append(-1)
-                    infer_flags[i] = -1
-                else:
-                    attrs['ends'].append(dim)
-        else:
-            attrs['ends'] = ends
-
-    # infer_flags
-    attrs['infer_flags'] = infer_flags
-    out = helper.create_variable_for_type_inference(
-        dtype=helper.input_dtype('input')
-    )
-    helper.append_op(
-        type='slice', inputs=inputs, attrs=attrs, outputs={'Out': out}
-    )
-
-    return out
-
-
 def shape(input):
     """
     :alias_main: paddle.shape
diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py
index b82a965e84fc87..01f22238406fa0 100644
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -2089,7 +2089,7 @@ def next_inputs(self, time, outputs, states, sample_ids):
         def _slice(x):  # TODO: use Variable.__getitem__
             axes = [0 if self.time_major else 1]
             return paddle.squeeze(
-                nn.slice(
+                paddle.slice(
                     x, axes=axes, starts=[next_time], ends=[next_time + 1]
                 ),
                 axis=axes,
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
index 0fbe99cce9e329..a2c1a73864bbb5 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
@@ -291,7 +291,7 @@ def forward(self, src_ids, position_ids, sentence_ids, input_mask):
         #
         # if not self.return_pooled_out:
         #    return enc_output
-        next_sent_feat = fluid.layers.slice(
+        next_sent_feat = paddle.slice(
             input=enc_output, axes=[1], starts=[0], ends=[1]
         )
         next_sent_feat = self.pooled_fc(next_sent_feat)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
index e34cce5d647349..5949903fc9936e 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
@@ -412,10 +412,10 @@ def pem_cls_loss_func(pred_score, gt_iou_map, mask):
         return loss
 
     pred_bm_reg = paddle.squeeze(
-        fluid.layers.slice(pred_bm, axes=[1], starts=[0], ends=[1]), axis=[1]
+        paddle.slice(pred_bm, axes=[1], starts=[0], ends=[1]), axis=[1]
     )
     pred_bm_cls = paddle.squeeze(
-        fluid.layers.slice(pred_bm, axes=[1], starts=[1], ends=[2]), axis=[1]
+        paddle.slice(pred_bm, axes=[1], starts=[1], ends=[2]), axis=[1]
     )
 
     bm_mask = _get_mask(cfg)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py
index 91926f259a7f60..3f8c9ad251fa31 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py
@@ -85,9 +85,7 @@ def forward(self, inputs):
                 j = i
 
             # input_ = inputs[:, j:j+1, :]  # original code
-            input_ = fluid.layers.slice(
-                inputs, axes=[1], starts=[j], ends=[j + 1]
-            )
+            input_ = paddle.slice(inputs, axes=[1], starts=[j], ends=[j + 1])
             input_ = paddle.reshape(input_, [-1, input_.shape[2]])
             hidden, reset, gate = self.gru_unit(input_, hidden)
             hidden_ = paddle.reshape(hidden, [-1, 1, hidden.shape[1]])
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py
index c0ef4408ff8ea4..c005b9e99be169 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py
@@ -206,9 +206,7 @@ def __init__(self, scale=2):
     def forward(self, inputs):
         # get dynamic upsample output shape
         shape_nchw = fluid.layers.shape(inputs)
-        shape_hw = fluid.layers.slice(
-            shape_nchw, axes=[0], starts=[2], ends=[4]
-        )
+        shape_hw = paddle.slice(shape_nchw, axes=[0], starts=[2], ends=[4])
         shape_hw.stop_gradient = True
         in_shape = fluid.layers.cast(shape_hw, dtype='int32')
         out_shape = in_shape * self.scale
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_slice_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_slice_op_ipu.py
index cdcc97fb3ec708..2a4e7f4e538db6 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_slice_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_slice_op_ipu.py
@@ -51,7 +51,7 @@ def build_model(self):
         x = paddle.static.data(
             name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32'
         )
-        out = paddle.fluid.layers.slice(x, **self.attrs)
+        out = paddle.slice(x, **self.attrs)
         self.fetch_list = [out.name]
 
     def run_model(self, exec_mode):
@@ -105,9 +105,7 @@ def build_model(self):
         ends = paddle.static.data(
             name=self.feed_list[2], shape=self.feed_shape[2], dtype='int32'
         )
-        out = paddle.fluid.layers.slice(
-            x, starts=starts, ends=ends, **self.attrs
-        )
+        out = paddle.slice(x, starts=starts, ends=ends, **self.attrs)
         self.fetch_list = [out.name]
 
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_dynamic_plugin.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_dynamic_plugin.py
index 02d76fb9f25363..d3c242c8d83834 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_dynamic_plugin.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_dynamic_plugin.py
@@ -17,6 +17,7 @@
 import numpy as np
 from inference_pass_test import InferencePassTest
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.core import AnalysisConfig
@@ -49,9 +50,7 @@ def setUp(self):
             axes = self.params_axes
             starts = self.params_starts
             ends = self.params_ends
-            slice_out = fluid.layers.slice(
-                data, axes=axes, starts=starts, ends=ends
-            )
+            slice_out = paddle.slice(data, axes=axes, starts=starts, ends=ends)
 
         self.feeds = {
             "data": np.random.random((3, 3, 3, 3)).astype("float32"),
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py
index 1504a2a8ee6dc4..b8b0e6a55033a0 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py
@@ -17,6 +17,7 @@
 import numpy as np
 from inference_pass_test import InferencePassTest
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.core import AnalysisConfig
@@ -43,9 +44,7 @@ def setUp(self):
             axes = self.params_axes
             starts = self.params_starts
             ends = self.params_ends
-            slice_out = fluid.layers.slice(
-                data, axes=axes, starts=starts, ends=ends
-            )
+            slice_out = paddle.slice(data, axes=axes, starts=starts, ends=ends)
             out = fluid.layers.batch_norm(slice_out, is_test=True)
 
         self.feeds = {
@@ -114,9 +113,7 @@ def setUp(self):
             axes = self.params_axes
             starts = self.params_starts
             ends = self.params_ends
-            slice_out = fluid.layers.slice(
-                data, axes=axes, starts=starts, ends=ends
-            )
+            slice_out = paddle.slice(data, axes=axes, starts=starts, ends=ends)
             cast_out = fluid.layers.cast(slice_out, 'float32')
             out = fluid.layers.batch_norm(cast_out, is_test=True)
 
@@ -141,9 +138,7 @@ def setUp(self):
             axes = self.params_axes
             starts = self.params_starts
             ends = self.params_ends
-            slice_out = fluid.layers.slice(
-                data, axes=axes, starts=starts, ends=ends
-            )
+            slice_out = paddle.slice(data, axes=axes, starts=starts, ends=ends)
             cast_out = fluid.layers.cast(slice_out, 'float32')
             out = fluid.layers.batch_norm(cast_out, is_test=True)
 
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
index 84f19e01bf2f97..c0a6ad983b1c73 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
@@ -143,10 +143,10 @@ def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None):
             )
             bias_arr.append(bias_1)
 
-            pre_hidden = layers.slice(
+            pre_hidden = paddle.slice(
                 init_hidden, axes=[0], starts=[i], ends=[i + 1]
             )
-            pre_cell = layers.slice(
+            pre_cell = paddle.slice(
                 init_cell, axes=[0], starts=[i], ends=[i + 1]
             )
             pre_hidden = paddle.reshape(pre_hidden, shape=[-1, hidden_size])
@@ -169,22 +169,22 @@ def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None):
                 gate_input = layers.matmul(x=nn, y=weight_1)
 
                 gate_input = layers.elementwise_add(gate_input, bias)
-                i = layers.slice(
+                i = paddle.slice(
                     gate_input, axes=[1], starts=[0], ends=[hidden_size]
                 )
-                j = layers.slice(
+                j = paddle.slice(
                     gate_input,
                     axes=[1],
                     starts=[hidden_size],
                     ends=[hidden_size * 2],
                 )
-                f = layers.slice(
+                f = paddle.slice(
                     gate_input,
                     axes=[1],
                     starts=[hidden_size * 2],
                     ends=[hidden_size * 3],
                 )
-                o = layers.slice(
+                o = paddle.slice(
                     gate_input,
                     axes=[1],
                     starts=[hidden_size * 3],
@@ -222,11 +222,11 @@ def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None):
             c = rnnout[i * 2 + 1]
             m.stop_gradient = True
             c.stop_gradient = True
-            last_h = layers.slice(
+            last_h = paddle.slice(
                 m, axes=[0], starts=[num_steps - 1], ends=[num_steps]
             )
             last_hidden_array.append(last_h)
-            last_c = layers.slice(
+            last_c = paddle.slice(
                 c, axes=[0], starts=[num_steps - 1], ends=[num_steps]
             )
             last_cell_array.append(last_c)
@@ -264,10 +264,10 @@ def encoder_static(
             )
             bias_arr.append(bias_1)
 
-            pre_hidden = layers.slice(
+            pre_hidden = paddle.slice(
                 init_hidden, axes=[0], starts=[i], ends=[i + 1]
             )
-            pre_cell = layers.slice(
+            pre_cell = paddle.slice(
                 init_cell, axes=[0], starts=[i], ends=[i + 1]
             )
             pre_hidden = paddle.reshape(pre_hidden, shape=[-1, hidden_size])
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
index ff4e3995740049..25c7eed19b87da 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
@@ -137,9 +137,7 @@ def forward(self, inputs):
         )
         pre_hidden = init_hidden
         for i in range(self.seq_len):
-            input = fluid.layers.slice(
-                inputs, axes=[1], starts=[i], ends=[i + 1]
-            )
+            input = paddle.slice(inputs, axes=[1], starts=[i], ends=[i + 1])
             input = paddle.reshape(input, shape=[1, 3])
             out_softmax, pre_hidden = self._cell(input, pre_hidden)
             outs.append(out_softmax)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
index d58a67cd671d09..6205bb7f58b630 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
@@ -192,9 +192,7 @@ def forward(self, inputs):
         for i in range(inputs.shape[1]):
             if self.is_reverse:
                 i = inputs.shape[1] - 1 - i
-            input_ = fluid.layers.slice(
-                inputs, axes=[1], starts=[i], ends=[i + 1]
-            )
+            input_ = paddle.slice(inputs, axes=[1], starts=[i], ends=[i + 1])
             input_ = paddle.reshape(input_, [-1, input_.shape[2]])
             hidden, reset, gate = self.gru_unit(input_, hidden)
             hidden_ = paddle.reshape(hidden, [-1, 1, hidden.shape[1]])
@@ -356,7 +354,7 @@ def forward(
         res = []
         hidden_mem = decoder_boot
         for i in range(target_embedding.shape[1]):
-            current_word = fluid.layers.slice(
+            current_word = paddle.slice(
                 target_embedding, axes=[1], starts=[i], ends=[i + 1]
             )
             current_word = paddle.reshape(
@@ -399,7 +397,7 @@ def __init__(self):
 
     def forward(self, inputs, label_in):
         gru_backward, encoded_vector, encoded_proj = self.encoder_net(inputs)
-        backward_first = fluid.layers.slice(
+        backward_first = paddle.slice(
             gru_backward, axes=[1], starts=[0], ends=[1]
         )
         backward_first = paddle.reshape(
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
index 4bc3ec36535f6d..2630e3ce689f53 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
@@ -79,10 +79,10 @@ def forward(self, input_embedding, init_hidden=None, init_cell=None):
         self.hidden_array = []
 
         for i in range(self._num_layers):
-            pre_hidden = fluid.layers.slice(
+            pre_hidden = paddle.slice(
                 init_hidden, axes=[0], starts=[i], ends=[i + 1]
             )
-            pre_cell = fluid.layers.slice(
+            pre_cell = paddle.slice(
                 init_cell, axes=[0], starts=[i], ends=[i + 1]
             )
             pre_hidden = paddle.reshape(
@@ -94,7 +94,7 @@ def forward(self, input_embedding, init_hidden=None, init_cell=None):
 
         res = []
         for index in range(self._num_steps):
-            self._input = fluid.layers.slice(
+            self._input = paddle.slice(
                 input_embedding, axes=[1], starts=[index], ends=[index + 1]
             )
             self._input = paddle.reshape(
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
index 920187f7eb5d94..82084cfd27ba53 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
@@ -74,10 +74,10 @@ def forward(self, input_embedding, init_hidden=None, init_cell=None):
         self.hidden_array = []
 
         for i in range(self._num_layers):
-            pre_hidden = fluid.layers.slice(
+            pre_hidden = paddle.slice(
                 init_hidden, axes=[0], starts=[i], ends=[i + 1]
             )
-            pre_cell = fluid.layers.slice(
+            pre_cell = paddle.slice(
                 init_cell, axes=[0], starts=[i], ends=[i + 1]
             )
             pre_hidden = paddle.reshape(
@@ -89,7 +89,7 @@ def forward(self, input_embedding, init_hidden=None, init_cell=None):
 
         res = []
         for index in range(self._num_steps):
-            self._input = fluid.layers.slice(
+            self._input = paddle.slice(
                 input_embedding, axes=[1], starts=[index], ends=[index + 1]
             )
             self._input = paddle.reshape(
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py
index 17ad62c660d917..2f81d59a60916d 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py
@@ -77,10 +77,10 @@ def forward(self, input_embedding, init_hidden=None, init_cell=None):
         self.hidden_array = []
 
         for i in range(self._num_layers):
-            pre_hidden = fluid.layers.slice(
+            pre_hidden = paddle.slice(
                 init_hidden, axes=[0], starts=[i], ends=[i + 1]
             )
-            pre_cell = fluid.layers.slice(
+            pre_cell = paddle.slice(
                 init_cell, axes=[0], starts=[i], ends=[i + 1]
             )
             pre_hidden = paddle.reshape(
@@ -92,7 +92,7 @@ def forward(self, input_embedding, init_hidden=None, init_cell=None):
 
         res = []
         for index in range(self._num_steps):
-            self._input = fluid.layers.slice(
+            self._input = paddle.slice(
                 input_embedding, axes=[1], starts=[index], ends=[index + 1]
             )
             self._input = paddle.reshape(
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 1d5521f4bdc43f..0fc87dd03b6e31 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -3584,7 +3584,7 @@ def make_slice(self):
                 name="input", shape=[3, 4, 5, 6], dtype='float32'
             )
 
-            out = layers.slice(input, axes=axes, starts=starts, ends=ends)
+            out = paddle.slice(input, axes=axes, starts=starts, ends=ends)
             return out
 
     def make_scale_variable(self):
diff --git a/python/paddle/fluid/tests/unittests/test_nn_grad.py b/python/paddle/fluid/tests/unittests/test_nn_grad.py
index b30aa34ad992d4..c774351db998f1 100644
--- a/python/paddle/fluid/tests/unittests/test_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_grad.py
@@ -30,7 +30,7 @@ class TestSliceOpDoubleGradCheck(unittest.TestCase):
     def func(self, place):
         self.config()
 
-        out = fluid.layers.slice(
+        out = paddle.slice(
             self.inputs, axes=self.axes, starts=self.starts, ends=self.ends
         )
         gradient_checker.double_grad_check(
diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py
index 27bc86259f7d87..c0472b16515a01 100644
--- a/python/paddle/fluid/tests/unittests/test_static_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py
@@ -85,10 +85,10 @@ def forward(self, input_embedding, init_hidden=None, init_cell=None):
         self.hidden_array = []
 
         for i in range(self._num_layers):
-            pre_hidden = fluid.layers.slice(
+            pre_hidden = paddle.slice(
                 init_hidden, axes=[0], starts=[i], ends=[i + 1]
             )
-            pre_cell = fluid.layers.slice(
+            pre_cell = paddle.slice(
                 init_cell, axes=[0], starts=[i], ends=[i + 1]
             )
             pre_hidden = paddle.reshape(
@@ -100,7 +100,7 @@ def forward(self, input_embedding, init_hidden=None, init_cell=None):
 
         res = []
         for index in range(self._num_steps):
-            self._input = fluid.layers.slice(
+            self._input = paddle.slice(
                 input_embedding, axes=[1], starts=[index], ends=[index + 1]
             )
             self._input = paddle.reshape(

From 23e5b25cb2cbb7b9785027ba1544865bf18b769c Mon Sep 17 00:00:00 2001
From: JZ-LIANG <jianzhongliang10@gmail.com>
Date: Tue, 29 Nov 2022 14:25:45 +0800
Subject: [PATCH 018/154] [Auto Parallel Performance] Optimizing  data parallel
 Fuse-Allreduce-Overlapping (#48092)

* add depend

* add origin amp files

* fp16 distinguish None & False

* engine log

* dp add deps for graph exe

* add dep for grad clip

* dep ops in comm stream

* unitest
---
 .../paddle/distributed/auto_parallel/utils.py |  70 ++++++--
 ...uto_parallel_data_parallel_optimization.py | 168 ++++++++++++++++--
 .../passes/auto_parallel_grad_clip.py         |  43 +++++
 .../passes/auto_parallel_recompute.py         |   2 +
 ...arallel_data_parallel_optimization_pass.py |  62 +++++++
 5 files changed, 325 insertions(+), 20 deletions(-)

diff --git a/python/paddle/distributed/auto_parallel/utils.py b/python/paddle/distributed/auto_parallel/utils.py
index c31642a9e2af31..280868773cdc3c 100644
--- a/python/paddle/distributed/auto_parallel/utils.py
+++ b/python/paddle/distributed/auto_parallel/utils.py
@@ -1410,6 +1410,9 @@ def naive_set_dist_op_attr_for_program_by_mesh_and_mapping(
 def naive_set_dist_op_attr_for_program_by_mesh(
     new_op, process_mesh, ctx, is_recompute=False
 ):
+    # hack to skip coalesce var for dist attr
+    if not is_recompute:
+        return
     assert process_mesh is not None
 
     new_op_dist_attr = OperatorDistributedAttribute()
@@ -2129,13 +2132,13 @@ def insert_dependencies_for_two_ops(
     block,
     idx,
     prior_op,
-    posterior,
+    posterior_op,
     dist_context,
     is_recompute=False,
     sync=False,
 ):
     """
-    dependency: prior_op should be run before posterior
+    dependency: prior_op should be run before posterior_op
     """
 
     assert (
@@ -2144,15 +2147,15 @@ def insert_dependencies_for_two_ops(
         str(prior_op)
     )
     assert (
-        len(posterior.input_arg_names) >= 1
+        len(posterior_op.input_arg_names) >= 1
     ), "second op of dependency should at least have one input. [{}]".format(
-        str(posterior)
+        str(posterior_op)
     )
     prior_op_mesh = dist_context.get_op_dist_attr_for_program(
         prior_op
     ).process_mesh
     posterior_mesh = dist_context.get_op_dist_attr_for_program(
-        posterior
+        posterior_op
     ).process_mesh
     assert (
         prior_op_mesh == posterior_mesh
@@ -2171,25 +2174,72 @@ def _select_best_depend_var(vars):
         [block.var(name) for name in prior_op.output_arg_names]
     )
     second_var = _select_best_depend_var(
-        [block.var(name) for name in posterior.input_arg_names]
+        [block.var(name) for name in posterior_op.input_arg_names]
     )
 
+    return insert_dependencies_for_two_vars(
+        block,
+        idx,
+        first_var,
+        second_var,
+        dist_context,
+        OpRole.Backward,
+        prior_op_mesh,
+        is_recompute,
+        sync,
+    )
+
+
+def insert_dependencies_for_two_vars(
+    block,
+    idx,
+    prior_var,
+    post_var,
+    dist_context,
+    oprole,
+    process_mesh=None,
+    is_recompute=False,
+    sync=False,
+):
+    """
+    dependency: op that generates prior_var should be run before op that generates post_var
+    """
+    assert block.has_var(prior_var.name)
+    assert block.has_var(post_var.name)
+    if process_mesh is None:
+        process_mesh = dist_context.get_tensor_dist_attr_for_program(
+            post_var
+        ).process_mesh
+    assert process_mesh is not None
+
     depend_op = block._insert_op_without_sync(
         idx,
         type='nop',
         inputs={
-            "X": first_var,
+            "X": prior_var,
         },
-        outputs={"Out": second_var},
+        outputs={"Out": post_var},
     )
     # depend_op.desc.set_type("depend")
-    depend_op._set_attr(OP_ROLE_KEY, OpRole.Backward)
+    depend_op._set_attr(OP_ROLE_KEY, oprole)
     # depend_op.desc.set_input("Dep", [first_var.name])
     # self.desc.set_output(out_proto.name, out_arg_names)
 
     naive_set_dist_op_attr_for_program_by_mesh(
-        depend_op, prior_op_mesh, dist_context, is_recompute
+        depend_op, process_mesh, dist_context, is_recompute
     )
 
     if sync:
         block._sync_with_cpp()
+
+    return depend_op
+
+
+def use_standalone_executor():
+    return os.environ.get('FLAGS_CONVERT_GRAPH_TO_PROGRAM', None) in [
+        1,
+        '1',
+        True,
+        'True',
+        'true',
+    ]
diff --git a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
index 601cd31948b3fc..47759484a66ee2 100644
--- a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
+++ b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
@@ -27,8 +27,11 @@
     find_higher_order_backward_op,
     is_loss_grad_op,
     is_optimize_op,
+    is_forward_op,
     ring_id_to_process_group,
     get_var_numel,
+    use_standalone_executor,
+    insert_dependencies_for_two_vars,
 )
 
 # add new optimizers supporting rescale_grad here
@@ -87,16 +90,20 @@ def _apply_single_impl(self, main_program, startup_program, context):
         self.dist_context = self.get_attr("dist_context")
         self.global_rank = int(self.get_attr("global_rank"))
         self.use_sharding = self.get_attr("use_sharding")
+        self.coalesce_prefix = 'coalesce_grad'
+        if use_standalone_executor():
+            self.gradient_sync_stream = "gradient_sync_stream"
 
         with paddle.static.program_guard(main_program, startup_program):
             self._analyze_program()
 
+            # TODO refactor here to first fuse then overlap
             if self.is_data_parallel_applied():
                 self._prune_grad_scaling()
                 self._calc_comm_overlap()
                 grad_group = self._fuse_allreduce()
-
-        # self.summary(grad_group)
+                self._add_dependencies(grad_group)
+                self.summary(grad_group)
 
     def _prune_grad_scaling(self):
 
@@ -284,7 +291,6 @@ def _comms_overlap_calc(self):
         # InterpreterCore has a different logic for overlapping
         # which is different from use_calc_stream
         block = default_main_program().global_block()
-        ops = block.ops
 
         # comm wait calc to finish
         for idx, op in reversed(list(enumerate(block.ops))):
@@ -294,7 +300,6 @@ def _comms_overlap_calc(self):
 
                 op._set_attr('use_calc_stream', False)
                 ring_id = op.attr("ring_id")
-
                 block._insert_op_without_sync(
                     idx,
                     type='c_wait_compute',
@@ -307,8 +312,10 @@ def _comms_overlap_calc(self):
 
     def _calc_wait_comms(self):
 
+        if use_standalone_executor():
+            return
+
         block = default_main_program().global_block()
-        ops = block.ops
 
         # NOTE the naive overlap implement in static hybird parallel only sync comm stream
         # at the end of Backward phase, based on a strong constraint that
@@ -325,7 +332,7 @@ def _calc_wait_comms(self):
             ring_id_to_un_sync_grad_map[group.id] = []
 
         # analyze the where need to sync
-        for i, op in enumerate(ops):
+        for i, op in enumerate(block.ops):
             if is_data_parallel_reduce_op(op):
                 ring_id = op.attr("ring_id")
                 grad_name = op.output_arg_names[0]
@@ -365,6 +372,7 @@ def _calc_wait_comms(self):
                     outputs={'Out': []},
                     attrs={'op_role': OpRole.Backward, 'ring_id': ring_id},
                 )
+        block._sync_with_cpp()
 
     def _could_be_fuse(self):
         # TODO  support gradient fuse higher order gradient.
@@ -404,8 +412,6 @@ def _group_grads(self):
         def collect_group(cur_group, grad_var, ring_id, i):
             if len(cur_group.gradients) == 0:
                 cur_group = None
-            elif len(cur_group.gradients) == 1:
-                grouped_grad_names.remove(cur_group.gradients[0].name)
             else:
                 cur_group.finalize()
                 grad_groups.append(cur_group)
@@ -451,9 +457,16 @@ def _update_program(self, grad_groups):
 
         for i, group in enumerate(grad_groups[::-1]):
 
+            # skip unfused big tensor
+            if len(group.gradients) <= 1:
+                group.coalesce_var = group.gradients[0]
+                continue
+
             # create coalecse tensor
             group.coalesce_var = block.create_var(
-                name=unique_name.generate('coalecse_grad_{}'.format(i)),
+                name=unique_name.generate(
+                    self.coalesce_prefix + '_{}'.format(i)
+                ),
                 dtype=group.dtype,
                 persistable=False,
                 stop_gradient=True,
@@ -497,7 +510,7 @@ def _update_program(self, grad_groups):
                 ), "Unexception: try to remove op {}".format(
                     str(block.ops[idx])
                 )
-                block._remove_op(idx)
+                block._remove_op(idx, False)
 
             # insert coalecse op
             concated_shapes = []
@@ -529,6 +542,141 @@ def _update_program(self, grad_groups):
         block._sync_with_cpp()
         # TODO update dist attr
 
+    def _add_dependencies(self, grad_groups):
+        # NOTE Currently, auto_parallel need to adopt for two executors: Sequential executor (old exe) and Graph based
+        # multiple stream executor(standalone exe). This function just for standalone exe. Refactor here
+        # in future when only one executor stay.
+
+        if not use_standalone_executor() or len(grad_groups) == 0:
+            return
+        block = default_main_program().global_block()
+
+        # Build maps
+        vars_to_coalesce_map = {}
+        coalesce_to_vars_map = {}
+
+        for group in grad_groups:
+            grad_names = []
+            coalesce_name = group.coalesce_var.name
+            for grad in group.gradients:
+                vars_to_coalesce_map[grad.name] = coalesce_name
+                grad_names.append(grad.name)
+            coalesce_to_vars_map[coalesce_name] = grad_names
+
+        # analyze dependencies
+        # Record ONLY the last grad that generated before allreduce
+        # NOTE need to be update when we allow multiple calc stream for backward calc
+        not_sync_coalesces = []
+        prior_allreduce_deps = {}
+        for idx, op in reversed(list(enumerate(block.ops))):
+            if is_forward_op(op):
+                break
+            if is_optimize_op(op):
+                continue
+
+            if is_data_parallel_reduce_op(op):
+                coalesce_var_name = op.output_arg_names[0]
+
+                # NOTE only add extra deps for fused tensor, other tensor rely on
+                # data flow analysis of executor.
+                if self.coalesce_prefix in coalesce_var_name:
+                    prior_allreduce_deps[coalesce_var_name] = [
+                        idx,
+                        None,
+                        coalesce_var_name,
+                    ]
+                    not_sync_coalesces.append(coalesce_var_name)
+                continue
+
+            for out_name in op.output_arg_names:
+                var_name = vars_to_coalesce_map.get(out_name, None)
+                if var_name in not_sync_coalesces:
+                    prior_allreduce_deps[var_name][1] = out_name
+                    not_sync_coalesces.remove(var_name)
+        assert (
+            len(not_sync_coalesces) == 0
+        ), "Unexception: {} has NOT been add prior Dep before allreduce.".format(
+            not_sync_coalesces
+        )
+
+        # Record ONLY the first grad that used after allreduce
+        # NOTE need to be update when we allow multiple calc stream for backward calc
+        not_sync_coalesces = []
+        post_allreduce_deps = {}
+        for idx, op in enumerate(block.ops):
+            if is_forward_op(op):
+                continue
+
+            if is_data_parallel_reduce_op(op):
+                coalesce_var_name = op.input_arg_names[0]
+                if self.coalesce_prefix in coalesce_var_name:
+                    post_allreduce_deps[coalesce_var_name] = [
+                        None,
+                        coalesce_var_name,
+                        None,
+                    ]
+                    not_sync_coalesces.append(coalesce_var_name)
+                continue
+
+            for out_name in op.input_arg_names:
+                var_name = vars_to_coalesce_map.get(out_name, None)
+                if var_name in not_sync_coalesces:
+                    post_allreduce_deps[var_name][0] = idx
+                    post_allreduce_deps[var_name][2] = out_name
+                    not_sync_coalesces.remove(var_name)
+
+        assert (
+            len(not_sync_coalesces) == 0
+        ), "Unexception: {} has NOT been add post Dep after allreduce.".format(
+            not_sync_coalesces
+        )
+
+        # Update program IR insert dependencise op
+        dep_var_pairs = []
+        for deps in [prior_allreduce_deps, post_allreduce_deps]:
+            for pair in deps.values():
+                dep_var_pairs.append(pair)
+
+        dep_var_pairs.sort(key=lambda x: x[0], reverse=True)
+        for idx, prior_name, post_name in dep_var_pairs:
+            prior_var = block.var(prior_name)
+            post_var = block.var(post_name)
+            depend_op = insert_dependencies_for_two_vars(
+                block,
+                idx,
+                prior_var,
+                post_var,
+                self.dist_context,
+                OpRole.Backward,
+                process_mesh=[
+                    -1
+                ],  # hack to avoid initialize the dist attr for coalesc var
+                is_recompute=False,
+                sync=False,
+            )
+            depend_op.dist_attr.execution_stream = self.gradient_sync_stream
+        block._sync_with_cpp()
+
+        # remove naive synchronization & assign allreduce stream
+        def remove_cond(op):
+            if op.type != "c_wait_compute":
+                return False
+            if len(op.input_arg_names) != 0:
+                return False
+            if len(op.output_arg_names) != 0:
+                return False
+            return True
+
+        for idx, op in reversed(list(enumerate(block.ops))):
+            if is_data_parallel_reduce_op(op):
+                op._set_attr('use_calc_stream', True)
+                op.dist_attr.execution_stream = self.gradient_sync_stream
+
+            if remove_cond(op):
+                block._remove_op(idx, sync=False)
+
+        block._sync_with_cpp()
+
     def summary(self, grad_groups=[]):
         # TODO: add logger module
         import logging
diff --git a/python/paddle/distributed/passes/auto_parallel_grad_clip.py b/python/paddle/distributed/passes/auto_parallel_grad_clip.py
index 73432baa1d3c3d..a475f8e0ac317e 100644
--- a/python/paddle/distributed/passes/auto_parallel_grad_clip.py
+++ b/python/paddle/distributed/passes/auto_parallel_grad_clip.py
@@ -26,6 +26,8 @@
     OP_ROLE_KEY,
     OpRole,
     _get_comm_group,
+    insert_dependencies_for_two_vars,
+    use_standalone_executor,
 )
 from ..auto_parallel.dist_attribute import (
     TensorDistributedAttribute,
@@ -334,6 +336,7 @@ def _remove_no_need_ops_vars(self, block):
             if op.type == 'sqrt':
                 input_name = op.input("X")[0]
                 input_var = block.vars[input_name]
+                insert_leaf_fill_constant_node = False
                 if paddle.distributed.get_world_size() > 1:
                     offset = 0
                     if input_name in removed_tmp_var:
@@ -356,6 +359,7 @@ def _remove_no_need_ops_vars(self, block):
                         )
                         offset += 1
                         self.clip_helper._init_dist_attr(fill_constant_op)
+                        insert_leaf_fill_constant_node = True
 
                     allreduce_op = block._insert_op(
                         idx + offset,
@@ -373,6 +377,45 @@ def _remove_no_need_ops_vars(self, block):
                     )
                     self.clip_helper._init_dist_attr(allreduce_op)
 
+                    if (
+                        use_standalone_executor
+                        and insert_leaf_fill_constant_node
+                    ):
+
+                        # NOTE add naive deps for global norm sync in graph exe
+                        j = idx - 1
+                        prior_op = None
+                        while j > 0:
+                            op_type = block.ops[j].type
+                            if op_type in [
+                                'update_loss_scaling',
+                                'check_finite_and_unscale',
+                            ] or op_type.endswith("_grad"):
+                                prior_op = block.ops[j]
+                                break
+                            j -= 1
+                            print("here: ", block.ops[j])
+                        assert (
+                            prior_op is not None
+                        ), "Unexception: ClipByGlobalNorm could not find priory depend op"
+                        prior_var = block.vars[prior_op.output_arg_names[0]]
+                        assert (
+                            prior_var is not None
+                        ), "Unexception: ClipByGlobalNorm could not find priory depend var"
+                        insert_dependencies_for_two_vars(
+                            block,
+                            idx,
+                            prior_var,
+                            input_var,
+                            self.clip_helper.dist_context,
+                            OpRole.Optimize,
+                            process_mesh=[
+                                -1
+                            ],  # hack to avoid initialize the dist attr for coalesc var
+                            is_recompute=False,
+                            sync=False,
+                        )
+
         for varname in removed_tmp_var:
             block._remove_var(varname, sync=False)
 
diff --git a/python/paddle/distributed/passes/auto_parallel_recompute.py b/python/paddle/distributed/passes/auto_parallel_recompute.py
index 72a116a5eb3afe..23fb73f10eff71 100644
--- a/python/paddle/distributed/passes/auto_parallel_recompute.py
+++ b/python/paddle/distributed/passes/auto_parallel_recompute.py
@@ -203,6 +203,7 @@ def modify_forward_desc_for_recompute(self, dist_context):
                 if cur_op.attr("fix_seed") is False
                 else int(cur_op.attr("seed"))
             )
+            # TODO add dependency for seed op to ensure it be issued just before recompute.
             seed_op = self._block._insert_op_without_sync(
                 index=cur_op.idx,
                 type="seed",
@@ -490,6 +491,7 @@ def _apply_single_impl(self, main_program, startup_program, context):
                                 prior_op,
                                 posterior_op,
                                 self._dist_context,
+                                is_recompute=True,
                                 sync=False,
                             )
         main_program._sync_with_cpp()
diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py
index ee61a156757ba4..5a6486991dc9da 100644
--- a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py
+++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import random
 import sys
 import unittest
@@ -24,6 +25,9 @@
 from paddle.distributed.auto_parallel.dist_context import (
     get_default_distributed_context,
 )
+from paddle.distributed.auto_parallel.operators.common import (
+    is_data_parallel_reduce_op,
+)
 from paddle.distributed.passes import PassContext, new_pass
 
 sys.path.append("..")
@@ -116,5 +120,63 @@ def get_model(self, place, batch_size, sequence_len, vocab_size):
         return dist_main_prog, dist_startup_prog, data_holder, [loss], gen_data
 
 
+class TestDataParallelPassWithStandaloneEXE(TestDataParallelPassWithScale1):
+    def init(self):
+        if paddle.is_compiled_with_cuda():
+            os.environ['FLAGS_CONVERT_GRAPH_TO_PROGRAM'] = "1"
+            paddle.set_flags({'FLAGS_cudnn_deterministic': 1})
+        self.rtol = 1e-5
+        self.atol = 1e-8
+        # NOTE a hack to compare pass apply or not, since there is no
+        # setting of this pass in dist_strategy
+        self._apply_pass = False
+
+        rank = paddle.distributed.get_rank()
+        paddle.seed(rank + 2021)
+        random.seed(rank + 2021)
+        np.random.seed(rank + 2021)
+
+    # test scaling with optimizer rescale_grad
+    def get_model(self, place, batch_size, sequence_len, vocab_size):
+
+        (
+            dist_main_prog,
+            dist_startup_prog,
+            data_holder,
+            [loss],
+            gen_data,
+        ) = self.get_gpt_model(
+            'dp',
+            place,
+            batch_size,
+            sequence_len,
+            vocab_size,
+            optimizer='LarsMomentum',
+        )
+        if self._apply_pass:
+            config = {}
+            config["dist_context"] = get_default_distributed_context()
+            config["global_rank"] = paddle.distributed.get_rank()
+            dp_pass = new_pass(
+                "auto_parallel_data_parallel_optimization", config
+            )
+            dp_pass.apply([dist_main_prog], [dist_startup_prog], PassContext())
+
+            ops = dist_main_prog.global_block().ops
+            allreduce_op_idx = -1
+            for idx in range(len(ops)):
+                if is_data_parallel_reduce_op(ops[idx]):
+                    allreduce_op_idx = idx
+                    break
+            assert allreduce_op_idx > 0
+            allreduce_op = ops[allreduce_op_idx]
+            assert allreduce_op.attr('use_calc_stream') is True
+            assert allreduce_op.dist_attr.execution_stream is not None
+            assert ops[allreduce_op_idx - 1].type == "nop"
+            assert ops[allreduce_op_idx + 1].type == "nop"
+
+        return dist_main_prog, dist_startup_prog, data_holder, [loss], gen_data
+
+
 if __name__ == "__main__":
     unittest.main()

From 7078c1e16222798e4dd741c72921290f42f6ed0f Mon Sep 17 00:00:00 2001
From: Vvsmile <450864116@qq.com>
Date: Tue, 29 Nov 2022 14:47:49 +0800
Subject: [PATCH 019/154] [Clean Fluid API]Remove API: lod_append (remove
 directly) (#47941)

* Remove API: lod_append
	remove lod_append which is not used in Paddle 2.0

* remove lod_append test file
---
 python/paddle/fluid/layers/nn.py              | 66 ------------------
 .../tests/unittests/test_lod_append_op.py     | 69 -------------------
 tools/parallel_UT_rule.py                     |  2 -
 tools/static_mode_white_list.py               |  1 -
 4 files changed, 138 deletions(-)
 delete mode 100644 python/paddle/fluid/tests/unittests/test_lod_append_op.py

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 00da0deea20c7c..60371974007e51 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -92,7 +92,6 @@
     'autoincreased_step_counter',
     'unsqueeze',
     'lod_reset',
-    'lod_append',
     'pad',
     'image_resize',
     'resize_bilinear',
@@ -4360,71 +4359,6 @@ def lod_reset(x, y=None, target_lod=None):
     return out
 
 
-def lod_append(x, level):
-    """
-    Append level to LoD of :attr:`x`.
-
-    .. code-block:: text
-
-        * Example 1:
-
-            given a 1-level LoDTensor x:
-                x.lod =  [[ 2,           3,                   1 ]]
-                x.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
-                x.dims = [6, 1]
-
-            level: [1, 1, 1, 1, 1, 1, 1]
-
-            then we get a 2-level LoDTensor:
-                x.lod =  [[ 2, 3, 1 ], [1, 1, 1, 1, 1, 1]]
-                x.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
-                x.dims = [6, 1]
-
-    Args:
-        x (Variable): Input variable which could be a tensor or LoDTensor.
-                      The data type should be int32, int64, float32 or float64.
-        level (list|tuple|Variable, optional): The LoD level to be appended into LoD of x.
-                                               If level is variable and its lod level>0, the data type can be any type.
-                                               If level is variable and its lod level=0, the data type should be int32.
-    Returns:
-        Variable: Output variable with new LoD level.
-
-    Raises:
-        ValueError: If :attr:`y` is None or and :attr:`level` is not Iterator.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            x = fluid.layers.data(name='x', shape=[6, 10], lod_level=1)
-            out = fluid.layers.lod_append(x, [1,1,1,1,1,1])
-    """
-    if x is None:
-        raise ValueError("Input(x) can't be None.")
-    if (not isinstance(level, Iterable)) and (not isinstance(level, Variable)):
-        raise ValueError("Input(level) must be list, tuple or Variable.")
-
-    check_variable_and_dtype(
-        x, 'x', ['float32', 'float64', 'int32', 'int64'], 'lod_append'
-    )
-
-    helper = LayerHelper("lod_append", **locals())
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-
-    inputs = {'X': x}
-    attrs = {'append': True}
-
-    if isinstance(level, Variable):
-        inputs['Y'] = level
-        # TODO: check y.lod_level = 0 dtype
-    else:
-        attrs['target_lod'] = level
-    helper.append_op(
-        type="lod_reset", inputs=inputs, attrs=attrs, outputs={'Out': out}
-    )
-    return out
-
-
 def pad(x, paddings, pad_value=0.0, name=None):
     r"""
     :alias_main: paddle.nn.functional.pad
diff --git a/python/paddle/fluid/tests/unittests/test_lod_append_op.py b/python/paddle/fluid/tests/unittests/test_lod_append_op.py
deleted file mode 100644
index 721a247d5d6009..00000000000000
--- a/python/paddle/fluid/tests/unittests/test_lod_append_op.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-import numpy as np
-import paddle.fluid as fluid
-from paddle.fluid import Program
-
-
-class TestLoDAppendAPI(unittest.TestCase):
-    def test_api(self, use_cuda=False):
-        main_program = Program()
-        with fluid.program_guard(main_program):
-            x = fluid.layers.data(name='x', shape=[6], dtype='float32')
-            level = fluid.layers.data(
-                name='level', shape=[3], dtype='int32', lod_level=0
-            )
-            result = fluid.layers.lod_append(x, level)
-
-            x_i = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0]).astype("float32")
-            level_i = np.array([0, 2, 6]).astype("int32")
-
-            for use_cuda in [False, True]:
-                if use_cuda and not fluid.core.is_compiled_with_cuda():
-                    return
-                place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-                exe = fluid.Executor(place)
-                [out] = exe.run(
-                    fluid.default_main_program(),
-                    feed={'x': x_i, 'level': level_i},
-                    fetch_list=[result],
-                    return_numpy=False,
-                )
-                self.assertEqual(out.recursive_sequence_lengths(), [[2, 4]])
-
-
-class TestLodAppendOpError(unittest.TestCase):
-    def test_error(self):
-        # The input(x) must be Variable.
-        x1 = np.array([0.9383, 0.1983, 3.2, 1.2]).astype("float64")
-        level1 = [0, 2, 4]
-        self.assertRaises(TypeError, fluid.layers.lod_append, x1, level1)
-
-        # The input(level) must be Variable or list.
-        x2 = fluid.layers.data(name='x2', shape=[4], dtype='float32')
-        self.assertRaises(ValueError, fluid.layers.lod_append, x2, 2)
-
-        # Input(x) dtype must be float32 or float64 or int32 or int64
-        for dtype in ["bool", "float16"]:
-            x3 = fluid.layers.data(name='x3_' + dtype, shape=[4], dtype=dtype)
-            level3 = fluid.layers.data(
-                name='level3' + dtype, shape=[4], dtype='int32', lod_level=2
-            )
-            self.assertRaises(TypeError, fluid.layers.lod_append, x3, level3)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py
index 30d91d9685dbfa..b2cda25fc93418 100755
--- a/tools/parallel_UT_rule.py
+++ b/tools/parallel_UT_rule.py
@@ -1124,7 +1124,6 @@
     'test_tree_conv_op',
     'test_share_data_op',
     'test_ir_memory_optimize_transformer',
-    'test_lod_append_op',
     'test_math_op_patch',
     'test_base_layer',
     'test_dequantize_log_op',
@@ -2467,7 +2466,6 @@
     'test_merged_momentum_op',
     'test_median',
     'test_math_op_patch_var_base',
-    'test_lod_append_op',
     'test_layer_norm_op_v2',
     'test_label_smooth_functional',
     'test_instance_norm_op',
diff --git a/tools/static_mode_white_list.py b/tools/static_mode_white_list.py
index aec18068c9a5f6..ed5fb31009a959 100755
--- a/tools/static_mode_white_list.py
+++ b/tools/static_mode_white_list.py
@@ -314,7 +314,6 @@
     'test_load_op',
     'test_load_vars_shape_check',
     'test_locality_aware_nms_op',
-    'test_lod_append_op',
     'test_lod_array_length_op',
     'test_lod_rank_table',
     'test_lod_tensor_array_ops',

From f080521289400153c9501f469024d10859a8000e Mon Sep 17 00:00:00 2001
From: ccrrong <101700995+ccrrong@users.noreply.github.com>
Date: Tue, 29 Nov 2022 15:01:59 +0800
Subject: [PATCH 020/154] remove pool3d from fluid (#48455)

* remove pool3d
---
 python/paddle/fluid/layers/nn.py              | 241 ---------------
 .../ir/inference/test_trt_pool3d_op.py        |  84 ++---
 .../fluid/tests/unittests/test_layers.py      |  14 -
 .../fluid/tests/unittests/test_pool3d_op.py   | 288 ------------------
 4 files changed, 17 insertions(+), 610 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 60371974007e51..4743e4b49f2164 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -71,7 +71,6 @@
     'conv2d',
     'softmax',
     'pool2d',
-    'pool3d',
     'batch_norm',
     'reduce_mean',
     'reduce_all',
@@ -1895,246 +1894,6 @@ def is_list_or_tuple(ele):
     return pool_out
 
 
-@templatedoc()
-def pool3d(
-    input,
-    pool_size=-1,
-    pool_type="max",
-    pool_stride=1,
-    pool_padding=0,
-    global_pooling=False,
-    use_cudnn=True,
-    ceil_mode=False,
-    name=None,
-    exclusive=True,
-    data_format="NCDHW",
-):
-    """
-
-    ${comment}
-
-    Args:
-        input (Variable): The input tensor of pooling operator, which is a 5-D tensor with
-                          shape [N, C, D, H, W]. The format of
-                          input tensor is `"NCDHW"` or `"NDHWC"`, where `N` is batch size, `C` is
-                          the number of channels, `D` is the depth of the feature,
-                          `H` is the height of the feature, and `W` is the width
-                          of the feature.
-        pool_size (int|list|tuple): The pool kernel size. If pool kernel size
-            is a tuple or list, it must contain three integers,
-            (pool_size_Depth, pool_size_Height, pool_size_Width).
-            Otherwise, the pool kernel size will be the cube of an int.
-        pool_type (string): ${pooling_type_comment}
-        pool_stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or
-            'SAME' which is the padding algorithm. If pool stride size is a tuple or list,
-            it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`.
-            Otherwise, the pool stride size will be a cube of an int.
-        pool_padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list,
-            it could be in three forms: `[pad_depth, pad_height, pad_width]` or
-            `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
-            and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
-            `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
-            when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
-            `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
-        global_pooling (bool): ${global_pooling_comment}
-        use_cudnn (bool): ${use_cudnn_comment}
-        ceil_mode (bool): ${ceil_mode_comment}
-        name(str, optional): For detailed information, please refer
-                             to :ref:`api_guide_Name`. Usually name is no need to set and
-                             None by default.
-        exclusive (bool): Whether to exclude padding points in average pooling
-                          mode, default is true.
-        data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
-                The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
-                `[batch_size, input_channels, input_depth, input_height, input_width]`.
-
-    Returns:
-        Variable: The output tensor of pooling result. The data type is same as input tensor.
-
-    Raises:
-        ValueError: If `pool_type` is not "max" nor "avg".
-        ValueError: If `global_pooling` is False and `pool_size` is -1.
-        TypeError: If `use_cudnn` is not a bool value.
-        ValueError: If `data_format` is not "NCDHW" or "NDHWC".
-        ValueError: If `pool_padding` is a string, but not "SAME" or "VALID".
-        ValueError: If `pool_padding` is "VALID", but `ceil_mode` is True.
-        ValueError: If `pool_padding` is a list or tuple, but the elements in the batch or channel dimensions are non-zero.
-        ShapeError: If the input is not a 4-D or 5-D Tensor.
-        ShapeError: If the dimension of input minus the size of `pool_stride` is not 2.
-        ShapeError: If the size of `pool_size` and `pool_stride` is not equal.
-        ShapeError: If the output's shape calculated is not greater than 0.
-
-    Examples:
-
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          import paddle
-
-          paddle.enable_static()
-
-          data = fluid.data(name='data', shape=[None, 3, 32, 32, 32], dtype='float32')
-
-          # max pool3d
-          pool3d = fluid.layers.pool3d(
-            input = data,
-            pool_size = 2,
-            pool_type = "max",
-            pool_stride = 1,
-            global_pooling=False)
-
-          # average pool3d
-          pool3d = fluid.layers.pool3d(
-            input = data,
-            pool_size = 2,
-            pool_type = "avg",
-            pool_stride = 1,
-            global_pooling=False)
-
-          # global average pool3d
-          pool3d = fluid.layers.pool3d(
-            input = data,
-            pool_size = 2,
-            pool_type = "avg",
-            pool_stride = 1,
-            global_pooling=True)
-
-          # example 1:
-          # Attr(pool_padding) is a list with 6 elements, Attr(data_format) is "NCDHW".
-          out_1 = fluid.layers.pool3d(
-            input = data,
-            pool_size = 2,
-            pool_type = "avg",
-            pool_stride = 1,
-            pool_padding = [1, 2, 1, 0, 1, 2],
-            global_pooling = False,
-            data_format = "NCDHW")
-
-          # example 2:
-          # Attr(pool_padding) is a string, Attr(data_format) is "NCDHW".
-          out_2 = fluid.layers.pool3d(
-            input = data,
-            pool_size = 3,
-            pool_type = "avg",
-            pool_stride = 1,
-            pool_padding = "VALID",
-            global_pooling = False,
-            data_format = "NCDHW")
-
-    """
-    if pool_type not in ["max", "avg"]:
-        raise ValueError(
-            "Unknown Attr(pool_type): '%s'. It can only be 'max' or 'avg'.",
-            str(pool_type),
-        )
-
-    if global_pooling is False and pool_size == -1:
-        raise ValueError(
-            "When Attr(global_pooling) is False, Attr(pool_size) must be passed "
-            "and be a valid value. Received Attr(pool_size): %s."
-            % str(pool_size)
-        )
-
-    if not isinstance(use_cudnn, bool):
-        raise TypeError(
-            "Attr(use_cudnn) should be True or False. Received "
-            "Attr(use_cudnn): %s. " % str(use_cudnn)
-        )
-
-    if data_format not in ["NCDHW", "NDHWC"]:
-        raise ValueError(
-            "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
-            "Attr(data_format): %s" % str(data_format)
-        )
-
-    pool_size = utils.convert_to_list(pool_size, 3, 'pool_size')
-    pool_stride = utils.convert_to_list(pool_stride, 3, 'pool_stride')
-
-    def update_padding(padding, data_format):
-        def is_list_or_tuple(ele):
-            if isinstance(ele, (list, tuple)):
-                return True
-            return False
-
-        if is_list_or_tuple(padding) and len(padding) == 5:
-            if is_list_or_tuple(padding[0]) and (data_format == "NCDHW"):
-                if not (padding[0] == [0, 0] and padding[1] == [0, 0]):
-                    raise ValueError(
-                        "Non-zero pool_padding(%s) in the batch or channel dimensions "
-                        "is not supported." % str(padding)
-                    )
-                padding = padding[2:5]
-                padding = [ele for a_list in padding for ele in a_list]
-            elif is_list_or_tuple(padding[0]) and (data_format == "NDHWC"):
-                if not (padding[0] == [0, 0] and padding[4] == [0, 0]):
-                    raise ValueError(
-                        "Non-zero pool_padding(%s) in the batch or channel dimensions "
-                        "is not supported." % str(padding)
-                    )
-                padding = padding[1:4]
-                padding = [ele for a_list in padding for ele in a_list]
-            padding = utils.convert_to_list(padding, 6, 'padding')
-            if utils._is_symmetric_padding(padding, 3):
-                padding = [padding[0], padding[2], padding[4]]
-
-        elif is_list_or_tuple(padding) and len(padding) == 6:
-            padding = utils.convert_to_list(padding, 6, 'padding')
-            if utils._is_symmetric_padding(padding, 3):
-                padding = [padding[0], padding[2], padding[4]]
-        else:
-            padding = utils.convert_to_list(padding, 3, 'padding')
-
-        return padding
-
-    padding_algorithm = "EXPLICIT"
-    if isinstance(pool_padding, str):
-        pool_padding = pool_padding.upper()
-        if pool_padding not in ["SAME", "VALID"]:
-            raise ValueError(
-                "Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'."
-                % str(pool_padding)
-            )
-        if pool_padding == "VALID":
-            padding_algorithm = "VALID"
-            pool_padding = [0, 0, 0]
-            if ceil_mode != False:
-                raise ValueError(
-                    "When Attr(pool_padding) is \"VALID\", ceil_mode must be False. "
-                    "Received ceil_mode: True."
-                )
-        elif pool_padding == "SAME":
-            padding_algorithm = "SAME"
-            pool_padding = [0, 0, 0]
-
-    pool_padding = update_padding(pool_padding, data_format)
-
-    op_type = "pool3d"
-    helper = LayerHelper(op_type, **locals())
-    dtype = helper.input_dtype()
-    pool_out = helper.create_variable_for_type_inference(dtype)
-
-    helper.append_op(
-        type=op_type,
-        inputs={"X": input},
-        outputs={"Out": pool_out},
-        attrs={
-            "pooling_type": pool_type,
-            "ksize": pool_size,
-            "global_pooling": global_pooling,
-            "strides": pool_stride,
-            "paddings": pool_padding,
-            "padding_algorithm": padding_algorithm,
-            "use_cudnn": use_cudnn,
-            "ceil_mode": ceil_mode,
-            "use_mkldnn": False,
-            "exclusive": exclusive,
-            "data_format": data_format,
-        },
-    )
-
-    return pool_out
-
-
 def batch_norm(
     input,
     act=None,
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py
index 087622f4f7ec31..0362d96fc2a912 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py
@@ -37,7 +37,6 @@ def setUp(self):
         self.pool_type = 'max'
         self.pool_stride = 1
         self.pool_padding = 0
-        self.global_pooling = False
         self.ceil_mode = False
         self.exclusive = False
         self.enable_trt = True
@@ -64,16 +63,23 @@ def build_network(self):
                 shape=[-1, self.channel, self.depth, self.height, self.width],
                 dtype='float32',
             )
-            pool_out = fluid.layers.pool3d(
-                input=data,
-                pool_size=self.pool_size,
-                pool_type=self.pool_type,
-                pool_stride=self.pool_stride,
-                pool_padding=self.pool_padding,
-                global_pooling=self.global_pooling,
-                ceil_mode=self.ceil_mode,
-                exclusive=self.exclusive,
-            )
+            if self.pool_type == "max":
+                pool_out = paddle.nn.functional.max_pool3d(
+                    x=data,
+                    kernel_size=self.pool_size,
+                    stride=self.pool_stride,
+                    padding=self.pool_padding,
+                    ceil_mode=self.ceil_mode,
+                )
+            else:
+                pool_out = paddle.nn.functional.avg_pool3d(
+                    x=data,
+                    kernel_size=self.pool_size,
+                    stride=self.pool_stride,
+                    padding=self.pool_padding,
+                    ceil_mode=self.ceil_mode,
+                    exclusive=self.exclusive,
+                )
             # out = fluid.layers.batch_norm(pool_out, is_test=True)
             self.fetch_list = [pool_out]
 
@@ -158,62 +164,6 @@ def set_extra_config(self):
         self.pool_type = 'avg'
         self.pool_stride = 1
         self.pool_padding = 0
-        self.global_pooling = False
-        self.ceil_mode = False
-        self.exclusive = False
-
-
-class TensorRTGlobalPool3dTest(TensorRTPool3dTest):
-    def set_extra_config(self):
-        self.pool_size = 2
-        self.pool_type = 'max'
-        self.pool_stride = 1
-        self.pool_padding = 0
-        self.global_pooling = True
-        self.ceil_mode = False
-        self.exclusive = False
-
-
-class TensorRTCeilPool3dTest(TensorRTPool3dTest):
-    def set_extra_config(self):
-        self.pool_size = 2
-        self.pool_type = 'max'
-        self.pool_stride = 1
-        self.pool_padding = 0
-        self.global_pooling = False
-        self.ceil_mode = True
-        self.exclusive = False
-
-
-class TensorRTExclusivePool3dTest(TensorRTPool3dTest):
-    def set_extra_config(self):
-        self.pool_size = 2
-        self.pool_type = 'max'
-        self.pool_stride = 1
-        self.pool_padding = 0
-        self.global_pooling = False
-        self.ceil_mode = False
-        self.exclusive = True
-
-
-class TensorRTSamePaddingPool3dTest(InferencePassTest):
-    def set_extra_config(self):
-        self.pool_size = 2
-        self.pool_type = 'max'
-        self.pool_stride = 1
-        self.pool_padding = 'SAME'
-        self.global_pooling = False
-        self.ceil_mode = False
-        self.exclusive = False
-
-
-class TensorRTValidPaddingPool3dTest(InferencePassTest):
-    def set_extra_config(self):
-        self.pool_size = 2
-        self.pool_type = 'max'
-        self.pool_stride = 1
-        self.pool_padding = 'VALID'
-        self.global_pooling = False
         self.ceil_mode = False
         self.exclusive = False
 
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 0fc87dd03b6e31..bac33f3e65f398 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -3178,20 +3178,6 @@ def make_pool2d_infershape(self):
                 x, pool_size=[5, 3], pool_stride=[1, 2], pool_padding=(2, 1)
             )
 
-    def make_pool3d(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            x = self._get_data(
-                name='x', shape=[3, 244, 244, 244], dtype='float32'
-            )
-            return layers.pool3d(
-                x,
-                pool_size=[5, 3, 2],
-                pool_stride=[1, 2, 3],
-                pool_padding=(2, 1, 1),
-            )
-
     def make_lstm_unit(self):
         with program_guard(
             fluid.default_main_program(), fluid.default_startup_program()
diff --git a/python/paddle/fluid/tests/unittests/test_pool3d_op.py b/python/paddle/fluid/tests/unittests/test_pool3d_op.py
index 09222e99c3622c..0c62aeb257ad36 100644
--- a/python/paddle/fluid/tests/unittests/test_pool3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pool3d_op.py
@@ -18,7 +18,6 @@
 import paddle
 import paddle.fluid.core as core
 from op_test import OpTest
-import paddle.fluid as fluid
 
 
 def adaptive_start_index(index, input_size, output_size):
@@ -1027,292 +1026,5 @@ def init_paddings(self):
 create_test_cudnn_padding_VALID_class(TestCase5_channel_last)
 
 
-# test API
-class TestPool3DAPI(unittest.TestCase):
-    def test_api(self):
-        x_NDHWC = np.random.random([2, 5, 5, 5, 3]).astype("float32")
-        x_NCDHW = np.random.random([2, 3, 5, 5, 5]).astype("float32")
-
-        input_NDHWC = fluid.layers.data(
-            name="input_NDHWC",
-            shape=[2, 5, 5, 5, 3],
-            append_batch_size=False,
-            dtype="float32",
-        )
-
-        input_NCDHW = fluid.layers.data(
-            name="input_NCDHW",
-            shape=[2, 3, 5, 5, 5],
-            append_batch_size=False,
-            dtype="float32",
-        )
-
-        ksize = [3, 3, 3]
-        out_1 = fluid.layers.pool3d(
-            input=input_NDHWC,
-            pool_size=ksize,
-            pool_type="max",
-            pool_padding=[1, 1, 1],
-            use_cudnn=False,
-            data_format="NDHWC",
-        )
-
-        out_2 = fluid.layers.pool3d(
-            input=input_NDHWC,
-            pool_size=ksize,
-            pool_type="avg",
-            pool_padding=[[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]],
-            use_cudnn=False,
-            data_format="NDHWC",
-        )
-
-        out_3 = fluid.layers.pool3d(
-            input=input_NCDHW,
-            pool_size=ksize,
-            pool_type="avg",
-            pool_padding=[[0, 0], [0, 0], [1, 1], [1, 1], [1, 1]],
-            use_cudnn=False,
-            data_format="NCDHW",
-        )
-
-        out_4 = fluid.layers.pool3d(
-            input=input_NCDHW,
-            pool_size=ksize,
-            pool_type="avg",
-            pool_padding=[1, 2, 1, 0, 0, 1],
-            use_cudnn=False,
-            data_format="NCDHW",
-        )
-        # test VALID
-        out_5 = fluid.layers.pool3d(
-            input=input_NDHWC,
-            pool_size=ksize,
-            pool_type="avg",
-            pool_padding="VALID",
-            use_cudnn=False,
-            data_format="NDHWC",
-        )
-
-        out_6 = fluid.layers.pool3d(
-            input=input_NCDHW,
-            pool_size=ksize,
-            pool_type="avg",
-            pool_padding="VALID",
-            use_cudnn=False,
-            data_format="NCDHW",
-        )
-
-        # test SAME
-        out_7 = fluid.layers.pool3d(
-            input=input_NDHWC,
-            pool_size=ksize,
-            pool_stride=[1, 1, 2],
-            pool_type="avg",
-            pool_padding="SAME",
-            use_cudnn=False,
-            data_format="NDHWC",
-        )
-
-        out_8 = fluid.layers.pool3d(
-            input=input_NCDHW,
-            pool_size=[4, 4, 4],
-            pool_type="avg",
-            pool_padding="SAME",
-            use_cudnn=False,
-            data_format="NCDHW",
-        )
-
-        exe = fluid.Executor(place=fluid.CPUPlace())
-        [res_1, res_2, res_3, res_4, res_5, res_6, res_7, res_8] = exe.run(
-            fluid.default_main_program(),
-            feed={"input_NDHWC": x_NDHWC, "input_NCDHW": x_NCDHW},
-            fetch_list=[out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8],
-        )
-
-        assert np.allclose(
-            res_1,
-            pool3D_forward_naive(
-                x=x_NDHWC,
-                ksize=ksize,
-                pool_type="max",
-                strides=[1, 1, 1],
-                paddings=[1, 1, 1],
-                data_format="NDHWC",
-            ),
-        )
-
-        assert np.allclose(
-            res_2,
-            pool3D_forward_naive(
-                x=x_NDHWC,
-                ksize=ksize,
-                pool_type="avg",
-                strides=[1, 1, 1],
-                paddings=[1, 1, 1, 1, 1, 1],
-                data_format="NDHWC",
-            ),
-        )
-        assert np.allclose(
-            res_3,
-            pool3D_forward_naive(
-                x=x_NCDHW,
-                ksize=ksize,
-                pool_type="avg",
-                strides=[1, 1, 1],
-                paddings=[1, 1, 1, 1, 1, 1],
-                data_format="NCDHW",
-            ),
-            rtol=0.07,
-            atol=1e-05,
-        )
-
-        assert np.allclose(
-            res_4,
-            pool3D_forward_naive(
-                x=x_NCDHW,
-                ksize=ksize,
-                pool_type="avg",
-                strides=[1, 1, 1],
-                paddings=[1, 2, 1, 0, 0, 1],
-                data_format="NCDHW",
-            ),
-            rtol=0.07,
-            atol=1e-05,
-        )
-        # VALID
-        assert np.allclose(
-            res_5,
-            pool3D_forward_naive(
-                x=x_NDHWC,
-                ksize=ksize,
-                pool_type="avg",
-                strides=[1, 1, 1],
-                paddings=[10, 20],
-                padding_algorithm="VALID",
-                data_format="NDHWC",
-            ),
-        )
-
-        assert np.allclose(
-            res_6,
-            pool3D_forward_naive(
-                x=x_NCDHW,
-                ksize=ksize,
-                pool_type="avg",
-                strides=[1, 1, 1],
-                paddings=[10, 20],
-                padding_algorithm="VALID",
-                data_format="NCDHW",
-            ),
-            rtol=0.07,
-            atol=1e-05,
-        )
-        # SAME
-        assert np.allclose(
-            res_7,
-            pool3D_forward_naive(
-                x=x_NDHWC,
-                ksize=ksize,
-                pool_type="avg",
-                strides=[1, 1, 2],
-                paddings=[10, 20],
-                padding_algorithm="SAME",
-                data_format="NDHWC",
-            ),
-        )
-
-        assert np.allclose(
-            res_8,
-            pool3D_forward_naive(
-                x=x_NCDHW,
-                ksize=[4, 4, 4],
-                pool_type="avg",
-                strides=[1, 1, 1],
-                paddings=[10, 20],
-                padding_algorithm="SAME",
-                data_format="NCDHW",
-            ),
-            rtol=0.07,
-            atol=1e-05,
-        )
-
-
-class TestPool3DAPI_Error(unittest.TestCase):
-    def test_api(self):
-        input_NDHWC = fluid.layers.data(
-            name="input_NDHWC",
-            shape=[2, 5, 5, 5, 3],
-            append_batch_size=False,
-            dtype="float32",
-        )
-        ksize = [3, 3, 3]
-
-        # cudnn type error
-        def run_1():
-            out_1 = fluid.layers.pool3d(
-                input=input_NDHWC,
-                pool_size=ksize,
-                pool_type="max",
-                pool_padding=[1, 1, 1],
-                use_cudnn=[0],
-                data_format="NDHWC",
-            )
-
-        self.assertRaises(TypeError, run_1)
-
-        # data_format value error
-        def run_2():
-            out_2 = fluid.layers.pool3d(
-                input=input_NDHWC,
-                pool_size=ksize,
-                pool_type="max",
-                pool_padding=[1, 1, 1],
-                use_cudnn=False,
-                data_format="NDHWCC",
-            )
-
-        self.assertRaises(ValueError, run_2)
-
-        # padding str value error
-        def run_3():
-            out_3 = fluid.layers.pool3d(
-                input=input_NDHWC,
-                pool_size=ksize,
-                pool_type="max",
-                pool_padding="VALIDSAME",
-                use_cudnn=False,
-                data_format="NDHWC",
-            )
-
-        self.assertRaises(ValueError, run_3)
-
-        # padding str valid and ceil_mode value error
-        def run_4():
-            out_4 = fluid.layers.pool3d(
-                input=input_NDHWC,
-                pool_size=ksize,
-                pool_type="max",
-                pool_padding="VALID",
-                use_cudnn=False,
-                ceil_mode=True,
-                data_format="NDHWC",
-            )
-
-        self.assertRaises(ValueError, run_4)
-
-        # padding with 8 ele. value error
-        def run_5():
-            out_5 = fluid.layers.pool3d(
-                input=input_NDHWC,
-                pool_size=ksize,
-                pool_type="max",
-                pool_padding=[[1, 1], [0, 0], [0, 0], [1, 1], [1, 1]],
-                use_cudnn=False,
-                data_format="NDHWC",
-            )
-
-        self.assertRaises(ValueError, run_5)
-
-
 if __name__ == '__main__':
     unittest.main()

From c0d31dac1a0326160b74fbb6e3630ffc5bd9f44e Mon Sep 17 00:00:00 2001
From: LiYuRio <63526175+LiYuRio@users.noreply.github.com>
Date: Tue, 29 Nov 2022 15:05:20 +0800
Subject: [PATCH 021/154] [Fluid API]Remove multiple APIs in control_flow
 (#48279)

* remove lod_tensor_to_array, array_to_lod_tensor, DynamicRNN

* remove less_equal, greater_than, greater_equal, equal, not_equal
---
 paddle/fluid/operators/CMakeLists.txt         |    2 -
 python/paddle/fluid/layers/control_flow.py    | 1101 +----------------
 python/paddle/fluid/layers/rnn.py             |   10 +-
 python/paddle/fluid/tests/book/CMakeLists.txt |    2 -
 .../tests/book/notest_understand_sentiment.py |   65 -
 .../tests/book/test_machine_translation.py    |  384 ------
 .../tests/book/test_rnn_encoder_decoder.py    |  319 -----
 .../fluid/tests/unittests/CMakeLists.txt      |    1 -
 .../seq2seq_dygraph_model.py                  |    2 +-
 .../transformer_dygraph_model.py              |    2 +-
 .../tests/unittests/ipu/test_equal_op_ipu.py  |    2 +-
 .../unittests/ipu/test_not_equal_op_ipu.py    |    2 +-
 .../paddle/fluid/tests/unittests/test_case.py |    4 +-
 .../fluid/tests/unittests/test_compare_op.py  |    4 +-
 .../fluid/tests/unittests/test_desc_clone.py  |    4 +-
 .../fluid/tests/unittests/test_dyn_rnn.py     |  370 ------
 .../unittests/test_dynrnn_gradient_check.py   |  402 ------
 .../unittests/test_dynrnn_static_input.py     |  229 ----
 .../fluid/tests/unittests/test_layers.py      |   36 +-
 .../unittests/test_lod_tensor_array_ops.py    |  301 -----
 .../unittests/test_uniform_random_bf16_op.py  |    2 +-
 .../tests/unittests/test_uniform_random_op.py |    2 +-
 22 files changed, 36 insertions(+), 3210 deletions(-)
 delete mode 100644 python/paddle/fluid/tests/book/test_machine_translation.py
 delete mode 100644 python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py
 delete mode 100644 python/paddle/fluid/tests/unittests/test_dyn_rnn.py
 delete mode 100644 python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py
 delete mode 100644 python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py
 delete mode 100644 python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py

diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
index 2823db516010ea..3f1bde61aa6444 100644
--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@@ -181,8 +181,6 @@ if (WITH_ASCEND_CL)
 endif()
 
 # FIXME(typhoonzero): operator deps may not needed.
-# op_library(lod_tensor_to_array_op DEPS lod_rank_table_op)
-# op_library(array_to_lod_tensor_op DEPS lod_rank_table_op)
 # op_library(unsqueeze_op DEPS reshape_op)
 # op_library(squeeze_op DEPS reshape_op)
 # op_library(flatten_op DEPS reshape_op)
diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py
index 2e456e7aa3122e..6e7a53db2d36ba 100755
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -58,16 +58,10 @@
     'array_write',
     'create_array',
     'less_than',
-    'less_equal',
-    'greater_than',
-    'greater_equal',
-    'equal',
-    'not_equal',
     'array_read',
     'array_length',
     'cond',
     'IfElse',
-    'DynamicRNN',
     'StaticRNN',
     'reorder_lod_tensor_by_rank',
     'Print',
@@ -1615,118 +1609,6 @@ def max_sequence_len(rank_table):
     return res
 
 
-def lod_tensor_to_array(x, table):
-    """
-    Convert a LoDTensor to a LoDTensorArray.
-
-    This function split a LoDTesnor to a LoDTensorArray according to its LoD
-    information. LoDTensorArray is an alias of C++ std::vector<LoDTensor> in
-    PaddlePaddle. The generated LoDTensorArray of this function can be further read
-    or written by `read_from_array()` and `write_to_array()` operators. However,
-    this function is generally an internal component of PaddlePaddle `DynamicRNN`.
-    Users should not use it directly.
-
-    Args:
-        x (Variable|list): The LoDTensor to be converted to a LoDTensorArray.
-        table (ParamAttr|list): The variable that stores the level of lod
-                                which is ordered by sequence length in
-                                descending order. It is generally generated
-                                by `layers.lod_rank_table()` API.
-
-    Returns:
-        Variable: The LoDTensorArray that has been converted from the input tensor.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          x = fluid.layers.data(name='x', shape=[10])
-          table = fluid.layers.lod_rank_table(x, level=0)
-          array = fluid.layers.lod_tensor_to_array(x, table)
-    """
-    check_type(x, 'x', (Variable, list), 'lod_tensor_to_array')
-    if isinstance(x, (list)):
-        for i, input_x in enumerate(x):
-            check_type(
-                input_x,
-                'input[' + str(i) + ']',
-                Variable,
-                'lod_tensor_to_array',
-            )
-    check_type(table, 'table', (Variable, list), 'lod_tensor_to_array')
-    if isinstance(table, (list)):
-        for i, table_x in enumerate(table):
-            check_type(
-                table_x,
-                'table[' + str(i) + ']',
-                Variable,
-                'lod_tensor_to_array',
-            )
-    helper = LayerHelper("lod_tensor_to_array", **locals())
-    array = helper.create_variable(
-        name=unique_name.generate("lod_tensor_to_array"),
-        type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
-        dtype=x.dtype,
-    )
-    helper.append_op(
-        type='lod_tensor_to_array',
-        inputs={'X': x, 'RankTable': table},
-        outputs={'Out': array},
-    )
-    return array
-
-
-def array_to_lod_tensor(x, table):
-    """Convert a LoD_Tensor_Aarry to an LoDTensor.
-
-    Args:
-        x (Variable|list): The lod tensor array to be converted to a tensor.
-        table (ParamAttr|list): The variable that stores the level of lod
-                                which is ordered by sequence length in
-                                descending order.
-
-    Returns:
-        Variable: The variable of type tensor that has been converted
-                  from an array.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          x = fluid.layers.data(name='x', shape=[10])
-          table = fluid.layers.lod_rank_table(x, level=0)
-          array = fluid.layers.lod_tensor_to_array(x, table)
-          lod_tensor = fluid.layers.array_to_lod_tensor(array, table)
-    """
-    check_type(x, 'x', (Variable, list), 'array_to_lod_tensor')
-    if isinstance(x, (list)):
-        for i, input_x in enumerate(x):
-            check_type(
-                input_x,
-                'input[' + str(i) + ']',
-                Variable,
-                'array_to_lod_tensor',
-            )
-    check_type(table, 'table', (Variable, list), 'array_to_lod_tensor')
-    if isinstance(table, (list)):
-        for i, table_x in enumerate(table):
-            check_type(
-                table_x,
-                'table[' + str(i) + ']',
-                Variable,
-                'array_to_lod_tensor',
-            )
-
-    helper = LayerHelper("array_to_lod_tensor", **locals())
-    tmp = helper.create_variable_for_type_inference(dtype=x.dtype)
-    helper.append_op(
-        type="array_to_lod_tensor",
-        inputs={'X': x, 'RankTable': table},
-        outputs={'Out': tmp},
-    )
-    return tmp
-
-
 def increment(x, value=1.0, in_place=True):
     """
     The OP is usually used for control flow to increment the data of :attr:`x` by an amount :attr:`value`.
@@ -1984,277 +1866,6 @@ def less_than(x, y, force_cpu=None, cond=None, name=None):
     return cond
 
 
-@templatedoc()
-def less_equal(x, y, cond=None, name=None):
-    """
-    :alias_main: paddle.less_equal
-        :alias: paddle.less_equal,paddle.tensor.less_equal,paddle.tensor.logic.less_equal
-        :old_api: paddle.fluid.layers.less_equal
-
-    This OP returns the truth value of :math:`x <= y` elementwise, which is equivalent function to the overloaded operator `<=`.
-
-    Args:
-        x(Variable): First input to compare which is N-D tensor. The input data type should be float32, float64, int32, int64.
-        y(Variable): Second input to compare which is N-D tensor. The input data type should be float32, float64, int32, int64.
-        cond(Variable, optional): Optional output which can be any created Variable that meets the requirements to store the result of *less_equal*.
-            if cond is None, a new Varibale will be created to store the result.
-        name(str, optional): The default value is None.  Normally there is no need for
-            user to set this property.  For more information, please refer to :ref:`api_guide_Name`.
-
-    Returns:
-        Variable, the output data type is bool: The tensor variable storing the output, the output shape is same as input :attr:`x`.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          import numpy as np
-          label = fluid.layers.assign(np.array([1, 3], dtype='int32'))
-          limit = fluid.layers.assign(np.array([1, 2], dtype='int32'))
-          out = fluid.layers.less_equal(x=label, y=limit) #out=[True, False]
-          out1 = label<= limit #out1=[True, False]
-
-    """
-    check_variable_and_dtype(
-        x, "x", ["float32", "float64", "int32", "int64"], "less_equal"
-    )
-    check_variable_and_dtype(
-        y, "y", ["float32", "float64", "int32", "int64"], "less_equal"
-    )
-    if cond is not None:
-        check_type(cond, "cond", Variable, "less_equal")
-
-    helper = LayerHelper("less_equal", **locals())
-    if cond is None:
-        cond = helper.create_variable_for_type_inference(dtype='bool')
-        cond.stop_gradient = True
-
-    attrs = dict()
-
-    helper.append_op(
-        type='less_equal',
-        inputs={'X': [x], 'Y': [y]},
-        outputs={'Out': [cond]},
-        attrs=attrs,
-    )
-    return cond
-
-
-@templatedoc()
-def greater_than(x, y, cond=None, name=None):
-    """
-    :alias_main: paddle.greater_than
-        :alias: paddle.greater_than,paddle.tensor.greater_than,paddle.tensor.logic.greater_than
-        :old_api: paddle.fluid.layers.greater_than
-
-    This OP returns the truth value of :math:`x > y` elementwise, which is equivalent function to the overloaded operator `>`.
-
-    Args:
-        x(Variable): First input to compare which is N-D tensor. The input data type should be float32, float64, int32, int64.
-        y(Variable): Second input to compare which is N-D tensor. The input data type should be float32, float64, int32, int64.
-        cond(Variable, optional): Optional output which can be any created Variable that meets the requirements to store the result of *greater_than*.
-            if cond is None, a new Varibale will be created to store the result.
-        name(str, optional): The default value is None.  Normally there is no need for
-            user to set this property.  For more information, please refer to :ref:`api_guide_Name`.
-
-    Returns:
-        Variable, the output data type is bool: The tensor variable storing the output, the output shape is same as input :attr:`x` .
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          import numpy as np
-          label = fluid.layers.assign(np.array([2, 3], dtype='int32'))
-          limit = fluid.layers.assign(np.array([3, 2], dtype='int32'))
-          out = fluid.layers.greater_than(x=label, y=limit) #out=[False, True]
-          out1 = label > limit #out1=[False, True]
-    """
-    check_variable_and_dtype(
-        x, "x", ["float32", "float64", "int32", "int64"], "greater_than"
-    )
-    check_variable_and_dtype(
-        y, "y", ["float32", "float64", "int32", "int64"], "greater_than"
-    )
-    if cond is not None:
-        check_type(cond, "cond", Variable, "greater_than")
-
-    helper = LayerHelper("greater_than", **locals())
-    if cond is None:
-        cond = helper.create_variable_for_type_inference(dtype='bool')
-        cond.stop_gradient = True
-
-    attrs = dict()
-
-    if in_dygraph_mode():
-        return _C_ops.greater_than(x, y)
-    else:
-        helper.append_op(
-            type='greater_than',
-            inputs={'X': [x], 'Y': [y]},
-            outputs={'Out': [cond]},
-            attrs=attrs,
-        )
-        return cond
-
-
-@templatedoc()
-def greater_equal(x, y, cond=None, name=None):
-    """
-    :alias_main: paddle.greater_equal
-        :alias: paddle.greater_equal,paddle.tensor.greater_equal,paddle.tensor.logic.greater_equal
-        :old_api: paddle.fluid.layers.greater_equal
-
-    This OP returns the truth value of :math:`x >= y` elementwise, which is equivalent function to the overloaded operator `>=`.
-
-    Args:
-        x(Variable): First input to compare which is N-D tensor. The input data type should be float32, float64, int32, int64.
-        y(Variable): Second input to compare which is N-D tensor. The input data type should be float32, float64, int32, int64.
-        cond(Variable, optional): Optional output which can be any created Variable that meets the requirements to store the result of *greater_equal*.
-            if cond is None, a new Varibale will be created to store the result.
-        name(str, optional): The default value is None.  Normally there is no need for
-            user to set this property.  For more information, please refer to :ref:`api_guide_Name`.
-
-    Returns:
-        Variable, the output data type is bool: The tensor variable storing the output, the output shape is same as input :attr:`x`.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          import numpy as np
-
-          label = fluid.layers.assign(np.array([2, 2], dtype='int32'))
-          limit = fluid.layers.assign(np.array([2, 3], dtype='int32'))
-          out = fluid.layers.greater_equal(x=label, y=limit) #out=[True, False]
-          out_1 = label >= limit #out1=[True, False]
-
-    """
-    check_variable_and_dtype(
-        x, "x", ["float32", "float64", "int32", "int64"], "greater_equal"
-    )
-    check_variable_and_dtype(
-        y, "y", ["float32", "float64", "int32", "int64"], "greater_equal"
-    )
-    if cond is not None:
-        check_type(cond, "cond", Variable, "greater_equal")
-
-    helper = LayerHelper("greater_equal", **locals())
-    if cond is None:
-        cond = helper.create_variable_for_type_inference(dtype='bool')
-        cond.stop_gradient = True
-
-    attrs = dict()
-
-    helper.append_op(
-        type='greater_equal',
-        inputs={'X': [x], 'Y': [y]},
-        outputs={'Out': [cond]},
-        attrs=attrs,
-    )
-    return cond
-
-
-def equal(x, y, cond=None, name=None):
-    """
-    This layer returns the truth value of :math:`x == y` elementwise.
-
-    Args:
-        x(Variable): Tensor, data type is float32, float64, int32, int64.
-        y(Variable): Tensor, data type is float32, float64, int32, int64.
-        cond(Variable, optional): Optional output which can be any created
-            Variable that meets the requirements to store the result of *equal*.
-            if cond is None, a new Varibale will be created to store the result.
-        name(str, optional): The default value is None.  Normally there is no need for
-            user to set this property.  For more information, please refer to :ref:`api_guide_Name`.
-
-    Returns:
-        Variable: output Tensor, it's shape is the same as the input's Tensor,
-        and the data type is bool.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          import numpy as np
-          out_cond =fluid.data(name="input1", shape=[2], dtype='bool')
-          label = fluid.layers.assign(np.array([3, 3], dtype="int32"))
-          limit = fluid.layers.assign(np.array([3, 2], dtype="int32"))
-          label_cond = fluid.layers.assign(np.array([1, 2], dtype="int32"))
-          out1 = fluid.layers.equal(x=label,y=limit) #out1=[True, False]
-          out2 = fluid.layers.equal(x=label_cond,y=limit, cond=out_cond) #out2=[False, True] out_cond=[False, True]
-    """
-    if in_dygraph_mode():
-        return _C_ops.equal(x, y)
-
-    check_variable_and_dtype(
-        x, "x", ["float32", "float64", "int32", "int64"], "equal"
-    )
-    check_variable_and_dtype(
-        y, "y", ["float32", "float64", "int32", "int64"], "equal"
-    )
-    if cond is not None:
-        check_type(cond, "cond", Variable, "equal")
-
-    helper = LayerHelper("equal", **locals())
-    if cond is None:
-        cond = helper.create_variable_for_type_inference(dtype='bool')
-        cond.stop_gradient = True
-
-    helper.append_op(
-        type='equal', inputs={'X': [x], 'Y': [y]}, outputs={'Out': [cond]}
-    )
-    return cond
-
-
-def not_equal(x, y, cond=None, name=None):
-    """
-    :alias_main: paddle.not_equal
-        :alias: paddle.not_equal,paddle.tensor.not_equal,paddle.tensor.logic.not_equal
-        :old_api: paddle.fluid.layers.not_equal
-
-    This OP returns the truth value of :math:`x != y` elementwise, which is equivalent function to the overloaded operator `!=`.
-
-    Args:
-        x(Variable): First input to compare which is N-D tensor. The input data type should be float32, float64, int32, int64.
-        y(Variable): Second input to compare which is N-D tensor. The input data type should be float32, float64, int32, int64.
-        cond(Variable, optional): Optional output which can be any created Variable that meets the requirements to store the result of *not_equal*.
-            if cond is None, a new Varibale will be created to store the result.
-        name(str, optional): The default value is None.  Normally there is no need for
-            user to set this property.  For more information, please refer to :ref:`api_guide_Name`.
-
-    Returns:
-        Variable, the output data type is bool: The tensor variable storing the output, the output shape is same as input :attr:`x`.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-
-          label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-          limit = fluid.layers.fill_constant(shape=[1], value=1, dtype='int64')
-          out = fluid.layers.not_equal(x=label, y=limit)
-    """
-    check_variable_and_dtype(
-        x, "x", ["float32", "float64", "int32", "int64"], "not_equal"
-    )
-    check_variable_and_dtype(
-        y, "y", ["float32", "float64", "int32", "int64"], "not_equal"
-    )
-    if cond is not None:
-        check_type(cond, "cond", Variable, "not_equal")
-
-    helper = LayerHelper("not_equal", **locals())
-    if cond is None:
-        cond = helper.create_variable_for_type_inference(dtype='bool')
-        cond.stop_gradient = True
-
-    helper.append_op(
-        type='not_equal', inputs={'X': [x], 'Y': [y]}, outputs={'Out': [cond]}
-    )
-    return cond
-
-
 def array_read(array, i):
     """
     This OP is used to read data at the specified position from the input array
@@ -3541,716 +3152,6 @@ def __call__(self):
         return rlist
 
 
-class DynamicRNN:
-    """
-    :api_attr: Static Graph
-
-    **Note: the input of this class should be LoDTensor which holds the
-    information of variable-length sequences. If the input is fixed-length Tensor,
-    please use StaticRNN (fluid.layers.** :ref:`api_fluid_layers_StaticRNN` **) for
-    better performance.**
-
-    DynamicRNN can process a minibatch of variable-length sequences.
-    The length of each sample can be different and is recorded in LoD.
-    In DynamicRNN, an input sequence will be unfolded into time steps and users
-    can define how to process each time step in :code:`block()` .
-    The total number of time steps is determined by the longest sequence.
-    DynamicRNN will not pad all sequences to the same length, instead it will
-    sort the sequences internally by the sequence length in descending order.
-    The input sequences will be shrank because only sequences of which the
-    length is larger than the time step will participate the remaining calculation.
-
-    If defined :code:`drnn = DynamicRNN()`, then users can call :code:`drnn()`
-    to obtain the result sequences. It is a LoDTensor gained by merging all
-    time steps's output. When RNN's input sequence x meets :code:`x.lod_level == 1`,
-    the output LoDTensor will have the same LoD with x. The result of :code:`drnn()`
-    includes RNN's outputs of all time steps, users can call
-    :ref:`api_fluid_layers_sequence_last_step` to extract the data of the last time step.
-
-    Warning:
-        Currently it is not supported to set :code:`is_sparse = True` of any
-        layers defined within DynamicRNN's :code:`block` function.
-
-    Args:
-        name (str, optional): The default value is None.  Normally there is no
-            need for user to set this property.  For more information,
-            please refer to :ref:`api_guide_Name` .
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-
-            sentence = fluid.data(name='sentence', shape=[None, 32], dtype='float32', lod_level=1)
-            encoder_proj = fluid.data(name='encoder_proj', shape=[None, 32], dtype='float32', lod_level=1)
-            decoder_boot = fluid.data(name='boot', shape=[None, 10], dtype='float32')
-
-            drnn = fluid.layers.DynamicRNN()
-            with drnn.block():
-                # Set sentence as RNN's input, each time step processes a word from the sentence
-                current_word = drnn.step_input(sentence)
-                # Set encode_proj as RNN's static input
-                encoder_word = drnn.static_input(encoder_proj)
-                # Initialize memory with boot_memory, which need reorder according to RNN's input sequences
-                memory = drnn.memory(init=decoder_boot, need_reorder=True)
-                fc_1 = fluid.layers.fc(input=encoder_word, size=30)
-                fc_2 = fluid.layers.fc(input=current_word, size=30)
-                decoder_inputs = fc_1 + fc_2
-                hidden, _, _ = fluid.layers.gru_unit(input=decoder_inputs, hidden=memory, size=30)
-                # Update memory with hidden
-                drnn.update_memory(ex_mem=memory, new_mem=hidden)
-                out = fluid.layers.fc(input=hidden, size=10, bias_attr=True, act='softmax')
-                # Set hidden and out as RNN's outputs
-                drnn.output(hidden, out)
-
-            # Get RNN's result
-            hidden, out = drnn()
-            # Get RNN's result of the last time step
-            last = fluid.layers.sequence_last_step(out)
-    """
-
-    BEFORE_RNN = 0
-    IN_RNN = 1
-    AFTER_RNN = 2
-
-    def __init__(self, name=None):
-        self.helper = LayerHelper('dynamic_rnn', name=name)
-        self.status = DynamicRNN.BEFORE_RNN
-        self.lod_rank_table = None
-        self.max_seq_len = None
-        self.step_idx = None
-        self.zero_idx = None
-        self.mem_dict = dict()
-        self.output_array = []
-        self.outputs = []
-        self.cond = self.helper.create_variable_for_type_inference(dtype='bool')
-        self.cond.stop_gradient = False
-        self.while_op = While(self.cond)
-        self.input_array = []
-        self.mem_link = []
-
-    def step_input(self, x, level=0):
-        r"""
-        This function is used to set sequence x as DynamicRNN's input.
-        The maximum sequence length in x determines the number of time steps
-        the RNN unit will be executed. DynamicRNN can take multiple inputs.
-        When all inputs' :code:`lod_level` are 1, all inputs should hold the
-        same LoD. When :code:`x.lod_level >= 2` , the input sequence will be
-        unfold along specified level, and the slice of each time step is a
-        LoDTensor whose lod_level is :code:`x.lod_level - level - 1` .
-        In this case, the specified LoD level of multiple inputs should be the same.
-
-        - Case 1:
-
-        .. code-block:: text
-
-            # input, where Si is slice data of shape [1, N]
-            level = 0
-            x.lod = [[2, 1, 3]]
-            x.shape = [6, N]
-            x.data = [[S0],
-                      [S0],
-                      [S1],
-                      [S2],
-                      [S2],
-                      [S2]]
-
-            # output
-            # step 0, time step data of 3 sequences
-            out.lod = [[]]
-            out.shape = [3, N]
-            out.data = [[S2],
-                        [S0],
-                        [S1]]
-
-            # step 1, time step data of 2 sequences
-            out.lod = [[]]
-            out.shape = [2, N]
-            out.data = [[S2],
-                        [S0]]
-
-            # step 2, time step data of 1 sequences
-            out.lod = [[]]
-            out.shape = [1, N]
-            out.data = [[S2]]
-
-
-        Args:
-            x (Variable): The input LoDTensor which holds information of a
-                minibatch of variable-length sequences and should meet :code:`x.lod_level >= 1` .
-                When RNN has multiple inputs, the first dimension should match
-                across all inputs, but other shape components may differ.
-                Optional data types are: bool, float16, float32, float64, int8, int16, int32, int64, uint8.
-            level (int, optional): The level of lod used to split steps.
-                It should be in range :math:`[0, x.lod\_level)` . The default value is 0.
-
-        Returns:
-            Variable: The current time step in the input sequence. If there are :code:`num_sequences` \
-                sequences in x whose length is larger than :code:`step_idx` , the returned Variable \
-                will only hold the :code:`step_idx` -th time step of those `num_sequences` sequences. \
-                The data type is the same as input. If :code:`x.lod_level == 1` , the return value is \
-                a Tensor of shape :math:`\{num\_sequences, x.shape[1], ...\}` , or it will \
-                be a variable-length LoDTensor.
-
-        Raises:
-            ValueError: When :code:`step_input()` is called outside :code:`block()` .
-            TypeError: When x is not a Variable.
-
-        Examples:
-            ..  code-block:: python
-
-                import paddle.fluid as fluid
-
-                sentence = fluid.data(name='sentence', shape=[None, 1], dtype='int64', lod_level=1)
-                embedding = fluid.layers.embedding(input=sentence, size=[65536, 32], is_sparse=True)
-
-                drnn = fluid.layers.DynamicRNN()
-                with drnn.block():
-                    # Set embedding as RNN's input, each time step processes a word from the sentence
-                    word = drnn.step_input(embedding)
-                    # Initialize memory to a Tensor whose value is 0, shape=[batch_size, 200],
-                    # where batch_size is the number of sequences in embedding.
-                    memory = drnn.memory(shape=[200])
-                    hidden = fluid.layers.fc(input=[word, memory], size=200, act='relu')
-                    # Update memory to hidden
-                    drnn.update_memory(ex_mem=memory, new_mem=hidden)
-                    # Set hidden as RNN's output
-                    drnn.output(hidden)
-
-                # Get RNN's result
-                rnn_output = drnn()
-        """
-        self._assert_in_rnn_block_("step_input")
-        check_type(x, 'x', Variable, 'fluid.layers.DynamicRNN.step_input()')
-        parent_block = self._parent_block_()
-        if self.lod_rank_table is None:
-            self.lod_rank_table = parent_block.create_var(
-                name=unique_name.generate('lod_rank_table'),
-                type=core.VarDesc.VarType.LOD_RANK_TABLE,
-            )
-            self.lod_rank_table.stop_gradient = True
-            parent_block.append_op(
-                type='lod_rank_table',
-                inputs={"X": x},
-                outputs={"Out": self.lod_rank_table},
-                attrs={"level": level},
-            )
-            self.max_seq_len = parent_block.create_var(
-                name=unique_name.generate('dynamic_rnn_max_seq_len'),
-                dtype='int64',
-            )
-            self.max_seq_len.stop_gradient = False
-            parent_block.append_op(
-                type='max_sequence_len',
-                inputs={'RankTable': self.lod_rank_table},
-                outputs={"Out": self.max_seq_len},
-            )
-            self.cond.stop_gradient = True
-            parent_block.append_op(
-                type='less_than',
-                inputs={'X': self.step_idx, 'Y': self.max_seq_len},
-                outputs={'Out': self.cond},
-                attrs={'force_cpu': True},
-            )
-
-        input_array = parent_block.create_var(
-            name=unique_name.generate('dynamic_rnn_input_array'),
-            type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
-            dtype=x.dtype,
-        )
-        self.input_array.append((input_array, x.dtype))
-        parent_block.append_op(
-            type='lod_tensor_to_array',
-            inputs={'X': x, 'RankTable': self.lod_rank_table},
-            outputs={'Out': input_array},
-        )
-        return array_read(array=input_array, i=self.step_idx)
-
-    def static_input(self, x):
-        r"""
-        This function is used to set x as DynamicRNN's static input. It is optional.
-
-        - Case 1, set static input with LoD
-
-        .. code-block:: text
-
-            # RNN's input is the same as the case listed in step_input
-            # static input, where Si is slice data of shape [1, M]
-            x.lod = [[3, 1, 2]]
-            x.shape = [6, M]
-            x.data = [[S0],
-                      [S0],
-                      [S0],
-                      [S1],
-                      [S2],
-                      [S2]]
-
-            # step 0, batch data corresponding to the 3 input sequences
-            out.lod = [[2, 3, 1]]
-            out.shape = [6, M]
-            out.data = [[S2],
-                        [S2],
-                        [S0],
-                        [S0],
-                        [S0],
-                        [S1]]
-
-            # step 1, batch data corresponding to the 2 input sequences
-            out.lod = [[2, 3]]
-            out.shape = [5, M]
-            out.data = [[S2],
-                        [S2],
-                        [S0],
-                        [S0],
-                        [S0]]
-
-            # step 2, batch data corresponding to the 1 input sequences
-            out.lod = [[2]]
-            out.shape = [2, M]
-            out.data = [[S2],
-                        [S2]]
-
-
-        - Case 2, set static input without LoD
-
-        .. code-block:: text
-
-            # RNN's input is the same as the case listed in step_input
-            # static input, where Si is slice data of shape [1, M]
-            x.lod = [[]]
-            x.shape = [3, M]
-            x.data = [[S0],
-                      [S1],
-                      [S2]]
-
-            # step 0, batch data corresponding to the 3 input sequences
-            out.lod = [[]]
-            out.shape = [3, M]
-            out.data = [[S2],
-                        [S0],
-                        [S1]]
-
-            # step 1, batch data corresponding to the 2 input sequences
-            out.lod = [[]]
-            out.shape = [2, M]
-            out.data = [[S2],
-                        [S0]]
-
-            # step 2, batch data corresponding to the 1 input sequences
-            out.lod = [[]]
-            out.shape = [1, M]
-            out.data = [[S2]]
-
-
-        Args:
-            x (Variable): The static input LoDTensor which should hold the same number of sequences
-                as RNN's input (the input LoDTensor set by :code:`step_input()` ). If the LoD is None,
-                the input x will be treated as a minibatch with :code:`x.shape[0]` sequences of length 1.
-                Optional data types are: bool, float16, float32, float64, int8, int16, int32, int64, uint8.
-
-        Returns:
-            Variable: The input LoDTensor after sorted and shrank. If there are :code:`num_sequences` \
-                sequences in RNN's input LoDTensor whose length is larger than :code:`step_idx` , \
-                the static input Tensor will be sorted to the same order as RNN's input and \
-                will only retain data corresponding to those :code:`num_sequences` sequences. \
-                The data type is the same as input. If :code:`x.lod == None` , the return value is \
-                a Tensor of shape :math:`\{num\_sequences, x.shape[1], ...\}` , or it will \
-                be a variable-length LoDTensor.
-
-        Raises:
-            ValueError: When :code:`static_input()` is called outside :code:`block()` .
-            TypeError: When x is not a Variable.
-            RuntimeError: When :code:`static_input()` is called before :code:`step_input()` .
-
-        Examples:
-            .. code-block:: python
-
-                import paddle.fluid as fluid
-
-                sentence = fluid.data(name='sentence', shape=[None, 32], dtype='float32', lod_level=1)
-                encoder_proj = fluid.data(name='encoder_proj', shape=[None, 32], dtype='float32', lod_level=1)
-                decoder_boot = fluid.data(name='boot', shape=[None, 10], dtype='float32')
-
-                drnn = fluid.layers.DynamicRNN()
-                with drnn.block():
-                    # Set sentence as RNN's input, each time step processes a word from the sentence
-                    current_word = drnn.step_input(sentence)
-                    # Set encode_proj as RNN's static input
-                    encoder_word = drnn.static_input(encoder_proj)
-                    # Initialize memory with boot_memory, which need reorder according to RNN's input sequences
-                    memory = drnn.memory(init=decoder_boot, need_reorder=True)
-                    fc_1 = fluid.layers.fc(input=encoder_word, size=30)
-                    fc_2 = fluid.layers.fc(input=current_word, size=30)
-                    decoder_inputs = fc_1 + fc_2
-                    hidden, _, _ = fluid.layers.gru_unit(input=decoder_inputs, hidden=memory, size=30)
-                    # Update memory with hidden
-                    drnn.update_memory(ex_mem=memory, new_mem=hidden)
-                    out = fluid.layers.fc(input=hidden, size=10, bias_attr=True, act='softmax')
-                    # Set out as RNN's output
-                    drnn.output(out)
-
-                # Get RNN's result
-                rnn_output = drnn()
-        """
-        self._assert_in_rnn_block_("static_input")
-        check_type(x, 'x', Variable, 'fluid.layers.DynamicRNN.static_input()')
-        if self.lod_rank_table is None:
-            raise RuntimeError(
-                "static_input() must be called after step_input()."
-            )
-        parent_block = self._parent_block_()
-        x_reordered = parent_block.create_var(
-            name=unique_name.generate("dynamic_rnn_static_input_reordered"),
-            type=core.VarDesc.VarType.LOD_TENSOR,
-            dtype=x.dtype,
-        )
-        parent_block.append_op(
-            type='reorder_lod_tensor_by_rank',
-            inputs={'X': [x], 'RankTable': [self.lod_rank_table]},
-            outputs={'Out': [x_reordered]},
-        )
-        return shrink_memory(x_reordered, self.step_idx, self.lod_rank_table)
-
-    @signature_safe_contextmanager
-    def block(self):
-        """
-        The function is used to list the operations executed during
-        each time step in RNN. The operation list will be executed :code:`max_sequence_len`
-        times (where :code:`max_sequence_len` is the maximum length of RNN's input sequences).
-
-        Raises:
-            ValueError: When :code:`block()` is called multi-times.
-        """
-        if self.status != DynamicRNN.BEFORE_RNN:
-            raise ValueError("rnn.block() can only be invoke once")
-        self.step_idx = fill_constant(
-            shape=[1], dtype='int64', value=0, force_cpu=True
-        )
-        self.step_idx.stop_gradient = False
-        self.status = DynamicRNN.IN_RNN
-        with self.while_op.block():
-            yield
-            increment(x=self.step_idx, value=1.0, in_place=True)
-
-            for new_mem, mem_array in self.mem_link:
-                array_write(x=new_mem, i=self.step_idx, array=mem_array)
-
-            less_than(
-                x=self.step_idx,
-                y=self.max_seq_len,
-                force_cpu=True,
-                cond=self.cond,
-            )
-
-        self.status = DynamicRNN.AFTER_RNN
-        for each_array in self.output_array:
-            self.outputs.append(
-                array_to_lod_tensor(x=each_array, table=self.lod_rank_table)
-            )
-
-    def __call__(self, *args, **kwargs):
-        """
-        This function is used to get the output  sequences of DynamicRNN.
-
-        Args:
-            None
-
-        Returns:
-            Variable or Variable list: RNN's output sequences.
-
-        Raises:
-            ValueError: When :code:`__call__()` is called before :code:`block()` .
-        """
-        if self.status != DynamicRNN.AFTER_RNN:
-            raise ValueError(
-                (
-                    "Output of the dynamic RNN can only be visited "
-                    "outside the rnn block."
-                )
-            )
-        if len(self.outputs) == 1:
-            return self.outputs[0]
-        else:
-            return self.outputs
-
-    def memory(
-        self,
-        init=None,
-        shape=None,
-        value=0.0,
-        need_reorder=False,
-        dtype='float32',
-    ):
-        r"""
-        Create a memory Variable for DynamicRNN to deliver data cross time steps.
-        It can be initialized by an existing Tensor or a constant Tensor of given
-        dtype and shape.
-
-        Args:
-            init (Variable, optional): LoDTensor used to initialize the memory.
-                If init is not None, it should hold the same number of sequences
-                as RNN's input (the input LoDTensor set by :code:`step_input()` )
-                and the memory will be initialized to it. If init's LoD is None,
-                it will be treated as a minibatch with :code:`init.shape[0]` sequences
-                of length 1. The default value is None.
-            shape (list|tuple, optional): When init is None, it is used to specify
-                the memory's shape. Note that the shape does not include the batch_size.
-                If setting shape to :math:`\{D_1, D_2, ...\}` , the shape of memory Tensor
-                will be :math:`\{batch\_size, D_1, D_2, ...\}` , where batch_size is
-                determined by RNN's input sequences. The default value is None.
-            value (float, optional): When init is None, it is used as initialized value
-                of memory. The default value is 0.0.
-            need_reorder (bool, optional): When init is not None, it determines whether
-                the memory needs to reorder like the RNN's input sequences. It should be
-                set to True when the initialized memory depends on the order of input samples.
-                The default value is False.
-            dtype (str|numpy.dtype, optional): When init is None, it is used to set the
-                data type of memory. The default value is "float32". Optional data types
-                are: "float32", "float64", "int32", "int64".
-
-        Returns:
-            Variable: The memory LoDTensor after shrank.  If there are :code:`num_sequences` \
-                sequences in RNN's input LoDTensor whose length is larger than :code:`step_idx` , \
-                the memory Tensor also need to be shrank and will only retain data \
-                corresponding to those :code:`num_sequences` sequences.
-
-        Raises:
-            ValueError: When :code:`memory()` is called outside :code:`block()` .
-            TypeError: When init is set and is not a Variable.
-            ValueError: When :code:`memory()` is called before :code:`step_input()` .
-
-        Examples:
-            .. code-block:: python
-
-                import paddle.fluid as fluid
-
-                sentence = fluid.data(name='sentence', shape=[None, 32], dtype='float32', lod_level=1)
-                boot_memory = fluid.data(name='boot', shape=[None, 10], dtype='float32')
-
-                drnn = fluid.layers.DynamicRNN()
-                with drnn.block():
-                    # Set sentence as RNN's input, each time step processes a word from the sentence
-                    word = drnn.step_input(sentence)
-                    # Initialize memory with boot_memory, which need reorder according to RNN's input sequences
-                    memory = drnn.memory(init=boot_memory, need_reorder=True)
-                    hidden = fluid.layers.fc(input=[word, memory], size=10, act='tanh')
-                    # Update memory with hidden
-                    drnn.update_memory(ex_mem=memory, new_mem=hidden)
-                    # Set hidden as RNN's output
-                    drnn.output(hidden)
-
-                # Get RNN's result
-                rnn_output = drnn()
-
-
-        Examples:
-            .. code-block:: python
-
-                import paddle.fluid as fluid
-
-                sentence = fluid.data(name='sentence', shape=[None, 32], dtype='float32', lod_level=1)
-
-                drnn = fluid.layers.DynamicRNN()
-                with drnn.block():
-                    # Set sentence as RNN's input, each time step processes a word from the sentence
-                    word = drnn.step_input(sentence)
-                    # Initialize memory to a Tensor whose value is 0, shape=[batch_size, 10],
-                    # where batch_size is the number of sequences in sentence.
-                    memory = drnn.memory(shape=[10], dtype='float32', value=0)
-                    hidden = fluid.layers.fc(input=[word, memory], size=10, act='tanh')
-                    # Update memory with hidden
-                    drnn.update_memory(ex_mem=memory, new_mem=hidden)
-                    # Set hidden as RNN's output
-                    drnn.output(hidden)
-
-                # Get RNN's result
-                rnn_output = drnn()
-        """
-        self._assert_in_rnn_block_('memory')
-        self._init_zero_idx_()
-        if shape is not None:
-            check_type(
-                shape,
-                'shape',
-                (list, tuple),
-                'fluid.layers.DynamicRNN.memory()',
-            )
-        if init is not None:
-            check_type(
-                init, 'init', Variable, 'fluid.layers.DynamicRNN.memory()'
-            )
-            parent_block = self._parent_block_()
-            init_tensor = init
-            if need_reorder == True:
-                if self.lod_rank_table is None:
-                    raise ValueError(
-                        'If set need_reorder to True, make sure step_input be '
-                        'invoked before '
-                        'memory(init=init, need_reordered=True, ...).'
-                    )
-                init_reordered = parent_block.create_var(
-                    name=unique_name.generate('dynamic_rnn_mem_init_reordered'),
-                    type=core.VarDesc.VarType.LOD_TENSOR,
-                    dtype=init.dtype,
-                )
-                parent_block.append_op(
-                    type='reorder_lod_tensor_by_rank',
-                    inputs={
-                        'X': [init_tensor],
-                        'RankTable': [self.lod_rank_table],
-                    },
-                    outputs={'Out': [init_reordered]},
-                )
-                init_tensor = init_reordered
-            mem_array = parent_block.create_var(
-                name=unique_name.generate('dynamic_rnn_mem_array'),
-                type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
-                dtype=init.dtype,
-            )
-            parent_block.append_op(
-                type='write_to_array',
-                inputs={'X': init_tensor, 'I': self.zero_idx},
-                outputs={'Out': mem_array},
-            )
-            retv = array_read(array=mem_array, i=self.step_idx)
-            retv = shrink_memory(
-                x=retv, i=self.step_idx, table=self.lod_rank_table
-            )
-            self.mem_dict[retv.name] = mem_array
-            return retv
-        else:
-            if len(self.input_array) == 0:
-                raise ValueError(
-                    "step_input should be invoked before memory(shape=..., value=...)"
-                )
-            parent_block = self._parent_block_()
-            init = parent_block.create_var(
-                name=unique_name.generate('mem_init'), dtype=dtype
-            )
-            arr, dtype = self.input_array[0]
-            in0 = parent_block.create_var(
-                name=unique_name.generate('in0'), dtype=dtype
-            )
-            parent_block.append_op(
-                type='read_from_array',
-                inputs={'X': [arr], 'I': [self.zero_idx]},
-                outputs={'Out': [in0]},
-            )
-            parent_block.append_op(
-                type='fill_constant_batch_size_like',
-                inputs={'Input': [in0]},
-                outputs={'Out': [init]},
-                attrs={
-                    'shape': [-1] + shape,
-                    'value': float(value),
-                    'dtype': init.dtype,
-                },
-            )
-            return self.memory(init=init)
-
-    def update_memory(self, ex_mem, new_mem):
-        """
-        Update the memory which need to be delivered across time steps.
-
-        Args:
-            ex_mem (Variable): The memory data of previous time step.
-            new_mem (Variable): The new memory data produced in current time step.
-                The shape and data type of ex_mem and new_mem should be the same.
-
-        Returns:
-            None
-
-        Raises:
-            ValueError: When :code:`update_memory()` is called outside :code:`block()` .
-            TypeError: When :code:`ex_mem` or :code:`new_mem` is not a Variable.
-            ValueError: When :code:`ex_mem` is defined by :code:`memory()` .
-            ValueError: When :code:`update_memory()` is called before :code:`step_input()` .
-        """
-        self._assert_in_rnn_block_('update_memory')
-        check_type(
-            ex_mem,
-            'ex_mem',
-            Variable,
-            'fluid.layers.DynamicRNN.update_memory()',
-        )
-        check_type(
-            new_mem,
-            'new_mem',
-            Variable,
-            'fluid.layers.DynamicRNN.update_memory()',
-        )
-
-        mem_array = self.mem_dict.get(ex_mem.name, None)
-        if mem_array is None:
-            raise ValueError("Please invoke memory before update_memory")
-        if self.lod_rank_table is None:
-            raise ValueError("Please invoke step_input before update_memory")
-
-        self.mem_link.append((new_mem, mem_array))
-
-    def output(self, *outputs):
-        """
-        This function is used to set :code:`outputs` as RNN's output.
-
-        Args:
-            *outputs (Variable ...): The output Tensor. DynamicRNN can mark multiple
-                Variables as its output.
-
-        Returns:
-            None
-
-        Raises:
-            ValueError: When :code:`output()` is called outside :code:`block()` .
-        """
-        self._assert_in_rnn_block_('output')
-        parent_block = self._parent_block_()
-        for each in outputs:
-            check_type(
-                each, "outputs", Variable, "fluid.layers.DynamicRNN.output"
-            )
-            outside_array = parent_block.create_var(
-                name=unique_name.generate_with_ignorable_key(
-                    "_".join([self.helper.name, "output_array", each.name])
-                ),
-                type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
-                dtype=each.dtype,
-            )
-            array_write(x=each, i=self.step_idx, array=outside_array)
-            self.output_array.append(outside_array)
-
-    def _init_zero_idx_(self):
-        if self.zero_idx is None:
-            parent_block = self._parent_block_()
-            self.zero_idx = parent_block.create_var(
-                name=unique_name.generate('zero_idx'), dtype='int64'
-            )
-            parent_block.append_op(
-                type='fill_constant',
-                inputs={},
-                outputs={'Out': [self.zero_idx]},
-                attrs={
-                    'shape': [1],
-                    'dtype': self.zero_idx.dtype,
-                    'value': float(0),
-                    'force_cpu': True,
-                },
-            )
-
-    def _parent_block_(self):
-        prog = self.helper.main_program
-        parent_idx = prog.current_block().parent_idx
-        assert parent_idx >= 0
-        parent_block = prog.block(parent_idx)
-
-        return parent_block
-
-    def _assert_in_rnn_block_(self, method):
-        if self.status != DynamicRNN.IN_RNN:
-            raise ValueError(
-                "{0} can only be invoked inside rnn block.".format(method)
-            )
-
-
 def switch_case(branch_index, branch_fns, default=None, name=None):
     '''
     :api_attr: Static Graph
@@ -4413,7 +3314,7 @@ def _check_args(branch_index, branch_fns, default):
         pred_fn_pairs = []
         for index, fn in branch_fns:
             new_index = fill_constant(shape=[1], dtype="int64", value=index)
-            pred = equal(branch_index, new_index)
+            pred = paddle.equal(branch_index, new_index)
             pred_fn_pairs.append((pred, fn))
 
         return pred_fn_pairs, default
diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py
index 01f22238406fa0..a3bfd80de6d88e 100644
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -1337,7 +1337,7 @@ def _beam_search_step(self, time, logits, next_cell_states, beam_state):
         )
         next_finished = paddle.logical_or(
             next_finished,
-            control_flow.equal(token_indices, self.end_token_tensor),
+            paddle.equal(token_indices, self.end_token_tensor),
         )
 
         beam_search_output = self.OutputWrapper(
@@ -1722,7 +1722,7 @@ def _create_array_out_of_while(dtype):
         if max_step_num is not None:
             paddle.logical_and(
                 paddle.logical_not(nn.reduce_all(global_finished)),
-                control_flow.less_equal(step_idx, max_step_num),
+                paddle.less_equal(step_idx, max_step_num),
                 cond,
             )
         else:
@@ -2013,7 +2013,7 @@ def initialize(self):
                 variable[s], and the tensor's shape is `[batch_size, ...]`. \
                 `initial_finished` is a bool tensor with shape `[batch_size]`.
         """
-        init_finished = control_flow.equal(
+        init_finished = paddle.equal(
             self.sequence_length,
             tensor.fill_constant(
                 shape=[1], dtype=self.sequence_length.dtype, value=0
@@ -2084,7 +2084,7 @@ def next_inputs(self, time, outputs, states, sample_ids):
         if self.sequence_length.dtype != time.dtype:
             self.sequence_length = tensor.cast(self.sequence_length, time.dtype)
         next_time = time + 1
-        finished = control_flow.less_equal(self.sequence_length, next_time)
+        finished = paddle.less_equal(self.sequence_length, next_time)
 
         def _slice(x):  # TODO: use Variable.__getitem__
             axes = [0 if self.time_major else 1]
@@ -2227,7 +2227,7 @@ def next_inputs(self, time, outputs, states, sample_ids):
                 argument `states`. `finished` is a `bool` Tensor with \
                 shape `[batch_size]`.
         """
-        finished = control_flow.equal(sample_ids, self.end_token)
+        finished = paddle.equal(sample_ids, self.end_token)
         next_inputs = self.embedding_fn(sample_ids)
         return finished, next_inputs, states
 
diff --git a/python/paddle/fluid/tests/book/CMakeLists.txt b/python/paddle/fluid/tests/book/CMakeLists.txt
index 9e807a79353bb1..aa61391305005d 100644
--- a/python/paddle/fluid/tests/book/CMakeLists.txt
+++ b/python/paddle/fluid/tests/book/CMakeLists.txt
@@ -13,6 +13,4 @@ set_tests_properties(test_word2vec_book PROPERTIES TIMEOUT 120)
 set_tests_properties(test_recognize_digits PROPERTIES TIMEOUT 120)
 set_tests_properties(test_image_classification PROPERTIES TIMEOUT 200)
 set_tests_properties(test_label_semantic_roles PROPERTIES TIMEOUT 240)
-set_tests_properties(test_machine_translation PROPERTIES TIMEOUT 120)
-set_tests_properties(test_rnn_encoder_decoder PROPERTIES TIMEOUT 120)
 set_tests_properties(test_fit_a_line PROPERTIES TIMEOUT 120)
diff --git a/python/paddle/fluid/tests/book/notest_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py
index 597b8b22cbe484..ab220911b630fe 100644
--- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py
+++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py
@@ -53,52 +53,6 @@ def convolution_net(
     return avg_cost, accuracy, prediction
 
 
-def dyn_rnn_lstm(
-    data, label, input_dim, class_dim=2, emb_dim=32, lstm_size=128
-):
-    emb = fluid.layers.embedding(
-        input=data, size=[input_dim, emb_dim], is_sparse=True
-    )
-    sentence = fluid.layers.fc(input=emb, size=lstm_size, act='tanh')
-
-    rnn = fluid.layers.DynamicRNN()
-    with rnn.block():
-        word = rnn.step_input(sentence)
-        prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
-        prev_cell = rnn.memory(value=0.0, shape=[lstm_size])
-
-        def gate_common(ipt, hidden, size):
-            gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
-            gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
-            return gate0 + gate1
-
-        forget_gate = paddle.nn.functional.sigmoid(
-            x=gate_common(word, prev_hidden, lstm_size)
-        )
-        input_gate = paddle.nn.functional.sigmoid(
-            x=gate_common(word, prev_hidden, lstm_size)
-        )
-        output_gate = paddle.nn.functional.sigmoid(
-            x=gate_common(word, prev_hidden, lstm_size)
-        )
-        cell_gate = paddle.nn.functional.sigmoid(
-            x=gate_common(word, prev_hidden, lstm_size)
-        )
-
-        cell = forget_gate * prev_cell + input_gate * cell_gate
-        hidden = output_gate * paddle.tanh(x=cell)
-        rnn.update_memory(prev_cell, cell)
-        rnn.update_memory(prev_hidden, hidden)
-        rnn.output(hidden)
-
-    last = fluid.layers.sequence_last_step(rnn())
-    prediction = fluid.layers.fc(input=last, size=class_dim, act="softmax")
-    cost = fluid.layers.cross_entropy(input=prediction, label=label)
-    avg_cost = paddle.mean(cost)
-    accuracy = fluid.layers.accuracy(input=prediction, label=label)
-    return avg_cost, accuracy, prediction
-
-
 def stacked_lstm_net(
     data, label, input_dim, class_dim=2, emb_dim=128, hid_dim=512, stacked_num=3
 ):
@@ -376,25 +330,6 @@ def test_stacked_lstm_gpu_parallel(self):
                 parallel=True,
             )
 
-    @unittest.skip(reason='make CI faster')
-    def test_dynrnn_lstm_gpu(self):
-        with self.new_program_scope():
-            main(
-                self.word_dict,
-                net_method=dyn_rnn_lstm,
-                use_cuda=True,
-                parallel=False,
-            )
-
-    def test_dynrnn_lstm_gpu_parallel(self):
-        with self.new_program_scope():
-            main(
-                self.word_dict,
-                net_method=dyn_rnn_lstm,
-                use_cuda=True,
-                parallel=True,
-            )
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/book/test_machine_translation.py b/python/paddle/fluid/tests/book/test_machine_translation.py
deleted file mode 100644
index 58426433ef2f65..00000000000000
--- a/python/paddle/fluid/tests/book/test_machine_translation.py
+++ /dev/null
@@ -1,384 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import contextlib
-import os
-import unittest
-
-import numpy as np
-
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.framework as framework
-import paddle.fluid.layers as pd
-from paddle.fluid.executor import Executor
-
-paddle.enable_static()
-
-dict_size = 30000
-source_dict_dim = target_dict_dim = dict_size
-hidden_dim = 32
-word_dim = 16
-batch_size = 2
-max_length = 8
-topk_size = 50
-trg_dic_size = 10000
-beam_size = 2
-
-decoder_size = hidden_dim
-
-
-def encoder(is_sparse):
-    # encoder
-    src_word_id = pd.data(
-        name="src_word_id", shape=[1], dtype='int64', lod_level=1
-    )
-    src_embedding = pd.embedding(
-        input=src_word_id,
-        size=[dict_size, word_dim],
-        dtype='float32',
-        is_sparse=is_sparse,
-        param_attr=fluid.ParamAttr(name='vemb'),
-    )
-
-    fc1 = pd.fc(input=src_embedding, size=hidden_dim * 4, act='tanh')
-    lstm_hidden0, lstm_0 = pd.dynamic_lstm(input=fc1, size=hidden_dim * 4)
-    encoder_out = pd.sequence_last_step(input=lstm_hidden0)
-    return encoder_out
-
-
-def decoder_train(context, is_sparse):
-    # decoder
-    trg_language_word = pd.data(
-        name="target_language_word", shape=[1], dtype='int64', lod_level=1
-    )
-    trg_embedding = pd.embedding(
-        input=trg_language_word,
-        size=[dict_size, word_dim],
-        dtype='float32',
-        is_sparse=is_sparse,
-        param_attr=fluid.ParamAttr(name='vemb'),
-    )
-
-    rnn = pd.DynamicRNN()
-    with rnn.block():
-        current_word = rnn.step_input(trg_embedding)
-        pre_state = rnn.memory(init=context)
-        current_state = pd.fc(
-            input=[current_word, pre_state], size=decoder_size, act='tanh'
-        )
-
-        current_score = pd.fc(
-            input=current_state, size=target_dict_dim, act='softmax'
-        )
-        rnn.update_memory(pre_state, current_state)
-        rnn.output(current_score)
-
-    return rnn()
-
-
-def decoder_decode(context, is_sparse):
-    init_state = context
-    array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length)
-    counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True)
-
-    # fill the first element with init_state
-    state_array = pd.create_array('float32')
-    pd.array_write(init_state, array=state_array, i=counter)
-
-    # ids, scores as memory
-    ids_array = pd.create_array('int64')
-    scores_array = pd.create_array('float32')
-
-    init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2)
-    init_scores = pd.data(
-        name="init_scores", shape=[1], dtype="float32", lod_level=2
-    )
-
-    pd.array_write(init_ids, array=ids_array, i=counter)
-    pd.array_write(init_scores, array=scores_array, i=counter)
-
-    cond = pd.less_than(x=counter, y=array_len)
-
-    while_op = pd.While(cond=cond)
-    with while_op.block():
-        pre_ids = pd.array_read(array=ids_array, i=counter)
-        pre_state = pd.array_read(array=state_array, i=counter)
-        pre_score = pd.array_read(array=scores_array, i=counter)
-
-        # expand the recursive_sequence_lengths of pre_state to be the same with pre_score
-        pre_state_expanded = pd.sequence_expand(pre_state, pre_score)
-
-        pre_ids_emb = pd.embedding(
-            input=pre_ids,
-            size=[dict_size, word_dim],
-            dtype='float32',
-            is_sparse=is_sparse,
-        )
-
-        # use rnn unit to update rnn
-        current_state = pd.fc(
-            input=[pre_state_expanded, pre_ids_emb],
-            size=decoder_size,
-            act='tanh',
-        )
-        current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score)
-        # use score to do beam search
-        current_score = pd.fc(
-            input=current_state_with_lod, size=target_dict_dim, act='softmax'
-        )
-        topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
-        # calculate accumulated scores after topk to reduce computation cost
-        accu_scores = pd.elementwise_add(
-            x=pd.log(topk_scores),
-            y=paddle.reshape(pre_score, shape=[-1]),
-            axis=0,
-        )
-        selected_ids, selected_scores = pd.beam_search(
-            pre_ids,
-            pre_score,
-            topk_indices,
-            accu_scores,
-            beam_size,
-            end_id=10,
-            level=0,
-        )
-
-        pd.increment(x=counter, value=1, in_place=True)
-
-        # update the memories
-        pd.array_write(current_state, array=state_array, i=counter)
-        pd.array_write(selected_ids, array=ids_array, i=counter)
-        pd.array_write(selected_scores, array=scores_array, i=counter)
-
-        # update the break condition: up to the max length or all candidates of
-        # source sentences have ended.
-        length_cond = pd.less_than(x=counter, y=array_len)
-        finish_cond = paddle.logical_not(pd.is_empty(x=selected_ids))
-        paddle.logical_and(x=length_cond, y=finish_cond, out=cond)
-
-    translation_ids, translation_scores = pd.beam_search_decode(
-        ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10
-    )
-
-    # return init_ids, init_scores
-
-    return translation_ids, translation_scores
-
-
-def train_main(use_cuda, is_sparse, is_local=True):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-
-    context = encoder(is_sparse)
-    rnn_out = decoder_train(context, is_sparse)
-    label = pd.data(
-        name="target_language_next_word", shape=[1], dtype='int64', lod_level=1
-    )
-    cost = pd.cross_entropy(input=rnn_out, label=label)
-    avg_cost = pd.mean(cost)
-
-    optimizer = fluid.optimizer.Adagrad(
-        learning_rate=1e-4,
-        regularization=fluid.regularizer.L2DecayRegularizer(
-            regularization_coeff=0.1
-        ),
-    )
-    optimizer.minimize(avg_cost)
-
-    train_data = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.wmt14.train(dict_size), buf_size=1000
-        ),
-        batch_size=batch_size,
-    )
-
-    feed_order = [
-        'src_word_id',
-        'target_language_word',
-        'target_language_next_word',
-    ]
-
-    exe = Executor(place)
-
-    def train_loop(main_program):
-        exe.run(framework.default_startup_program())
-
-        feed_list = [
-            main_program.global_block().var(var_name) for var_name in feed_order
-        ]
-        feeder = fluid.DataFeeder(feed_list, place)
-
-        batch_id = 0
-        for pass_id in range(1):
-            for data in train_data():
-                outs = exe.run(
-                    main_program, feed=feeder.feed(data), fetch_list=[avg_cost]
-                )
-                avg_cost_val = np.array(outs[0])
-                print(
-                    'pass_id='
-                    + str(pass_id)
-                    + ' batch='
-                    + str(batch_id)
-                    + " avg_cost="
-                    + str(avg_cost_val)
-                )
-                if batch_id > 3:
-                    break
-                batch_id += 1
-
-    if is_local:
-        train_loop(framework.default_main_program())
-    else:
-        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
-        pserver_ips = os.getenv("PADDLE_PSERVER_IPS")  # ip,ip...
-        eplist = []
-        for ip in pserver_ips.split(","):
-            eplist.append(':'.join([ip, port]))
-        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
-        trainers = int(os.getenv("PADDLE_TRAINERS"))
-        current_endpoint = os.getenv("POD_IP") + ":" + port
-        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
-        training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
-        t = fluid.DistributeTranspiler()
-        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
-        if training_role == "PSERVER":
-            pserver_prog = t.get_pserver_program(current_endpoint)
-            pserver_startup = t.get_startup_program(
-                current_endpoint, pserver_prog
-            )
-            exe.run(pserver_startup)
-            exe.run(pserver_prog)
-        elif training_role == "TRAINER":
-            train_loop(t.get_trainer_program())
-
-
-def decode_main(use_cuda, is_sparse):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-
-    context = encoder(is_sparse)
-    translation_ids, translation_scores = decoder_decode(context, is_sparse)
-
-    exe = Executor(place)
-    exe.run(framework.default_startup_program())
-
-    init_ids_data = np.array([1 for _ in range(batch_size)], dtype='int64')
-    init_scores_data = np.array(
-        [1.0 for _ in range(batch_size)], dtype='float32'
-    )
-    init_ids_data = init_ids_data.reshape((batch_size, 1))
-    init_scores_data = init_scores_data.reshape((batch_size, 1))
-    init_recursive_seq_lens = [1] * batch_size
-    init_recursive_seq_lens = [init_recursive_seq_lens, init_recursive_seq_lens]
-
-    init_ids = fluid.create_lod_tensor(
-        init_ids_data, init_recursive_seq_lens, place
-    )
-    init_scores = fluid.create_lod_tensor(
-        init_scores_data, init_recursive_seq_lens, place
-    )
-
-    train_data = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.wmt14.train(dict_size), buf_size=1000
-        ),
-        batch_size=batch_size,
-    )
-
-    feed_order = ['src_word_id']
-    feed_list = [
-        framework.default_main_program().global_block().var(var_name)
-        for var_name in feed_order
-    ]
-    feeder = fluid.DataFeeder(feed_list, place)
-
-    for data in train_data():
-        feed_dict = feeder.feed([[x[0]] for x in data])
-        feed_dict['init_ids'] = init_ids
-        feed_dict['init_scores'] = init_scores
-
-        result_ids, result_scores = exe.run(
-            framework.default_main_program(),
-            feed=feed_dict,
-            fetch_list=[translation_ids, translation_scores],
-            return_numpy=False,
-        )
-        print(result_ids.recursive_sequence_lengths())
-        break
-
-
-class TestMachineTranslation(unittest.TestCase):
-    pass
-
-
-@contextlib.contextmanager
-def scope_prog_guard():
-    prog = fluid.Program()
-    startup_prog = fluid.Program()
-    scope = fluid.core.Scope()
-    with fluid.scope_guard(scope):
-        with fluid.program_guard(prog, startup_prog):
-            yield
-
-
-def inject_test_train(use_cuda, is_sparse):
-    f_name = 'test_{0}_{1}_train'.format(
-        'cuda' if use_cuda else 'cpu', 'sparse' if is_sparse else 'dense'
-    )
-
-    def f(*args):
-        with scope_prog_guard():
-            train_main(use_cuda, is_sparse)
-
-    setattr(TestMachineTranslation, f_name, f)
-
-
-def inject_test_decode(use_cuda, is_sparse, decorator=None):
-    f_name = 'test_{0}_{1}_decode'.format(
-        'cuda' if use_cuda else 'cpu', 'sparse' if is_sparse else 'dense'
-    )
-
-    def f(*args):
-        with scope_prog_guard():
-            decode_main(use_cuda, is_sparse)
-
-    if decorator is not None:
-        f = decorator(f)
-
-    setattr(TestMachineTranslation, f_name, f)
-
-
-for _use_cuda_ in (False, True):
-    for _is_sparse_ in (False, True):
-        inject_test_train(_use_cuda_, _is_sparse_)
-
-for _use_cuda_ in (False, True):
-    for _is_sparse_ in (False, True):
-
-        _decorator_ = None
-        if _use_cuda_:
-            _decorator_ = unittest.skip(
-                reason='Beam Search does not support CUDA!'
-            )
-
-        inject_test_decode(
-            is_sparse=_is_sparse_, use_cuda=_use_cuda_, decorator=_decorator_
-        )
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py b/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py
deleted file mode 100644
index e27839eabc3949..00000000000000
--- a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py
+++ /dev/null
@@ -1,319 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import contextlib
-import math
-import os
-import sys
-import tempfile
-import unittest
-
-import numpy as np
-
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.framework as framework
-from paddle.fluid.executor import Executor
-
-paddle.enable_static()
-
-dict_size = 30000
-source_dict_dim = target_dict_dim = dict_size
-src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size)
-hidden_dim = 32
-embedding_dim = 16
-batch_size = 10
-max_length = 50
-topk_size = 50
-encoder_size = decoder_size = hidden_dim
-IS_SPARSE = True
-USE_PEEPHOLES = False
-
-
-def bi_lstm_encoder(input_seq, hidden_size):
-    input_forward_proj = fluid.layers.fc(
-        input=input_seq, size=hidden_size * 4, bias_attr=True
-    )
-    forward, _ = fluid.layers.dynamic_lstm(
-        input=input_forward_proj,
-        size=hidden_size * 4,
-        use_peepholes=USE_PEEPHOLES,
-    )
-    input_backward_proj = fluid.layers.fc(
-        input=input_seq, size=hidden_size * 4, bias_attr=True
-    )
-    backward, _ = fluid.layers.dynamic_lstm(
-        input=input_backward_proj,
-        size=hidden_size * 4,
-        is_reverse=True,
-        use_peepholes=USE_PEEPHOLES,
-    )
-
-    forward_last = fluid.layers.sequence_last_step(input=forward)
-    backward_first = fluid.layers.sequence_first_step(input=backward)
-
-    return forward_last, backward_first
-
-
-# FIXME(peterzhang2029): Replace this function with the lstm_unit_op.
-def lstm_step(x_t, hidden_t_prev, cell_t_prev, size):
-    def linear(inputs):
-        return fluid.layers.fc(input=inputs, size=size, bias_attr=True)
-
-    forget_gate = paddle.nn.functional.sigmoid(x=linear([hidden_t_prev, x_t]))
-    input_gate = paddle.nn.functional.sigmoid(x=linear([hidden_t_prev, x_t]))
-    output_gate = paddle.nn.functional.sigmoid(x=linear([hidden_t_prev, x_t]))
-    cell_tilde = paddle.tanh(x=linear([hidden_t_prev, x_t]))
-
-    cell_t = fluid.layers.sums(
-        input=[
-            fluid.layers.elementwise_mul(x=forget_gate, y=cell_t_prev),
-            fluid.layers.elementwise_mul(x=input_gate, y=cell_tilde),
-        ]
-    )
-
-    hidden_t = fluid.layers.elementwise_mul(
-        x=output_gate, y=paddle.tanh(x=cell_t)
-    )
-
-    return hidden_t, cell_t
-
-
-def lstm_decoder_without_attention(
-    target_embedding, decoder_boot, context, decoder_size
-):
-    rnn = fluid.layers.DynamicRNN()
-
-    cell_init = fluid.layers.fill_constant_batch_size_like(
-        input=decoder_boot, value=0.0, shape=[-1, decoder_size], dtype='float32'
-    )
-    cell_init.stop_gradient = False
-
-    with rnn.block():
-        current_word = rnn.step_input(target_embedding)
-        context = rnn.static_input(context)
-
-        hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True)
-        cell_mem = rnn.memory(init=cell_init)
-        decoder_inputs = fluid.layers.concat(
-            input=[context, current_word], axis=1
-        )
-        h, c = lstm_step(decoder_inputs, hidden_mem, cell_mem, decoder_size)
-        rnn.update_memory(hidden_mem, h)
-        rnn.update_memory(cell_mem, c)
-        out = fluid.layers.fc(
-            input=h, size=target_dict_dim, bias_attr=True, act='softmax'
-        )
-        rnn.output(out)
-    return rnn()
-
-
-def seq_to_seq_net():
-    """Construct a seq2seq network."""
-
-    src_word_idx = fluid.layers.data(
-        name='source_sequence', shape=[1], dtype='int64', lod_level=1
-    )
-
-    src_embedding = fluid.layers.embedding(
-        input=src_word_idx,
-        size=[source_dict_dim, embedding_dim],
-        dtype='float32',
-    )
-
-    src_forward_last, src_backward_first = bi_lstm_encoder(
-        input_seq=src_embedding, hidden_size=encoder_size
-    )
-
-    encoded_vector = fluid.layers.concat(
-        input=[src_forward_last, src_backward_first], axis=1
-    )
-
-    decoder_boot = fluid.layers.fc(
-        input=src_backward_first, size=decoder_size, bias_attr=False, act='tanh'
-    )
-
-    trg_word_idx = fluid.layers.data(
-        name='target_sequence', shape=[1], dtype='int64', lod_level=1
-    )
-
-    trg_embedding = fluid.layers.embedding(
-        input=trg_word_idx,
-        size=[target_dict_dim, embedding_dim],
-        dtype='float32',
-    )
-
-    prediction = lstm_decoder_without_attention(
-        trg_embedding, decoder_boot, encoded_vector, decoder_size
-    )
-    label = fluid.layers.data(
-        name='label_sequence', shape=[1], dtype='int64', lod_level=1
-    )
-    cost = fluid.layers.cross_entropy(input=prediction, label=label)
-    avg_cost = paddle.mean(cost)
-
-    return avg_cost, prediction
-
-
-def train(use_cuda, save_dirname=None):
-    [avg_cost, prediction] = seq_to_seq_net()
-
-    optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4)
-    optimizer.minimize(avg_cost)
-
-    train_data = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.wmt14.train(dict_size), buf_size=1000
-        ),
-        batch_size=batch_size,
-    )
-
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    exe = Executor(place)
-    exe.run(framework.default_startup_program())
-
-    feed_order = ['source_sequence', 'target_sequence', 'label_sequence']
-    feed_list = [
-        framework.default_main_program().global_block().var(var_name)
-        for var_name in feed_order
-    ]
-    feeder = fluid.DataFeeder(feed_list, place)
-
-    batch_id = 0
-    for pass_id in range(2):
-        for data in train_data():
-            outs = exe.run(
-                framework.default_main_program(),
-                feed=feeder.feed(data),
-                fetch_list=[avg_cost],
-            )
-
-            avg_cost_val = np.array(outs[0])
-            print(
-                'pass_id='
-                + str(pass_id)
-                + ' batch='
-                + str(batch_id)
-                + " avg_cost="
-                + str(avg_cost_val)
-            )
-            if math.isnan(float(avg_cost_val[0])):
-                sys.exit("got NaN loss, training failed.")
-            if batch_id > 3:
-                if save_dirname is not None:
-                    fluid.io.save_inference_model(
-                        save_dirname,
-                        ['source_sequence', 'target_sequence'],
-                        [prediction],
-                        exe,
-                    )
-                return
-
-            batch_id += 1
-
-
-def infer(use_cuda, save_dirname=None):
-    if save_dirname is None:
-        return
-
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    exe = fluid.Executor(place)
-
-    inference_scope = fluid.core.Scope()
-    with fluid.scope_guard(inference_scope):
-        # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be fed
-        # data using feed operators), and the fetch_targets (variables that
-        # we want to obtain data from using fetch operators).
-        [
-            inference_program,
-            feed_target_names,
-            fetch_targets,
-        ] = fluid.io.load_inference_model(save_dirname, exe)
-
-        # Setup input by creating LoDTensor to represent sequence of words.
-        # Here each word is the basic element of the LoDTensor and the shape of
-        # each word (base_shape) should be [1] since it is simply an index to
-        # look up for the corresponding word vector.
-        # Suppose the recursive_sequence_lengths info is set to [[4, 6]],
-        # which has only one level of detail. Then the created LoDTensor will have only
-        # one higher level structure (sequence of words, or sentence) than the basic
-        # element (word). Hence the LoDTensor will hold data for two sentences of
-        # length 4 and 6, respectively.
-        # Note that recursive_sequence_lengths should be a list of lists.
-        recursive_seq_lens = [[4, 6]]
-        base_shape = [1]
-        # The range of random integers is [low, high]
-        word_data = fluid.create_random_int_lodtensor(
-            recursive_seq_lens, base_shape, place, low=0, high=1
-        )
-        trg_word = fluid.create_random_int_lodtensor(
-            recursive_seq_lens, base_shape, place, low=0, high=1
-        )
-
-        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
-        # and results will contain a list of data corresponding to fetch_targets.
-        assert feed_target_names[0] == 'source_sequence'
-        assert feed_target_names[1] == 'target_sequence'
-        results = exe.run(
-            inference_program,
-            feed={
-                feed_target_names[0]: word_data,
-                feed_target_names[1]: trg_word,
-            },
-            fetch_list=fetch_targets,
-            return_numpy=False,
-        )
-        print(results[0].recursive_sequence_lengths())
-        np_data = np.array(results[0])
-        print("Inference shape: ", np_data.shape)
-        print("Inference results: ", np_data)
-
-
-def main(use_cuda):
-    if use_cuda and not fluid.core.is_compiled_with_cuda():
-        return
-
-    # Directory for saving the trained model
-    temp_dir = tempfile.TemporaryDirectory()
-    save_dirname = os.path.join(
-        temp_dir.name, "rnn_encoder_decoder.inference.model"
-    )
-
-    train(use_cuda, save_dirname)
-    infer(use_cuda, save_dirname)
-    temp_dir.cleanup()
-
-
-class TestRnnEncoderDecoder(unittest.TestCase):
-    def test_cuda(self):
-        with self.scope_prog_guard():
-            main(use_cuda=True)
-
-    def test_cpu(self):
-        with self.scope_prog_guard():
-            main(use_cuda=False)
-
-    @contextlib.contextmanager
-    def scope_prog_guard(self):
-        prog = fluid.Program()
-        startup_prog = fluid.Program()
-        scope = fluid.core.Scope()
-        with fluid.scope_guard(scope):
-            with fluid.program_guard(prog, startup_prog):
-                yield
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt
index 2b9b5e7c3d23aa..61e9917359b0df 100755
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -1108,7 +1108,6 @@ set_tests_properties(test_imperative_optimizer_v2 PROPERTIES TIMEOUT 150)
 set_tests_properties(test_partial_sum_op PROPERTIES TIMEOUT 120)
 set_tests_properties(test_cond PROPERTIES TIMEOUT 120)
 set_tests_properties(test_space_to_depth_op PROPERTIES TIMEOUT 200)
-set_tests_properties(test_dyn_rnn PROPERTIES TIMEOUT 120)
 set_tests_properties(test_sgd_op PROPERTIES TIMEOUT 250)
 set_tests_properties(test_parallel_executor_seresnext_base_gpu
                      PROPERTIES TIMEOUT 120)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
index aa0219880a073c..c34c780c549ffa 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
@@ -490,7 +490,7 @@ def beam_search(self, inputs):
             next_finished = fluid.layers.cast(next_finished, "bool")
             next_finished = paddle.logical_or(
                 next_finished,
-                fluid.layers.equal(token_indices, end_token_tensor),
+                paddle.equal(token_indices, end_token_tensor),
             )
             next_finished = fluid.layers.cast(next_finished, "float32")
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
index bcd881c7996024..ea9394001a7d51 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
@@ -871,7 +871,7 @@ def gather(input, indices, batch_pos):
             log_probs = gather(log_probs, topk_indices, batch_pos)
             finished = gather(finished, beam_indices, batch_pos)
             finished = paddle.logical_or(
-                finished, layers.equal(token_indices, end_token_tensor)
+                finished, paddle.equal(token_indices, end_token_tensor)
             )
             trg_word = paddle.reshape(token_indices, [-1, 1])
 
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_equal_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_equal_op_ipu.py
index 41aa141c13b697..f45ff7aa5912c3 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_equal_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_equal_op_ipu.py
@@ -56,7 +56,7 @@ def build_model(self):
         y = paddle.static.data(
             name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32'
         )
-        out = paddle.fluid.layers.equal(x, y, **self.attrs)
+        out = paddle.equal(x, y)
         self.fetch_list = [out.name]
 
     def run_model(self, exec_mode):
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_not_equal_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_not_equal_op_ipu.py
index 507cf0b073a368..55d380ce095c99 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_not_equal_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_not_equal_op_ipu.py
@@ -56,7 +56,7 @@ def build_model(self):
         y = paddle.static.data(
             name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32'
         )
-        out = paddle.fluid.layers.not_equal(x, y, **self.attrs)
+        out = paddle.not_equal(x, y)
         self.fetch_list = [out.name]
 
     def run_model(self, exec_mode):
diff --git a/python/paddle/fluid/tests/unittests/test_case.py b/python/paddle/fluid/tests/unittests/test_case.py
index 62a3898fb97893..c0ef9f811efc1d 100644
--- a/python/paddle/fluid/tests/unittests/test_case.py
+++ b/python/paddle/fluid/tests/unittests/test_case.py
@@ -105,8 +105,8 @@ def fn_3():
             y = layers.fill_constant(shape=[1], dtype='float32', value=1)
             z = layers.fill_constant(shape=[1], dtype='float32', value=3)
 
-            pred_1 = layers.equal(x, y)  # true
-            pred_2 = layers.equal(x, z)  # false
+            pred_1 = paddle.equal(x, y)  # true
+            pred_2 = paddle.equal(x, z)  # false
 
             out = layers.case(((pred_1, fn_1), (pred_2, fn_2)), fn_3)
 
diff --git a/python/paddle/fluid/tests/unittests/test_compare_op.py b/python/paddle/fluid/tests/unittests/test_compare_op.py
index 2a598cae044169..191f250c4530d3 100755
--- a/python/paddle/fluid/tests/unittests/test_compare_op.py
+++ b/python/paddle/fluid/tests/unittests/test_compare_op.py
@@ -46,7 +46,7 @@ def test_errors(self):
                     self.assertRaises(
                         TypeError, fluid.layers.less_than, x=x, y=y, force_cpu=1
                     )
-                op = eval("fluid.layers.%s" % self.op_type)
+                op = eval("paddle.%s" % self.op_type)
                 self.assertRaises(TypeError, op, x=x, y=y, cond=1)
                 self.assertRaises(TypeError, op, x=x, y=a)
                 self.assertRaises(TypeError, op, x=a, y=y)
@@ -446,7 +446,7 @@ def test_errors(self):
             y = fluid.create_lod_tensor(
                 numpy.array([[-1]]), [[1]], fluid.CPUPlace()
             )
-            self.assertRaises(TypeError, fluid.layers.greater_equal, x, y)
+            self.assertRaises(TypeError, paddle.greater_equal, x, y)
 
 
 class API_TestElementwise_Equal(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_desc_clone.py b/python/paddle/fluid/tests/unittests/test_desc_clone.py
index ad0e95d4bbcde2..6a2e831df94108 100644
--- a/python/paddle/fluid/tests/unittests/test_desc_clone.py
+++ b/python/paddle/fluid/tests/unittests/test_desc_clone.py
@@ -212,7 +212,7 @@ def test_clone_with_stop_gradient(self):
             hidden1 = fluid.layers.fc(input=img, size=200, act='relu')
             hidden1.stop_gradient = True
 
-            cond = fluid.layers.equal(true, true)
+            cond = paddle.equal(true, true)
 
             def true_fn():
                 hidden2 = fluid.layers.dropout(hidden1, dropout_prob=0.5)
@@ -253,7 +253,7 @@ def test_clone_with_stop_gradient(self):
             hidden1 = fluid.layers.fc(input=img, size=200, act='relu')
             hidden1.stop_gradient = True
 
-            cond = fluid.layers.equal(true, true)
+            cond = paddle.equal(true, true)
 
             def true_fn():
                 hidden2 = fluid.layers.dropout(hidden1, dropout_prob=0.5)
diff --git a/python/paddle/fluid/tests/unittests/test_dyn_rnn.py b/python/paddle/fluid/tests/unittests/test_dyn_rnn.py
deleted file mode 100644
index e4f0a3a173a3f3..00000000000000
--- a/python/paddle/fluid/tests/unittests/test_dyn_rnn.py
+++ /dev/null
@@ -1,370 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.fluid as fluid
-import paddle
-import unittest
-import numpy
-
-from paddle.fluid.framework import Program, program_guard
-from paddle.fluid.layers.control_flow import lod_rank_table
-from paddle.fluid.layers.control_flow import max_sequence_len
-from paddle.fluid.layers.control_flow import lod_tensor_to_array
-from paddle.fluid.layers.control_flow import array_to_lod_tensor
-from paddle.fluid.layers.control_flow import shrink_memory
-from fake_reader import fake_imdb_reader
-
-numpy.random.seed(2020)
-
-
-class TestDynamicRNN(unittest.TestCase):
-    def setUp(self):
-        self.word_dict_len = 5147
-        self.BATCH_SIZE = 2
-        reader = fake_imdb_reader(self.word_dict_len, self.BATCH_SIZE * 100)
-        self.train_data = paddle.batch(reader, batch_size=self.BATCH_SIZE)
-
-    def _train(
-        self,
-        main_program,
-        startup_program,
-        feed_list,
-        fetch_list,
-        is_nested=False,
-        max_iters=1,
-    ):
-        place = fluid.CPUPlace()
-        exe = fluid.Executor(place)
-        exe.run(startup_program)
-        feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
-        data = next(self.train_data())
-
-        for iter_id in range(max_iters):
-            fetch_outs = exe.run(
-                main_program,
-                feed=feeder.feed(data),
-                fetch_list=fetch_list,
-                return_numpy=False,
-            )
-            if len(fetch_list) == 3:
-                rnn_in_seq = fetch_outs[0]
-                rnn_out_seq = fetch_outs[1]
-                if not is_nested:
-                    # Check for lod set in runtime. When lod_level is 1,
-                    # the lod of DynamicRNN's output should be the same as input.
-                    self.assertEqual(rnn_in_seq.lod(), rnn_out_seq.lod())
-
-                loss_i = numpy.array(fetch_outs[2])
-            elif len(fetch_list) == 1:
-                loss_i = numpy.array(fetch_outs[0])
-            # print(loss_i)
-
-            self.assertEqual((1,), loss_i.shape)
-            self.assertFalse(numpy.isnan(loss_i))
-            if iter_id == 0:
-                loss_0 = loss_i
-
-        if max_iters > 10:
-            # loss should be small after 10 mini-batch
-            self.assertLess(loss_i[0], loss_0[0])
-
-    def test_plain_while_op(self):
-        main_program = fluid.Program()
-        startup_program = fluid.Program()
-
-        with fluid.program_guard(main_program, startup_program):
-            sentence = fluid.layers.data(
-                name='word', shape=[1], dtype='int64', lod_level=1
-            )
-            sent_emb = fluid.layers.embedding(
-                input=sentence, size=[self.word_dict_len, 32], dtype='float32'
-            )
-
-            rank_table = lod_rank_table(x=sent_emb)
-            sent_emb_array = lod_tensor_to_array(x=sent_emb, table=rank_table)
-
-            seq_len = max_sequence_len(rank_table=rank_table)
-            i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0)
-            i.stop_gradient = False
-
-            boot_mem = fluid.layers.fill_constant_batch_size_like(
-                input=fluid.layers.array_read(array=sent_emb_array, i=i),
-                value=0,
-                shape=[-1, 100],
-                dtype='float32',
-            )
-            boot_mem.stop_gradient = False
-            mem_array = fluid.layers.array_write(x=boot_mem, i=i)
-
-            cond = fluid.layers.less_than(x=i, y=seq_len)
-            cond.stop_gradient = False
-            while_op = fluid.layers.While(cond=cond)
-            out = fluid.layers.create_array(dtype='float32')
-
-            with while_op.block():
-                mem = fluid.layers.array_read(array=mem_array, i=i)
-                ipt = fluid.layers.array_read(array=sent_emb_array, i=i)
-
-                mem = shrink_memory(x=mem, i=i, table=rank_table)
-
-                hidden = fluid.layers.fc(input=[mem, ipt], size=100, act='tanh')
-
-                fluid.layers.array_write(x=hidden, i=i, array=out)
-                fluid.layers.increment(x=i, in_place=True)
-                fluid.layers.array_write(x=hidden, i=i, array=mem_array)
-                fluid.layers.less_than(x=i, y=seq_len, cond=cond)
-
-            result_all_timesteps = array_to_lod_tensor(x=out, table=rank_table)
-            last = fluid.layers.sequence_last_step(input=result_all_timesteps)
-
-            logits = fluid.layers.fc(input=last, size=1, act=None)
-            label = fluid.layers.data(name='label', shape=[1], dtype='float32')
-            loss = fluid.layers.sigmoid_cross_entropy_with_logits(
-                x=logits, label=label
-            )
-            loss = paddle.mean(loss)
-            sgd = fluid.optimizer.SGD(1e-4)
-            sgd.minimize(loss=loss)
-
-        # Check for lod_level set in compile-time.
-        self.assertEqual(sent_emb.lod_level, result_all_timesteps.lod_level)
-
-        self._train(
-            main_program=main_program,
-            startup_program=startup_program,
-            feed_list=[sentence, label],
-            fetch_list=[sent_emb, result_all_timesteps, loss],
-            is_nested=False,
-            max_iters=1,
-        )
-
-    def test_train_dynamic_rnn(self):
-        main_program = fluid.Program()
-        startup_program = fluid.Program()
-        main_program.random_seed = 10
-        startup_program.random_seed = 10
-        with fluid.program_guard(main_program, startup_program):
-            sentence = fluid.layers.data(
-                name='word', shape=[1], dtype='int64', lod_level=1
-            )
-            sent_emb = fluid.layers.embedding(
-                input=sentence, size=[self.word_dict_len, 32], dtype='float32'
-            )
-
-            drnn = fluid.layers.DynamicRNN()
-            with drnn.block():
-                in_ = drnn.step_input(sent_emb)
-                mem = drnn.memory(shape=[100], dtype='float32')
-                out_ = fluid.layers.fc(input=[in_, mem], size=100, act='tanh')
-                drnn.update_memory(mem, out_)
-                drnn.output(out_)
-
-            drnn_result = drnn()
-            last = fluid.layers.sequence_last_step(input=drnn_result)
-            logits = fluid.layers.fc(input=last, size=1, act=None)
-
-            label = fluid.layers.data(name='label', shape=[1], dtype='float32')
-            loss = fluid.layers.sigmoid_cross_entropy_with_logits(
-                x=logits, label=label
-            )
-            loss = paddle.mean(loss)
-            sgd = fluid.optimizer.Adam(1e-3)
-            sgd.minimize(loss=loss)
-
-        # Check for lod_level set in compile-time.
-        self.assertEqual(sent_emb.lod_level, drnn_result.lod_level)
-
-        self._train(
-            main_program=main_program,
-            startup_program=startup_program,
-            feed_list=[sentence, label],
-            fetch_list=[sent_emb, drnn_result, loss],
-            is_nested=False,
-            max_iters=100,
-        )
-
-    def _fake_reader(self):
-        seq_len, label = [[2, 2]], [0, 1]
-        data = []
-        for ele in seq_len:
-            for j in ele:
-                data.append([numpy.random.randint(30) for _ in range(j)])
-
-        while True:
-            yield data, label
-
-    # this unit test is just used to the two layer nested dyn_rnn.
-    def test_train_nested_dynamic_rnn(self):
-        word_dict = [i for i in range(30)]
-
-        main_program = fluid.Program()
-        startup_program = fluid.Program()
-        main_program.random_seed = 10
-        startup_program.random_seed = 10
-        with fluid.program_guard(main_program, startup_program):
-            sentence = fluid.layers.data(
-                name='word', shape=[1], dtype='int64', lod_level=2
-            )
-            label = fluid.layers.data(
-                name='label', shape=[1], dtype='float32', lod_level=1
-            )
-
-            drnn0 = fluid.layers.DynamicRNN()
-            with drnn0.block():
-                in_0 = drnn0.step_input(sentence)
-                assert in_0.lod_level == 1, "the lod level of in_ should be 1"
-                sentence_emb = fluid.layers.embedding(
-                    input=in_0, size=[len(word_dict), 32], dtype='float32'
-                )
-                out_0 = fluid.layers.fc(
-                    input=sentence_emb, size=100, act='tanh'
-                )
-
-                drnn1 = fluid.layers.DynamicRNN()
-                with drnn1.block():
-                    in_1 = drnn1.step_input(out_0)
-                    assert (
-                        in_1.lod_level == 0
-                    ), "the lod level of in_1 should be 0"
-                    out_1 = fluid.layers.fc(input=[in_1], size=100, act='tanh')
-                    drnn1.output(out_1)
-
-                drnn1_result = drnn1()
-                last_1 = fluid.layers.sequence_last_step(input=drnn1_result)
-                drnn0.output(last_1)
-
-            last = drnn0()
-            logits = fluid.layers.fc(input=last, size=1, act=None)
-            loss = fluid.layers.sigmoid_cross_entropy_with_logits(
-                x=logits, label=label
-            )
-            loss = paddle.mean(loss)
-            sgd = fluid.optimizer.SGD(1e-3)
-            sgd.minimize(loss=loss)
-
-        train_data_orig = self.train_data
-        self.train_data = paddle.batch(self._fake_reader, batch_size=2)
-        self._train(
-            main_program=main_program,
-            startup_program=startup_program,
-            feed_list=[sentence, label],
-            fetch_list=[loss],
-            is_nested=True,
-            max_iters=100,
-        )
-        self.train_data = train_data_orig
-
-    # this unit test is just used to the two layer nested dyn_rnn.
-    def test_train_nested_dynamic_rnn2(self):
-        word_dict = [i for i in range(30)]
-
-        hidden_size = 32
-        main_program = fluid.Program()
-        startup_program = fluid.Program()
-        main_program.random_seed = 10
-        startup_program.random_seed = 10
-        with fluid.program_guard(main_program, startup_program):
-            sentence = fluid.layers.data(
-                name='word', shape=[1], dtype='int64', lod_level=2
-            )
-            label = fluid.layers.data(
-                name='label', shape=[1], dtype='float32', lod_level=1
-            )
-
-            drnn0 = fluid.layers.DynamicRNN()
-            with drnn0.block():
-                in_0 = drnn0.step_input(sentence)
-                sentence_emb = fluid.layers.embedding(
-                    input=in_0,
-                    size=[len(word_dict), hidden_size],
-                    dtype='float32',
-                )
-                input_forward_proj = fluid.layers.fc(
-                    input=sentence_emb,
-                    size=hidden_size * 4,
-                    act=None,
-                    bias_attr=False,
-                )
-                forward, _ = fluid.layers.dynamic_lstm(
-                    input=input_forward_proj,
-                    size=hidden_size * 4,
-                    use_peepholes=False,
-                )
-
-                drnn1 = fluid.layers.DynamicRNN()
-                with drnn1.block():
-                    in_1 = drnn1.step_input(forward)
-                    out_1 = fluid.layers.fc(input=[in_1], size=100, act='tanh')
-                    drnn1.output(out_1)
-
-                last = fluid.layers.sequence_last_step(input=drnn1())
-                drnn0.output(last)
-
-            last = drnn0()
-            logits = fluid.layers.fc(input=last, size=1, act=None)
-            loss = fluid.layers.sigmoid_cross_entropy_with_logits(
-                x=logits, label=label
-            )
-            loss = paddle.mean(loss)
-            sgd = fluid.optimizer.SGD(1e-3)
-            sgd.minimize(loss=loss)
-
-        train_data_orig = self.train_data
-        self.train_data = paddle.batch(self._fake_reader, batch_size=2)
-        self._train(
-            main_program=main_program,
-            startup_program=startup_program,
-            feed_list=[sentence, label],
-            fetch_list=[loss],
-            is_nested=True,
-            max_iters=100,
-        )
-        self.train_data = train_data_orig
-
-
-class TestDynamicRNNErrors(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            init = fluid.layers.zeros(shape=[1], dtype='float32')
-            shape = 'shape'
-            sentence = fluid.data(
-                name='sentence', shape=[None, 32], dtype='float32', lod_level=1
-            )
-
-            # The type of Input(shape) in API(memory) must be list or tuple
-            def input_shape_type_of_memory():
-                drnn = fluid.layers.DynamicRNN()
-                with drnn.block():
-                    res = drnn.memory(init, shape)
-
-            self.assertRaises(TypeError, input_shape_type_of_memory)
-
-            # The type of element of Input(*outputs) in API(output) must be Variable.
-            def outputs_type_of_output():
-                drnn = fluid.layers.DynamicRNN()
-                with drnn.block():
-                    word = drnn.step_input(sentence)
-                    memory = drnn.memory(shape=[10], dtype='float32', value=0)
-                    hidden = fluid.layers.fc(
-                        input=[word, memory], size=10, act='tanh'
-                    )
-                    out = numpy.ones(1).astype('float32')
-                    drnn.update_memory(ex_mem=memory, new_mem=hidden)
-                    drnn.output(hidden, out)
-
-                self.assertRaises(TypeError, outputs_type_of_output)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py b/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py
deleted file mode 100644
index b6f85c9876a385..00000000000000
--- a/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py
+++ /dev/null
@@ -1,402 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import random
-import collections
-import paddle
-import paddle.fluid as fluid
-import unittest
-from decorator_helper import prog_scope
-
-
-class Memory:
-    def __init__(self, shape, dtype='float32'):
-        self.ex = np.zeros(shape=shape, dtype=dtype)
-        self.cur = None
-
-    def update(self, val):
-        assert val.shape == self.ex.shape
-        assert val.dtype == self.ex.dtype
-        self.cur = val
-
-    def next(self):
-        self.ex = self.cur
-        self.cur = None
-
-    def __next__(self):
-        self.next()
-
-    def reset(self):
-        self.ex = np.zeros(shape=self.ex.shape, dtype=self.ex.dtype)
-        self.cur = None
-
-
-class Output:
-    def __init__(self):
-        self.outs = []
-
-    def next_sequence(self):
-        self.outs.append([])
-
-    def out(self, val):
-        self.outs[-1].append(val)
-
-    def last(self):
-        return self.outs[-1][-1]
-
-
-class BaseRNN:
-    def __init__(self, ins, mems, params, outs, num_seq=5, max_seq_len=15):
-        self.num_seq = num_seq
-        self.inputs = collections.defaultdict(list)
-
-        for _ in range(num_seq):
-            seq_len = random.randint(1, max_seq_len - 1)
-            for iname in ins:
-                ishape = ins[iname].get('shape', None)
-                idtype = ins[iname].get('dtype', 'float32')
-                lst = []
-                for _ in range(seq_len):
-                    lst.append(np.random.random(size=ishape).astype(idtype))
-                self.inputs[iname].append(lst)
-
-        self.mems = dict()
-        for mname in mems:
-            mshape = mems[mname].get('shape', None)
-            mdtype = mems[mname].get('dtype', 'float32')
-            self.mems[mname] = Memory(shape=mshape, dtype=mdtype)
-
-        self.params = dict()
-        for pname in params:
-            pshape = params[pname].get('shape', None)
-            pdtype = params[pname].get('dtype', 'float32')
-            self.params[pname] = np.random.random(size=pshape).astype(pdtype)
-
-        self.outputs = dict()
-
-        for oname in outs:
-            self.outputs[oname] = Output()
-
-    def step(self, **kwargs):
-        raise NotImplementedError()
-
-    def exe(self):
-        retv = dict()
-        for out in self.outputs:
-            retv[out] = []
-
-        for seq_id in range(self.num_seq):
-            for mname in self.mems:
-                self.mems[mname].reset()
-            for out in self.outputs:
-                self.outputs[out].next_sequence()
-
-            iname0 = list(self.inputs.keys())[0]
-            seq_len = len(self.inputs[iname0][seq_id])
-
-            for step_id in range(seq_len):
-                xargs = dict()
-
-                for iname in self.inputs:
-                    xargs[iname] = self.inputs[iname][seq_id][step_id]
-
-                for mname in self.mems:
-                    xargs[mname] = self.mems[mname]
-
-                for pname in self.params:
-                    xargs[pname] = self.params[pname]
-
-                for out in self.outputs:
-                    xargs[out] = self.outputs[out]
-
-                self.step(**xargs)
-
-                for mname in self.mems:
-                    next(self.mems[mname])
-
-            for out in self.outputs:
-                retv[out].append(self.outputs[out].last())
-
-        for out in retv:
-            retv[out] = np.array(retv[out])
-        return retv
-
-    def to_feed(self, place):
-        feed_dict = dict()
-
-        for iname in self.inputs:
-            lod = []
-            np_flatten = []
-            for seq_id in range(len(self.inputs[iname])):
-                seq_len = len(self.inputs[iname][seq_id])
-                lod.append(seq_len)
-                np_flatten.extend(self.inputs[iname][seq_id])
-
-            t = fluid.Tensor()
-            t.set(np.array(np_flatten), place)
-            t.set_recursive_sequence_lengths([lod])
-            feed_dict[iname] = t
-
-        for pname in self.params:
-            feed_dict[pname] = self.params[pname]
-        return feed_dict
-
-    def get_numeric_gradient_of_param(self, param_name, delta=0.001):
-        p = self.params[param_name]
-        if len(p.shape) != 2:
-            raise ValueError(
-                "Not support get numeric gradient of an parameter,"
-                " which is not matrix"
-            )
-        g = np.zeros(shape=p.shape, dtype=p.dtype)
-
-        for i in range(p.shape[0]):
-            for j in range(p.shape[1]):
-                o = p[i][j]
-                p[i][j] += delta
-                pos = self._exe_mean_out_()
-                p[i][j] -= 2 * delta
-                neg = self._exe_mean_out_()
-                p[i][j] = o
-                g[i][j] = (pos - neg) / (delta * 2)
-        return g
-
-    def get_numeric_gradient_of_input(
-        self, input_name, delta=0.001, return_one_tensor=True
-    ):
-        ipt = self.inputs[input_name]
-        grad = []
-
-        for seq in ipt:
-            seq_grad = []
-            for item in seq:
-                item_grad = np.zeros(shape=item.shape, dtype=item.dtype)
-                if len(item.shape) != 1:
-                    raise ValueError("Not support")
-
-                for i in range(len(item)):
-                    o = item[i]
-                    item[i] += delta
-                    pos = self._exe_mean_out_()
-                    item[i] -= 2 * delta
-                    neg = self._exe_mean_out_()
-                    item[i] = o
-                    item_grad[i] = (pos - neg) / (delta * 2)
-                seq_grad.append(item_grad)
-            grad.append(seq_grad)
-
-        if not return_one_tensor:
-            return grad
-
-        for i in range(len(grad)):
-            grad[i] = np.concatenate(grad[i])
-        grad = np.concatenate(grad)
-        return grad
-
-    def _exe_mean_out_(self):
-        outs = self.exe()
-        return np.array([o.mean() for o in outs.values()]).mean()
-
-
-class SeedFixedTestCase(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        """Fix random seeds to remove randomness from tests"""
-        cls._np_rand_state = np.random.get_state()
-        cls._py_rand_state = random.getstate()
-
-        np.random.seed(123)
-        random.seed(124)
-
-    @classmethod
-    def tearDownClass(cls):
-        """Restore random seeds"""
-        np.random.set_state(cls._np_rand_state)
-        random.setstate(cls._py_rand_state)
-
-
-class TestSimpleMul(SeedFixedTestCase):
-    DATA_NAME = 'X'
-    DATA_WIDTH = 32
-    PARAM_NAME = 'W'
-    HIDDEN_WIDTH = 10
-    OUT_NAME = 'Out'
-
-    class SimpleMul(BaseRNN):
-        def __init__(self):
-            base = TestSimpleMul
-            super().__init__(
-                {base.DATA_NAME: {'shape': [base.DATA_WIDTH]}},
-                {},
-                {
-                    base.PARAM_NAME: {
-                        'shape': [base.DATA_WIDTH, base.HIDDEN_WIDTH]
-                    }
-                },
-                [base.OUT_NAME],
-            )
-
-        def step(self, X, W, Out):
-            Out.out(np.matmul(X, W))
-
-    # Test many times in local to ensure the random seed cannot breaks CI
-    # @many_times(10)
-    @prog_scope()
-    def test_forward_backward(self):
-        py_rnn = TestSimpleMul.SimpleMul()
-        dat = fluid.layers.data(
-            name=self.DATA_NAME, shape=[self.DATA_WIDTH], lod_level=1
-        )
-        dat.stop_gradient = False
-
-        rnn = fluid.layers.DynamicRNN()
-        with rnn.block():
-            d = rnn.step_input(dat)
-            o = fluid.layers.fc(
-                input=d,
-                param_attr=self.PARAM_NAME,
-                bias_attr=False,
-                size=self.HIDDEN_WIDTH,
-                act=None,
-            )
-            rnn.output(o)
-
-        out = rnn()
-        out = fluid.layers.sequence_pool(out, pool_type='last')
-        loss = paddle.mean(out)
-        fluid.backward.append_backward(loss)
-
-        cpu = fluid.CPUPlace()
-        exe = fluid.Executor(cpu)
-        out, w_g, i_g = list(
-            map(
-                np.array,
-                exe.run(
-                    feed=py_rnn.to_feed(cpu),
-                    fetch_list=[
-                        out,
-                        self.PARAM_NAME + "@GRAD",
-                        self.DATA_NAME + "@GRAD",
-                    ],
-                    return_numpy=False,
-                ),
-            )
-        )
-        out_by_python = py_rnn.exe()[self.OUT_NAME]
-        np.testing.assert_allclose(out, out_by_python, rtol=1e-05)
-        w_g_num = py_rnn.get_numeric_gradient_of_param(self.PARAM_NAME)
-        np.testing.assert_allclose(w_g_num, w_g, rtol=0.05)
-        i_g_num = py_rnn.get_numeric_gradient_of_input(
-            input_name=self.DATA_NAME
-        )
-        i_g_num = i_g_num.reshape(i_g.shape)
-        np.testing.assert_allclose(i_g_num, i_g, rtol=0.05)
-
-
-class TestSimpleMulWithMemory(SeedFixedTestCase):
-    DATA_WIDTH = 32
-    HIDDEN_WIDTH = 20
-    DATA_NAME = 'X'
-    PARAM_NAME = 'W'
-
-    class SimpleMulWithMemory(BaseRNN):
-        def __init__(self):
-            super().__init__(
-                {
-                    TestSimpleMulWithMemory.DATA_NAME: {
-                        'shape': [TestSimpleMulWithMemory.DATA_WIDTH]
-                    }
-                },
-                {'Mem': {'shape': [TestSimpleMulWithMemory.HIDDEN_WIDTH]}},
-                {
-                    TestSimpleMulWithMemory.PARAM_NAME: {
-                        'shape': [
-                            TestSimpleMulWithMemory.DATA_WIDTH,
-                            TestSimpleMulWithMemory.HIDDEN_WIDTH,
-                        ]
-                    }
-                },
-                ['Out'],
-            )
-
-        def step(self, X, Mem, W, Out):
-            o = np.matmul(X, W)
-            assert isinstance(Mem, Memory)
-            o += Mem.ex
-            Mem.update(o)
-            assert isinstance(Out, Output)
-            Out.out(o)
-
-    # many_times used locally for debug. Make sure the calculation is stable.
-    # @many_times(10)
-    @prog_scope()
-    def test_forward_backward(self):
-        py_rnn = TestSimpleMulWithMemory.SimpleMulWithMemory()
-        data = fluid.layers.data(
-            name=self.DATA_NAME, shape=[self.DATA_WIDTH], lod_level=1
-        )
-        data.stop_gradient = False
-        rnn = fluid.layers.DynamicRNN()
-        with rnn.block():
-            d = rnn.step_input(data)
-            mem = rnn.memory(value=0.0, shape=[self.HIDDEN_WIDTH])
-            hidden = fluid.layers.fc(
-                input=d,
-                size=self.HIDDEN_WIDTH,
-                param_attr=self.PARAM_NAME,
-                bias_attr=False,
-                act=None,
-            )
-            o = fluid.layers.elementwise_add(x=hidden, y=mem)
-            rnn.update_memory(mem, o)
-            rnn.output(o)
-
-        out = rnn()
-        last = fluid.layers.sequence_pool(input=out, pool_type='last')
-        loss = paddle.mean(last)
-        fluid.backward.append_backward(loss)
-
-        cpu = fluid.CPUPlace()
-        exe = fluid.Executor(cpu)
-        feed = py_rnn.to_feed(cpu)
-        last_np, w_g, i_g = list(
-            map(
-                np.array,
-                exe.run(
-                    feed=feed,
-                    fetch_list=[
-                        last,
-                        self.PARAM_NAME + "@GRAD",
-                        self.DATA_NAME + "@GRAD",
-                    ],
-                    return_numpy=False,
-                ),
-            )
-        )
-        (last_by_py,) = list(py_rnn.exe().values())
-        w_g_num = py_rnn.get_numeric_gradient_of_param(self.PARAM_NAME)
-        np.testing.assert_allclose(last_np, last_by_py, rtol=1e-05)
-
-        np.testing.assert_allclose(w_g_num, w_g, rtol=0.1)
-        i_g_num = py_rnn.get_numeric_gradient_of_input(self.DATA_NAME)
-        i_g_num = i_g_num.reshape(i_g.shape)
-
-        # Since this RNN has many float add. The number could be not stable.
-        # rtol = 0.1
-        np.testing.assert_allclose(i_g_num, i_g, rtol=0.1)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py b/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py
deleted file mode 100644
index 2f0a99247564c4..00000000000000
--- a/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py
+++ /dev/null
@@ -1,229 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-import paddle
-import paddle.fluid.core as core
-import paddle.fluid as fluid
-from paddle.fluid.backward import append_backward
-import paddle.fluid.framework as framework
-from paddle.fluid.framework import Program, switch_main_program
-import bisect
-import numpy as np
-
-fluid.default_startup_program().random_seed = 1
-np.random.seed(1)
-
-
-class TestDyRnnStaticInput(unittest.TestCase):
-    def setUp(self):
-        self._delta = 0.005
-        self._max_sequence_len = 3
-        self._program = Program()
-        switch_main_program(self._program)
-        self.output_dim = 10
-        self.place = core.CPUPlace()
-        self.prepare_x_tensor()
-        self.prepare_static_input_tensor()
-        self.exe = fluid.Executor(self.place)
-
-    def prepare_x_tensor(self):
-        self.x_tensor_dim = 10
-        lod = [[2, 1, 3]]
-        shape = [sum(lod[0]), self.x_tensor_dim]
-        self.x_tensor_data = np.random.random(shape).astype('float32')
-        self.x_tensor = core.LoDTensor()
-        self.x_tensor.set_recursive_sequence_lengths(lod)
-        self.x_tensor.set(self.x_tensor_data, self.place)
-
-    def prepare_static_input_tensor(self):
-        self.static_input_tensor_dim = 4
-        lod = [[1, 2, 3]]
-        shape = [sum(lod[0]), self.static_input_tensor_dim]
-        self.static_input_data = np.random.random(shape).astype('float32')
-        self.static_input_tensor = core.LoDTensor()
-        self.static_input_tensor.set_recursive_sequence_lengths(lod)
-        self.static_input_tensor.set(self.static_input_data, self.place)
-
-    def fetch_value(self, var):
-        fetch_outs = self.exe.run(
-            feed={
-                'x_tensor': self.x_tensor,
-                'static_input_tensor': self.static_input_tensor,
-            },
-            fetch_list=[var],
-            return_numpy=False,
-        )
-        return self._lodtensor_to_ndarray(fetch_outs[0])
-
-    def _lodtensor_to_ndarray(self, lod_tensor):
-        dims = lod_tensor.shape()
-        ndarray = np.zeros(shape=dims).astype('float32')
-        for i in range(np.product(dims)):
-            ndarray.ravel()[i] = lod_tensor._get_float_element(i)
-        return ndarray, lod_tensor.recursive_sequence_lengths()
-
-    def build_graph(self, only_forward=False):
-        x_tensor = fluid.layers.data(
-            name='x_tensor',
-            shape=[self.x_tensor_dim],
-            dtype='float32',
-            lod_level=1,
-        )
-        x_tensor.stop_gradient = False
-
-        static_input_tensor = fluid.layers.data(
-            name='static_input_tensor',
-            shape=[self.static_input_tensor_dim],
-            dtype='float32',
-            lod_level=1,
-        )
-        static_input_tensor.stop_gradient = False
-
-        if only_forward:
-            static_input_out_array = self._program.global_block().create_var(
-                name='static_input_out_array',
-                type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
-                dtype='float32',
-            )
-            static_input_out_array.stop_gradient = True
-
-        rnn = fluid.layers.DynamicRNN()
-        with rnn.block():
-            step_x = rnn.step_input(x_tensor)
-            step_static_input = rnn.static_input(static_input_tensor)
-            if only_forward:
-                fluid.layers.array_write(
-                    x=step_static_input,
-                    i=rnn.step_idx,
-                    array=static_input_out_array,
-                )
-            last = fluid.layers.sequence_pool(
-                input=step_static_input, pool_type='last'
-            )
-            projected = fluid.layers.fc(
-                input=[step_x, last], size=self.output_dim
-            )
-            rnn.output(projected)
-
-        if only_forward:
-            static_input_step_outs = []
-            step_idx = fluid.layers.fill_constant(
-                shape=[1], dtype='int64', value=0
-            )
-            step_idx.stop_gradient = True
-
-            for i in range(self._max_sequence_len):
-                step_out = fluid.layers.array_read(
-                    static_input_out_array, step_idx
-                )
-                step_out.stop_gradient = True
-                static_input_step_outs.append(step_out)
-                fluid.layers.increment(x=step_idx, value=1.0, in_place=True)
-
-        if only_forward:
-            return static_input_step_outs
-
-        last = fluid.layers.sequence_pool(input=rnn(), pool_type='last')
-        loss = paddle.mean(last)
-        append_backward(loss)
-        static_input_grad = self._program.global_block().var(
-            framework.grad_var_name('static_input_tensor')
-        )
-        return static_input_grad, loss
-
-    def get_expected_static_step_outs(self):
-        x_lod = self.x_tensor.recursive_sequence_lengths()
-        x_seq_len = x_lod[0]
-        x_seq_len_sorted = sorted(x_seq_len)
-        x_sorted_indices = np.argsort(x_seq_len)[::-1]
-
-        static_lod = self.static_input_tensor.recursive_sequence_lengths()
-        static_sliced = []
-        cur_offset = 0
-        for i in range(len(static_lod[0])):
-            static_sliced.append(
-                self.static_input_data[
-                    cur_offset : (cur_offset + static_lod[0][i])
-                ]
-            )
-            cur_offset += static_lod[0][i]
-        static_seq_len = static_lod[0]
-        static_reordered = []
-        for i in range(len(x_sorted_indices)):
-            static_reordered.extend(static_sliced[x_sorted_indices[i]].tolist())
-        static_seq_len_reordered = [
-            static_seq_len[x_sorted_indices[i]]
-            for i in range(len(x_sorted_indices))
-        ]
-
-        static_step_outs = []
-        static_step_lods = []
-
-        for i in range(self._max_sequence_len):
-            end = len(x_seq_len) - bisect.bisect_left(x_seq_len_sorted, i + 1)
-            lod = []
-            total_len = 0
-            for i in range(end):
-                lod.append(static_seq_len_reordered[i])
-                total_len += lod[-1]
-            static_step_lods.append([lod])
-            end = total_len
-            static_step_outs.append(
-                np.array(static_reordered[:end]).astype('float32')
-            )
-
-        return static_step_outs, static_step_lods
-
-    def test_step_out(self):
-        static_step_outs = self.build_graph(only_forward=True)
-        self.exe.run(framework.default_startup_program())
-        expected_outs, expected_lods = self.get_expected_static_step_outs()
-        for i in range(self._max_sequence_len):
-            step_out, lod = self.fetch_value(static_step_outs[i])
-            np.testing.assert_allclose(step_out, expected_outs[i], rtol=1e-05)
-            np.testing.assert_allclose(lod, expected_lods[i], rtol=1e-05)
-
-    def test_network_gradient(self):
-        static_input_grad, loss = self.build_graph()
-        self.exe.run(framework.default_startup_program())
-
-        actual_gradients, actual_lod = self.fetch_value(static_input_grad)
-
-        static_input_shape = self.static_input_tensor.shape()
-        numeric_gradients = np.zeros(shape=static_input_shape).astype('float32')
-        # calculate numeric gradients
-        tensor_size = np.product(static_input_shape)
-        for i in range(tensor_size):
-            origin = self.static_input_tensor._get_float_element(i)
-            x_pos = origin + self._delta
-            self.static_input_tensor._set_float_element(i, x_pos)
-            y_pos = self.fetch_value(loss)[0][0]
-            x_neg = origin - self._delta
-            self.static_input_tensor._set_float_element(i, x_neg)
-            y_neg = self.fetch_value(loss)[0][0]
-            self.static_input_tensor._set_float_element(i, origin)
-            numeric_gradients.ravel()[i] = (y_pos - y_neg) / self._delta / 2
-        np.testing.assert_allclose(
-            actual_gradients, numeric_gradients, rtol=0.001
-        )
-        np.testing.assert_allclose(
-            actual_lod,
-            self.static_input_tensor.recursive_sequence_lengths(),
-            rtol=1e-05,
-        )
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index bac33f3e65f398..37f83e47e791d7 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -2482,7 +2482,7 @@ def test_compare(self):
         with self.static_graph():
             a1 = layers.data(name='a1', shape=[1], dtype='int64')
             b1 = layers.data(name='b1', shape=[1], dtype='int64')
-            cond1 = layers.less_equal(x=a1, y=b1)
+            cond1 = paddle.less_equal(x=a1, y=b1)
             static_ret1 = self.get_static_graph_result(
                 feed={"a1": value_a, "b1": value_b}, fetch_list=[cond1]
             )[0]
@@ -2490,14 +2490,14 @@ def test_compare(self):
             with _test_eager_guard():
                 da1 = base.to_variable(value_a)
                 db1 = base.to_variable(value_b)
-                dcond1 = layers.less_equal(x=da1, y=db1)
+                dcond1 = paddle.less_equal(x=da1, y=db1)
 
                 for i in range(len(static_ret1)):
                     self.assertTrue(dcond1.numpy()[i] == static_ret1[i])
 
             da1 = base.to_variable(value_a)
             db1 = base.to_variable(value_b)
-            dcond1 = layers.less_equal(x=da1, y=db1)
+            dcond1 = paddle.less_equal(x=da1, y=db1)
 
             for i in range(len(static_ret1)):
                 self.assertTrue(dcond1.numpy()[i] == static_ret1[i])
@@ -2506,7 +2506,7 @@ def test_compare(self):
         with self.static_graph():
             a2 = layers.data(name='a2', shape=[1], dtype='int64')
             b2 = layers.data(name='b2', shape=[1], dtype='int64')
-            cond2 = layers.greater_than(x=a2, y=b2)
+            cond2 = paddle.greater_than(x=a2, y=b2)
             static_ret2 = self.get_static_graph_result(
                 feed={"a2": value_a, "b2": value_b}, fetch_list=[cond2]
             )[0]
@@ -2514,14 +2514,14 @@ def test_compare(self):
             with _test_eager_guard():
                 da2 = base.to_variable(value_a)
                 db2 = base.to_variable(value_b)
-                dcond2 = layers.greater_than(x=da2, y=db2)
+                dcond2 = paddle.greater_than(x=da2, y=db2)
 
                 for i in range(len(static_ret2)):
                     self.assertTrue(dcond2.numpy()[i] == static_ret2[i])
 
             da2 = base.to_variable(value_a)
             db2 = base.to_variable(value_b)
-            dcond2 = layers.greater_than(x=da2, y=db2)
+            dcond2 = paddle.greater_than(x=da2, y=db2)
 
             for i in range(len(static_ret2)):
                 self.assertTrue(dcond2.numpy()[i] == static_ret2[i])
@@ -2530,7 +2530,7 @@ def test_compare(self):
         with self.static_graph():
             a3 = layers.data(name='a3', shape=[1], dtype='int64')
             b3 = layers.data(name='b3', shape=[1], dtype='int64')
-            cond3 = layers.greater_equal(x=a3, y=b3)
+            cond3 = paddle.greater_equal(x=a3, y=b3)
             static_ret3 = self.get_static_graph_result(
                 feed={"a3": value_a, "b3": value_b}, fetch_list=[cond3]
             )[0]
@@ -2538,14 +2538,14 @@ def test_compare(self):
             with _test_eager_guard():
                 da3 = base.to_variable(value_a)
                 db3 = base.to_variable(value_b)
-                dcond3 = layers.greater_equal(x=da3, y=db3)
+                dcond3 = paddle.greater_equal(x=da3, y=db3)
 
                 for i in range(len(static_ret3)):
                     self.assertTrue(dcond3.numpy()[i] == static_ret3[i])
 
             da3 = base.to_variable(value_a)
             db3 = base.to_variable(value_b)
-            dcond3 = layers.greater_equal(x=da3, y=db3)
+            dcond3 = paddle.greater_equal(x=da3, y=db3)
 
             for i in range(len(static_ret3)):
                 self.assertTrue(dcond3.numpy()[i] == static_ret3[i])
@@ -2554,7 +2554,7 @@ def test_compare(self):
         with self.static_graph():
             a4 = layers.data(name='a4', shape=[1], dtype='int64')
             b4 = layers.data(name='b4', shape=[1], dtype='int64')
-            cond4 = layers.equal(x=a4, y=b4)
+            cond4 = paddle.equal(x=a4, y=b4)
             static_ret4 = self.get_static_graph_result(
                 feed={"a4": value_a, "b4": value_b}, fetch_list=[cond4]
             )[0]
@@ -2562,14 +2562,14 @@ def test_compare(self):
             with _test_eager_guard():
                 da4 = base.to_variable(value_a)
                 db4 = base.to_variable(value_b)
-                dcond4 = layers.equal(x=da4, y=db4)
+                dcond4 = paddle.equal(x=da4, y=db4)
 
                 for i in range(len(static_ret4)):
                     self.assertTrue(dcond4.numpy()[i] == static_ret4[i])
 
             da4 = base.to_variable(value_a)
             db4 = base.to_variable(value_b)
-            dcond4 = layers.equal(x=da4, y=db4)
+            dcond4 = paddle.equal(x=da4, y=db4)
 
             for i in range(len(static_ret4)):
                 self.assertTrue(dcond4.numpy()[i] == static_ret4[i])
@@ -2578,7 +2578,7 @@ def test_compare(self):
         with self.static_graph():
             a5 = layers.data(name='a5', shape=[1], dtype='int64')
             b5 = layers.data(name='b5', shape=[1], dtype='int64')
-            cond5 = layers.equal(x=a5, y=b5)
+            cond5 = paddle.equal(x=a5, y=b5)
             static_ret5 = self.get_static_graph_result(
                 feed={"a5": value_a, "b5": value_b}, fetch_list=[cond5]
             )[0]
@@ -2586,14 +2586,14 @@ def test_compare(self):
             with _test_eager_guard():
                 da5 = base.to_variable(value_a)
                 db5 = base.to_variable(value_b)
-                dcond5 = layers.equal(x=da5, y=db5)
+                dcond5 = paddle.equal(x=da5, y=db5)
 
                 for i in range(len(static_ret5)):
                     self.assertTrue(dcond5.numpy()[i] == static_ret5[i])
 
             da5 = base.to_variable(value_a)
             db5 = base.to_variable(value_b)
-            dcond5 = layers.equal(x=da5, y=db5)
+            dcond5 = paddle.equal(x=da5, y=db5)
 
             for i in range(len(static_ret5)):
                 self.assertTrue(dcond5.numpy()[i] == static_ret5[i])
@@ -2692,7 +2692,7 @@ def fn_3():
 
             pred_1 = layers.less_than(z, x)  # true: 0.2 < 0.3
             pred_2 = layers.less_than(x, y)  # false: 0.3 < 0.1
-            pred_3 = layers.equal(x, y)  # false: 0.3 == 0.1
+            pred_3 = paddle.equal(x, y)  # false: 0.3 == 0.1
 
             out_1 = layers.case(
                 pred_fn_pairs=[(pred_1, fn_1), (pred_2, fn_2)], default=fn_3
@@ -2715,7 +2715,7 @@ def fn_3():
 
                 pred_1 = layers.less_than(z, x)  # true: 0.2 < 0.3
                 pred_2 = layers.less_than(x, y)  # false: 0.3 < 0.1
-                pred_3 = layers.equal(x, y)  # false: 0.3 == 0.1
+                pred_3 = paddle.equal(x, y)  # false: 0.3 == 0.1
 
                 out_1 = layers.case(
                     pred_fn_pairs=[(pred_1, fn_1), (pred_2, fn_2)], default=fn_3
@@ -2732,7 +2732,7 @@ def fn_3():
 
             pred_1 = layers.less_than(z, x)  # true: 0.2 < 0.3
             pred_2 = layers.less_than(x, y)  # false: 0.3 < 0.1
-            pred_3 = layers.equal(x, y)  # false: 0.3 == 0.1
+            pred_3 = paddle.equal(x, y)  # false: 0.3 == 0.1
 
             out_1 = layers.case(
                 pred_fn_pairs=[(pred_1, fn_1), (pred_2, fn_2)], default=fn_3
diff --git a/python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py b/python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py
deleted file mode 100644
index 418c95901edd0d..00000000000000
--- a/python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py
+++ /dev/null
@@ -1,301 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-import paddle
-import paddle.fluid.core as core
-import numpy as np
-import paddle.fluid.layers as layers
-from paddle.fluid.framework import Program, program_guard
-from paddle.fluid.executor import Executor
-from paddle.fluid.backward import append_backward
-
-from paddle.fluid.layers.control_flow import lod_rank_table
-from paddle.fluid.layers.control_flow import max_sequence_len
-from paddle.fluid.layers.control_flow import lod_tensor_to_array
-from paddle.fluid.layers.control_flow import array_to_lod_tensor
-
-
-class TestCPULoDTensorArrayOps(unittest.TestCase):
-    def place(self):
-        return core.CPUPlace()
-
-    def test_lod_tensor_to_array_level_0(self):
-        tensor = core.LoDTensor()
-        tensor.set(np.arange(10).reshape(10, 1).astype('int32'), self.place())
-        tensor.set_recursive_sequence_lengths([[3, 6, 1]])
-        expect = [
-            np.array(x).astype('int32')
-            for x in [[3, 0, 9], [4, 1], [5, 2], [6], [7], [8]]
-        ]
-        self.main(
-            tensor=tensor,
-            expect_array=expect,
-            expect_lod=[] * 6,
-            expect_max_len=6,
-        )
-
-    def test_lod_tensor_to_array_level_0_empty_seq(self):
-        tensor = core.LoDTensor()
-        tensor.set(np.arange(10).reshape(10, 1).astype('int32'), self.place())
-        tensor.set_recursive_sequence_lengths([[3, 6, 0, 1]])
-        expect = [
-            np.array(x).astype('int32')
-            for x in [[3, 0, 9], [4, 1], [5, 2], [6], [7], [8]]
-        ]
-        self.main(
-            tensor=tensor,
-            expect_array=expect,
-            expect_lod=[] * 6,
-            expect_max_len=6,
-        )
-
-    def test_lod_tensor_to_array_level_1(self):
-        tensor = core.LoDTensor()
-        tensor.set(np.arange(20).reshape(20, 1).astype('int32'), self.place())
-        tensor.set_recursive_sequence_lengths([[2, 3], [3, 6, 2, 6, 3]])
-
-        expect = [
-            np.array([9, 10, 0, 1, 2], dtype='int32'),
-            np.array([11, 12, 13, 14, 15, 16, 3, 4, 5, 6, 7, 8], dtype='int32'),
-            np.array([17, 18, 19], dtype='int32'),
-        ]
-
-        lod = [[[2, 3]], [[6, 6]], [[3]]]
-        self.main(
-            tensor=tensor, expect_array=expect, expect_lod=lod, expect_max_len=3
-        )
-
-    def test_lod_tensor_to_array_level_1_empty_seq(self):
-        tensor = core.LoDTensor()
-        tensor.set(np.arange(31).reshape(31, 1).astype('int32'), self.place())
-
-        tensor.set_recursive_sequence_lengths(
-            [[3, 2, 4, 2], [3, 4, 4, 0, 1, 5, 2, 2, 2, 7, 1]]
-        )
-
-        expect = [
-            np.array(item, dtype='int32')
-            for item in [
-                [12, 13, 14, 15, 16, 0, 1, 2, 23, 24, 25, 26, 27, 28, 29],
-                [17, 18, 3, 4, 5, 6, 11, 30],
-                [19, 20, 7, 8, 9, 10],
-                [21, 22],
-            ]
-        ]
-
-        lod = [[[5, 3, 0, 7]], [[2, 4, 1, 1]], [[2, 4]], [[2]]]
-        self.main(
-            tensor=tensor, expect_array=expect, expect_lod=lod, expect_max_len=4
-        )
-
-    def test_lod_tensor_to_array_level_2(self):
-        tensor = core.LoDTensor()
-        tensor.set(np.arange(50).reshape(50, 1).astype('int32'), self.place())
-        tensor.set_recursive_sequence_lengths(
-            [
-                [2, 3, 1],
-                [2, 3, 1, 4, 2, 1],
-                [3, 4, 4, 6, 4, 1, 1, 4, 4, 8, 6, 1, 4],
-            ]
-        )
-
-        expect = [
-            np.array(item, dtype='int32')
-            for item in [
-                [21, 0, 1, 2, 3, 4, 5, 6, 46, 47, 48, 49],
-                list(range(22, 39)) + list(range(7, 21)),
-                list(range(39, 46)),
-            ]
-        ]
-        lod = [
-            [[1, 2, 1], [1, 3, 4, 4]],
-            [[4, 3], [1, 4, 4, 8, 4, 6, 4]],
-            [[2], [6, 1]],
-        ]
-        self.main(
-            tensor=tensor, expect_array=expect, expect_lod=lod, expect_max_len=3
-        )
-
-    def test_lod_tensor_to_array_level_2_skip_level(self):
-        tensor = core.LoDTensor()
-        tensor.set(np.arange(50).reshape(50, 1).astype('int32'), self.place())
-        tensor.set_recursive_sequence_lengths(
-            [
-                [2, 3, 1],
-                [2, 3, 1, 4, 2, 1],
-                [3, 4, 4, 6, 4, 1, 1, 4, 4, 8, 6, 1, 4],
-            ]
-        )
-        self.main(
-            tensor=tensor,
-            expect_array=None,
-            expect_lod=None,
-            expect_max_len=4,
-            level=1,
-        )
-
-    def main(self, tensor, expect_array, expect_lod, expect_max_len, level=0):
-        place = self.place()
-        program = Program()
-        with program_guard(program):
-            x = layers.data(name='x', shape=[10])
-            x.persistable = True
-            table = lod_rank_table(x, level=level)
-            max_len = max_sequence_len(table)
-            max_len.persistable = True
-            array = lod_tensor_to_array(x, table)
-            array.persistable = True
-
-            result = array_to_lod_tensor(array, table)
-            result.persistable = True
-        exe = Executor(place)
-        scope = core.Scope()
-        exe.run(program, feed={'x': tensor}, scope=scope)
-        var = scope.find_var(array.name)
-        array = var.get_lod_tensor_array()
-        if expect_array is not None and expect_lod is not None:
-            self.check_array_same(array, expect_array, expect_lod)
-        self.check_tensor_same(scope.find_var(result.name).get_tensor(), tensor)
-
-        self.assertEqual(
-            np.array(scope.find_var(max_len.name).get_tensor())[0],
-            expect_max_len,
-        )
-
-    def check_array_same(self, array, expect_tensor, expect_lod):
-        self.assertEqual(len(expect_tensor), len(array))
-        for i, exp in enumerate(zip(expect_tensor, expect_lod)):
-            exp_tensor, exp_lod = exp
-            exp_tensor = np.expand_dims(exp_tensor, axis=1)
-            np.testing.assert_allclose(
-                exp_tensor, np.array(array[i]), rtol=1e-05
-            )
-            self.assertEqual(exp_lod, array[i].recursive_sequence_lengths())
-
-    def check_tensor_same(self, actual, expect):
-        np.testing.assert_allclose(
-            np.array(actual), np.array(expect), rtol=1e-05
-        )
-        self.assertEqual(
-            actual.recursive_sequence_lengths(),
-            expect.recursive_sequence_lengths(),
-        )
-
-
-class TestCPULoDTensorArrayOpGrad(unittest.TestCase):
-    def test_grad(self):
-        place = core.CPUPlace()
-        program = Program()
-
-        with program_guard(program):
-            x = layers.data(
-                name='x', shape=[1], dtype='float32', stop_gradient=False
-            )
-            table = lod_rank_table(x, level=0)
-            array = lod_tensor_to_array(x, table)
-            result = array_to_lod_tensor(array, table)
-
-            mean = paddle.mean(result)
-
-            append_backward(mean)
-
-        tensor = core.LoDTensor()
-        tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place)
-        tensor.set_recursive_sequence_lengths([[3, 6, 1]])
-
-        g_vars = program.global_block().var(x.name + "@GRAD")
-
-        exe = Executor(place)
-        g_out = [
-            np.array(item).sum()
-            for item in exe.run(
-                program,
-                feed={'x': tensor},
-                fetch_list=[g_vars],
-                return_numpy=False,
-            )
-        ]
-        g_out_sum = np.array(g_out).sum()
-
-        self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
-
-
-class TestLoDTensorArrayError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            x = np.random.random((10)).astype("float32")
-            x2 = layers.data(name='x', shape=[10])
-            table = lod_rank_table(x2, level=0)
-
-            def test_x_Variable():
-                rank_table = lod_tensor_to_array(x=x, table=table)
-
-            self.assertRaises(TypeError, test_x_Variable)
-
-            table2 = np.random.random((2)).astype("int64")
-
-            def test_table_Variable():
-                rank_table = lod_tensor_to_array(x=x2, table=table2)
-
-            self.assertRaises(TypeError, test_table_Variable)
-
-            def test_x_list_Variable():
-                rank_table = lod_tensor_to_array(x=[x], table=table)
-
-            self.assertRaises(TypeError, test_x_list_Variable)
-
-            def test_table_list_Variable():
-                rank_table = lod_tensor_to_array(x=x2, table=[table2])
-
-            self.assertRaises(TypeError, test_table_list_Variable)
-
-            array = lod_tensor_to_array(x2, table)
-
-
-class TestArrayLoDTensorError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            x = np.random.random((10)).astype("float32")
-            x2 = layers.data(name='x', shape=[10])
-            table = lod_rank_table(x2, level=0)
-            array = lod_tensor_to_array(x2, table)
-
-            def test_x_Variable():
-                rank_table = array_to_lod_tensor(x=x, table=table)
-
-            self.assertRaises(TypeError, test_x_Variable)
-
-            table2 = np.random.random((2)).astype("int64")
-
-            def test_table_Variable():
-                rank_table = array_to_lod_tensor(x=array, table=table2)
-
-            self.assertRaises(TypeError, test_table_Variable)
-
-            def test_x_list_Variable():
-                rank_table = array_to_lod_tensor(x=[x], table=table)
-
-            self.assertRaises(TypeError, test_x_list_Variable)
-
-            def test_table_list_Variable():
-                rank_table = array_to_lod_tensor(x=x2, table=[table2])
-
-            self.assertRaises(TypeError, test_table_list_Variable)
-
-            array = array_to_lod_tensor(x2, table)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py
index b417789ec01c4c..6ebaf0d64e6735 100644
--- a/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py
+++ b/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py
@@ -193,7 +193,7 @@ def test_attr_tensor_API(self):
             ret_2 = fluid.layers.nn.uniform_random(
                 [2, 3, 2], min=_min, max=_max, seed=_seed
             )
-            res = fluid.layers.equal(ret, ret_2)
+            res = paddle.equal(ret, ret_2)
             place = fluid.CPUPlace()
             exe = fluid.Executor(place)
 
diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py
index dbc036cb7e47fa..ddad1f60c5128f 100644
--- a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py
+++ b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py
@@ -383,7 +383,7 @@ def test_attr_tensor_API(self):
             ret_2 = fluid.layers.nn.uniform_random(
                 [2, 3, 2], min=_min, max=_max, seed=_seed
             )
-            res = fluid.layers.equal(ret, ret_2)
+            res = paddle.equal(ret, ret_2)
             place = fluid.CPUPlace()
             if fluid.core.is_compiled_with_cuda():
                 place = fluid.CUDAPlace(0)

From fc882c7bcf557cd3def60a1a956e5b7a7eede3b4 Mon Sep 17 00:00:00 2001
From: gem5 <117625383+linsheng011@users.noreply.github.com>
Date: Tue, 29 Nov 2022 15:14:17 +0800
Subject: [PATCH 022/154] Support rsqrt op. (#48223)

---
 .../fluid/inference/api/analysis_predictor.cc |  1 +
 .../inference/tensorrt/convert/CMakeLists.txt |  0
 .../inference/tensorrt/convert/unary_op.cc    | 59 ++++++++++++-------
 paddle/fluid/inference/tensorrt/op_teller.cc  |  2 +
 .../ir/inference/test_trt_convert_unary.py    | 11 ++--
 5 files changed, 45 insertions(+), 28 deletions(-)
 mode change 100644 => 100755 paddle/fluid/inference/api/analysis_predictor.cc
 mode change 100644 => 100755 paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
 mode change 100644 => 100755 paddle/fluid/inference/tensorrt/op_teller.cc

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
old mode 100644
new mode 100755
index 618de300fcb022..0a8b3d5eb29ed3
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2246,6 +2246,7 @@ USE_TRT_CONVERTER(flatten_contiguous_range);
 USE_TRT_CONVERTER(matmul);
 USE_TRT_CONVERTER(matmul_v2);
 USE_TRT_CONVERTER(bmm);
+USE_TRT_CONVERTER(rsqrt);
 USE_TRT_CONVERTER(conv2d);
 USE_TRT_CONVERTER(relu);
 USE_TRT_CONVERTER(exp);
diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/paddle/fluid/inference/tensorrt/convert/unary_op.cc b/paddle/fluid/inference/tensorrt/convert/unary_op.cc
index 565afd214aeea6..cbf2139a3c4836 100644
--- a/paddle/fluid/inference/tensorrt/convert/unary_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/unary_op.cc
@@ -52,38 +52,47 @@ class UnaryOpConverter : public OpConverter {
     nvinfer1::ITensor* input_tensor =
         engine_->GetITensor(op_desc.Input("X")[0]);
     auto op_pair = ops.find(op_type_);
-    nvinfer1::IUnaryLayer* layer =
-        TRT_ENGINE_ADD_LAYER(engine_, Unary, *input_tensor, op_pair->second);
+
+    nvinfer1::IUnaryLayer* layer = nullptr;
+    for (auto trt_op : op_pair->second) {
+      layer = TRT_ENGINE_ADD_LAYER(engine_, Unary, *input_tensor, trt_op);
+      input_tensor = layer->getOutput(0);
+    }
+
     auto output_name = op_desc.Output("Out")[0];
     RreplenishLayerAndOutput(layer, op_type_, {output_name}, test_mode);
   }
 
  protected:
   std::string op_type_;
-  static const std::unordered_map<std::string, nvinfer1::UnaryOperation> ops;
+  static const std::unordered_map<std::string,
+                                  std::vector<nvinfer1::UnaryOperation>>
+      ops;
 };
 
-const std::unordered_map<std::string, nvinfer1::UnaryOperation>
+const std::unordered_map<std::string, std::vector<nvinfer1::UnaryOperation>>
     UnaryOpConverter::ops = {
-        {"exp", nvinfer1::UnaryOperation::kEXP},
-        {"log", nvinfer1::UnaryOperation::kLOG},
-        {"sqrt", nvinfer1::UnaryOperation::kSQRT},
-        {"abs", nvinfer1::UnaryOperation::kABS},
-        {"sin", nvinfer1::UnaryOperation::kSIN},
-        {"cos", nvinfer1::UnaryOperation::kCOS},
-        {"tan", nvinfer1::UnaryOperation::kTAN},
-        {"sinh", nvinfer1::UnaryOperation::kSINH},
-        {"cosh", nvinfer1::UnaryOperation::kCOSH},
-        {"asin", nvinfer1::UnaryOperation::kASIN},
-        {"acos", nvinfer1::UnaryOperation::kACOS},
-        {"atan", nvinfer1::UnaryOperation::kATAN},
-        {"asinh", nvinfer1::UnaryOperation::kASINH},
-        {"atanh", nvinfer1::UnaryOperation::kATANH},
-        {"ceil", nvinfer1::UnaryOperation::kCEIL},
-        {"floor", nvinfer1::UnaryOperation::kFLOOR},
-        {"reciprocal", nvinfer1::UnaryOperation::kRECIP},
+        {"exp", {nvinfer1::UnaryOperation::kEXP}},
+        {"log", {nvinfer1::UnaryOperation::kLOG}},
+        {"sqrt", {nvinfer1::UnaryOperation::kSQRT}},
+        {"abs", {nvinfer1::UnaryOperation::kABS}},
+        {"sin", {nvinfer1::UnaryOperation::kSIN}},
+        {"cos", {nvinfer1::UnaryOperation::kCOS}},
+        {"tan", {nvinfer1::UnaryOperation::kTAN}},
+        {"sinh", {nvinfer1::UnaryOperation::kSINH}},
+        {"cosh", {nvinfer1::UnaryOperation::kCOSH}},
+        {"asin", {nvinfer1::UnaryOperation::kASIN}},
+        {"acos", {nvinfer1::UnaryOperation::kACOS}},
+        {"atan", {nvinfer1::UnaryOperation::kATAN}},
+        {"asinh", {nvinfer1::UnaryOperation::kASINH}},
+        {"atanh", {nvinfer1::UnaryOperation::kATANH}},
+        {"ceil", {nvinfer1::UnaryOperation::kCEIL}},
+        {"floor", {nvinfer1::UnaryOperation::kFLOOR}},
+        {"rsqrt",
+         {nvinfer1::UnaryOperation::kSQRT, nvinfer1::UnaryOperation::kRECIP}},
+        {"reciprocal", {nvinfer1::UnaryOperation::kRECIP}},
 #if IS_TRT_VERSION_GE(7000)
-        {"erf", nvinfer1::UnaryOperation::kERF},
+        {"erf", {nvinfer1::UnaryOperation::kERF}},
 #endif
 };
 
@@ -153,6 +162,11 @@ class FloorOpConverter : public UnaryOpConverter {
  public:
   FloorOpConverter() { op_type_ = "floor"; }
 };
+
+class RsqrtOpConverter : public UnaryOpConverter {
+ public:
+  RsqrtOpConverter() { op_type_ = "rsqrt"; }
+};
 class ReciprocalOpConverter : public UnaryOpConverter {
  public:
   ReciprocalOpConverter() { op_type_ = "reciprocal"; }
@@ -184,6 +198,7 @@ REGISTER_TRT_OP_CONVERTER(asinh, AsinhOpConverter);
 REGISTER_TRT_OP_CONVERTER(atanh, AtanhOpConverter);
 REGISTER_TRT_OP_CONVERTER(ceil, CeilOpConverter);
 REGISTER_TRT_OP_CONVERTER(floor, FloorOpConverter);
+REGISTER_TRT_OP_CONVERTER(rsqrt, RsqrtOpConverter);
 REGISTER_TRT_OP_CONVERTER(reciprocal, ReciprocalOpConverter);
 #if IS_TRT_VERSION_GE(7000)
 REGISTER_TRT_OP_CONVERTER(erf, ErfOpConverter);
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
old mode 100644
new mode 100755
index 22bd172e93b40f..0e180cd7d65269
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -2310,6 +2310,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "atanh",
       "ceil",
       "floor",
+      "rsqrt",
       "reciprocal",
       "erf",
       "softmax",
@@ -2438,6 +2439,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "atanh",
       "ceil",
       "floor",
+      "rsqrt",
       "reciprocal",
       "erf",
       "softmax",
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py
index 0928453b0ac341..e5c763b822b556 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py
@@ -31,16 +31,14 @@ def sample_program_configs(self):
         self.trt_param.workspace_size = 1073741824
 
         def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
-            if dims == 1:
-                return np.random.random([32]).astype(np.float32)
-            elif dims == 2:
+            if dims == 2:
                 return np.random.random([3, 32]).astype(np.float32)
             elif dims == 3:
                 return np.random.random([3, 32, 32]).astype(np.float32)
             else:
                 return np.random.random([batch, 3, 32, 32]).astype(np.float32)
 
-        for dims in [1, 2, 3, 4]:
+        for dims in [2, 3, 4]:
             for batch in [1, 4]:
                 for op_type in [
                     "exp",
@@ -59,6 +57,7 @@ def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
                     "atanh",
                     "ceil",
                     "floor",
+                    "rsqrt",
                     "reciprocal",
                 ]:
                     self.dims = dims
@@ -135,7 +134,7 @@ def generate_trt_nodes_num(attrs, dynamic_shape):
         self.trt_param.precision = paddle_infer.PrecisionType.Float32
         yield self.create_inference_config(), generate_trt_nodes_num(
             attrs, False
-        ), 1e-5
+        ), 1e-4
         self.trt_param.precision = paddle_infer.PrecisionType.Half
         yield self.create_inference_config(), generate_trt_nodes_num(
             attrs, False
@@ -146,7 +145,7 @@ def generate_trt_nodes_num(attrs, dynamic_shape):
         self.trt_param.precision = paddle_infer.PrecisionType.Float32
         yield self.create_inference_config(), generate_trt_nodes_num(
             attrs, True
-        ), 1e-5
+        ), 1e-4
         self.trt_param.precision = paddle_infer.PrecisionType.Half
         yield self.create_inference_config(), generate_trt_nodes_num(
             attrs, True

From a66bb67afbb292889e688056f2752fec9cf2011c Mon Sep 17 00:00:00 2001
From: JZ-LIANG <jianzhongliang10@gmail.com>
Date: Tue, 29 Nov 2022 15:16:39 +0800
Subject: [PATCH 023/154] Bugfix for Collective default calc stream (#48308)

* get default calc stream from execution ctx instead of global dev ctx pool.
---
 paddle/fluid/operators/collective/alltoall_op.cu.cc         | 4 ++--
 paddle/fluid/operators/collective/barrier_op.cu.cc          | 4 ++--
 paddle/fluid/operators/collective/c_allgather_op.cu.cc      | 4 ++--
 paddle/fluid/operators/collective/c_allreduce_op.h          | 6 ++++--
 paddle/fluid/operators/collective/c_broadcast_op.cu.cc      | 4 ++--
 paddle/fluid/operators/collective/c_concat_op.cu.cc         | 4 ++--
 paddle/fluid/operators/collective/c_reduce_op.h             | 4 ++--
 paddle/fluid/operators/collective/c_reducescatter_op.cu.cc  | 4 ++--
 paddle/fluid/operators/collective/c_scatter_op.cu.cc        | 4 ++--
 paddle/fluid/operators/collective/global_gather_op.cu.cc    | 4 ++--
 paddle/fluid/operators/collective/global_scatter_op.cu.cc   | 4 ++--
 .../fluid/operators/collective/partial_allgather_op.cu.cc   | 4 ++--
 paddle/fluid/operators/collective/partial_recv_op.cu.cc     | 4 ++--
 paddle/fluid/operators/collective/partial_send_op.cu.cc     | 4 ++--
 paddle/fluid/operators/collective/recv_v2_op.cu.cc          | 4 ++--
 paddle/fluid/operators/collective/send_v2_op.cu.cc          | 4 ++--
 16 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/paddle/fluid/operators/collective/alltoall_op.cu.cc b/paddle/fluid/operators/collective/alltoall_op.cu.cc
index e50d14e5ef6ae1..fd67342b3affa3 100644
--- a/paddle/fluid/operators/collective/alltoall_op.cu.cc
+++ b/paddle/fluid/operators/collective/alltoall_op.cu.cc
@@ -46,8 +46,8 @@ class AllToAllOpCUDAKernel : public framework::OpKernel<T> {
 
     gpuStream_t stream = nullptr;
     if (ctx.Attr<bool>("use_calc_stream")) {
-      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
-      stream = static_cast<phi::GPUContext*>(dev_ctx)->stream();
+      // should ExecutionContext for calc stream.
+      stream = ctx.cuda_device_context().stream();
     } else {
       stream = comm->stream();
     }
diff --git a/paddle/fluid/operators/collective/barrier_op.cu.cc b/paddle/fluid/operators/collective/barrier_op.cu.cc
index 622b25f2a49bb3..648b8fdc83b878 100644
--- a/paddle/fluid/operators/collective/barrier_op.cu.cc
+++ b/paddle/fluid/operators/collective/barrier_op.cu.cc
@@ -39,8 +39,8 @@ class BarrierOpCUDAKernel : public framework::OpKernel<T> {
 
     int rid = ctx.Attr<int>("ring_id");
     auto comm = platform::NCCLCommContext::Instance().Get(rid, place);
-    auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
-    auto stream = static_cast<phi::GPUContext*>(dev_ctx)->stream();
+    // should ExecutionContext for calc stream.
+    auto stream = ctx.cuda_device_context().stream();
     ncclRedOp_t nccl_red_type = ncclSum;
     PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
         sendbuff, recvbuff, numel, dtype, nccl_red_type, comm->comm(), stream));
diff --git a/paddle/fluid/operators/collective/c_allgather_op.cu.cc b/paddle/fluid/operators/collective/c_allgather_op.cu.cc
index ddef85d73e0841..947475ece482ab 100644
--- a/paddle/fluid/operators/collective/c_allgather_op.cu.cc
+++ b/paddle/fluid/operators/collective/c_allgather_op.cu.cc
@@ -67,8 +67,8 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel<T> {
 
     gpuStream_t stream = nullptr;
     if (ctx.Attr<bool>("use_calc_stream")) {
-      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
-      stream = static_cast<phi::GPUContext*>(dev_ctx)->stream();
+      // should ExecutionContext for calc stream.
+      stream = ctx.cuda_device_context().stream();
     } else {
       stream = comm->stream();
     }
diff --git a/paddle/fluid/operators/collective/c_allreduce_op.h b/paddle/fluid/operators/collective/c_allreduce_op.h
index 4d90442afbc5ab..8d3af26f0c2542 100644
--- a/paddle/fluid/operators/collective/c_allreduce_op.h
+++ b/paddle/fluid/operators/collective/c_allreduce_op.h
@@ -482,8 +482,10 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel<T> {
 
     gpuStream_t stream = nullptr;
     if (ctx.Attr<bool>("use_calc_stream")) {
-      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
-      stream = static_cast<phi::GPUContext*>(dev_ctx)->stream();
+      // should not use global ctx for calc stream.
+      // auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
+      // stream = static_cast<phi::GPUContext*>(dev_ctx)->stream();
+      stream = ctx.cuda_device_context().stream();
     } else {
       stream = comm->stream();
     }
diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc
index 78fb50ce31c62d..47e5bfd825d650 100644
--- a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc
+++ b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc
@@ -53,8 +53,8 @@ class CBroadcastOpCUDAKernel : public framework::OpKernel<T> {
     auto comm = platform::NCCLCommContext::Instance().Get(rid, place);
     gpuStream_t stream = nullptr;
     if (ctx.Attr<bool>("use_calc_stream")) {
-      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
-      stream = static_cast<phi::GPUContext*>(dev_ctx)->stream();
+      // should ExecutionContext for calc stream.
+      stream = ctx.cuda_device_context().stream();
     } else {
       stream = comm->stream();
     }
diff --git a/paddle/fluid/operators/collective/c_concat_op.cu.cc b/paddle/fluid/operators/collective/c_concat_op.cu.cc
index e2ee9cefdbfb28..2d7eaf26ea420a 100644
--- a/paddle/fluid/operators/collective/c_concat_op.cu.cc
+++ b/paddle/fluid/operators/collective/c_concat_op.cu.cc
@@ -89,8 +89,8 @@ class CConcatOpCUDAKernel : public framework::OpKernel<T> {
       const T* send_buff = x->data<T>();
       T* recv_buff = temp_out.data<T>();
       gpuStream_t stream = nullptr;
-      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
-      stream = static_cast<phi::GPUContext*>(dev_ctx)->stream();
+      // should ExecutionContext for calc stream.
+      stream = ctx.cuda_device_context().stream();
 
       PADDLE_ENFORCE_GPU_SUCCESS(
           platform::dynload::ncclAllGather(send_buff,
diff --git a/paddle/fluid/operators/collective/c_reduce_op.h b/paddle/fluid/operators/collective/c_reduce_op.h
index f9288dea063f05..3e752011f152e2 100644
--- a/paddle/fluid/operators/collective/c_reduce_op.h
+++ b/paddle/fluid/operators/collective/c_reduce_op.h
@@ -311,8 +311,8 @@ class CReduceOpCUDAKernel : public framework::OpKernel<T> {
 
     gpuStream_t stream = nullptr;
     if (ctx.Attr<bool>("use_calc_stream")) {
-      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
-      stream = static_cast<phi::GPUContext*>(dev_ctx)->stream();
+      // should ExecutionContext for calc stream.
+      stream = ctx.cuda_device_context().stream();
     } else {
       stream = comm->stream();
     }
diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc
index b4eba9d124243c..e0b0800f77769d 100644
--- a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc
+++ b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc
@@ -54,8 +54,8 @@ class CReduceScatterOpCUDAKernel : public framework::OpKernel<T> {
 
     gpuStream_t stream = nullptr;
     if (ctx.Attr<bool>("use_calc_stream")) {
-      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
-      stream = static_cast<phi::GPUContext*>(dev_ctx)->stream();
+      // should ExecutionContext for calc stream.
+      stream = ctx.cuda_device_context().stream();
     } else {
       stream = comm->stream();
     }
diff --git a/paddle/fluid/operators/collective/c_scatter_op.cu.cc b/paddle/fluid/operators/collective/c_scatter_op.cu.cc
index 903d3d568861a8..72493e51505cd0 100644
--- a/paddle/fluid/operators/collective/c_scatter_op.cu.cc
+++ b/paddle/fluid/operators/collective/c_scatter_op.cu.cc
@@ -60,8 +60,8 @@ class CScatterOpCUDAKernel : public framework::OpKernel<T> {
 
     gpuStream_t stream = nullptr;
     if (ctx.Attr<bool>("use_calc_stream")) {
-      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
-      stream = static_cast<phi::GPUContext*>(dev_ctx)->stream();
+      // should ExecutionContext for calc stream.
+      stream = ctx.cuda_device_context().stream();
     } else {
       stream = comm->stream();
     }
diff --git a/paddle/fluid/operators/collective/global_gather_op.cu.cc b/paddle/fluid/operators/collective/global_gather_op.cu.cc
index 439630a7f1dd7c..83e1a4d4ca778c 100644
--- a/paddle/fluid/operators/collective/global_gather_op.cu.cc
+++ b/paddle/fluid/operators/collective/global_gather_op.cu.cc
@@ -82,8 +82,8 @@ struct GlobalGatherFunctor<phi::GPUContext, T> {
     auto comm = platform::NCCLCommContext::Instance().Get(ring_id, place);
     gpuStream_t stream = nullptr;
     if (ctx.Attr<bool>("use_calc_stream")) {
-      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
-      stream = static_cast<phi::GPUContext*>(dev_ctx)->stream();
+      // should ExecutionContext for calc stream.
+      stream = ctx.cuda_device_context().stream();
     } else {
       stream = comm->stream();
     }
diff --git a/paddle/fluid/operators/collective/global_scatter_op.cu.cc b/paddle/fluid/operators/collective/global_scatter_op.cu.cc
index 4ccf9dee2631f2..017398413b372b 100644
--- a/paddle/fluid/operators/collective/global_scatter_op.cu.cc
+++ b/paddle/fluid/operators/collective/global_scatter_op.cu.cc
@@ -81,8 +81,8 @@ struct GlobalScatterFunctor<phi::GPUContext, T> {
     auto comm = platform::NCCLCommContext::Instance().Get(ring_id, place);
     gpuStream_t stream = nullptr;
     if (ctx.Attr<bool>("use_calc_stream")) {
-      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
-      stream = static_cast<phi::GPUContext*>(dev_ctx)->stream();
+      // should ExecutionContext for calc stream.
+      stream = ctx.cuda_device_context().stream();
     } else {
       stream = comm->stream();
     }
diff --git a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc
index cd1e12d7e1bab2..c4565a94500639 100644
--- a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc
+++ b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc
@@ -75,8 +75,8 @@ class PartialAllGatherOpCUDAKernel : public framework::OpKernel<T> {
 
       gpuStream_t stream = nullptr;
       if (ctx.Attr<bool>("use_calc_stream")) {
-        auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
-        stream = static_cast<phi::GPUContext*>(dev_ctx)->stream();
+        // should ExecutionContext for calc stream.
+        stream = ctx.cuda_device_context().stream();
       } else {
         stream = comm->stream();
       }
diff --git a/paddle/fluid/operators/collective/partial_recv_op.cu.cc b/paddle/fluid/operators/collective/partial_recv_op.cu.cc
index c8a49f51d5c468..c95d1fe4bc6195 100644
--- a/paddle/fluid/operators/collective/partial_recv_op.cu.cc
+++ b/paddle/fluid/operators/collective/partial_recv_op.cu.cc
@@ -81,8 +81,8 @@ class PartialRecvOpCUDAKernel : public framework::OpKernel<T> {
       gpuStream_t stream = nullptr;
       auto comm = platform::NCCLCommContext::Instance().Get(rid, place);
       if (ctx.Attr<bool>("use_calc_stream")) {
-        auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
-        stream = static_cast<phi::GPUContext *>(dev_ctx)->stream();
+        // should ExecutionContext for calc stream.
+        stream = ctx.cuda_device_context().stream();
       } else {
         stream = comm->stream();
       }
diff --git a/paddle/fluid/operators/collective/partial_send_op.cu.cc b/paddle/fluid/operators/collective/partial_send_op.cu.cc
index 7d4125be8d32e7..7b9c154bd44997 100644
--- a/paddle/fluid/operators/collective/partial_send_op.cu.cc
+++ b/paddle/fluid/operators/collective/partial_send_op.cu.cc
@@ -77,8 +77,8 @@ class PartialSendCUDAKernel : public framework::OpKernel<T> {
       auto place = ctx.GetPlace();
       auto comm = platform::NCCLCommContext::Instance().Get(rid, place);
       if (ctx.Attr<bool>("use_calc_stream")) {
-        auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
-        stream = static_cast<phi::GPUContext*>(dev_ctx)->stream();
+        // should ExecutionContext for calc stream.
+        stream = ctx.cuda_device_context().stream();
       } else {
         stream = comm->stream();
       }
diff --git a/paddle/fluid/operators/collective/recv_v2_op.cu.cc b/paddle/fluid/operators/collective/recv_v2_op.cu.cc
index 06e06a79c6b623..a32376f3e842da 100644
--- a/paddle/fluid/operators/collective/recv_v2_op.cu.cc
+++ b/paddle/fluid/operators/collective/recv_v2_op.cu.cc
@@ -157,8 +157,8 @@ class RecvOpV2CUDAKernel : public framework::OpKernel<T> {
     }
     auto comm = platform::NCCLCommContext::Instance().Get(rid, place);
     if (ctx.Attr<bool>("use_calc_stream")) {
-      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
-      stream = static_cast<phi::GPUContext *>(dev_ctx)->stream();
+      // should ExecutionContext for calc stream.
+      stream = ctx.cuda_device_context().stream();
     } else {
       stream = comm->stream();
     }
diff --git a/paddle/fluid/operators/collective/send_v2_op.cu.cc b/paddle/fluid/operators/collective/send_v2_op.cu.cc
index c7ab3c749b9b73..631595ccd08695 100644
--- a/paddle/fluid/operators/collective/send_v2_op.cu.cc
+++ b/paddle/fluid/operators/collective/send_v2_op.cu.cc
@@ -151,8 +151,8 @@ class SendOpV2CUDAKernel : public framework::OpKernel<T> {
     auto place = ctx.GetPlace();
     auto comm = platform::NCCLCommContext::Instance().Get(rid, place);
     if (ctx.Attr<bool>("use_calc_stream")) {
-      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
-      stream = static_cast<phi::GPUContext*>(dev_ctx)->stream();
+      // should ExecutionContext for calc stream.
+      stream = ctx.cuda_device_context().stream();
     } else {
       stream = comm->stream();
     }

From 9d4b4be36c5588bfc35462803201fa6d26f225c7 Mon Sep 17 00:00:00 2001
From: haosicheng <47998305+HarperCy@users.noreply.github.com>
Date: Tue, 29 Nov 2022 15:38:52 +0800
Subject: [PATCH 024/154] add floor fp32 op *test=kunlun (#48458)

---
 .../fluid/platform/device/xpu/xpu2_op_list.h  |  1 +
 paddle/phi/kernels/xpu/activation_kernel.cc   | 15 +++++++++++
 .../unittests/xpu/test_activation_op_xpu.py   | 26 +++++++++++++++++++
 3 files changed, 42 insertions(+)

diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h
index b1838a0f714c86..cdd86479f44b99 100644
--- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h
+++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h
@@ -282,6 +282,7 @@ XPUOpMap& get_kl2_ops() {
                      pOpKernelType(vartype::INT32, XPUPlace()),
                      pOpKernelType(vartype::INT8, XPUPlace()),
                      pOpKernelType(vartype::FP32, XPUPlace())})},
+      {"floor", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
       {"gather_grad",
        XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
                      pOpKernelType(vartype::FP16, XPUPlace())})},
diff --git a/paddle/phi/kernels/xpu/activation_kernel.cc b/paddle/phi/kernels/xpu/activation_kernel.cc
index 0d41afeeacc2a6..39f928eb114733 100644
--- a/paddle/phi/kernels/xpu/activation_kernel.cc
+++ b/paddle/phi/kernels/xpu/activation_kernel.cc
@@ -426,7 +426,21 @@ struct XPUTanhFunctor : public funcs::BaseActivationFunctor<T> {
   }
 };
 
+template <typename T>
+struct XPUFloorFunctor : public funcs::BaseActivationFunctor<T> {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+  template <typename Context>
+  void operator()(const Context& dev_ctx,
+                  const DenseTensor& x,
+                  DenseTensor* out) const {
+    int r = xpu_activation_func<Context, T, XPUType>(
+        dev_ctx, x, out, xpu::floor<XPUType>);
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "floor");
+  }
+};
+
 DEFINE_XPU_ACTIVATION_KERNEL(Exp, XPUExpFunctor)
+DEFINE_XPU_ACTIVATION_KERNEL(Floor, XPUFloorFunctor)
 DEFINE_XPU_ACTIVATION_KERNEL(Log, XPULogFunctor)
 DEFINE_XPU_ACTIVATION_KERNEL(Reciprocal, XPUReciprocalFunctor)
 DEFINE_XPU_ACTIVATION_KERNEL(Relu, XPUReluFunctor)
@@ -483,6 +497,7 @@ PD_REGISTER_KERNEL(
     square, XPU, ALL_LAYOUT, phi::SquareKernel, float, phi::dtype::float16) {}
 
 PD_REGISTER_ACTIVATION_KERNEL(exp, ExpKernel)  // no grad
+PD_REGISTER_ACTIVATION_KERNEL(floor, FloorKernel)
 PD_REGISTER_ACTIVATION_KERNEL(log, LogKernel)
 PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
 PD_REGISTER_ACTIVATION_KERNEL(hard_sigmoid, HardSigmoidKernel)
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py
index 8c4c722cbfac00..c30a472618c1c9 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py
@@ -177,6 +177,32 @@ def set_case(self):
     create_test_class(globals(), XPUTestSqrtOP, stype)
 
 
+class XPUTestFloorOP(XPUOpTestWrapper):
+    def __init__(self):
+        self.op_name = 'floor'
+        self.use_dynamic_create_class = False
+
+    class XPUTestSqrt(TestActivationOPBase):
+        def set_case(self):
+            self.op_type = "floor"
+            self.dtype = self.in_type
+
+            x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
+            out = np.floor(x)
+
+            self.attrs = {'use_xpu': True}
+            self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
+            self.outputs = {'Out': out}
+
+        def test_check_grad(self):
+            self.check_output_with_place(self.place)
+
+
+support_types = get_xpu_op_support_types('floor')
+for stype in support_types:
+    create_test_class(globals(), XPUTestFloorOP, stype)
+
+
 class XPUTestAbsOP(XPUOpTestWrapper):
     def __init__(self):
         self.op_name = 'abs'

From 6dbfbfa5bf4413f7aea1b5e9aea66972ecf5fc9a Mon Sep 17 00:00:00 2001
From: kangguangli <kangguangli@hotmail.com>
Date: Tue, 29 Nov 2022 15:44:53 +0800
Subject: [PATCH 025/154] [Control Flow] replace executor in while op with
 InterpreterCore (#47573)

* fix:add no support for cuda_arch<700

* replace Executor in while op with InterpreterCore

* cache InterpreterCore as the member of WhileOp

* fix bug: tensor place changed because of assign op in while loop

* refine code

* refine code

* refine code

* hot fix

* fix compile

* merge develop

* follow comments

* add log for test

* remove LoDTensor

* set flag control_flow_use_new_executor false

Co-authored-by: fengshuai <fengshuai03@baidu.com>
Co-authored-by: zhiqiu <chenqiuliang@baidu.com>
---
 .../framework/new_executor/interpretercore.cc |  21 ++-
 .../operators/controlflow/CMakeLists.txt      |   3 +-
 .../controlflow/conditional_block_op.cc       | 163 +++++++---------
 .../controlflow/control_flow_op_helper.h      |  58 ++++++
 .../fluid/operators/controlflow/while_op.cc   | 175 ++++++++++++++++--
 5 files changed, 305 insertions(+), 115 deletions(-)
 create mode 100644 paddle/fluid/operators/controlflow/control_flow_op_helper.h

diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc
index c792aa393b5196..070230af4d7867 100644
--- a/paddle/fluid/framework/new_executor/interpretercore.cc
+++ b/paddle/fluid/framework/new_executor/interpretercore.cc
@@ -638,9 +638,20 @@ void InterpreterCore::Convert(
         if (var_desc && ins.count(item.first) &&
             !info.IsInArgBufferNeeded(var_desc->Name())) {
           continue;
-        } else if (!block_.HasVar(var_scope_.GetNameById(id))) {
-          VLOG(10) << "[gc_check_inputs] skip gc: "
-                   << var_scope_.GetNameById(id);
+        }
+        // skip when this var is not in block and not a data_transferred var,
+        // which means this var is managed by other block
+        const auto& var_name = var_scope_.GetNameById(id);
+        bool not_owned = !block_.HasVar(var_name);
+        const auto& transferred_vars = var_scope_.DataTransferAddedVars();
+        bool not_transferred =
+            std::all_of(transferred_vars.begin(),
+                        transferred_vars.end(),
+                        [&](const std::pair<std::string, int>& elem) {
+                          return elem.first != var_name;
+                        });
+        if (not_owned && not_transferred) {
+          VLOG(10) << "[gc_check_inputs] skip gc: " << var_name;
           continue;
         }
         gc_check_vars.insert(id);
@@ -759,7 +770,7 @@ void InterpreterCore::RunOperator(const Instruction& instr_node) {
   auto place = instr_node.DeviceContext().GetPlace();
   Scope* local_scope = HasLocalScope() ? var_scope_.GetMutableLocalScope()
                                        : var_scope_.GetMutableScope();
-  VLOG(4) << "Start run " << place << " " << op->DebugStringEx(local_scope_);
+  VLOG(4) << "Start run " << place << " " << op->DebugStringEx(local_scope);
 
   SetDeviceId(place);
 
@@ -873,7 +884,7 @@ void InterpreterCore::RunOperator(const Instruction& instr_node) {
     VLOG(4) << "Check nan/inf";
     framework::details::CheckOpHasNanOrInf(
         *op,
-        *local_scope_,
+        *local_scope,
         place);  // TODO(xiongkun03) change it to inner scope.
   }
 }
diff --git a/paddle/fluid/operators/controlflow/CMakeLists.txt b/paddle/fluid/operators/controlflow/CMakeLists.txt
index 07d72297b2b704..7f953f031b0164 100644
--- a/paddle/fluid/operators/controlflow/CMakeLists.txt
+++ b/paddle/fluid/operators/controlflow/CMakeLists.txt
@@ -3,7 +3,8 @@ if(WITH_UNITY_BUILD)
   # Load Unity Build rules for operators in paddle/fluid/operators/controlflow.
   include(unity_build_rule.cmake)
 endif()
-register_operators(EXCLUDES conditional_block_op DEPS naive_executor)
+register_operators(EXCLUDES conditional_block_op DEPS naive_executor
+                   standalone_executor)
 
 cc_library(
   conditional_block_op
diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.cc b/paddle/fluid/operators/controlflow/conditional_block_op.cc
index 6425c3519e94c4..1efc5085165776 100644
--- a/paddle/fluid/operators/controlflow/conditional_block_op.cc
+++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/new_executor/standalone_executor.h"
 #include "paddle/fluid/operators/assign_op.h"
+#include "paddle/fluid/operators/controlflow/control_flow_op_helper.h"
 #include "paddle/fluid/platform/flags.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
@@ -39,43 +40,6 @@ using ExecutorPrepareContext = framework::ExecutorPrepareContext;
 
 using InterpreterCore = framework::InterpreterCore;
 
-namespace details {
-static void BuildScopeForConditionalBlockOp(
-    const paddle::framework::InterpreterCore &interpreter_core,
-    const paddle::framework::BlockDesc &block,
-    paddle::framework::Scope *scope) {
-  for (auto &var_desc : block.AllVars()) {
-    auto var_name = var_desc->Name();
-    if (var_name == framework::kEmptyVarName) {
-      continue;
-    }
-    VLOG(5) << "[BuildScopeForConditionalBlockOp]"
-            << "start:" << var_name;
-    if (var_desc->Persistable()) {
-      VLOG(5) << "[BuildScopeForConditionalBlockOp]"
-              << "Don't process persistent: " << var_name;
-    } else {
-      auto *ptr = scope->Var(var_name);
-      InitializeVariable(ptr, var_desc->GetType());
-      VLOG(5) << "[BuildScopeForConditionalBlockOp]"
-              << "Not Found locally and created: " << var_name;
-    }
-  }
-
-  auto &data_transfer_added_vars =
-      interpreter_core.GetVariableScope()->DataTransferAddedVars();
-  for (size_t i = 0; i < data_transfer_added_vars.size(); i++) {
-    auto *ptr = scope->Var(data_transfer_added_vars[i].first);
-    InitializeVariable(ptr,
-                       static_cast<paddle::framework::proto::VarType::Type>(
-                           data_transfer_added_vars[i].second));
-    VLOG(10) << "[BuildScopeForConditionalBlockOp]"
-             << "Initialize Transfer Added Variable "
-             << data_transfer_added_vars[i].first;
-  }
-}
-}  // namespace details
-
 class ConditionalBlockOp : public ConditionalOp {
  public:
   ConditionalBlockOp(const std::string &type,
@@ -141,51 +105,53 @@ class ConditionalBlockOp : public ConditionalOp {
           Attr<std::vector<std::string>>(ConditionalOp::kSkipEagerDeletionVars);
 
       if (FLAGS_control_flow_use_new_executor) {
-        std::set<std::string> skip_gc_vars(skip_vars.begin(), skip_vars.end());
-
-        if (!core || !platform::is_same_place(core->GetPlace(), dev_place)) {
-          VLOG(10) << "[interpreterCore cache]" << core.get();
-          VLOG_IF(10, core)
-              << platform::is_same_place(core->GetPlace(), dev_place);
-          core.reset(new InterpreterCore(dev_place,
-                                         *block,
-                                         skip_gc_vars,
-                                         &cur_scope,
-                                         /* used_for_jit */ false,
-                                         /* used_for_control_flow_op */ true));
+        LOG_FIRST_N(INFO, 1)
+            << "[ControlFlow][ConditionalBlock] New Executor is Running.";
+        if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
+          std::set<std::string> skip_gc_vars(skip_vars.begin(),
+                                             skip_vars.end());
+          VLOG(10) << "[interpreterCore cache]" << core_.get();
+          VLOG_IF(10, core_)
+              << platform::is_same_place(core_->GetPlace(), dev_place);
+          core_.reset(new InterpreterCore(dev_place,
+                                          *block,
+                                          skip_gc_vars,
+                                          &cur_scope,
+                                          /* used_for_jit */ false,
+                                          /* used_for_control_flow_op */ true));
           VLOG(10) << "[interpreterCore cache]"
-                   << "new created:" << core;
+                   << "new created:" << core_;
         } else {
-          details::BuildScopeForConditionalBlockOp(*core, *block, &cur_scope);
-          core->reset_scope(&cur_scope);
+          BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
+          core_->reset_scope(&cur_scope);
         }
 
-        core->Run({}, false);
+        core_->Run({}, false);
 
       } else {
-        if (!exec || !platform::is_same_place(exec->GetPlace(), dev_place)) {
+        if (!exec_ || !platform::is_same_place(exec_->GetPlace(), dev_place)) {
           auto &pdesc = *block->Program();
-          exec.reset(new Executor(dev_place));
-          if (FLAGS_use_mkldnn) exec->EnableMKLDNN(pdesc);
-          ctx = exec->Prepare(pdesc, block->ID(), skip_vars, false);
+          exec_.reset(new Executor(dev_place));
+          if (FLAGS_use_mkldnn) exec_->EnableMKLDNN(pdesc);
+          ctx_ = exec_->Prepare(pdesc, block->ID(), skip_vars, false);
 #ifdef PADDLE_WITH_MKLDNN
-          platform::AttachPointerHashToMKLDNNKey(exec.get(), dev_place);
-          platform::RegisterModelLayout(ctx->ops_, dev_place);
+          platform::AttachPointerHashToMKLDNNKey(exec_.get(), dev_place);
+          platform::RegisterModelLayout(ctx_->ops_, dev_place);
 #endif
         }
-        exec->RunPreparedContext(ctx.get(),
-                                 &cur_scope,
-                                 /* create_local_scope */ false,
-                                 /* create_vars */ true,
-                                 /* keep_kids */ true);
+        exec_->RunPreparedContext(ctx_.get(),
+                                  &cur_scope,
+                                  /* create_local_scope */ false,
+                                  /* create_vars */ true,
+                                  /* keep_kids */ true);
       }
     }
   }
 
  private:
-  mutable std::shared_ptr<Executor> exec{nullptr};
-  mutable std::unique_ptr<ExecutorPrepareContext> ctx{nullptr};
-  mutable std::shared_ptr<InterpreterCore> core{nullptr};
+  mutable std::shared_ptr<Executor> exec_{nullptr};
+  mutable std::unique_ptr<ExecutorPrepareContext> ctx_{nullptr};
+  mutable std::shared_ptr<InterpreterCore> core_{nullptr};
 };
 
 class ConditionalBlockInferShape : public framework::InferShapeBase {
@@ -251,43 +217,44 @@ class ConditionalBlockGradOp : public ConditionalOp {
               << ", scope = " << &cur_scope;
 
       if (FLAGS_control_flow_use_new_executor) {
-        std::set<std::string> skip_gc_vars(inside_grads.begin(),
-                                           inside_grads.end());
-
-        if (!core || !platform::is_same_place(core->GetPlace(), dev_place)) {
-          VLOG(10) << "[interpreterCore cache]" << core.get();
-          VLOG_IF(10, core)
-              << platform::is_same_place(core->GetPlace(), dev_place);
-          core.reset(new InterpreterCore(dev_place,
-                                         *block,
-                                         skip_gc_vars,
-                                         &cur_scope,
-                                         /* used_for_jit */ false,
-                                         /* used_for_control_flow_op */ true));
+        LOG_FIRST_N(INFO, 1)
+            << "[ControlFlow][ConditionalGradBlock] New Executor is Running.";
+        if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
+          VLOG(10) << "[interpreterCore cache]" << core_.get();
+          VLOG_IF(10, core_)
+              << platform::is_same_place(core_->GetPlace(), dev_place);
+          std::set<std::string> skip_gc_vars(inside_grads.begin(),
+                                             inside_grads.end());
+          core_.reset(new InterpreterCore(dev_place,
+                                          *block,
+                                          skip_gc_vars,
+                                          &cur_scope,
+                                          /* used_for_jit */ false,
+                                          /* used_for_control_flow_op */ true));
           VLOG(10) << "[interpreterCore cache]"
-                   << "new created:" << core;
+                   << "new created:" << core_;
         } else {
-          details::BuildScopeForConditionalBlockOp(*core, *block, &cur_scope);
-          core->reset_scope(&cur_scope);
+          BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
+          core_->reset_scope(&cur_scope);
         }
-        core->Run({}, false);
+        core_->Run({}, false);
 
       } else {
-        if (!exec || !platform::is_same_place(exec->GetPlace(), dev_place)) {
+        if (!exec_ || !platform::is_same_place(exec_->GetPlace(), dev_place)) {
           auto &pdesc = *block->Program();
-          exec.reset(new Executor(dev_place));
-          if (FLAGS_use_mkldnn) exec->EnableMKLDNN(pdesc);
-          ctx = exec->Prepare(pdesc, block->ID(), inside_grads, false);
+          exec_.reset(new Executor(dev_place));
+          if (FLAGS_use_mkldnn) exec_->EnableMKLDNN(pdesc);
+          ctx_ = exec_->Prepare(pdesc, block->ID(), inside_grads, false);
 #ifdef PADDLE_WITH_MKLDNN
-          platform::AttachPointerHashToMKLDNNKey(exec.get(), dev_place);
-          platform::RegisterModelLayout(ctx->ops_, dev_place);
+          platform::AttachPointerHashToMKLDNNKey(exec_.get(), dev_place);
+          platform::RegisterModelLayout(ctx_->ops_, dev_place);
 #endif
         }
-        exec->RunPreparedContext(ctx.get(),
-                                 &cur_scope,
-                                 /* create_local_scope */ false,
-                                 /* create_vars */ true,
-                                 /* keep_kids */ true);
+        exec_->RunPreparedContext(ctx_.get(),
+                                  &cur_scope,
+                                  /* create_local_scope */ false,
+                                  /* create_vars */ true,
+                                  /* keep_kids */ true);
       }
 
       AssignLocalGradientToParentScope(
@@ -299,9 +266,9 @@ class ConditionalBlockGradOp : public ConditionalOp {
   }
 
  private:
-  mutable std::shared_ptr<Executor> exec{nullptr};
-  mutable std::unique_ptr<ExecutorPrepareContext> ctx{nullptr};
-  mutable std::shared_ptr<InterpreterCore> core{nullptr};
+  mutable std::shared_ptr<Executor> exec_{nullptr};
+  mutable std::unique_ptr<ExecutorPrepareContext> ctx_{nullptr};
+  mutable std::shared_ptr<InterpreterCore> core_{nullptr};
 
  private:
   void AssignLocalGradientToParentScope(
diff --git a/paddle/fluid/operators/controlflow/control_flow_op_helper.h b/paddle/fluid/operators/controlflow/control_flow_op_helper.h
new file mode 100644
index 00000000000000..82b57831f93561
--- /dev/null
+++ b/paddle/fluid/operators/controlflow/control_flow_op_helper.h
@@ -0,0 +1,58 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/framework/new_executor/standalone_executor.h"
+
+namespace paddle {
+namespace operators {
+
+static void BuildScopeForControlFlowOp(
+    const framework::InterpreterCore &interpreter_core,
+    const framework::BlockDesc &block,
+    framework::Scope *scope) {
+  for (auto &var_desc : block.AllVars()) {
+    auto var_name = var_desc->Name();
+    if (var_name == framework::kEmptyVarName) {
+      continue;
+    }
+    VLOG(5) << "[BuildScopeForControlFlowOp]"
+            << "start:" << var_name;
+    if (var_desc->Persistable()) {
+      VLOG(5) << "[BuildScopeForControlFlowOp]"
+              << "Don't process persistent: " << var_name;
+    } else {
+      auto *ptr = scope->Var(var_name);
+      InitializeVariable(ptr, var_desc->GetType());
+      VLOG(5) << "[BuildScopeForControlFlowOp]"
+              << "Not Found locally and created: " << var_name;
+    }
+  }
+
+  auto &data_transfer_added_vars =
+      interpreter_core.GetVariableScope()->DataTransferAddedVars();
+  for (size_t i = 0; i < data_transfer_added_vars.size(); i++) {
+    auto *ptr = scope->Var(data_transfer_added_vars[i].first);
+    InitializeVariable(ptr,
+                       static_cast<paddle::framework::proto::VarType::Type>(
+                           data_transfer_added_vars[i].second));
+    VLOG(5) << "[BuildScopeForControlFlowOp]"
+            << "Initialize Transfer Added Variable "
+            << data_transfer_added_vars[i].first;
+  }
+}
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc
index cc6bb72324e57c..5fe51425dc44e7 100644
--- a/paddle/fluid/operators/controlflow/while_op.cc
+++ b/paddle/fluid/operators/controlflow/while_op.cc
@@ -13,8 +13,10 @@
 // limitations under the License.
 
 #include "paddle/fluid/framework/executor.h"
+#include "paddle/fluid/framework/new_executor/standalone_executor.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/operators/controlflow/control_flow_op_helper.h"
 #include "paddle/fluid/operators/controlflow/while_op_helper.h"
 
 #ifdef PADDLE_WITH_MKLDNN
@@ -44,6 +46,41 @@ static std::string GetSkipEagerDeletionVarsDebugString(
   }
   return str;
 }
+
+static void TransferVariablePlace(const framework::Scope *scope,
+                                  const std::string &var_name,
+                                  const phi::Place &dst_place,
+                                  const platform::DeviceContext &dev_ctx) {
+  framework::Variable *var = scope->FindVar(var_name);
+  if (var == nullptr) {
+    VLOG(4) << "[TransferVariablePlace]"
+            << "lost in_var: " << var_name;
+    return;
+  }
+  if (var->Type() != framework::proto::VarType::LOD_TENSOR) {
+    VLOG(10) << "[TransferVariablePlace]" << var_name << " type changed:"
+             << framework::TransToPhiDataType(
+                    framework::ToVarType(var->Type()));
+    return;
+  }
+  phi::DenseTensor *t = var->GetMutable<phi::DenseTensor>();
+  if (t->place() == dst_place) {
+    VLOG(10) << "[TransferVariablePlace]"
+             << "no need transfer: " << var_name;
+    return;
+  }
+
+  phi::DenseTensor *new_t = new phi::DenseTensor;
+  framework::TensorCopy(*t, dst_place, new_t);
+  dev_ctx.Wait();
+
+  t->set_meta(new_t->meta());
+  t->ResetHolder(new_t->Holder());
+
+  VLOG(4) << "[TransferVariablePlace]" << var_name
+          << " place: " << new_t->place();
+}
+
 }  // namespace
 
 class WhileOp : public framework::OperatorBase {
@@ -77,9 +114,12 @@ class WhileOp : public framework::OperatorBase {
     // Executors (executors declared inside control ops)
     platform::DontClearMKLDNNCache(dev_place);
 #endif
-    framework::Executor executor(dev_place);
     auto *block = Attr<framework::BlockDesc *>(kStepBlock);
 
+    // get device context from pool
+    platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
+    auto &dev_ctx = *pool.Get(dev_place);
+
     auto *program = block->Program();
     bool is_test = Attr<bool>("is_test");
 
@@ -134,7 +174,53 @@ class WhileOp : public framework::OperatorBase {
     auto &skip_vars = Attr<std::vector<std::string>>(kSkipEagerDeletionVars);
     VLOG(2) << GetSkipEagerDeletionVarsDebugString(skip_vars);
 
-    auto ctx = executor.Prepare(*program, block->ID(), skip_vars);
+    // note(lvyongkang): The assign op in while loop may change the place of
+    // variable. However, InterpreterCore fix the kernel of every ops during its
+    // first run. A cpu tensor may become gpu tensor after first run. This will
+    // lead to segmetation fault when it's used in a cpu kernel. Here we record
+    // the place of every inputs and restore their place after
+    // InterpreterCore.run().
+    std::map<std::string, phi::Place> input_var_original_places;
+    for (const auto &in_name : Inputs(kX)) {
+      framework::Variable *var = scope.FindVar(in_name);
+      if (var == nullptr) {
+        VLOG(4) << "[while op]"
+                << "input not found:" << in_name;
+      }
+
+      if (var->Type() == framework::proto::VarType::LOD_TENSOR) {
+        input_var_original_places[in_name] =
+            (var->Get<phi::DenseTensor>()).place();
+      } else {
+        VLOG(10) << "[while op]"
+                 << "skip backup input " << in_name << " type:"
+                 << framework::TransToPhiDataType(
+                        framework::ToVarType(var->Type()));
+      }
+    }
+
+    if (FLAGS_control_flow_use_new_executor) {
+      LOG_FIRST_N(INFO, 1) << "[ControlFlow][WhileOp] New Executor is Running.";
+      if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
+        std::set<std::string> skip_gc_vars(skip_vars.begin(), skip_vars.end());
+        framework::Scope placeholder;  // Don't care if it's valid, just for
+                                       // initialize InterpreterCore
+        core_.reset(new framework::InterpreterCore(
+            dev_place,
+            *block,
+            skip_gc_vars,
+            &placeholder,
+            /* used_for_jit */ false,
+            /* used_for_control_flow_op */ true));
+      }
+    } else {
+      if (!executor_ ||
+          !platform::is_same_place(executor_->GetPlace(), dev_place)) {
+        executor_.reset(new framework::Executor(dev_place));
+        ctx_ = executor_->Prepare(*program, block->ID(), skip_vars);
+      }
+    }
+
     if (!is_test) {
       while (cond_data) {
         auto &current_scope = scope.NewScope();
@@ -158,8 +244,23 @@ class WhileOp : public framework::OperatorBase {
             }
           }
         }
-        executor.RunPreparedContext(
-            ctx.get(), &current_scope, false, true, true);
+        if (FLAGS_control_flow_use_new_executor) {
+          BuildScopeForControlFlowOp(*core_, *block, &current_scope);
+          core_->reset_scope(&current_scope);
+          core_->Run({}, false);
+
+          // restore inputs place
+          for (const auto &n : input_var_original_places) {
+            const std::string &in_name = n.first;
+            const phi::Place &original_place = n.second;
+            // input vars exist in `scope` not `current_scope`
+            TransferVariablePlace(&scope, in_name, original_place, dev_ctx);
+          }
+
+        } else {
+          executor_->RunPreparedContext(
+              ctx_.get(), &current_scope, false, true, true);
+        }
 
         for (auto &var_rename : rename_vars) {
           std::string input_var_name =
@@ -171,7 +272,14 @@ class WhileOp : public framework::OperatorBase {
       }
     } else {
       auto &current_scope = scope.NewScope();
-      executor.CreateVariables(*program, &current_scope, block->ID());
+
+      if (FLAGS_control_flow_use_new_executor) {
+        BuildScopeForControlFlowOp(*core_, *block, &current_scope);
+        core_->reset_scope(&current_scope);
+      } else {
+        executor_->CreateVariables(*program, &current_scope, block->ID());
+      }
+
       while (cond_data) {
         for (auto &name : current_scope.LocalVarNames()) {
           auto *var = current_scope.Var(name);
@@ -186,14 +294,25 @@ class WhileOp : public framework::OperatorBase {
             t->clear();
           }
         }
-        executor.RunPreparedContext(
-            ctx.get(), &current_scope, false, false, false);
+
+        if (FLAGS_control_flow_use_new_executor) {
+          core_->Run({}, false);
+        } else {
+          executor_->RunPreparedContext(
+              ctx_.get(), &current_scope, false, false, false);
+        }
+
         cond_data = GetCondData(
             scope.FindVar(Input(kCondition))->Get<phi::DenseTensor>());
       }
       scope.DeleteScope(&current_scope);
     }
   }
+
+ private:
+  mutable std::shared_ptr<framework::Executor> executor_{nullptr};
+  mutable std::unique_ptr<framework::ExecutorPrepareContext> ctx_{nullptr};
+  mutable std::shared_ptr<framework::InterpreterCore> core_{nullptr};
 };
 
 class WhileOpMaker : public framework::OpProtoAndCheckerMaker {
@@ -245,13 +364,12 @@ class WhileGradOp : public framework::OperatorBase {
     // get device context from pool
     platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
     auto &dev_ctx = *pool.Get(dev_place);
-    framework::Executor executor(dev_place);
+
     auto *block = Attr<framework::BlockDesc *>(kStepBlock);
     auto *program = block->Program();
 
     auto &skip_vars = Attr<std::vector<std::string>>(kSkipEagerDeletionVars);
     VLOG(2) << GetSkipEagerDeletionVarsDebugString(skip_vars);
-    auto ctx = executor.Prepare(*program, block->ID(), skip_vars);
 
     auto *step_scopes =
         scope.FindVar(Input(kStepScopes))->GetMutable<StepScopeVar>();
@@ -271,6 +389,29 @@ class WhileGradOp : public framework::OperatorBase {
                           outside_og_names.size(),
                           inside_og_names.size()));
 
+    if (FLAGS_control_flow_use_new_executor) {
+      LOG_FIRST_N(INFO, 1)
+          << "[ControlFlow][WhileGradOp] New Executor is Running.";
+      if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
+        std::set<std::string> skip_gc_vars(skip_vars.begin(), skip_vars.end());
+        framework::Scope placeholder;  // Don't care if it's valid, just for
+                                       // initialize InterpreterCore
+        core_.reset(new framework::InterpreterCore(
+            dev_place,
+            *block,
+            skip_gc_vars,
+            &placeholder,
+            /* used_for_jit */ false,
+            /* used_for_control_flow_op */ true));
+      }
+    } else {
+      if (!executor_ ||
+          !platform::is_same_place(executor_->GetPlace(), dev_place)) {
+        executor_.reset(new framework::Executor(dev_place));
+        ctx_ = executor_->Prepare(*program, block->ID(), skip_vars);
+      }
+    }
+
     for (auto cur_scope_iter = step_scopes->rbegin();
          cur_scope_iter != step_scopes->rend();
          ++cur_scope_iter) {
@@ -330,8 +471,15 @@ class WhileGradOp : public framework::OperatorBase {
               "WhileGradOp."));
         }
       }
-      executor.RunPreparedContext(
-          ctx.get(), *cur_scope_iter, false, true, true);
+
+      if (FLAGS_control_flow_use_new_executor) {
+        BuildScopeForControlFlowOp(*core_, *block, *cur_scope_iter);
+        core_->reset_scope(*cur_scope_iter);
+        core_->Run({}, false);
+      } else {
+        executor_->RunPreparedContext(
+            ctx_.get(), *cur_scope_iter, false, true, true);
+      }
 
       // The Outputs(kXGRAD) contains the names of the gradient of parameters
       // and inputs.
@@ -446,6 +594,11 @@ class WhileGradOp : public framework::OperatorBase {
     }
     step_scopes->clear();
   }
+
+ private:
+  mutable std::shared_ptr<framework::Executor> executor_{nullptr};
+  mutable std::unique_ptr<framework::ExecutorPrepareContext> ctx_{nullptr};
+  mutable std::shared_ptr<framework::InterpreterCore> core_{nullptr};
 };
 
 template <typename T>

From f5c520bb8d65e1f0df1115dc657e257e3c58fde6 Mon Sep 17 00:00:00 2001
From: Infinity_lee <luhputu0815@gmail.com>
Date: Tue, 29 Nov 2022 16:21:13 +0800
Subject: [PATCH 026/154] fix some typo errors (#48185)

* fix some typo errors

function rotate: format errors.
function to_grayscale: missing one sentences to explain num_output_chanels

* Update functional.py

* test=docs_preview

* Update functional.py

* Update functional.py

* Update functional.py

* Update functional.py

* Update functional.py

Co-authored-by: Ligoml <39876205+Ligoml@users.noreply.github.com>
---
 python/paddle/vision/transforms/functional.py | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/python/paddle/vision/transforms/functional.py b/python/paddle/vision/transforms/functional.py
index 45e50712c9e75d..f813142144c8ce 100644
--- a/python/paddle/vision/transforms/functional.py
+++ b/python/paddle/vision/transforms/functional.py
@@ -269,18 +269,18 @@ def center_crop(img, output_size):
         PIL.Image or np.array: Cropped image.
 
     Examples:
-    .. code-block:: python
+        .. code-block:: python
 
-        import numpy as np
-        from PIL import Image
-        from paddle.vision.transforms import functional as F
+            import numpy as np
+            from PIL import Image
+            from paddle.vision.transforms import functional as F
 
-        fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
+            fake_img = (np.random.rand(256, 300, 3) * 255.).astype('uint8')
 
-        fake_img = Image.fromarray(fake_img)
+            fake_img = Image.fromarray(fake_img)
 
-        cropped_img = F.center_crop(fake_img, (150, 100))
-        print(cropped_img.size)
+            cropped_img = F.center_crop(fake_img, (150, 100))
+            print(cropped_img.size)
     """
     if not (
         _is_pil_image(img) or _is_numpy_image(img) or _is_tensor_image(img)
@@ -764,7 +764,7 @@ def rotate(
             Origin is the upper left corner.
             Default is the center of the image.
         fill (3-list|3-tuple or int): RGB pixel fill value for area outside the rotated image.
-            If int, it is used for all channels respectively.
+            If int, it is used for all channels respectively. Default value is 0.
 
 
     Returns:
@@ -919,7 +919,8 @@ def to_grayscale(img, num_output_channels=1):
 
     Args:
         img (PIL.Image|np.array): Image to be converted to grayscale.
-
+        num_output_channels (int, optional): The number of channels for the output
+            image. Single channel. Default: 1.
     Returns:
         PIL.Image or np.array: Grayscale version of the image.
             if num_output_channels = 1 : returned image is single channel

From c928a35e664ce1cec96af80daa7371783e347505 Mon Sep 17 00:00:00 2001
From: wanghuancoder <wanghuan29@baidu.com>
Date: Tue, 29 Nov 2022 16:24:41 +0800
Subject: [PATCH 027/154] set_state_dict return missing_keys and
 unexpected_keys (#48436)

* refine set_state_dict
---
 python/paddle/fluid/dygraph/layers.py         | 17 ++++++++++++--
 .../unittests/test_state_dict_convert.py      | 22 +++++++++++++++++++
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py
index 1593cc78e6a2c5..02b0e2bcfe1be3 100644
--- a/python/paddle/fluid/dygraph/layers.py
+++ b/python/paddle/fluid/dygraph/layers.py
@@ -1600,7 +1600,8 @@ def set_state_dict(self, state_dict, use_structured_name=True):
             use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
                                                   Default: True
         Returns:
-            None
+            missing_keys(list):A list of str containing the missing keys
+            unexpected_keys(list):A list of str containing the unexpected keys
 
         Examples:
             .. code-block:: python
@@ -1615,15 +1616,20 @@ def set_state_dict(self, state_dict, use_structured_name=True):
                 emb.set_state_dict(para_state_dict)
 
         '''
+        missing_keys = []
+        match_keys = set()
+        unexpected_keys = []
 
         def _check_match(key, param):
             state = state_dict.get(key, None)
             if state is None:
+                missing_keys.append(key)
                 raise ValueError(
                     "{} is not found in the provided dict.".format(key)
                 )
             if isinstance(state, dict) or isinstance(state, list):
                 if len(state) != len(param):
+                    missing_keys.append(key)
                     raise ValueError(
                         "{} receieves the length of {}, "
                         "but the expected shape is {}".format(
@@ -1631,6 +1637,7 @@ def _check_match(key, param):
                         )
                     )
                 else:
+                    match_keys.add(key)
                     return param, state
             else:
                 state_shape = (
@@ -1640,11 +1647,13 @@ def _check_match(key, param):
                 )
 
                 if list(state_shape) != list(param.shape):
+                    missing_keys.append(key)
                     raise ValueError(
                         "{} receives a shape {}, but the expected shape is {}.".format(
                             key, list(state_shape), list(param.shape)
                         )
                     )
+                match_keys.add(key)
                 return param, state
 
         matched_param_state = []
@@ -1655,7 +1664,9 @@ def _check_match(key, param):
                 matched_param_state.append(match_res)
             except ValueError as err:
                 warnings.warn(("Skip loading for {}. ".format(key) + str(err)))
-
+        for key in state_dict.keys():
+            if key not in match_keys:
+                unexpected_keys.append(key)
         if _non_static_mode():
             for param, state in matched_param_state:
                 param.set_value(state)
@@ -1693,6 +1704,8 @@ def _set_var(var, ndarray):
                     "This error might happens in dy2static, while calling 'set_state_dict' dynamicly in 'forward', which is not supported. If you only need call 'set_state_dict' once, move it to '__init__'."
                 )
 
+        return missing_keys, unexpected_keys
+
     def to(self, device=None, dtype=None, blocking=None):
         '''
         Cast the parameters and buffers of Layer by the give device, dtype and blocking.
diff --git a/python/paddle/fluid/tests/unittests/test_state_dict_convert.py b/python/paddle/fluid/tests/unittests/test_state_dict_convert.py
index f62f983e90320e..77a181613374f2 100644
--- a/python/paddle/fluid/tests/unittests/test_state_dict_convert.py
+++ b/python/paddle/fluid/tests/unittests/test_state_dict_convert.py
@@ -53,6 +53,15 @@ def set_state_dict(self, state_dict, use_structured_name=True):
         return super().set_state_dict(state_dict)
 
 
+class MyModel2(nn.Layer):
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(100, 300)
+
+    def forward(self, x):
+        return self.linear(x)
+
+
 def is_state_dict_equal(model1, model2):
     st1 = model1.state_dict()
     st2 = model2.state_dict()
@@ -73,5 +82,18 @@ def test_main(self):
         self.assertTrue(is_state_dict_equal(model1, model2))
 
 
+class TestStateDictReturn(unittest.TestCase):
+    def test_missing_keys_and_unexpected_keys(self):
+        model1 = MyModel2()
+        tmp_dict = dict()
+        tmp_dict["unexpected_keys"] = paddle.to_tensor(1)
+        missing_keys, unexpected_keys = model1.set_state_dict(tmp_dict)
+        self.assertEqual(len(missing_keys), 2)
+        self.assertEqual(missing_keys[0], "linear.weight")
+        self.assertEqual(missing_keys[1], "linear.bias")
+        self.assertEqual(len(unexpected_keys), 1)
+        self.assertEqual(unexpected_keys[0], "unexpected_keys")
+
+
 if __name__ == "__main__":
     unittest.main()

From f41ccbd549549e2f9accc467ade1b01a86eb6d2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C5=82awomir=20Siwek?= <slawomir.siwek@intel.com>
Date: Tue, 29 Nov 2022 09:42:30 +0100
Subject: [PATCH 028/154] [PHI] Migrate matmul kernel (#48162)

* cleanup unused code

* unify is_int8 is_bfloat16

* Simplify matmul_v2 FWD kernel

* remove RunKernel methods

* remove import namespace

* remove headers

* clean fluid/phi cross imports

* remove fluid axpy_handler

* delete fluid methods

* activations

* OneDNNMemDesc

* MKLDNNFormatForSize

* MatchShapeToLayout

* MKLDNNMemoryFormat

* MKLDNNFormat

* ReorderMKLDNNHandler

* to_void_cast

* review suggestions

* interpolate

* remove fluid depedency

* init

* ExecuteMatMulV2

* rm fluid kernel

* matmul_grad

* remove mutable_data

* mul_grad

* matmul fwd

* add extra attr

* temp disable passes

* re-enable passes

* workaround for matmul+act

* fix for matmul+eltwise_add

* fix typo

* merge bugfix #48364

* remove merge conflict
---
 .../operators/mkldnn/matmul_v2_mkldnn_op.cc   |  18 +-
 paddle/fluid/operators/ops_extra_info.h       |   4 +-
 paddle/phi/backends/onednn/onednn_reuse.h     |  21 ++-
 paddle/phi/kernels/onednn/matmul_kernel.cc    | 164 ++++++++++++++++++
 4 files changed, 186 insertions(+), 21 deletions(-)
 create mode 100644 paddle/phi/kernels/onednn/matmul_kernel.cc

diff --git a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc
index b4d782da78f024..be965c4abb8956 100644
--- a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc
@@ -381,7 +381,7 @@ void ExecuteMatMulV2(const ExecutionContext &ctx,
 }
 
 template <typename T>
-class MatMulV2MKLDNNKernel : public paddle::framework::OpKernel<T> {
+class MatMulMKLDNNKernel : public paddle::framework::OpKernel<T> {
  public:
   void Compute(const ExecutionContext &ctx) const override {
     if (ctx.HasAttr("head_number")) {
@@ -696,21 +696,13 @@ class MatMulGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
 REGISTER_OP_KERNEL(matmul,
                    MKLDNN,
                    ::paddle::platform::CPUPlace,
-                   MatMulV2MKLDNNKernel<float>,
-                   MatMulV2MKLDNNKernel<paddle::platform::bfloat16>,
-                   MatMulV2MKLDNNKernel<int8_t>,
-                   MatMulV2MKLDNNKernel<uint8_t>);
+                   MatMulMKLDNNKernel<float>,
+                   MatMulMKLDNNKernel<paddle::platform::bfloat16>,
+                   MatMulMKLDNNKernel<int8_t>,
+                   MatMulMKLDNNKernel<uint8_t>);
 
 REGISTER_OP_KERNEL(matmul_grad,
                    MKLDNN,
                    ::paddle::platform::CPUPlace,
                    MatMulGradMKLDNNKernel<float>,
                    MatMulGradMKLDNNKernel<paddle::platform::bfloat16>);
-
-REGISTER_OP_KERNEL(matmul_v2,
-                   MKLDNN,
-                   ::paddle::platform::CPUPlace,
-                   MatMulV2MKLDNNKernel<float>,
-                   MatMulV2MKLDNNKernel<paddle::platform::bfloat16>,
-                   MatMulV2MKLDNNKernel<int8_t>,
-                   MatMulV2MKLDNNKernel<uint8_t>);
diff --git a/paddle/fluid/operators/ops_extra_info.h b/paddle/fluid/operators/ops_extra_info.h
index b16e4ed58f3fe2..77c0aa7a33fb3c 100644
--- a/paddle/fluid/operators/ops_extra_info.h
+++ b/paddle/fluid/operators/ops_extra_info.h
@@ -98,6 +98,7 @@ const std::unordered_map<std::string, ExtraAttrPropertySet>
         {"fuse_alpha", ExtraAttrProperty::ONEDNN},
         {"fuse_beta", ExtraAttrProperty::ONEDNN},
         {"fuse_relu", ExtraAttrProperty::ONEDNN},
+        {"fused_output_scale", ExtraAttrProperty::ONEDNN},
         {"fuse_residual_connection", ExtraAttrProperty::ONEDNN},
         {"fuse_with_relu", ExtraAttrProperty::ONEDNN},
         {"fused_reshape_Out", ExtraAttrProperty::ONEDNN},
@@ -221,7 +222,8 @@ class ExtraInfoUtils {
   std::unordered_map<std::string, std::vector<std::string>>
       g_extra_input_names_map_ = {{"conv2d", {"Bias", "ResidualData"}},
                                   {"conv2d_transpose", {"Bias"}},
-                                  {"conv2d_grad", {"Bias"}}};
+                                  {"conv2d_grad", {"Bias"}},
+                                  {"matmul_v2", {"ResidualData"}}};
   std::vector<std::string> empty_extra_input_names_;
 };
 
diff --git a/paddle/phi/backends/onednn/onednn_reuse.h b/paddle/phi/backends/onednn/onednn_reuse.h
index f4577dab5aa476..7f64f8668c91bd 100644
--- a/paddle/phi/backends/onednn/onednn_reuse.h
+++ b/paddle/phi/backends/onednn/onednn_reuse.h
@@ -1874,9 +1874,11 @@ class MatmulOneDNNHandler : public OneDNNHandlerNoCachingT<XT, dnnl::matmul> {
     if (scale_out != 1.0f) {
       matmul_attrs.set_output_scales(0, {scale_out});
     }
+    const auto* residual_data = dev_ctx.HasDnnInput("ResidualData")
+                                    ? dev_ctx.GetDnnInput("ResidualData")
+                                    : nullptr;
 
-    if (dev_ctx.HasDnnInput("ResidualData")) {
-      auto* residual_data = dev_ctx.GetDnnInput("ResidualData");
+    if (residual_data) {
       auto residual_data_tz = vectorize(residual_data->dims());
       auto residual_data_md = memory::desc(residual_data_tz,
                                            OneDNNGetDataType<OT>(),
@@ -1893,9 +1895,11 @@ class MatmulOneDNNHandler : public OneDNNHandlerNoCachingT<XT, dnnl::matmul> {
 
     AppendActivation(dev_ctx, post_operations);
 
-    if (dev_ctx.HasDnnAttr("fused_output_scale")) {
-      float scale_alpha =
-          PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("fused_output_scale"));
+    const float scale_alpha =
+        dev_ctx.HasDnnAttr("fused_output_scale")
+            ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("fused_output_scale"))
+            : 1.0f;
+    if (scale_alpha != 1.0f) {
       post_operations.append_eltwise(
           1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f);
     }
@@ -2014,8 +2018,11 @@ void ExecuteMatmul(const OneDNNContext& dev_ctx,
       {DNNL_ARG_WEIGHTS, *weights_memory_p},
       {DNNL_ARG_DST, *dst_memory_p}};
 
-  if (dev_ctx.HasDnnInput("ResidualData")) {
-    auto* residual_data = dev_ctx.GetDnnInput("ResidualData");
+  const auto* residual_data = dev_ctx.HasDnnInput("ResidualData")
+                                  ? dev_ctx.GetDnnInput("ResidualData")
+                                  : nullptr;
+
+  if (residual_data) {
     const auto residual_data_memory_p = handler.AcquireSrcMemory(residual_data);
     matmul_args.insert({DNNL_ARG_ATTR_MULTIPLE_POST_OP(0) | DNNL_ARG_SRC_1,
                         *residual_data_memory_p});
diff --git a/paddle/phi/kernels/onednn/matmul_kernel.cc b/paddle/phi/kernels/onednn/matmul_kernel.cc
new file mode 100644
index 00000000000000..30a1735c5184aa
--- /dev/null
+++ b/paddle/phi/kernels/onednn/matmul_kernel.cc
@@ -0,0 +1,164 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/matmul_kernel.h"
+
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+DDim GetDimsForInput(const OneDNNContext &dev_ctx,
+                     DDim input_dims,
+                     std::string input_name) {
+  auto shape =
+      dev_ctx.HasDnnAttr("fused_reshape_" + input_name)
+          ? PADDLE_GET_CONST(std::vector<int>,
+                             dev_ctx.GetDnnAttr("fused_reshape_" + input_name))
+          : std::vector<int>();
+  auto axis = dev_ctx.HasDnnAttr("fused_transpose_" + input_name)
+                  ? PADDLE_GET_CONST(
+                        std::vector<int>,
+                        dev_ctx.GetDnnAttr("fused_transpose_" + input_name))
+                  : std::vector<int>();
+  if (!shape.empty() && !axis.empty()) {
+    return input_dims.reshape(shape).transpose(axis);
+  }
+  return input_dims;
+}
+
+void CalculateMatrixDims(const std::vector<int64_t> &x_dims,
+                         const std::vector<int64_t> &y_dims,
+                         std::vector<int64_t> *x_bd_dims,
+                         std::vector<int64_t> *y_bd_dims,
+                         DenseTensor *out,
+                         const bool is_output_fused) {
+  if (x_dims.size() == 1) {
+    (*x_bd_dims)[(*x_bd_dims).size() - 1] = x_dims[0];
+  } else if (x_dims.size() == 2) {
+    (*x_bd_dims)[(*x_bd_dims).size() - 1] = x_dims[1];
+    (*x_bd_dims)[(*x_bd_dims).size() - 2] = x_dims[0];
+  } else {
+    for (size_t i = 0; i < x_dims.size(); ++i) {
+      (*x_bd_dims)[(*x_bd_dims).size() - x_dims.size() + i] = x_dims[i];
+    }
+  }
+  if (y_dims.size() == 1) {
+    (*y_bd_dims)[(*x_bd_dims).size() - 2] = y_dims[0];
+  } else if (y_dims.size() == 2) {
+    (*y_bd_dims)[(*y_bd_dims).size() - 1] = y_dims[1];
+    (*y_bd_dims)[(*y_bd_dims).size() - 2] = y_dims[0];
+  } else {
+    for (size_t i = 0; i < y_dims.size(); ++i) {
+      (*y_bd_dims)[(*y_bd_dims).size() - y_dims.size() + i] = y_dims[i];
+    }
+  }
+
+  if (!is_output_fused && x_dims.size() > 2 && y_dims.size() > 2) {
+    auto out_dims = vectorize(out->dims());
+    for (size_t i = 0; i < (*x_bd_dims).size() - 2; ++i) {
+      PADDLE_ENFORCE_EQ(
+          (*x_bd_dims)[i] == (*y_bd_dims)[i] || (*x_bd_dims)[i] == 1 ||
+              (*y_bd_dims)[i] == 1,
+          true,
+          errors::InvalidArgument(
+              "Tensor dimensions are incorrect for broadcasting."
+              "Dimensions in X and Y must be same or equal to 1, but "
+              "received x_dim[%d]=%d and y_dims[%d]= %d",
+              i,
+              (*x_bd_dims)[i],
+              i,
+              (*y_bd_dims)[i]));
+      (out_dims)[i] = std::max((*x_bd_dims)[i], (*y_bd_dims)[i]);
+    }
+    out->Resize(make_ddim((out_dims)));
+  }
+}
+
+template <typename T, typename Context>
+void MatmulKernel(const Context &dev_ctx,
+                  const DenseTensor &x,
+                  const DenseTensor &y,
+                  bool transpose_x,
+                  bool transpose_y,
+                  DenseTensor *out) {
+  if (dev_ctx.HasDnnAttr("head_number")) {
+    const auto head_number =
+        PADDLE_GET_CONST(int, dev_ctx.GetDnnAttr("head_number"));
+    PADDLE_ENFORCE_EQ(
+        head_number,
+        1,
+        errors::Unimplemented(
+            "oneDNN matmul doesn't support multiple heads. Expected "
+            "head_number=1. But received `head_number` is %d",
+            head_number));
+  }
+
+  constexpr bool is_int8 = funcs::is_int8<T>();
+  constexpr bool is_bfloat16 = funcs::is_bfloat16<T>();
+  const bool force_fp32_output =
+      dev_ctx.HasDnnAttr("force_fp32_output")
+          ? PADDLE_GET_CONST(bool, dev_ctx.GetDnnAttr("force_fp32_output"))
+          : false;
+
+  bool fuse_relu = false;
+  if (dev_ctx.HasDnnAttr("fuse_activation")) {
+    auto act_type =
+        PADDLE_GET_CONST(std::string, dev_ctx.GetDnnAttr("fuse_activation"));
+    if (act_type == "relu" || act_type == "relu6") {
+      fuse_relu = true;
+    }
+  }
+
+  auto x_dims = vectorize(GetDimsForInput(dev_ctx, x.dims(), "X"));
+  auto y_dims = vectorize(GetDimsForInput(dev_ctx, y.dims(), "Y"));
+
+  int ndims = std::max(x_dims.size(), y_dims.size());
+  ndims = std::max(ndims, 3);
+
+  std::vector<int64_t> x_bd_dims(ndims, 1);
+  std::vector<int64_t> y_bd_dims(ndims, 1);
+
+  CalculateMatrixDims(x_dims,
+                      y_dims,
+                      &x_bd_dims,
+                      &y_bd_dims,
+                      out,
+                      funcs::IsOutputFused(dev_ctx));
+
+  if (force_fp32_output || ((!is_int8) && (!is_bfloat16))) {
+    funcs::ExecuteMatmul<T, float>(
+        dev_ctx, x, y, x_bd_dims, y_bd_dims, transpose_x, transpose_y, out);
+  } else if (is_bfloat16) {
+    funcs::ExecuteMatmul<T, paddle::platform::bfloat16>(
+        dev_ctx, x, y, x_bd_dims, y_bd_dims, transpose_x, transpose_y, out);
+  } else if (fuse_relu) {
+    funcs::ExecuteMatmul<T, uint8_t>(
+        dev_ctx, x, y, x_bd_dims, y_bd_dims, transpose_x, transpose_y, out);
+  } else {
+    funcs::ExecuteMatmul<T, int8_t>(
+        dev_ctx, x, y, x_bd_dims, y_bd_dims, transpose_x, transpose_y, out);
+  }
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(matmul,
+                   OneDNN,
+                   ONEDNN,
+                   phi::MatmulKernel,
+                   float,
+                   phi::dtype::bfloat16,
+                   int8_t,
+                   uint8_t) {}

From d926b30b45809a67c5c3ff815087167d60f4ca16 Mon Sep 17 00:00:00 2001
From: Aurelius84 <zhangliujie@baidu.com>
Date: Tue, 29 Nov 2022 16:45:17 +0800
Subject: [PATCH 029/154] [Fluid Clean]Migrate if/while/return/break
 transformer into paddle.jit (#48449)

* [Fluid Clean]Migrate if/while/return/break transformer into paddle.jit

* migrate call_transformer

* migrate call_transformer
---
 python/paddle/fluid/compiler.py               |   2 +-
 .../dygraph/dygraph_to_static/__init__.py     |   9 -
 .../dygraph_to_static/call_transformer.py     |   2 +-
 .../dygraph_to_static/convert_call_func.py    | 342 -------
 .../dygraph_to_static/convert_operators.py    | 837 -----------------
 .../fluid/dygraph/dygraph_to_static/utils.py  |   6 +-
 python/paddle/fluid/dygraph/io.py             |   2 +-
 python/paddle/fluid/layers/control_flow.py    |   2 +-
 python/paddle/fluid/layers/math_op_patch.py   |   2 +-
 .../dygraph_to_static/test_convert_call.py    |   2 +-
 .../unittests/dygraph_to_static/test_len.py   |   6 +-
 .../unittests/dygraph_to_static/test_loop.py  |   2 +-
 .../test_save_inference_model.py              |   2 +-
 python/paddle/jit/api.py                      |   2 +-
 .../jit/dy2static/assert_transformer.py       |   2 +
 .../paddle/jit/dy2static/ast_transformer.py   |  12 +-
 .../dy2static}/break_continue_transformer.py  |   0
 .../paddle/jit/dy2static/convert_call_func.py | 330 ++++++-
 .../paddle/jit/dy2static/convert_operators.py | 841 +++++++++++++++++-
 .../dy2static}/early_return_transformer.py    |   2 +
 .../dy2static}/ifelse_transformer.py          |  10 +-
 .../dy2static}/loop_transformer.py            |  11 +-
 .../dy2static}/partial_program.py             |   2 +-
 .../jit/dy2static/program_translator.py       |   2 +-
 .../dy2static}/return_transformer.py          |   5 +-
 25 files changed, 1179 insertions(+), 1256 deletions(-)
 delete mode 100644 python/paddle/fluid/dygraph/dygraph_to_static/convert_call_func.py
 delete mode 100644 python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/break_continue_transformer.py (100%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/early_return_transformer.py (98%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/ifelse_transformer.py (98%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/loop_transformer.py (98%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/partial_program.py (99%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/return_transformer.py (98%)

diff --git a/python/paddle/fluid/compiler.py b/python/paddle/fluid/compiler.py
index 4dc495edc1a4fc..81af46c468adbd 100644
--- a/python/paddle/fluid/compiler.py
+++ b/python/paddle/fluid/compiler.py
@@ -697,7 +697,7 @@ def patch_program_cache(ipu_strategy):
             MAX_TRACED_PROGRAM_COUNT,
         )
         from ..fluid.dygraph.dygraph_to_static import logging_utils
-        from ..fluid.dygraph.dygraph_to_static.partial_program import (
+        from paddle.jit.dy2static.partial_program import (
             partial_program_from,
         )
 
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/__init__.py b/python/paddle/fluid/dygraph/dygraph_to_static/__init__.py
index ad3fc3d18411b7..15013fb36d806f 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/__init__.py
+++ b/python/paddle/fluid/dygraph/dygraph_to_static/__init__.py
@@ -15,23 +15,14 @@
 from . import static_analysis
 from .static_analysis import *
 
-from . import loop_transformer
-from .loop_transformer import *
-
 from . import variable_trans_func
 from .variable_trans_func import *
 
-from . import convert_call_func
-from .convert_call_func import *
-
-from . import convert_operators
 
 from . import logging_utils
 from .logging_utils import *
 
 __all__ = []
-__all__ += loop_transformer.__all__
 __all__ += static_analysis.__all__
 __all__ += variable_trans_func.__all__
-__all__ += convert_call_func.__all__
 __all__ += logging_utils.__all__
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/call_transformer.py b/python/paddle/fluid/dygraph/dygraph_to_static/call_transformer.py
index a29fe8f9d6b1ef..043102b0f661aa 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/call_transformer.py
+++ b/python/paddle/fluid/dygraph/dygraph_to_static/call_transformer.py
@@ -51,7 +51,7 @@ def _no_need_convert_call(self, node):
 
         func_str = ast_to_source_code(node.func).strip()
         try:
-            from paddle.fluid.dygraph.dygraph_to_static.convert_call_func import (
+            from paddle.jit.dy2static.convert_call_func import (
                 is_builtin,
             )
 
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/convert_call_func.py b/python/paddle/fluid/dygraph/dygraph_to_static/convert_call_func.py
deleted file mode 100644
index 86496d72f26339..00000000000000
--- a/python/paddle/fluid/dygraph/dygraph_to_static/convert_call_func.py
+++ /dev/null
@@ -1,342 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import collections
-import copy
-import functools
-import logging
-import inspect
-import pdb
-import re
-import types
-
-import numpy
-import builtins
-
-from paddle.fluid.dygraph.container import Sequential
-from paddle.fluid.dygraph.dygraph_to_static.convert_operators import (
-    convert_len,
-    convert_zip,
-)
-from paddle.fluid.dygraph.dygraph_to_static.convert_operators import (
-    convert_range,
-    convert_enumerate,
-)
-from paddle.fluid.dygraph.dygraph_to_static.logging_utils import (
-    TranslatorLogger,
-)
-
-from paddle.fluid.dygraph.dygraph_to_static.utils import is_paddle_func, unwrap
-from paddle.fluid.dygraph.layers import Layer
-
-__all__ = ["convert_call"]
-
-
-# The api(s) should be considered as plain function and convert
-# them into static layer code.
-PADDLE_NEED_CONVERT_APIS = [Sequential]
-
-translator_logger = TranslatorLogger()
-
-CONVERSION_OPTIONS = "An attribute for a function that indicates conversion flags of the function in dynamic-to-static."
-
-
-class ConversionOptions:
-    """
-    A container for conversion flags of a function in dynamic-to-static.
-
-    Attributes:
-        not_convert(bool): An attribute indicates that the function won't be converted in dynamic-to-static.
-
-    NOTE(liym27): More attributes and methods can be added in this class.
-    """
-
-    def __init__(self, not_convert=False):
-        self.not_convert = not_convert
-
-
-def is_builtin(func, name=None):
-    """predict whether a function is a builtin function with name={name}.
-    if name == None, then any builtin function will return True
-    """
-
-    def name_judge():
-        return name is None or func.__name__ == name
-
-    if isinstance(func, types.BuiltinFunctionType) and name_judge():
-        return True
-    elif func in builtins.__dict__.values() and name_judge():
-        return True
-    else:
-        return False
-
-
-def builtin_modules():
-    """
-    Return builtin modules.
-    """
-    modules = [
-        collections,
-        pdb,
-        copy,
-        inspect,
-        re,
-        numpy,
-        logging,
-    ]
-    try:
-        import six
-
-        modules.append(six)
-    except ImportError:
-        pass  # do nothing
-
-    return modules
-
-
-BUILTIN_LIKELY_MODULES = builtin_modules()
-
-
-def is_unsupported(func):
-    """
-    Checks whether the func is supported by dygraph to static graph.
-    """
-
-    for m in BUILTIN_LIKELY_MODULES:
-        for v in m.__dict__.values():
-            func_in_dict = func == v
-            if isinstance(func_in_dict, (list, numpy.ndarray)):
-                func_in_dict = numpy.array(func_in_dict).any()
-            if func_in_dict:
-                translator_logger.log(
-                    2,
-                    "Whitelist: {} is part of built-in module and does not have to be transformed.".format(
-                        func
-                    ),
-                )
-                return True
-
-    # NOTE: should be placed before `is_paddle_func`
-    if type(func) in PADDLE_NEED_CONVERT_APIS:
-        return False
-
-    if is_paddle_func(func):
-        translator_logger.log(
-            2,
-            "Whitelist: {} is part of Paddle module and does not have to be transformed.".format(
-                func
-            ),
-        )
-        return True
-
-
-def convert_call(func):
-    """
-    Converts a function call which needs to be transformed to static function.
-
-    Args:
-        func (callable): A callable function or method to convert.
-
-    Returns:
-        Callable: A converted function.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-            from paddle.jit.dy2static import convert_call
-
-            paddle.enable_static()
-            def dyfunc(x):
-                if paddle.mean(x) < 0:
-                    x_v = x - 1
-                else:
-                    x_v = x + 1
-                return x_v
-
-            new_func = convert_call(dyfunc)
-            x = paddle.tensor.manipulation.fill_constant(shape=[3, 3], value=0, dtype='float64')
-            x_v = new_func(x)
-
-            exe = paddle.static.Executor(paddle.CPUPlace())
-            out = exe.run(fetch_list=[x_v])
-            print(out[0])
-            # [[1. 1. 1.]
-            #  [1. 1. 1.]
-            #  [1. 1. 1.]]
-
-    """
-    # NOTE(Aurelius84): Fix it after all files migrating into jit.
-    from paddle.jit.dy2static.program_translator import (
-        convert_to_static,
-        unwrap_decorators,
-        StaticFunction,
-    )
-
-    translator_logger.log(
-        1, "Convert callable object: convert {}.".format(func)
-    )
-    func_self = None
-    converted_call = None
-
-    # Function in convert_call may be decorated by another `@to_static`,
-    # in this case, unwraps it into a raw method or function.
-    _, func = unwrap_decorators(func)
-
-    options = getattr(func, CONVERSION_OPTIONS, None)
-    if options is not None and options.not_convert:
-        translator_logger.log(
-            2,
-            "{} is not converted when it is decorated by 'paddle.jit.not_to_static'.".format(
-                func
-            ),
-        )
-        return func
-
-    if is_builtin(func, "len"):
-        return convert_len
-
-    if is_builtin(func, "zip"):
-        return convert_zip
-
-    if is_builtin(func, "range"):
-        return convert_range
-
-    if is_builtin(func, "enumerate"):
-        return convert_enumerate
-
-    if is_builtin(func) or is_unsupported(func):
-        return func
-
-    if inspect.isgeneratorfunction(func):
-        # NOTE(xiongkun03): inspect.isfunction() will return True even though func is a generator function.
-        # If we don't deal generatorfunction here, we will regard it as normal function and get errors in some
-        # occasion.
-        number_of_stars = 30
-        translator_logger.warn(
-            "\n\n"
-            + "*" * number_of_stars
-            + "\nYour function:`{}` doesn't support to transform to static function because it is a generator function, it will be run as-is.".format(
-                func.__name__
-            )
-            + "\n"
-            + "*" * number_of_stars
-            + "\n\n"
-        )
-        return func
-
-    if inspect.isfunction(func):
-        # TODO(liym27): If func is a lambda function, special conversion is needed.
-        if func.__name__ == '<lambda>':
-            return func
-        try:
-            # Note(Aurelius84): Because `@declarative` returns a class instance instead of
-            # a function. This will modify the value referring to itself in `__globals__`.
-
-            # For example:
-            #
-            #      @declarative
-            #      def foo(x):
-            #          return x
-            #
-            # `foo` will be converted into a wrapper class, suppose as `StaticFunction`.
-            # And `foo.__globals__['foo']` will still return this `StaticFunction` instead of
-            # `foo` function. So `isinstance(fn, StaticFunction)` is added here.
-            _origfunc = unwrap(func)
-            global_functions = set()
-            for fn in _origfunc.__globals__.values():
-                if inspect.isfunction(fn):
-                    global_functions.add(fn)
-                elif isinstance(fn, StaticFunction):
-                    _, fn = unwrap_decorators(fn)
-                    global_functions.add(fn)
-                elif inspect.isclass(fn):
-                    if isinstance(
-                        fn.__dict__.get(func.__name__, None), staticmethod
-                    ):
-                        global_functions.add(
-                            func
-                        )  # Add func to ensure that we will convert
-
-            if func in global_functions:
-                converted_call = convert_to_static(func)
-                func_self = getattr(func, '__self__', None)
-            else:
-                # NOTE:
-                # If func is not in __globals__, it does not need to be transformed
-                # because it has been transformed before.
-                translator_logger.warn(
-                    "{} doesn't have to be transformed to static function because it has been transformed before, it will be run as-is.".format(
-                        func
-                    )
-                )
-                converted_call = func
-        except AttributeError:
-            # NOTE:
-            # If func is not in __globals__, it does not need to be transformed
-            # because it has been transformed before.
-            converted_call = None
-        except (IOError, OSError):
-            # NOTE:
-            # If func has been decorated, its source code can not be get
-            # so that it can not be transformed to static function.
-            converted_call = None
-    elif inspect.ismethod(func):
-        try:
-            converted_call = convert_to_static(func)
-            func_self = getattr(func, '__self__', None)
-        except (IOError, OSError):
-            # NOTE: func may have been decorated.
-            converted_call = None
-
-    elif hasattr(func, '__class__') and hasattr(func.__class__, '__call__'):
-        if hasattr(func, 'forward') and isinstance(func, Layer):
-            try:
-                _, forward_func = unwrap_decorators(func.forward)
-                func._original_funcs['forward'] = forward_func.__func__
-                forward_func = convert_to_static(forward_func)
-                # Bound mothod will be convert into plain function after `convert_to_static`.
-                # So descriptor mechanism is used to bound `self` instance on function to
-                # keep it as bound method.
-                setattr(func, 'forward', forward_func.__get__(func))
-            except (IOError, OSError, TypeError):
-                # NOTE: func.forward may have been decorated.
-                func_self = None if func_self else func_self
-            converted_call = func
-        else:
-            try:
-                call_func = func.__class__.__call__
-                converted_call = convert_to_static(call_func)
-                func_self = func
-            except (IOError, OSError, TypeError):
-                # NOTE:
-                # If `func` is a class which is being initialized, for example `convert_call(Foo)()`,
-                # it doesn't need to be transformed
-                func_self = None if func_self else func_self
-    else:
-        raise NotImplementedError(
-            "Callable {} can not be transformed at present.".format(func)
-        )
-
-    if converted_call is None:
-        translator_logger.warn(
-            "{} doesn't have to be transformed to static function, and it will be run as-is.".format(
-                func
-            )
-        )
-        return func
-
-    if func_self:
-        converted_call = functools.partial(converted_call, func_self)
-    return converted_call
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py b/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py
deleted file mode 100644
index ee8fc9e4c7242b..00000000000000
--- a/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py
+++ /dev/null
@@ -1,837 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-import paddle
-from paddle.fluid.data_feeder import convert_dtype
-from paddle.fluid.dygraph.dygraph_to_static.variable_trans_func import (
-    to_static_variable,
-)
-from paddle.fluid.framework import core, Variable
-from paddle.fluid.layers import Assert, Print
-from paddle.fluid.layers import (
-    array_length,
-    array_read,
-    array_write,
-    create_array,
-)
-from paddle.fluid.layers import (
-    assign,
-    fill_constant,
-    reduce_all,
-    reduce_any,
-)
-from paddle.fluid.layers import (
-    cast,
-    control_flow,
-    nn,
-)
-from paddle.fluid.layers.control_flow import (
-    cond,
-    while_loop,
-    less_than,
-    increment,
-)
-from paddle.fluid.dygraph.dygraph_to_static.return_transformer import (
-    RETURN_NO_VALUE_VAR_NAME,
-)
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
-    UndefinedVar,
-    Dygraph2StaticException,
-)
-from paddle.fluid.dygraph.dygraph_to_static.utils import GetterSetterHelper
-from paddle.fluid.layers.utils import copy_mutable_vars
-
-
-def convert_attr(x, attr):
-    if isinstance(x, Variable) and attr == "size":
-        return x.size()
-    else:
-        return getattr(x, attr)
-
-
-def indexable(x, code=None):
-    if isinstance(x, Variable):
-        return x
-    if hasattr(x, '__len__') and hasattr(x, '__getitem__'):
-        return x
-    if hasattr(x, '__iter__'):
-        return [i for i in x]
-    else:
-        raise RuntimeError("X can't be convert into indexable.")
-
-
-def unpack_by_structure(target, structure):
-    """unified unpack interface for paddle and python."""
-    if isinstance(target, Variable):
-        return _unpack_by_structure_paddle(target, structure)
-    else:
-        return _unpack_by_structure_python(target, structure)
-
-
-def _unpack_by_structure_python(target, structure):
-    """TODO(xiongkun): analysis the differences between python and paddle unpack."""
-    return _unpack_by_structure_paddle(target, structure)
-
-
-def _unpack_by_structure_paddle(target, structure):
-    if structure == 1:
-        return target
-    ret = []
-    for idx, ele in enumerate(structure):
-        if ele == 1:
-            ret.append(target[idx])
-            continue
-        if isinstance(ele, list):
-            ret.append(unpack_by_structure(target[idx], ele))
-            continue
-        assert False, "structure element must be 1 or list"
-    return ret
-
-
-def convert_while_loop(
-    cond, body, getter, setter, return_name_ids=None, push_pop_names=None
-):
-    """
-    A function representation of a Python ``while`` statement.
-
-    Args:
-        cond(Callable): A callable object that returns a boolean variable to control whether to execute the loop body. It takes ``loop_vars`` as arguments.
-        body(Callable): A callable object that returns a tuple or list of variables with the same arguments ``loops_vars`` as ``cond`` .
-        get_args(callable): Get all arguments that needed in true_fn and false_fn.
-        set_args(callable): Update arguments that modified in trure_fn and false_fn.
-        return_name_ids(list[string], optional): the returned names.
-        push_pop_names(list[string], optional): the names on which called .append() or .pop().
-
-    Returns:
-        A list or tuple of variables which returned by ``body``.
-    """
-
-    # NOTE: It may be slower if cond is very expensive, but usually cond is just O(1).
-    # If loop_vars is changed during cond callable, then it causes bug, but current logical_and/logical_not/... doesn't change the loop_vars.
-    pred = cond()
-    if isinstance(pred, Variable):
-        _run_paddle_while(
-            cond, body, getter, setter, return_name_ids, push_pop_names
-        )
-    else:
-        _run_py_while(cond, body, getter, setter)
-
-
-def _convert_tensor_arrray_if_necessary(setterhelper, push_pop_names):
-    push_pop_vars = setterhelper.get(push_pop_names)
-    if push_pop_vars is None:
-        return
-
-    def maybe_to_tensor_array(v):
-        if isinstance(v, list):
-            return create_array("float32", initialized_list=v)
-        else:
-            return v
-
-    setterhelper.set(
-        push_pop_names, [maybe_to_tensor_array(v) for v in push_pop_vars]
-    )
-
-
-def _run_paddle_while(
-    cond, body, getter, setter, return_name_ids, push_pop_names
-):
-    # NOTE: loop_vars of Paddle op `control_flow.while_loop` must be Paddle Tensors.
-    helper = GetterSetterHelper(getter, setter, return_name_ids, push_pop_names)
-    _convert_tensor_arrray_if_necessary(helper, push_pop_names)
-
-    def new_body_fn(*args):
-        """wrap the body() and add return value for `while_loop`
-        the args may be differ from getter().
-        """
-        mutable_loop_vars = args
-        helper.set(return_name_ids, mutable_loop_vars)
-        body()
-        return helper.get(return_name_ids)
-
-    def new_cond_fn(*args):
-        """cond is a zero-args function, which is not
-        compatible with `while_loop`.
-        """
-        return cond()
-
-    # UndefinedVar will become data layer not check variable with value=NO_VALUE_MAGIC.
-    loop_vars = [
-        to_static_variable(var) if not isinstance(var, UndefinedVar) else var
-        for var in helper.get(return_name_ids)
-    ]
-    helper.set(
-        return_name_ids, loop_vars
-    )  # change the non-local var to variable
-    # variable maybe modified to inner var. change it into
-    loop_vars = control_flow.while_loop(new_cond_fn, new_body_fn, loop_vars)
-    helper.set(return_name_ids, loop_vars)
-    return loop_vars
-
-
-def _run_py_while(cond, body, getter, setter):
-    while True:
-        pred = cond()
-        if isinstance(pred, Variable):
-            raise Dygraph2StaticException(
-                "python while pred change from bool to variable."
-            )
-        if not pred:
-            break
-        body()
-
-
-def convert_logical_and(x_func, y_func):
-    """
-    A function representation of a Python ``and`` statement.
-
-    Args:
-        x_func(callable): x_func() is the left hand operand of ``and`` operator. x_func() is bool or Tensor.
-        y_func(callable): y_func() is the right hand operand of ``and`` operator.  y_func() is bool or Tensor.
-
-    Returns:
-        A python bool variable or a bool Tensor.
-
-    NOTE(liym27):
-        1) The operands are executed sequentially according to the running logic of Python. So here the arguments
-        should be callable.
-        2) If the left hand operand is False, the right hand operand should be executed.
-
-        For example:
-            a = x > 1 and y < 1
-        Transformed code:
-            a = paddle.jit.dy2static.convert_logical_and(lambda:x>1, lambda:y<1)
-
-          In `convert_logical_and(lambda:x>1, lambda:y<1)`, `lambda:y<1` must be run after `lambda:x>1`. And
-        if `x>1` is False, `y<1` should NOT be run.
-    """
-    x_value = x_func()
-    if not isinstance(x_value, Variable):
-        return _run_py_logical_and(lambda: x_value, y_func)
-
-    y_value = y_func()
-    if not isinstance(y_value, Variable):
-        return _run_py_logical_and(lambda: y_value, lambda: x_value)
-
-    return _run_paddle_logical_and(x_value, y_value)
-
-
-def _run_paddle_logical_and(x, y):
-    x = cast_bool_if_necessary(x)
-    y = cast_bool_if_necessary(y)
-    return paddle.logical_and(x, y)
-
-
-def _run_py_logical_and(x_func, y_func):
-    x_value = x_func()
-    assert not isinstance(x_value, Variable)
-
-    # NOTE(liym27):
-    #  1. Returns y_func() if x_value is False;
-    #  2. If x_value is False, y_func() should not be run.
-    return x_value and y_func()
-
-
-def convert_logical_or(x_func, y_func):
-    """
-    A function representation of a Python ``or`` statement.
-
-    Args:
-        x_func(callable): x_func() is the left hand operand of ``or`` operator. x_func() is bool or Tensor.
-        y_func(callable): y_func() is the right hand operand of ``or`` operator.  y_func() is bool or Tensor.
-
-    Returns:
-        A python bool variable or a bool Tensor.
-
-    NOTE(liym27):
-        1) The operands are executed sequentially according to the running logic of Python. So here the arguments
-        should be callable.
-        2) If the left hand operand is True, the right hand operand should be executed.
-
-        For example:
-            a = x > 1 or y < 1
-        Transformed code:
-            a = paddle.jit.dy2static.convert_logical_or(lambda:x>1, lambda:y<1)
-
-        In `convert_logical_or(lambda:x>1, lambda:y<1)`, `lambda:y<1` must be run after `lambda:x>1`. And
-        if `x>1` is True, `y<1` should NOT be run.
-    """
-    x_value = x_func()
-    if not isinstance(x_value, Variable):
-        return _run_py_logical_or(lambda: x_value, y_func)
-
-    y_value = y_func()
-    if not isinstance(y_value, Variable):
-        return _run_py_logical_or(lambda: y_value, lambda: x_value)
-
-    return _run_paddle_logical_or(x_value, y_value)
-
-
-def _run_paddle_logical_or(x, y):
-    x = cast_bool_if_necessary(x)
-    y = cast_bool_if_necessary(y)
-    return paddle.logical_or(x, y)
-
-
-def _run_py_logical_or(x_func, y_func):
-    x_value = x_func()
-    assert not isinstance(x_value, Variable)
-
-    # NOTE(liym27):
-    #  1. Returns y_func() if x_value is False;
-    #  2. If x_value is True, y_func() should not be run.
-    return x_value or y_func()
-
-
-def convert_logical_not(x):
-    """
-    A function representation of a Python ``not`` statement.
-
-    Args:
-        x(bool|Tensor): Operand of ``not`` operator.
-
-    Returns:
-        A python bool variable or a bool Tensor.
-    """
-
-    if isinstance(x, Variable):
-        return _run_paddle_logical_not(x)
-    else:
-        return _run_py_logical_not(x)
-
-
-def _run_paddle_logical_not(x):
-    x = cast_bool_if_necessary(x)
-    return paddle.logical_not(x)
-
-
-def _run_py_logical_not(x):
-    return not x
-
-
-def convert_ifelse(
-    pred,
-    true_fn,
-    false_fn,
-    get_args,
-    set_args,
-    return_name_ids,
-    push_pop_names=None,
-):
-    """
-    A function representation of a Python ``if/else`` statement.
-
-    Args:
-        pred(bool|Tensor): A boolean Tensor which determines whether to return the result of ``true_fn`` or ``false_fn`` .
-        true_fn(callable): A callable to be performed if ``pred`` is true.
-        false_fn(callable): A callable to be performed if ``pred`` is false.
-        get_args(callable): Get all arguments that needed in true_fn and false_fn.
-        set_args(callable): Update arguments that modified in trure_fn and false_fn.
-        return_name_ids(list[string], optional): the returned names.
-        push_pop_names(list[string], optional): the names on which called .append() or .pop().
-
-    Returns:
-        ``true_fn()`` if the predicate ``pred`` is true else ``false_fn()`` .
-
-    """
-    if isinstance(pred, Variable):
-        out = _run_paddle_cond(
-            pred,
-            true_fn,
-            false_fn,
-            get_args,
-            set_args,
-            return_name_ids,
-            push_pop_names,
-        )
-    else:
-        out = _run_py_ifelse(
-            pred, true_fn, false_fn, get_args, set_args, return_name_ids
-        )
-
-    return out
-
-
-def _run_paddle_cond(
-    pred, true_fn, false_fn, get_args, set_args, return_name_ids, push_pop_names
-):
-    """
-    Paddle cond API will evaluate both true_fn and false_fn codes.
-    """
-    helper = GetterSetterHelper(
-        get_args, set_args, return_name_ids, push_pop_names
-    )
-    _convert_tensor_arrray_if_necessary(helper, push_pop_names)
-    pred = cast_bool_if_necessary(pred)
-    init_args = helper.get(return_name_ids)
-
-    def new_true_fn():
-        # init args may contain mutable python container like [var, 2], we copy then like in while_loop
-        helper.set(return_name_ids, copy_mutable_vars(init_args))
-        ret = true_fn()
-        # IfExpr will return a non-None return value, so we just return ret.
-        # We assume normal return has no return value.
-        if ret is None:
-            return helper.get(return_name_ids)
-        else:
-            return ret
-
-    def new_false_fn():
-        # init args may contain mutable python container like [var, 2], we copy then like in while_loop
-        helper.set(return_name_ids, copy_mutable_vars(init_args))
-        ret = false_fn()
-        if ret is None:
-            return helper.get(return_name_ids)
-        else:
-            return ret
-
-    try:
-        cond_outs = control_flow.cond(
-            pred, new_true_fn, new_false_fn, None, return_name_ids
-        )
-    except Exception as e:
-        if re.search(
-            "Unsupported return type of true_fn and false_fn in cond", str(e)
-        ):
-            raise Dygraph2StaticException(
-                "Your if/else have different return type. TODO: add link to modifty. {}".format(
-                    str(e)
-                )
-            )
-        if re.search("Incompatible return values of", str(e)):
-            raise Dygraph2StaticException(
-                "Your if/else have different number of return value. TODO: add link to modifty. {}".format(
-                    str(e)
-                )
-            )
-        raise e
-    get_args = lambda: helper.get(return_name_ids)
-    set_args = lambda vs: helper.set(return_name_ids, vs)
-    return _recover_args_state(cond_outs, get_args, set_args, return_name_ids)
-
-
-def _run_py_ifelse(
-    pred, true_fn, false_fn, get_args, set_args, return_name_ids
-):
-    """
-    Evaluate python original branch function if-else.
-    """
-    py_outs = true_fn() if pred else false_fn()
-    return py_outs
-
-
-def _remove_no_value_return_var(out):
-    if isinstance(out, tuple) and len(out) > 0:
-        processed_out = out
-        align_ret = out[0]
-        if isinstance(align_ret, tuple):
-            for index, item in enumerate(align_ret):
-                if isinstance(item, Variable) and (
-                    RETURN_NO_VALUE_VAR_NAME in item.name
-                ):
-                    # return None
-                    if index == 0:
-                        processed_out = (None,) + out[1:]
-                    elif index == 1:
-                        processed_out = align_ret[:1] + out[1:]
-                    else:
-                        processed_out = (align_ret[:index],) + out[1:]
-                    break
-
-        for index, item in enumerate(processed_out):
-            if isinstance(item, Variable) and (
-                RETURN_NO_VALUE_VAR_NAME in item.name
-            ):
-                processed_out = processed_out[:index]
-
-        if not processed_out:
-            return None
-        elif len(processed_out) == 1:
-            return processed_out[0]
-        else:
-            return processed_out
-
-    else:
-        return out
-
-
-def _check_no_undefined_var(outs, names, branch_name):
-    if names is None:
-        return
-    if not isinstance(outs, (list, tuple)):
-        outs = [outs]
-    for var, name in zip(list(outs), names):
-        if isinstance(var, UndefinedVar):
-            raise ValueError(
-                "Required '{}' must be initialized both in if-else branch, but found it not initialized in '{}'.".format(
-                    name, branch_name
-                )
-            )
-
-
-def _recover_args_state(outs, get_args, set_args, return_name_ids):
-    """
-    Currently we support variant length of early return statement by padding
-    _no_return_value.
-
-    # TODO(dev): We shall consider to evaluate whether should support this for Python if-else?
-    """
-    # IfExpr's return_name_ids maybe None
-    if return_name_ids is None:
-        return outs
-
-    init_args = get_args()
-    # recover args state
-    num_outs = len(return_name_ids)
-    num_args = len(init_args)
-    assert num_outs <= num_args
-
-    if num_args == 1:
-        final_outs = (
-            (outs,) if not isinstance(outs, (list, tuple)) else tuple(outs)
-        )
-    else:
-        outs = (outs,) if num_outs == 1 else tuple(outs)
-        final_outs = outs + init_args[num_outs:]
-
-    set_args(final_outs)
-    return final_outs
-
-
-def convert_len(var):
-    """
-    Returns variable(length) from shape ops based on var.type
-
-    Note: In addition to some ast transformations, some block-related
-          operations are added in `len` transformation, such as appending
-          `shape_op` in var.block.
-    """
-    if isinstance(var, Variable):
-        assert var.ndim > 0, "len() of a 0D tensor is wrong"
-        if var.type in [
-            core.VarDesc.VarType.LOD_TENSOR,
-            core.VarDesc.VarType.SELECTED_ROWS,
-        ]:
-            # Note: Length of var may be known ahead of time in dygraph,
-            # but it probably represents batch size which can be variant.
-            # so we return a variable dynamically inferred from var.shape.
-            if var.shape[0] > 0 and var.type == core.VarDesc.VarType.LOD_TENSOR:
-                return var.shape[0]
-            return nn.shape(var)[0]
-        elif var.type == core.VarDesc.VarType.LOD_TENSOR_ARRAY:
-            return control_flow.array_length(var)
-        else:
-            raise TypeError(
-                'len(var) only supports LoDTensor/LoDTensorArray/SelectedRows, but received %s.'
-                % type(var)
-            )
-    else:
-        if isinstance(var, VariableTuple):
-            return var.__len__()
-        return len(var)
-
-
-def convert_zip(*args):
-    for i, arg in enumerate(args):
-        if isinstance(arg, Variable) and arg.shape[0] == -1:
-            raise RuntimeError(
-                "Not support zip(tensor, ...) when tensor.shape[0] == -1, "
-                "but found args[{}].shape[0] == -1 in 'zip'".format(str(i))
-            )
-    return zip(*args)
-
-
-# TODO(xiongkun): delete when list<variable> is ready.
-class VariableTuple:
-    """
-    this class will cause enumerate can't be wrapped by other iterator change function.
-    this will be fixed when list<Variable> is producted.
-    VariableTuple can only deal with variables which is fixed.
-    """
-
-    def __init__(self, var, start=0):
-        self.var = var
-        self.len = convert_len(var)
-        if isinstance(self.len, Variable):
-            self.rag = paddle.arange(start, start + self.len, 1, paddle.int64)
-        else:
-            self.rag = range(start, start + self.len)
-
-    def __getitem__(self, idx):
-        return self.rag[idx], self.var[idx]
-
-    def __len__(self):
-        return self.len
-
-
-def convert_enumerate(*args):
-    has_variable = any(map(lambda x: isinstance(x, Variable), args))
-    if has_variable:
-        return VariableTuple(*args)
-    return enumerate(*args)
-
-
-def convert_range(*args):
-    has_variable = any(map(lambda x: isinstance(x, Variable), args))
-    if has_variable:
-        if len(args) == 1:
-            return paddle.arange(0, args[0], 1, paddle.int64)
-        if len(args) == 2:
-            return paddle.arange(args[0], args[1], 1, paddle.int64)
-        if len(args) == 3:
-            return paddle.arange(args[0], args[1], args[2], paddle.int64)
-    return range(*args)
-
-
-def convert_shape(x):
-    """
-    A function representation of the shape of variable.
-    """
-
-    def has_negative(list_shape):
-        return any([x < 0 for x in list_shape])
-
-    # When `x` is Variable:
-    #  (1) if x.shape contains -1, such as [2, -1, 64], returns [2, var, 64],
-    #      where var = paddle.shape(x)[1]
-
-    #  (2) if x.shape does not contains -1, return lsit(x.shape) directly
-
-    if isinstance(x, Variable):
-        values = list(x.shape)
-        if has_negative(values):
-            shape_tensor = nn.shape(x)
-            for i, v in enumerate(values):
-                if v is None or v < 0:
-                    values[i] = shape_tensor[i]
-        return values
-    else:
-        return x.shape
-
-
-def convert_shape_compare(left, *args):
-    """
-    A function handles comparison difference between Paddle and Python.
-    For example, if x and y are Tensors, x.shape == y.shape will return single
-    boolean Value (True/False). However, paddle.shape(x) == paddle.shape(y) is
-    an element-wise comparison. The difference can cause dy2stat error. So we
-    create this function to handle the difference.
-
-    Args:
-        left: variable
-        *args: compare_op(str), variable, compare_op(str), variable, where
-            compare_op means "<", ">", "==", "!=", etc.
-    Returns:
-        If the variables to compare are NOT Paddle Variables, we will return as
-        Python like "a op1 b and b op2 c and ... ".
-        If the variables to compare are Paddle Variables, we will do elementwise
-        comparsion first and then reduce to a boolean whose numel is 1.
-
-    """
-    args_len = len(args)
-    assert (
-        args_len >= 2
-    ), "convert_shape_compare needs at least one right compare variable"
-    assert (
-        args_len % 2 == 0
-    ), "Illegal input for convert_shape_compare, *args should be op(str), var, op(str), var ..."
-    num_cmp = args_len // 2
-    if isinstance(left, Variable):
-
-        def reduce_compare(x, op_str, y):
-            element_wise_result = eval("x " + op_str + " y")
-            if op_str == "!=":
-                return reduce_any(element_wise_result)
-            elif (
-                op_str == "is"
-                or op_str == "is not"
-                or op_str == "in"
-                or op_str == "not in"
-            ):
-                return element_wise_result
-            else:
-                return reduce_all(element_wise_result)
-
-        final_result = reduce_compare(left, args[0], args[1])
-        for i in range(1, num_cmp):
-            cmp_left = args[i * 2 - 1]
-            cmp_op = args[i * 2]
-            cmp_right = args[i * 2 + 1]
-            cur_result = reduce_compare(cmp_left, cmp_op, cmp_right)
-            final_result = convert_logical_and(
-                lambda: final_result, lambda: cur_result
-            )
-        return final_result
-    else:
-        cmp_left = left
-        final_result = None
-        for i in range(num_cmp):
-            cmp_op = args[i * 2]
-            cmp_right = args[i * 2 + 1]
-            cur_result = eval("cmp_left " + cmp_op + " cmp_right")
-            if final_result is None:
-                final_result = cur_result
-            else:
-                final_result = final_result and cur_result
-
-            if final_result is False:
-                return False
-            cmp_left = cmp_right
-        return final_result
-
-
-def cast_bool_if_necessary(var):
-    assert isinstance(var, Variable)
-    if convert_dtype(var.dtype) not in ['bool']:
-        var = cast(var, dtype="bool")
-    return var
-
-
-def convert_var_dtype(var, dtype):
-    if isinstance(var, Variable):
-        src_dtype = convert_dtype(var.dtype)
-        assert src_dtype in [
-            'bool',
-            'float16',
-            'float32',
-            'float64',
-            'int32',
-            'int64',
-            'uint8',
-        ], "The dtype of var {} is {}, which is not supported in the cast op.".format(
-            var.name, src_dtype
-        )
-        assert dtype in [
-            'bool',
-            'int',
-            'float',
-        ], "The casted target dtype is {}, which is not supported in type casting.".format(
-            dtype
-        )
-        cast_map = {
-            'bool': 'bool',
-            'int': 'int32',
-            'float': 'float32',
-        }
-        return cast(var, dtype=cast_map[dtype])
-    else:
-        return eval('{}(var)'.format(dtype))
-
-
-def convert_assert(cond, message=""):
-    """
-    A function representation of a Python ``assert`` statement.
-    """
-    if isinstance(cond, Variable):
-        cond = cast(cond, "bool")
-        # NOTE: message is not used because Paddle Assert has no corresponding parameter to use.
-        return Assert(cond)
-    else:
-        assert cond, message
-
-
-def convert_print(*args):
-    """
-    A function representing Python ``print`` statement. Note: this is a basic
-    python function so we haven't handle sep, end, file and flush parameters of
-    python function.
-    """
-    for var in args:
-        if isinstance(var, Variable):
-            var = Print(var)
-        else:
-            print(var)
-
-
-def convert_pop(target, *args):
-    """
-    A function representation of a Python pop statement for a list or dict.
-
-    Args:
-        target(list|dict|Tensor): A variable to pop item from.
-        *args(tuple): index or default value to parse.
-
-    Returns:
-        A item poped from target.
-    """
-
-    is_variable = isinstance(target, Variable)
-    if is_variable:
-        is_tensor_array = target.type == core.VarDesc.VarType.LOD_TENSOR_ARRAY
-
-    if is_variable and is_tensor_array:
-        return _run_paddle_pop(target, *args)
-    else:
-        return _run_python_pop(target, *args)
-
-
-def _run_paddle_pop(array, *args):
-    if len(args) == 0:
-        idx = -1
-    else:
-        idx = args[0]
-
-    assert isinstance(idx, int)
-
-    def cond(i, new_array):
-        return less_than(i, arr_len)
-
-    def body(i, new_array):
-        item = array_read(array=array, i=i)
-        array_write(item, array_length(new_array), new_array)
-        i = increment(i)
-        return i, new_array
-
-    arr_len = array_length(array)
-    if idx < 0:
-        idx = idx + arr_len
-    else:
-        idx = fill_constant(shape=[1], dtype="int64", value=idx)
-
-    pop_item = array_read(array, idx)
-
-    new_array = _slice_tensor_array(array, 0, idx)
-    i = idx + 1
-    _, new_array = while_loop(cond, body, [i, new_array])
-    assign(input=new_array, output=array)
-
-    return pop_item
-
-
-# TODO(liym27): A better way to slice tensor array.
-#  Maybe support start == end for slice op.
-def _slice_tensor_array(array, start, end):
-    def true_fn():
-        null_array = create_array("float32")
-        return null_array
-
-    def false_fn(array, start, end):
-        new_array = paddle.slice(array, starts=[start], ends=[end], axes=[0])
-        return new_array
-
-    new_array = cond(start == end, true_fn, lambda: false_fn(array, start, end))
-    return new_array
-
-
-def _run_python_pop(target, *args):
-    # 1. pop for a dict
-    if len(args) == 2:
-        idx, default = args
-        return target.pop(idx, default)
-
-    # 2. pop for a list or dict
-    else:
-        idx = args[0] if args else -1
-        return target.pop(idx)
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/utils.py b/python/paddle/fluid/dygraph/dygraph_to_static/utils.py
index f5d425ca0ac063..23579720f3804c 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/utils.py
+++ b/python/paddle/fluid/dygraph/dygraph_to_static/utils.py
@@ -145,7 +145,7 @@ def data_layer_not_check(name, shape, dtype='float32', lod_level=0):
 
 
 def create_undefined_variable():
-    from paddle.fluid.dygraph.dygraph_to_static.return_transformer import (
+    from paddle.jit.dy2static.return_transformer import (
         RETURN_NO_VALUE_MAGIC_NUM,
     )
 
@@ -1212,13 +1212,13 @@ def post_func():
             """NOTE: why we need merge w_vars and push_pop_vars here ?
             because we do ifelse_transformer after loop_transformer. Loops will changed into functioons. but we know this function will be called in if. so we add w_vars to father function scope.
             """
-            from paddle.fluid.dygraph.dygraph_to_static.loop_transformer import (
+            from paddle.jit.dy2static.loop_transformer import (
                 WHILE_CONDITION_PREFIX,
                 WHILE_BODY_PREFIX,
                 FOR_CONDITION_PREFIX,
                 FOR_BODY_PREFIX,
             )
-            from paddle.fluid.dygraph.dygraph_to_static.ifelse_transformer import (
+            from paddle.jit.dy2static.ifelse_transformer import (
                 TRUE_FUNC_PREFIX,
                 FALSE_FUNC_PREFIX,
             )
diff --git a/python/paddle/fluid/dygraph/io.py b/python/paddle/fluid/dygraph/io.py
index ba00deed977a97..1350493bae332d 100644
--- a/python/paddle/fluid/dygraph/io.py
+++ b/python/paddle/fluid/dygraph/io.py
@@ -30,7 +30,7 @@
     _is_enable_standalone_executor,
     _is_dy2st_enable_standalone_executor,
 )
-from paddle.fluid.dygraph.dygraph_to_static.partial_program import (
+from paddle.jit.dy2static.partial_program import (
     add_build_strategy_for,
     LazyInitialized,
 )
diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py
index 6e7a53db2d36ba..c47e9babea7fdb 100755
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -2589,7 +2589,7 @@ def expand_undefined_var(nest1, nest2, names):
     In this case, we should not expand recursively.
     """
     from paddle.fluid.dygraph.dygraph_to_static.utils import UndefinedVar
-    from paddle.fluid.dygraph.dygraph_to_static.return_transformer import (
+    from paddle.jit.dy2static.return_transformer import (
         RETURN_VALUE_PREFIX,
     )
 
diff --git a/python/paddle/fluid/layers/math_op_patch.py b/python/paddle/fluid/layers/math_op_patch.py
index f9ba6498671617..5b5be84ac6b0ac 100644
--- a/python/paddle/fluid/layers/math_op_patch.py
+++ b/python/paddle/fluid/layers/math_op_patch.py
@@ -275,7 +275,7 @@ def pop(self, *args):
         Returns:
             Variable: self[index]
         """
-        from paddle.fluid.dygraph.dygraph_to_static.convert_operators import (
+        from paddle.jit.dy2static.convert_operators import (
             _run_paddle_pop,
         )
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
index 2a7d15e8c296e6..3922c60bafc2f0 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
@@ -20,7 +20,7 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.jit import ProgramTranslator
-from paddle.fluid.dygraph.dygraph_to_static.convert_call_func import (
+from paddle.jit.dy2static.convert_call_func import (
     CONVERSION_OPTIONS,
 )
 from test_program_translator import get_source_code
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py
index 1d0ccada5128e4..6ab1fbc9f20657 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py
@@ -18,7 +18,7 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.jit.api import declarative
-from paddle.fluid.dygraph.dygraph_to_static import convert_call
+from paddle.jit.dy2static import Call
 
 SEED = 2020
 np.random.seed(SEED)
@@ -90,11 +90,11 @@ def len_with_selected_rows(place):
     )
     # y is Variable(SelectedRows)
     y = fluid.layers.merge_selected_rows(var)
-    y_len = convert_call(len)(y)
+    y_len = Call(len)(y)
 
     # z is inner tensor with shape [4, 2]
     z = fluid.layers.get_tensor_from_selected_rows(y)
-    z_len = convert_call(len)(z)
+    z_len = Call(len)(z)
 
     # set data for selected_rows
     x_rows = [0, 2, 2, 4, 19]
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_loop.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_loop.py
index 771d9ce59ad5ae..c6c8a392274f2f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_loop.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_loop.py
@@ -19,7 +19,7 @@
 import paddle.fluid as fluid
 import unittest
 
-from paddle.fluid.dygraph.dygraph_to_static.loop_transformer import NameVisitor
+from paddle.jit.dy2static.loop_transformer import NameVisitor
 from paddle.jit.api import declarative
 
 SEED = 2020
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_inference_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_inference_model.py
index bacf03ace0a559..043ad587fe77b3 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_inference_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_inference_model.py
@@ -21,7 +21,7 @@
 import paddle.fluid as fluid
 from paddle.jit import ProgramTranslator
 from paddle.jit.api import declarative
-from paddle.fluid.dygraph.dygraph_to_static.partial_program import (
+from paddle.jit.dy2static.partial_program import (
     partial_program_from,
 )
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
diff --git a/python/paddle/jit/api.py b/python/paddle/jit/api.py
index 8a2ed4ce1b8b87..0387460c26a5d7 100644
--- a/python/paddle/jit/api.py
+++ b/python/paddle/jit/api.py
@@ -35,7 +35,7 @@
     switch_to_static_graph,
 )
 from paddle.fluid.dygraph.dygraph_to_static import logging_utils
-from paddle.fluid.dygraph.dygraph_to_static.convert_call_func import (
+from paddle.jit.dy2static.convert_call_func import (
     ConversionOptions,
     CONVERSION_OPTIONS,
 )
diff --git a/python/paddle/jit/dy2static/assert_transformer.py b/python/paddle/jit/dy2static/assert_transformer.py
index ca6f1e652e944e..96b2abca217cda 100644
--- a/python/paddle/jit/dy2static/assert_transformer.py
+++ b/python/paddle/jit/dy2static/assert_transformer.py
@@ -22,6 +22,8 @@
     BaseTransformer,
 )
 
+__all__ = ['AssertTransformer']
+
 
 class AssertTransformer(BaseTransformer):
     """
diff --git a/python/paddle/jit/dy2static/ast_transformer.py b/python/paddle/jit/dy2static/ast_transformer.py
index 975b2200ef0eb4..f99c85c94a58e1 100644
--- a/python/paddle/jit/dy2static/ast_transformer.py
+++ b/python/paddle/jit/dy2static/ast_transformer.py
@@ -21,7 +21,7 @@
 from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
     BaseTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.early_return_transformer import (
+from .early_return_transformer import (
     EarlyReturnTransformer,
 )
 from .assert_transformer import (
@@ -30,10 +30,8 @@
 from paddle.fluid.dygraph.dygraph_to_static.basic_api_transformer import (
     BasicApiTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.break_continue_transformer import (
+from .break_continue_transformer import (
     BreakContinueTransformer,
-)
-from paddle.fluid.dygraph.dygraph_to_static.break_continue_transformer import (
     BreakTransformOptimizer,
 )
 from paddle.fluid.dygraph.dygraph_to_static.call_transformer import (
@@ -45,19 +43,19 @@
 from paddle.fluid.dygraph.dygraph_to_static.typehint_transformer import (
     TypeHintTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.ifelse_transformer import (
+from .ifelse_transformer import (
     IfElseTransformer,
 )
 from paddle.fluid.dygraph.dygraph_to_static.logical_transformer import (
     LogicalTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.loop_transformer import (
+from .loop_transformer import (
     LoopTransformer,
 )
 from paddle.fluid.dygraph.dygraph_to_static.print_transformer import (
     PrintTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.return_transformer import (
+from .return_transformer import (
     ReturnTransformer,
 )
 from paddle.fluid.dygraph.dygraph_to_static.create_variable_transformer import (
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/break_continue_transformer.py b/python/paddle/jit/dy2static/break_continue_transformer.py
similarity index 100%
rename from python/paddle/fluid/dygraph/dygraph_to_static/break_continue_transformer.py
rename to python/paddle/jit/dy2static/break_continue_transformer.py
diff --git a/python/paddle/jit/dy2static/convert_call_func.py b/python/paddle/jit/dy2static/convert_call_func.py
index 17e33f53bca727..325ffb206011fd 100644
--- a/python/paddle/jit/dy2static/convert_call_func.py
+++ b/python/paddle/jit/dy2static/convert_call_func.py
@@ -1,4 +1,4 @@
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,8 +12,330 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...fluid.dygraph.dygraph_to_static.convert_call_func import (  # noqa: F401
-    convert_call,
+import collections
+import copy
+import functools
+import logging
+import inspect
+import pdb
+import re
+import types
+
+import numpy
+import builtins
+
+from paddle.fluid.dygraph.container import Sequential
+from .convert_operators import (
+    convert_len,
+    convert_zip,
+    convert_range,
+    convert_enumerate,
+)
+
+from paddle.fluid.dygraph.dygraph_to_static.logging_utils import (
+    TranslatorLogger,
 )
 
-__all__ = []
+from paddle.fluid.dygraph.dygraph_to_static.utils import is_paddle_func, unwrap
+from paddle.fluid.dygraph.layers import Layer
+
+__all__ = ["convert_call"]
+
+
+# The api(s) should be considered as plain function and convert
+# them into static layer code.
+PADDLE_NEED_CONVERT_APIS = [Sequential]
+
+translator_logger = TranslatorLogger()
+
+CONVERSION_OPTIONS = "An attribute for a function that indicates conversion flags of the function in dynamic-to-static."
+
+
+class ConversionOptions:
+    """
+    A container for conversion flags of a function in dynamic-to-static.
+
+    Attributes:
+        not_convert(bool): An attribute indicates that the function won't be converted in dynamic-to-static.
+
+    NOTE(liym27): More attributes and methods can be added in this class.
+    """
+
+    def __init__(self, not_convert=False):
+        self.not_convert = not_convert
+
+
+def is_builtin(func, name=None):
+    """predict whether a function is a builtin function with name={name}.
+    if name == None, then any builtin function will return True
+    """
+
+    def name_judge():
+        return name is None or func.__name__ == name
+
+    if isinstance(func, types.BuiltinFunctionType) and name_judge():
+        return True
+    elif func in builtins.__dict__.values() and name_judge():
+        return True
+    else:
+        return False
+
+
+def builtin_modules():
+    """
+    Return builtin modules.
+    """
+    modules = [
+        collections,
+        pdb,
+        copy,
+        inspect,
+        re,
+        numpy,
+        logging,
+    ]
+    try:
+        import six
+
+        modules.append(six)
+    except ImportError:
+        pass  # do nothing
+
+    return modules
+
+
+BUILTIN_LIKELY_MODULES = builtin_modules()
+
+
+def is_unsupported(func):
+    """
+    Checks whether the func is supported by dygraph to static graph.
+    """
+
+    for m in BUILTIN_LIKELY_MODULES:
+        for v in m.__dict__.values():
+            func_in_dict = func == v
+            if isinstance(func_in_dict, (list, numpy.ndarray)):
+                func_in_dict = numpy.array(func_in_dict).any()
+            if func_in_dict:
+                translator_logger.log(
+                    2,
+                    "Whitelist: {} is part of built-in module and does not have to be transformed.".format(
+                        func
+                    ),
+                )
+                return True
+
+    # NOTE: should be placed before `is_paddle_func`
+    if type(func) in PADDLE_NEED_CONVERT_APIS:
+        return False
+
+    if is_paddle_func(func):
+        translator_logger.log(
+            2,
+            "Whitelist: {} is part of Paddle module and does not have to be transformed.".format(
+                func
+            ),
+        )
+        return True
+
+
+def convert_call(func):
+    """
+    Converts a function call which needs to be transformed to static function.
+
+    Args:
+        func (callable): A callable function or method to convert.
+
+    Returns:
+        Callable: A converted function.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            from paddle.jit.dy2static import Call
+
+            paddle.enable_static()
+            def dyfunc(x):
+                if paddle.mean(x) < 0:
+                    x_v = x - 1
+                else:
+                    x_v = x + 1
+                return x_v
+
+            new_func = Call(dyfunc)
+            x = paddle.tensor.manipulation.fill_constant(shape=[3, 3], value=0, dtype='float64')
+            x_v = new_func(x)
+
+            exe = paddle.static.Executor(paddle.CPUPlace())
+            out = exe.run(fetch_list=[x_v])
+            print(out[0])
+            # [[1. 1. 1.]
+            #  [1. 1. 1.]
+            #  [1. 1. 1.]]
+
+    """
+    # NOTE(Aurelius84): Fix it after all files migrating into jit.
+    from paddle.jit.dy2static.program_translator import (
+        convert_to_static,
+        unwrap_decorators,
+        StaticFunction,
+    )
+
+    translator_logger.log(
+        1, "Convert callable object: convert {}.".format(func)
+    )
+    func_self = None
+    converted_call = None
+
+    # Function in convert_call may be decorated by another `@to_static`,
+    # in this case, unwraps it into a raw method or function.
+    _, func = unwrap_decorators(func)
+
+    options = getattr(func, CONVERSION_OPTIONS, None)
+    if options is not None and options.not_convert:
+        translator_logger.log(
+            2,
+            "{} is not converted when it is decorated by 'paddle.jit.not_to_static'.".format(
+                func
+            ),
+        )
+        return func
+
+    if is_builtin(func, "len"):
+        return convert_len
+
+    if is_builtin(func, "zip"):
+        return convert_zip
+
+    if is_builtin(func, "range"):
+        return convert_range
+
+    if is_builtin(func, "enumerate"):
+        return convert_enumerate
+
+    if is_builtin(func) or is_unsupported(func):
+        return func
+
+    if inspect.isgeneratorfunction(func):
+        # NOTE(xiongkun03): inspect.isfunction() will return True even though func is a generator function.
+        # If we don't deal generatorfunction here, we will regard it as normal function and get errors in some
+        # occasion.
+        number_of_stars = 30
+        translator_logger.warn(
+            "\n\n"
+            + "*" * number_of_stars
+            + "\nYour function:`{}` doesn't support to transform to static function because it is a generator function, it will be run as-is.".format(
+                func.__name__
+            )
+            + "\n"
+            + "*" * number_of_stars
+            + "\n\n"
+        )
+        return func
+
+    if inspect.isfunction(func):
+        # TODO(liym27): If func is a lambda function, special conversion is needed.
+        if func.__name__ == '<lambda>':
+            return func
+        try:
+            # Note(Aurelius84): Because `@declarative` returns a class instance instead of
+            # a function. This will modify the value referring to itself in `__globals__`.
+
+            # For example:
+            #
+            #      @declarative
+            #      def foo(x):
+            #          return x
+            #
+            # `foo` will be converted into a wrapper class, suppose as `StaticFunction`.
+            # And `foo.__globals__['foo']` will still return this `StaticFunction` instead of
+            # `foo` function. So `isinstance(fn, StaticFunction)` is added here.
+            _origfunc = unwrap(func)
+            global_functions = set()
+            for fn in _origfunc.__globals__.values():
+                if inspect.isfunction(fn):
+                    global_functions.add(fn)
+                elif isinstance(fn, StaticFunction):
+                    _, fn = unwrap_decorators(fn)
+                    global_functions.add(fn)
+                elif inspect.isclass(fn):
+                    if isinstance(
+                        fn.__dict__.get(func.__name__, None), staticmethod
+                    ):
+                        global_functions.add(
+                            func
+                        )  # Add func to ensure that we will convert
+
+            if func in global_functions:
+                converted_call = convert_to_static(func)
+                func_self = getattr(func, '__self__', None)
+            else:
+                # NOTE:
+                # If func is not in __globals__, it does not need to be transformed
+                # because it has been transformed before.
+                translator_logger.warn(
+                    "{} doesn't have to be transformed to static function because it has been transformed before, it will be run as-is.".format(
+                        func
+                    )
+                )
+                converted_call = func
+        except AttributeError:
+            # NOTE:
+            # If func is not in __globals__, it does not need to be transformed
+            # because it has been transformed before.
+            converted_call = None
+        except (IOError, OSError):
+            # NOTE:
+            # If func has been decorated, its source code can not be get
+            # so that it can not be transformed to static function.
+            converted_call = None
+    elif inspect.ismethod(func):
+        try:
+            converted_call = convert_to_static(func)
+            func_self = getattr(func, '__self__', None)
+        except (IOError, OSError):
+            # NOTE: func may have been decorated.
+            converted_call = None
+
+    elif hasattr(func, '__class__') and hasattr(func.__class__, '__call__'):
+        if hasattr(func, 'forward') and isinstance(func, Layer):
+            try:
+                _, forward_func = unwrap_decorators(func.forward)
+                func._original_funcs['forward'] = forward_func.__func__
+                forward_func = convert_to_static(forward_func)
+                # Bound mothod will be convert into plain function after `convert_to_static`.
+                # So descriptor mechanism is used to bound `self` instance on function to
+                # keep it as bound method.
+                setattr(func, 'forward', forward_func.__get__(func))
+            except (IOError, OSError, TypeError):
+                # NOTE: func.forward may have been decorated.
+                func_self = None if func_self else func_self
+            converted_call = func
+        else:
+            try:
+                call_func = func.__class__.__call__
+                converted_call = convert_to_static(call_func)
+                func_self = func
+            except (IOError, OSError, TypeError):
+                # NOTE:
+                # If `func` is a class which is being initialized, for example `convert_call(Foo)()`,
+                # it doesn't need to be transformed
+                func_self = None if func_self else func_self
+    else:
+        raise NotImplementedError(
+            "Callable {} can not be transformed at present.".format(func)
+        )
+
+    if converted_call is None:
+        translator_logger.warn(
+            "{} doesn't have to be transformed to static function, and it will be run as-is.".format(
+                func
+            )
+        )
+        return func
+
+    if func_self:
+        converted_call = functools.partial(converted_call, func_self)
+    return converted_call
diff --git a/python/paddle/jit/dy2static/convert_operators.py b/python/paddle/jit/dy2static/convert_operators.py
index 54210302c41787..e5b35d0b4c0d11 100644
--- a/python/paddle/jit/dy2static/convert_operators.py
+++ b/python/paddle/jit/dy2static/convert_operators.py
@@ -1,4 +1,4 @@
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,23 +12,826 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...fluid.dygraph.dygraph_to_static.convert_operators import (  # noqa: F401
-    cast_bool_if_necessary,
-    convert_assert,
-    convert_ifelse,
-    convert_len,
-    convert_logical_and,
-    convert_logical_not,
-    convert_logical_or,
-    convert_pop,
-    convert_print,
-    convert_shape_compare,
-    convert_var_dtype,
-    convert_shape,
-    convert_while_loop,
-    unpack_by_structure,
-    indexable,
-    convert_attr,
+import re
+import paddle
+from paddle.fluid.data_feeder import convert_dtype
+from paddle.fluid.dygraph.dygraph_to_static.variable_trans_func import (
+    to_static_variable,
 )
+from paddle.fluid.framework import core, Variable
+from paddle.fluid.layers import Assert, Print
+from paddle.fluid.layers import (
+    array_length,
+    array_read,
+    array_write,
+    create_array,
+)
+from paddle.fluid.layers import (
+    assign,
+    fill_constant,
+    reduce_all,
+    reduce_any,
+)
+from paddle.fluid.layers import (
+    cast,
+    control_flow,
+    nn,
+)
+from paddle.fluid.layers.control_flow import (
+    cond,
+    while_loop,
+    less_than,
+    increment,
+)
+from .return_transformer import (
+    RETURN_NO_VALUE_VAR_NAME,
+)
+from paddle.fluid.dygraph.dygraph_to_static.utils import (
+    UndefinedVar,
+    Dygraph2StaticException,
+)
+from paddle.fluid.dygraph.dygraph_to_static.utils import GetterSetterHelper
+from paddle.fluid.layers.utils import copy_mutable_vars
+
+
+def convert_attr(x, attr):
+    if isinstance(x, Variable) and attr == "size":
+        return x.size()
+    else:
+        return getattr(x, attr)
+
+
+def indexable(x, code=None):
+    if isinstance(x, Variable):
+        return x
+    if hasattr(x, '__len__') and hasattr(x, '__getitem__'):
+        return x
+    if hasattr(x, '__iter__'):
+        return [i for i in x]
+    else:
+        raise RuntimeError("X can't be convert into indexable.")
+
+
+def unpack_by_structure(target, structure):
+    """unified unpack interface for paddle and python."""
+    if isinstance(target, Variable):
+        return _unpack_by_structure_paddle(target, structure)
+    else:
+        return _unpack_by_structure_python(target, structure)
+
+
+def _unpack_by_structure_python(target, structure):
+    """TODO(xiongkun): analysis the differences between python and paddle unpack."""
+    return _unpack_by_structure_paddle(target, structure)
+
+
+def _unpack_by_structure_paddle(target, structure):
+    if structure == 1:
+        return target
+    ret = []
+    for idx, ele in enumerate(structure):
+        if ele == 1:
+            ret.append(target[idx])
+            continue
+        if isinstance(ele, list):
+            ret.append(unpack_by_structure(target[idx], ele))
+            continue
+        assert False, "structure element must be 1 or list"
+    return ret
+
+
+def convert_while_loop(
+    cond, body, getter, setter, return_name_ids=None, push_pop_names=None
+):
+    """
+    A function representation of a Python ``while`` statement.
+
+    Args:
+        cond(Callable): A callable object that returns a boolean variable to control whether to execute the loop body. It takes ``loop_vars`` as arguments.
+        body(Callable): A callable object that returns a tuple or list of variables with the same arguments ``loops_vars`` as ``cond`` .
+        get_args(callable): Get all arguments that needed in true_fn and false_fn.
+        set_args(callable): Update arguments that modified in trure_fn and false_fn.
+        return_name_ids(list[string], optional): the returned names.
+        push_pop_names(list[string], optional): the names on which called .append() or .pop().
+
+    Returns:
+        A list or tuple of variables which returned by ``body``.
+    """
+
+    # NOTE: It may be slower if cond is very expensive, but usually cond is just O(1).
+    # If loop_vars is changed during cond callable, then it causes bug, but current logical_and/logical_not/... doesn't change the loop_vars.
+    pred = cond()
+    if isinstance(pred, Variable):
+        _run_paddle_while(
+            cond, body, getter, setter, return_name_ids, push_pop_names
+        )
+    else:
+        _run_py_while(cond, body, getter, setter)
+
+
+def _convert_tensor_arrray_if_necessary(setterhelper, push_pop_names):
+    push_pop_vars = setterhelper.get(push_pop_names)
+    if push_pop_vars is None:
+        return
+
+    def maybe_to_tensor_array(v):
+        if isinstance(v, list):
+            return create_array("float32", initialized_list=v)
+        else:
+            return v
+
+    setterhelper.set(
+        push_pop_names, [maybe_to_tensor_array(v) for v in push_pop_vars]
+    )
+
+
+def _run_paddle_while(
+    cond, body, getter, setter, return_name_ids, push_pop_names
+):
+    # NOTE: loop_vars of Paddle op `control_flow.while_loop` must be Paddle Tensors.
+    helper = GetterSetterHelper(getter, setter, return_name_ids, push_pop_names)
+    _convert_tensor_arrray_if_necessary(helper, push_pop_names)
+
+    def new_body_fn(*args):
+        """wrap the body() and add return value for `while_loop`
+        the args may be differ from getter().
+        """
+        mutable_loop_vars = args
+        helper.set(return_name_ids, mutable_loop_vars)
+        body()
+        return helper.get(return_name_ids)
+
+    def new_cond_fn(*args):
+        """cond is a zero-args function, which is not
+        compatible with `while_loop`.
+        """
+        return cond()
+
+    # UndefinedVar will become data layer not check variable with value=NO_VALUE_MAGIC.
+    loop_vars = [
+        to_static_variable(var) if not isinstance(var, UndefinedVar) else var
+        for var in helper.get(return_name_ids)
+    ]
+    helper.set(
+        return_name_ids, loop_vars
+    )  # change the non-local var to variable
+    # variable maybe modified to inner var. change it into
+    loop_vars = control_flow.while_loop(new_cond_fn, new_body_fn, loop_vars)
+    helper.set(return_name_ids, loop_vars)
+    return loop_vars
+
+
+def _run_py_while(cond, body, getter, setter):
+    while True:
+        pred = cond()
+        if isinstance(pred, Variable):
+            raise Dygraph2StaticException(
+                "python while pred change from bool to variable."
+            )
+        if not pred:
+            break
+        body()
+
+
+def convert_logical_and(x_func, y_func):
+    """
+    A function representation of a Python ``and`` statement.
+
+    Args:
+        x_func(callable): x_func() is the left hand operand of ``and`` operator. x_func() is bool or Tensor.
+        y_func(callable): y_func() is the right hand operand of ``and`` operator.  y_func() is bool or Tensor.
+
+    Returns:
+        A python bool variable or a bool Tensor.
+
+    NOTE(liym27):
+        1) The operands are executed sequentially according to the running logic of Python. So here the arguments
+        should be callable.
+        2) If the left hand operand is False, the right hand operand should be executed.
+
+        For example:
+            a = x > 1 and y < 1
+        Transformed code:
+            a = paddle.jit.dy2static.convert_logical_and(lambda:x>1, lambda:y<1)
+
+          In `convert_logical_and(lambda:x>1, lambda:y<1)`, `lambda:y<1` must be run after `lambda:x>1`. And
+        if `x>1` is False, `y<1` should NOT be run.
+    """
+    x_value = x_func()
+    if not isinstance(x_value, Variable):
+        return _run_py_logical_and(lambda: x_value, y_func)
+
+    y_value = y_func()
+    if not isinstance(y_value, Variable):
+        return _run_py_logical_and(lambda: y_value, lambda: x_value)
+
+    return _run_paddle_logical_and(x_value, y_value)
+
+
+def _run_paddle_logical_and(x, y):
+    x = cast_bool_if_necessary(x)
+    y = cast_bool_if_necessary(y)
+    return paddle.logical_and(x, y)
+
+
+def _run_py_logical_and(x_func, y_func):
+    x_value = x_func()
+    assert not isinstance(x_value, Variable)
+
+    # NOTE(liym27):
+    #  1. Returns y_func() if x_value is False;
+    #  2. If x_value is False, y_func() should not be run.
+    return x_value and y_func()
+
+
+def convert_logical_or(x_func, y_func):
+    """
+    A function representation of a Python ``or`` statement.
+
+    Args:
+        x_func(callable): x_func() is the left hand operand of ``or`` operator. x_func() is bool or Tensor.
+        y_func(callable): y_func() is the right hand operand of ``or`` operator.  y_func() is bool or Tensor.
+
+    Returns:
+        A python bool variable or a bool Tensor.
+
+    NOTE(liym27):
+        1) The operands are executed sequentially according to the running logic of Python. So here the arguments
+        should be callable.
+        2) If the left hand operand is True, the right hand operand should be executed.
+
+        For example:
+            a = x > 1 or y < 1
+        Transformed code:
+            a = paddle.jit.dy2static.convert_logical_or(lambda:x>1, lambda:y<1)
+
+        In `convert_logical_or(lambda:x>1, lambda:y<1)`, `lambda:y<1` must be run after `lambda:x>1`. And
+        if `x>1` is True, `y<1` should NOT be run.
+    """
+    x_value = x_func()
+    if not isinstance(x_value, Variable):
+        return _run_py_logical_or(lambda: x_value, y_func)
+
+    y_value = y_func()
+    if not isinstance(y_value, Variable):
+        return _run_py_logical_or(lambda: y_value, lambda: x_value)
+
+    return _run_paddle_logical_or(x_value, y_value)
+
+
+def _run_paddle_logical_or(x, y):
+    x = cast_bool_if_necessary(x)
+    y = cast_bool_if_necessary(y)
+    return paddle.logical_or(x, y)
+
+
+def _run_py_logical_or(x_func, y_func):
+    x_value = x_func()
+    assert not isinstance(x_value, Variable)
+
+    # NOTE(liym27):
+    #  1. Returns y_func() if x_value is False;
+    #  2. If x_value is True, y_func() should not be run.
+    return x_value or y_func()
+
+
+def convert_logical_not(x):
+    """
+    A function representation of a Python ``not`` statement.
+
+    Args:
+        x(bool|Tensor): Operand of ``not`` operator.
+
+    Returns:
+        A python bool variable or a bool Tensor.
+    """
+
+    if isinstance(x, Variable):
+        return _run_paddle_logical_not(x)
+    else:
+        return _run_py_logical_not(x)
+
+
+def _run_paddle_logical_not(x):
+    x = cast_bool_if_necessary(x)
+    return paddle.logical_not(x)
+
+
+def _run_py_logical_not(x):
+    return not x
+
+
+def convert_ifelse(
+    pred,
+    true_fn,
+    false_fn,
+    get_args,
+    set_args,
+    return_name_ids,
+    push_pop_names=None,
+):
+    """
+    A function representation of a Python ``if/else`` statement.
+
+    Args:
+        pred(bool|Tensor): A boolean Tensor which determines whether to return the result of ``true_fn`` or ``false_fn`` .
+        true_fn(callable): A callable to be performed if ``pred`` is true.
+        false_fn(callable): A callable to be performed if ``pred`` is false.
+        get_args(callable): Get all arguments that needed in true_fn and false_fn.
+        set_args(callable): Update arguments that modified in trure_fn and false_fn.
+        return_name_ids(list[string], optional): the returned names.
+        push_pop_names(list[string], optional): the names on which called .append() or .pop().
+
+    Returns:
+        ``true_fn()`` if the predicate ``pred`` is true else ``false_fn()`` .
+
+    """
+    if isinstance(pred, Variable):
+        out = _run_paddle_cond(
+            pred,
+            true_fn,
+            false_fn,
+            get_args,
+            set_args,
+            return_name_ids,
+            push_pop_names,
+        )
+    else:
+        out = _run_py_ifelse(
+            pred, true_fn, false_fn, get_args, set_args, return_name_ids
+        )
+
+    return out
+
+
+def _run_paddle_cond(
+    pred, true_fn, false_fn, get_args, set_args, return_name_ids, push_pop_names
+):
+    """
+    Paddle cond API will evaluate both true_fn and false_fn codes.
+    """
+    helper = GetterSetterHelper(
+        get_args, set_args, return_name_ids, push_pop_names
+    )
+    _convert_tensor_arrray_if_necessary(helper, push_pop_names)
+    pred = cast_bool_if_necessary(pred)
+    init_args = helper.get(return_name_ids)
+
+    def new_true_fn():
+        # init args may contain mutable python container like [var, 2], we copy then like in while_loop
+        helper.set(return_name_ids, copy_mutable_vars(init_args))
+        ret = true_fn()
+        # IfExpr will return a non-None return value, so we just return ret.
+        # We assume normal return has no return value.
+        if ret is None:
+            return helper.get(return_name_ids)
+        else:
+            return ret
+
+    def new_false_fn():
+        # init args may contain mutable python container like [var, 2], we copy then like in while_loop
+        helper.set(return_name_ids, copy_mutable_vars(init_args))
+        ret = false_fn()
+        if ret is None:
+            return helper.get(return_name_ids)
+        else:
+            return ret
+
+    try:
+        cond_outs = control_flow.cond(
+            pred, new_true_fn, new_false_fn, None, return_name_ids
+        )
+    except Exception as e:
+        if re.search(
+            "Unsupported return type of true_fn and false_fn in cond", str(e)
+        ):
+            raise Dygraph2StaticException(
+                "Your if/else have different return type. TODO: add link to modifty. {}".format(
+                    str(e)
+                )
+            )
+        if re.search("Incompatible return values of", str(e)):
+            raise Dygraph2StaticException(
+                "Your if/else have different number of return value. TODO: add link to modifty. {}".format(
+                    str(e)
+                )
+            )
+        raise e
+    get_args = lambda: helper.get(return_name_ids)
+    set_args = lambda vs: helper.set(return_name_ids, vs)
+    return _recover_args_state(cond_outs, get_args, set_args, return_name_ids)
+
+
+def _run_py_ifelse(
+    pred, true_fn, false_fn, get_args, set_args, return_name_ids
+):
+    """
+    Evaluate python original branch function if-else.
+    """
+    py_outs = true_fn() if pred else false_fn()
+    return py_outs
+
+
+def _remove_no_value_return_var(out):
+    if isinstance(out, tuple) and len(out) > 0:
+        processed_out = out
+        align_ret = out[0]
+        if isinstance(align_ret, tuple):
+            for index, item in enumerate(align_ret):
+                if isinstance(item, Variable) and (
+                    RETURN_NO_VALUE_VAR_NAME in item.name
+                ):
+                    # return None
+                    if index == 0:
+                        processed_out = (None,) + out[1:]
+                    elif index == 1:
+                        processed_out = align_ret[:1] + out[1:]
+                    else:
+                        processed_out = (align_ret[:index],) + out[1:]
+                    break
+
+        for index, item in enumerate(processed_out):
+            if isinstance(item, Variable) and (
+                RETURN_NO_VALUE_VAR_NAME in item.name
+            ):
+                processed_out = processed_out[:index]
+
+        if not processed_out:
+            return None
+        elif len(processed_out) == 1:
+            return processed_out[0]
+        else:
+            return processed_out
+
+    else:
+        return out
+
+
+def _check_no_undefined_var(outs, names, branch_name):
+    if names is None:
+        return
+    if not isinstance(outs, (list, tuple)):
+        outs = [outs]
+    for var, name in zip(list(outs), names):
+        if isinstance(var, UndefinedVar):
+            raise ValueError(
+                "Required '{}' must be initialized both in if-else branch, but found it not initialized in '{}'.".format(
+                    name, branch_name
+                )
+            )
+
+
+def _recover_args_state(outs, get_args, set_args, return_name_ids):
+    """
+    Currently we support variant length of early return statement by padding
+    _no_return_value.
+
+    # TODO(dev): We shall consider to evaluate whether should support this for Python if-else?
+    """
+    # IfExpr's return_name_ids maybe None
+    if return_name_ids is None:
+        return outs
+
+    init_args = get_args()
+    # recover args state
+    num_outs = len(return_name_ids)
+    num_args = len(init_args)
+    assert num_outs <= num_args
+
+    if num_args == 1:
+        final_outs = (
+            (outs,) if not isinstance(outs, (list, tuple)) else tuple(outs)
+        )
+    else:
+        outs = (outs,) if num_outs == 1 else tuple(outs)
+        final_outs = outs + init_args[num_outs:]
+
+    set_args(final_outs)
+    return final_outs
+
+
+def convert_len(var):
+    """
+    Returns variable(length) from shape ops based on var.type
+
+    Note: In addition to some ast transformations, some block-related
+          operations are added in `len` transformation, such as appending
+          `shape_op` in var.block.
+    """
+    if isinstance(var, Variable):
+        assert var.ndim > 0, "len() of a 0D tensor is wrong"
+        if var.type in [
+            core.VarDesc.VarType.LOD_TENSOR,
+            core.VarDesc.VarType.SELECTED_ROWS,
+        ]:
+            # Note: Length of var may be known ahead of time in dygraph,
+            # but it probably represents batch size which can be variant.
+            # so we return a variable dynamically inferred from var.shape.
+            if var.shape[0] > 0 and var.type == core.VarDesc.VarType.LOD_TENSOR:
+                return var.shape[0]
+            return nn.shape(var)[0]
+        elif var.type == core.VarDesc.VarType.LOD_TENSOR_ARRAY:
+            return control_flow.array_length(var)
+        else:
+            raise TypeError(
+                'len(var) only supports LoDTensor/LoDTensorArray/SelectedRows, but received %s.'
+                % type(var)
+            )
+    else:
+        if isinstance(var, VariableTuple):
+            return var.__len__()
+        return len(var)
+
+
+def convert_zip(*args):
+    for i, arg in enumerate(args):
+        if isinstance(arg, Variable) and arg.shape[0] == -1:
+            raise RuntimeError(
+                "Not support zip(tensor, ...) when tensor.shape[0] == -1, "
+                "but found args[{}].shape[0] == -1 in 'zip'".format(str(i))
+            )
+    return zip(*args)
+
+
+# TODO(xiongkun): delete when list<variable> is ready.
+class VariableTuple:
+    """
+    this class will cause enumerate can't be wrapped by other iterator change function.
+    this will be fixed when list<Variable> is producted.
+    VariableTuple can only deal with variables which is fixed.
+    """
+
+    def __init__(self, var, start=0):
+        self.var = var
+        self.len = convert_len(var)
+        if isinstance(self.len, Variable):
+            self.rag = paddle.arange(start, start + self.len, 1, paddle.int64)
+        else:
+            self.rag = range(start, start + self.len)
+
+    def __getitem__(self, idx):
+        return self.rag[idx], self.var[idx]
+
+    def __len__(self):
+        return self.len
+
+
+def convert_enumerate(*args):
+    has_variable = any(map(lambda x: isinstance(x, Variable), args))
+    if has_variable:
+        return VariableTuple(*args)
+    return enumerate(*args)
+
+
+def convert_range(*args):
+    has_variable = any(map(lambda x: isinstance(x, Variable), args))
+    if has_variable:
+        if len(args) == 1:
+            return paddle.arange(0, args[0], 1, paddle.int64)
+        if len(args) == 2:
+            return paddle.arange(args[0], args[1], 1, paddle.int64)
+        if len(args) == 3:
+            return paddle.arange(args[0], args[1], args[2], paddle.int64)
+    return range(*args)
+
+
+def convert_shape(x):
+    """
+    A function representation of the shape of variable.
+    """
+
+    def has_negative(list_shape):
+        return any([x < 0 for x in list_shape])
+
+    # When `x` is Variable:
+    #  (1) if x.shape contains -1, such as [2, -1, 64], returns [2, var, 64],
+    #      where var = paddle.shape(x)[1]
+
+    #  (2) if x.shape does not contains -1, return lsit(x.shape) directly
+
+    if isinstance(x, Variable):
+        values = list(x.shape)
+        if has_negative(values):
+            shape_tensor = nn.shape(x)
+            for i, v in enumerate(values):
+                if v is None or v < 0:
+                    values[i] = shape_tensor[i]
+        return values
+    else:
+        return x.shape
+
+
+def convert_shape_compare(left, *args):
+    """
+    A function handles comparison difference between Paddle and Python.
+    For example, if x and y are Tensors, x.shape == y.shape will return single
+    boolean Value (True/False). However, paddle.shape(x) == paddle.shape(y) is
+    an element-wise comparison. The difference can cause dy2stat error. So we
+    create this function to handle the difference.
+
+    Args:
+        left: variable
+        *args: compare_op(str), variable, compare_op(str), variable, where
+            compare_op means "<", ">", "==", "!=", etc.
+    Returns:
+        If the variables to compare are NOT Paddle Variables, we will return as
+        Python like "a op1 b and b op2 c and ... ".
+        If the variables to compare are Paddle Variables, we will do elementwise
+        comparsion first and then reduce to a boolean whose numel is 1.
+
+    """
+    args_len = len(args)
+    assert (
+        args_len >= 2
+    ), "convert_shape_compare needs at least one right compare variable"
+    assert (
+        args_len % 2 == 0
+    ), "Illegal input for convert_shape_compare, *args should be op(str), var, op(str), var ..."
+    num_cmp = args_len // 2
+    if isinstance(left, Variable):
+
+        def reduce_compare(x, op_str, y):
+            element_wise_result = eval("x " + op_str + " y")
+            if op_str == "!=":
+                return reduce_any(element_wise_result)
+            elif (
+                op_str == "is"
+                or op_str == "is not"
+                or op_str == "in"
+                or op_str == "not in"
+            ):
+                return element_wise_result
+            else:
+                return reduce_all(element_wise_result)
+
+        final_result = reduce_compare(left, args[0], args[1])
+        for i in range(1, num_cmp):
+            cmp_left = args[i * 2 - 1]
+            cmp_op = args[i * 2]
+            cmp_right = args[i * 2 + 1]
+            cur_result = reduce_compare(cmp_left, cmp_op, cmp_right)
+            final_result = convert_logical_and(
+                lambda: final_result, lambda: cur_result
+            )
+        return final_result
+    else:
+        cmp_left = left
+        final_result = None
+        for i in range(num_cmp):
+            cmp_op = args[i * 2]
+            cmp_right = args[i * 2 + 1]
+            cur_result = eval("cmp_left " + cmp_op + " cmp_right")
+            if final_result is None:
+                final_result = cur_result
+            else:
+                final_result = final_result and cur_result
+
+            if final_result is False:
+                return False
+            cmp_left = cmp_right
+        return final_result
+
+
+def cast_bool_if_necessary(var):
+    assert isinstance(var, Variable)
+    if convert_dtype(var.dtype) not in ['bool']:
+        var = cast(var, dtype="bool")
+    return var
+
+
+def convert_var_dtype(var, dtype):
+    if isinstance(var, Variable):
+        src_dtype = convert_dtype(var.dtype)
+        assert src_dtype in [
+            'bool',
+            'float16',
+            'float32',
+            'float64',
+            'int32',
+            'int64',
+            'uint8',
+        ], "The dtype of var {} is {}, which is not supported in the cast op.".format(
+            var.name, src_dtype
+        )
+        assert dtype in [
+            'bool',
+            'int',
+            'float',
+        ], "The casted target dtype is {}, which is not supported in type casting.".format(
+            dtype
+        )
+        cast_map = {
+            'bool': 'bool',
+            'int': 'int32',
+            'float': 'float32',
+        }
+        return cast(var, dtype=cast_map[dtype])
+    else:
+        return eval('{}(var)'.format(dtype))
+
+
+def convert_assert(cond, message=""):
+    """
+    A function representation of a Python ``assert`` statement.
+    """
+    if isinstance(cond, Variable):
+        cond = cast(cond, "bool")
+        # NOTE: message is not used because Paddle Assert has no corresponding parameter to use.
+        return Assert(cond)
+    else:
+        assert cond, message
+
+
+def convert_print(*args):
+    """
+    A function representing Python ``print`` statement. Note: this is a basic
+    python function so we haven't handle sep, end, file and flush parameters of
+    python function.
+    """
+    for var in args:
+        if isinstance(var, Variable):
+            var = Print(var)
+        else:
+            print(var)
+
+
+def convert_pop(target, *args):
+    """
+    A function representation of a Python pop statement for a list or dict.
+
+    Args:
+        target(list|dict|Tensor): A variable to pop item from.
+        *args(tuple): index or default value to parse.
+
+    Returns:
+        A item poped from target.
+    """
+
+    is_variable = isinstance(target, Variable)
+    if is_variable:
+        is_tensor_array = target.type == core.VarDesc.VarType.LOD_TENSOR_ARRAY
+
+    if is_variable and is_tensor_array:
+        return _run_paddle_pop(target, *args)
+    else:
+        return _run_python_pop(target, *args)
+
+
+def _run_paddle_pop(array, *args):
+    if len(args) == 0:
+        idx = -1
+    else:
+        idx = args[0]
+
+    assert isinstance(idx, int)
+
+    def cond(i, new_array):
+        return less_than(i, arr_len)
+
+    def body(i, new_array):
+        item = array_read(array=array, i=i)
+        array_write(item, array_length(new_array), new_array)
+        i = increment(i)
+        return i, new_array
+
+    arr_len = array_length(array)
+    if idx < 0:
+        idx = idx + arr_len
+    else:
+        idx = fill_constant(shape=[1], dtype="int64", value=idx)
+
+    pop_item = array_read(array, idx)
+
+    new_array = _slice_tensor_array(array, 0, idx)
+    i = idx + 1
+    _, new_array = while_loop(cond, body, [i, new_array])
+    assign(input=new_array, output=array)
+
+    return pop_item
+
+
+# TODO(liym27): A better way to slice tensor array.
+#  Maybe support start == end for slice op.
+def _slice_tensor_array(array, start, end):
+    def true_fn():
+        null_array = create_array("float32")
+        return null_array
+
+    def false_fn(array, start, end):
+        new_array = paddle.slice(array, starts=[start], ends=[end], axes=[0])
+        return new_array
+
+    new_array = cond(start == end, true_fn, lambda: false_fn(array, start, end))
+    return new_array
+
+
+def _run_python_pop(target, *args):
+    # 1. pop for a dict
+    if len(args) == 2:
+        idx, default = args
+        return target.pop(idx, default)
 
-__all__ = []
+    # 2. pop for a list or dict
+    else:
+        idx = args[0] if args else -1
+        return target.pop(idx)
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/early_return_transformer.py b/python/paddle/jit/dy2static/early_return_transformer.py
similarity index 98%
rename from python/paddle/fluid/dygraph/dygraph_to_static/early_return_transformer.py
rename to python/paddle/jit/dy2static/early_return_transformer.py
index 1ce75b277864e2..864ca52155d72e 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/early_return_transformer.py
+++ b/python/paddle/jit/dy2static/early_return_transformer.py
@@ -20,6 +20,8 @@
     BaseTransformer,
 )
 
+__all__ = ['EarlyReturnTransformer']
+
 
 class EarlyReturnTransformer(BaseTransformer):
     """
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/ifelse_transformer.py b/python/paddle/jit/dy2static/ifelse_transformer.py
similarity index 98%
rename from python/paddle/fluid/dygraph/dygraph_to_static/ifelse_transformer.py
rename to python/paddle/jit/dy2static/ifelse_transformer.py
index 8120e79c1da308..86d4f4d4054d6b 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/ifelse_transformer.py
+++ b/python/paddle/jit/dy2static/ifelse_transformer.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import copy
-import textwrap
 from collections import defaultdict
 
 # gast is a generic AST to represent Python2 and Python3's Abstract Syntax Tree(AST).
@@ -28,18 +27,11 @@
     ast_to_source_code,
 )
 from paddle.fluid.dygraph.dygraph_to_static.utils import (
-    create_assign_node,
     FunctionNameLivenessAnalysis,
 )
-from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
-    StaticAnalysisVisitor,
-)
 from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
     AstNodeWrapper,
 )
-from paddle.fluid.dygraph.dygraph_to_static.variable_trans_func import (
-    create_undefined_var,
-)
 from paddle.fluid.dygraph.dygraph_to_static.utils import (
     create_nonlocal_stmt_nodes,
 )
@@ -65,6 +57,8 @@
     create_name_str,
 )
 
+__all__ = ['IfElseTransformer']
+
 TRUE_FUNC_PREFIX = 'true_fn'
 FALSE_FUNC_PREFIX = 'false_fn'
 GET_ARGS_FUNC_PREFIX = 'get_args'
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/loop_transformer.py b/python/paddle/jit/dy2static/loop_transformer.py
similarity index 98%
rename from python/paddle/fluid/dygraph/dygraph_to_static/loop_transformer.py
rename to python/paddle/jit/dy2static/loop_transformer.py
index b61f9f6e43c201..3d109398c81e7d 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/loop_transformer.py
+++ b/python/paddle/jit/dy2static/loop_transformer.py
@@ -25,11 +25,7 @@
     StaticAnalysisVisitor,
 )
 from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
-from paddle.fluid.dygraph.dygraph_to_static.utils import generate_name_node
 from paddle.fluid.dygraph.dygraph_to_static.utils import get_attribute_full_name
-from paddle.fluid.dygraph.dygraph_to_static.variable_trans_func import (
-    create_undefined_var,
-)
 from paddle.fluid.dygraph.dygraph_to_static.utils import (
     create_nonlocal_stmt_nodes,
     create_get_args_node,
@@ -38,13 +34,10 @@
 from paddle.fluid.dygraph.dygraph_to_static.utils import (
     FunctionNameLivenessAnalysis,
 )
-from paddle.fluid.dygraph.dygraph_to_static.ifelse_transformer import ARGS_NAME
+from .ifelse_transformer import ARGS_NAME
 from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
     BaseTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
-    RenameTransformer,
-)
 from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
     ForLoopTuplePreTransformer,
 )
@@ -217,7 +210,7 @@ def get_loop_var_names(self, node):
                 # If this var is a basic variable and read-only and not
                 # condition var, it may not be loop_var else it should
                 # be in loop_var as input
-                if (not name in condition_names) and (not name in write_names):
+                if (name not in condition_names) and (name not in write_names):
                     continue
                 loop_var_names.add(name)
 
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py b/python/paddle/jit/dy2static/partial_program.py
similarity index 99%
rename from python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py
rename to python/paddle/jit/dy2static/partial_program.py
index bc371cc99c9174..ad5afaff7cdd51 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py
+++ b/python/paddle/jit/dy2static/partial_program.py
@@ -23,7 +23,7 @@
 from paddle.fluid.dygraph import layers
 from paddle.fluid.dygraph.base import switch_to_static_graph
 from paddle.fluid.dygraph.dygraph_to_static import logging_utils
-from paddle.fluid.dygraph.dygraph_to_static.return_transformer import (
+from .return_transformer import (
     RETURN_NO_VALUE_MAGIC_NUM,
 )
 from paddle.fluid.layers.utils import flatten
diff --git a/python/paddle/jit/dy2static/program_translator.py b/python/paddle/jit/dy2static/program_translator.py
index ea4ecdbe03ca53..0e912954ae6950 100644
--- a/python/paddle/jit/dy2static/program_translator.py
+++ b/python/paddle/jit/dy2static/program_translator.py
@@ -37,7 +37,7 @@
 from paddle.fluid.dygraph.dygraph_to_static.origin_info import (
     update_op_callstack_with_origin_info,
 )
-from paddle.fluid.dygraph.dygraph_to_static.partial_program import (
+from .partial_program import (
     partial_program_from,
 )
 from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_func
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/return_transformer.py b/python/paddle/jit/dy2static/return_transformer.py
similarity index 98%
rename from python/paddle/fluid/dygraph/dygraph_to_static/return_transformer.py
rename to python/paddle/jit/dy2static/return_transformer.py
index e00e322cda3393..cb18dfd33adf39 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/return_transformer.py
+++ b/python/paddle/jit/dy2static/return_transformer.py
@@ -16,12 +16,9 @@
 
 from paddle.fluid import unique_name
 from paddle.fluid.dygraph.dygraph_to_static.utils import index_in_list
-from paddle.fluid.dygraph.dygraph_to_static.break_continue_transformer import (
+from .break_continue_transformer import (
     ForToWhileTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.variable_trans_func import (
-    create_fill_constant_node,
-)
 from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
 from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
     BaseTransformer,

From de443726c837797175d0aabd5e4493c6595b8b41 Mon Sep 17 00:00:00 2001
From: GGBond8488 <33050871+GGBond8488@users.noreply.github.com>
Date: Tue, 29 Nov 2022 17:03:53 +0800
Subject: [PATCH 030/154] remove paddle.nn.Sequential to fix dygraph to static
 error (#48477)

---
 python/paddle/nn/__init__.py        |  2 +-
 python/paddle/nn/layer/container.py | 73 -----------------------------
 2 files changed, 1 insertion(+), 74 deletions(-)

diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py
index a4aaa18ea5552b..ad966683aede23 100644
--- a/python/paddle/nn/__init__.py
+++ b/python/paddle/nn/__init__.py
@@ -17,7 +17,7 @@
 from ..fluid.dygraph.layers import Layer  # noqa: F401
 from .layer.container import LayerList  # noqa: F401
 from .layer.container import ParameterList  # noqa: F401
-from .layer.container import Sequential  # noqa: F401
+from ..fluid.dygraph.container import Sequential  # noqa: F401
 
 from .clip import ClipGradByGlobalNorm  # noqa: F401
 from .clip import ClipGradByNorm  # noqa: F401
diff --git a/python/paddle/nn/layer/container.py b/python/paddle/nn/layer/container.py
index 0a8e5ec009d024..6a54a20787c18e 100644
--- a/python/paddle/nn/layer/container.py
+++ b/python/paddle/nn/layer/container.py
@@ -299,79 +299,6 @@ def update(self, sublayers):
                 self.add_sublayer(kv[0], kv[1])
 
 
-class Sequential(Layer):
-    """Sequential container.
-    Sub layers will be added to this container in the order of argument in the constructor.
-    The argument passed to the constructor can be iterable Layers or iterable name Layer pairs.
-
-    Parameters:
-        layers(Layer|list|tuple): Layer or list/tuple of iterable name Layer pair.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-
-            data = paddle.uniform(shape=[30, 10], dtype='float32')
-            # create Sequential with iterable Layers
-            model1 = paddle.nn.Sequential(
-                paddle.nn.Linear(10, 1), paddle.nn.Linear(1, 2)
-            )
-            model1[0]  # access the first layer
-            res1 = model1(data)  # sequential execution
-
-            # create Sequential with name Layer pairs
-            model2 = paddle.nn.Sequential(
-                ('l1', paddle.nn.Linear(10, 2)),
-                ('l2', paddle.nn.Linear(2, 3))
-            )
-            model2['l1']  # access l1 layer
-            model2.add_sublayer('l3', paddle.nn.Linear(3, 3))  # add sublayer
-            res2 = model2(data)  # sequential execution
-
-    """
-
-    def __init__(self, *layers):
-        super().__init__()
-        if len(layers) > 0 and isinstance(layers[0], (list, tuple)):
-            for name, layer in layers:
-                self.add_sublayer(name, layer)
-        else:
-            for idx, layer in enumerate(layers):
-                self.add_sublayer(str(idx), layer)
-
-    def __getitem__(self, name):
-        if isinstance(name, slice):
-            return self.__class__(*(list(self._sub_layers.values())[name]))
-        elif isinstance(name, str):
-            return self._sub_layers[name]
-        else:
-            if name >= len(self._sub_layers):
-                raise IndexError('index {} is out of range'.format(name))
-            elif name < 0 and name >= -len(self._sub_layers):
-                name += len(self._sub_layers)
-            elif name < -len(self._sub_layers):
-                raise IndexError('index {} is out of range'.format(name))
-            return list(self._sub_layers.values())[name]
-
-    def __setitem__(self, name, layer):
-        assert isinstance(layer, Layer)
-        setattr(self, str(name), layer)
-
-    def __delitem__(self, name):
-        name = str(name)
-        assert name in self._sub_layers
-        del self._sub_layers[name]
-
-    def __len__(self):
-        return len(self._sub_layers)
-
-    def forward(self, input):
-        for layer in self._sub_layers.values():
-            input = layer(input)
-        return input
-
-
 class ParameterList(Layer):
     """ParameterList Container.
 

From 9e9b705aa842542d9bcf69b9039b555cae982b80 Mon Sep 17 00:00:00 2001
From: Vvsmile <450864116@qq.com>
Date: Tue, 29 Nov 2022 17:09:02 +0800
Subject: [PATCH 031/154] Optimize the implementation of the argsort operator.
 (#47738)

Optimize the implementation of the argsort operator
---
 paddle/phi/kernels/gpu/argsort_kernel.cu | 424 +++++++++++++++++------
 1 file changed, 314 insertions(+), 110 deletions(-)

diff --git a/paddle/phi/kernels/gpu/argsort_kernel.cu b/paddle/phi/kernels/gpu/argsort_kernel.cu
index 6a9c1e275998b8..1c3825b90e2101 100644
--- a/paddle/phi/kernels/gpu/argsort_kernel.cu
+++ b/paddle/phi/kernels/gpu/argsort_kernel.cu
@@ -64,8 +64,10 @@ struct SegmentOffsetIter {
   int num_cols_;
 };
 
+#define PADDLE_CUDA_NUM_THREADS 1024
+
 template <typename T>
-static __global__ void FillIndex(T* indices, T num_rows, T num_cols) {
+static __global__ void FillIndex(T *indices, T num_rows, T num_cols) {
   int col_id = threadIdx.x;
   int row_id = blockIdx.x;
 
@@ -78,23 +80,246 @@ static __global__ void FillIndex(T* indices, T num_rows, T num_cols) {
 
 // Sort by flag descending, True: descending. False: Ascending.
 // Default is false.
-template <typename T, typename IndType>
-void ArgFullSort(const phi::GPUContext& ctx,
-                 const DenseTensor* input,
-                 DenseTensor* output,
-                 DenseTensor* indices,
-                 const IndType num_rows,
-                 const IndType num_cols,
+static __global__ void FillIndexAndSegmentKernel(int2 *data,
+                                                 int numel,
+                                                 int nsort) {
+  CUDA_KERNEL_LOOP(idx, numel) {
+    auto segment = static_cast<int>(idx / nsort);
+    auto sort = static_cast<int>(idx % nsort);
+    data[idx] = int2{segment, sort};
+  }
+}
+
+#define CUB_WRAPPER(func, ctx, ...)                                            \
+  do {                                                                         \
+    size_t temp_storage_bytes = 0;                                             \
+    gpuError_t err;                                                            \
+    err = func(nullptr, temp_storage_bytes, __VA_ARGS__);                      \
+    PADDLE_ENFORCE_GPU_SUCCESS(err);                                           \
+    DenseTensor temp_storage;                                                  \
+    int64_t temp_size = temp_storage_bytes;                                    \
+    temp_storage.Resize({temp_size});                                          \
+    ctx.template Alloc<uint8_t>(&temp_storage);                                \
+    err = func(temp_storage.data<uint8_t>(), temp_storage_bytes, __VA_ARGS__); \
+    PADDLE_ENFORCE_GPU_SUCCESS(err);                                           \
+  } while (false)
+
+template <typename KT, typename VT>
+static void RadixSortPairs(const phi::GPUContext &ctx,
+                           const KT *keys_in,
+                           const VT *values_in,
+                           KT *keys_out,
+                           VT *values_out,
+                           int64_t n,
+                           bool descending = false,
+                           int64_t begin_bit = 0,
+                           int64_t end_bit = sizeof(KT) * 8) {
+  if (keys_out == nullptr) {
+    DenseTensor key_out_owner;
+    key_out_owner.Resize({n});
+    ctx.template Alloc<KT>(&key_out_owner);
+    keys_out = key_out_owner.data<KT>();
+  }
+
+  if (descending) {
+    CUB_WRAPPER(cub::DeviceRadixSort::SortPairsDescending,
+                ctx,
+                keys_in,
+                keys_out,
+                values_in,
+                values_out,
+                n,
+                begin_bit,
+                end_bit,
+                ctx.stream());
+  } else {
+    CUB_WRAPPER(cub::DeviceRadixSort::SortPairs,
+                ctx,
+                keys_in,
+                keys_out,
+                values_in,
+                values_out,
+                n,
+                begin_bit,
+                end_bit,
+                ctx.stream());
+  }
+}
+
+template <typename KT>
+static void RadixSortKeys(const phi::GPUContext &ctx,
+                          const KT *keys_in,
+                          KT *keys_out,
+                          int64_t n,
+                          bool descending,
+                          int64_t begin_bit,
+                          int64_t end_bit) {
+  if (descending) {
+    CUB_WRAPPER(cub::DeviceRadixSort::SortKeysDescending,
+                ctx,
+                keys_in,
+                keys_out,
+                n,
+                begin_bit,
+                end_bit,
+                ctx.stream());
+  } else {
+    CUB_WRAPPER(cub::DeviceRadixSort::SortKeys,
+                ctx,
+                keys_in,
+                keys_out,
+                n,
+                begin_bit,
+                end_bit,
+                ctx.stream());
+  }
+}
+
+template <typename T>
+static __global__ void SortPostprocessKernel(const T *in,
+                                             const int2 *i_s_ptr,
+                                             T *out,
+                                             int64_t *index,
+                                             int nsegments,
+                                             int nsort) {
+  CUDA_KERNEL_LOOP(i, nsegments * nsort) {
+    int segment = i / nsort;  // segment_id
+    int j = i % nsort;
+
+    int offset = segment * nsort;
+    const T *in_ = in + offset;
+    T *out_ = out + offset;
+    int64_t *index_ = index + offset;
+    const int2 *i_s_ptr_ = i_s_ptr + offset;
+
+    int idx = i_s_ptr_[j].y;
+    index_[j] = idx;
+    out_[j] = in_[idx];
+  }
+}
+
+template <typename T>
+inline void SegmentedSortPairsByFullSort(const phi::GPUContext &ctx,
+                                         const T *const self_ptr,
+                                         T *const values_ptr,
+                                         int64_t *const indices_ptr,
+                                         const int64_t nsegments,
+                                         const int64_t nsort,
+                                         const int64_t n,
+                                         const bool descending) {
+  int64_t segment_bits = std::max<int64_t>(
+      1L, static_cast<int64_t>(std::ceil(std::log2(nsegments))));
+
+  const auto numel = nsort * nsegments;
+
+  DenseTensor indices_and_segment;
+  int64_t indices_and_segment_size = numel;
+  indices_and_segment.Resize({indices_and_segment_size * 2});
+  ctx.template Alloc<int64_t>(&indices_and_segment);
+  auto i_s_ptr_base = indices_and_segment.data<int64_t>();
+  auto i_s_ptr = reinterpret_cast<int2 *>(i_s_ptr_base);
+
+  dim3 block = PADDLE_CUDA_NUM_THREADS;
+  auto block_num = (numel - 1) / PADDLE_CUDA_NUM_THREADS + 1;
+  dim3 grid = static_cast<int>(block_num);
+
+  auto cu_stream = ctx.stream();
+
+  FillIndexAndSegmentKernel<<<grid, block, 0, cu_stream>>>(
+      i_s_ptr, numel, nsort);
+
+  DenseTensor indices_and_segment2;
+  int64_t indices_and_segment2_size = numel;
+  indices_and_segment2.Resize({indices_and_segment2_size * 2});
+  ctx.template Alloc<int64_t>(&indices_and_segment2);
+  auto i_s_ptr2_base = indices_and_segment2.data<int64_t>();
+  auto i_s_ptr2 = reinterpret_cast<int2 *>(i_s_ptr2_base);
+
+  RadixSortPairs<T, int2>(
+      ctx, self_ptr, i_s_ptr, nullptr, i_s_ptr2, n, descending);
+
+  RadixSortKeys<int64_t>(ctx,
+                         reinterpret_cast<int64_t *>(i_s_ptr2),
+                         reinterpret_cast<int64_t *>(i_s_ptr),
+                         n,
+                         false,
+                         0,
+                         segment_bits);
+
+  SortPostprocessKernel<<<grid, block, 0, cu_stream>>>(
+      self_ptr, i_s_ptr, values_ptr, indices_ptr, nsegments, nsort);
+}
+
+// The method is called when # of the rows of the input is less than or equal to
+// 4
+template <typename T, typename IndexType>
+void ArgFullSortForTinyRows(const phi::GPUContext &ctx,
+                            const DenseTensor *input,
+                            DenseTensor *output,
+                            DenseTensor *indices,
+                            const IndexType num_rows,
+                            const IndexType num_cols,
+                            const bool descending) {
+  auto gpu_stream = ctx.stream();
+  size_t temp_storage_bytes = -1;
+
+  IndexType numel = num_rows * num_cols;
+  if (numel == 0) {
+    return;
+  }
+
+  IndexType numel_or_intmax =
+      std::min(numel, static_cast<int64_t>(std::numeric_limits<int>::max()));
+  IndexType nsort = num_cols;
+  IndexType nbatch = (numel_or_intmax / nsort) * nsort;
+
+  T *sorted_out_ptr;
+  IndexType *sorted_indices_ptr;
+  const T *input_data = input->data<T>();
+  T *out = ctx.template Alloc<T>(output);
+  IndexType *ind = ctx.template Alloc<IndexType>(indices);
+  sorted_out_ptr = out;
+  sorted_indices_ptr = ind;
+
+  int64_t remaining = numel;
+
+  while (remaining > 0) {
+    int64_t n = std::min(remaining, nbatch);
+    IndexType nsegments = n / nsort;
+
+    SegmentedSortPairsByFullSort(ctx,
+                                 input_data,
+                                 sorted_out_ptr,
+                                 sorted_indices_ptr,
+                                 nsegments,
+                                 nsort,
+                                 n,
+                                 descending);
+
+    remaining -= n;
+    input_data += n;
+    sorted_out_ptr += n;
+    sorted_indices_ptr += n;
+  }
+}
+
+template <typename T, typename IndexType>
+void ArgFullSort(const phi::GPUContext &ctx,
+                 const DenseTensor *input,
+                 DenseTensor *output,
+                 DenseTensor *indices,
+                 const IndexType num_rows,
+                 const IndexType num_cols,
                  const bool descending) {
   auto cu_stream = ctx.stream();
   DenseTensor input_indices;
-  const std::vector<IndType> dims = {num_rows, num_cols};
+  const std::vector<IndexType> dims = {num_rows, num_cols};
   auto dim = phi::make_ddim(dims);
   input_indices.Resize(dim);
-  ctx.template Alloc<IndType>(&input_indices);
+  ctx.template Alloc<IndexType>(&input_indices);
   size_t temp_storage_bytes = -1;
 
-  auto ComputeBlockSize = [](IndType col) {
+  auto ComputeBlockSize = [](IndexType col) {
     if (col > 512)
       return 1024;
     else if (col > 256 && col <= 512)
@@ -113,111 +338,70 @@ void ArgFullSort(const phi::GPUContext& ctx,
   int grid_size = num_rows < maxGridDimX ? num_rows : maxGridDimX;
   // Init a index array
   FillIndex<<<grid_size, block_size, 0, cu_stream>>>(
-      input_indices.data<IndType>(), num_rows, num_cols);
+      input_indices.data<IndexType>(), num_rows, num_cols);
 
-  T* sorted_out_ptr;
-  IndType* sorted_indices_ptr;
-  const T* inp = input->data<T>();
-  T* out = ctx.template Alloc<T>(output);
-  IndType* ind = ctx.template Alloc<IndType>(indices);
+  T *sorted_out_ptr;
+  IndexType *sorted_indices_ptr;
+  const T *inp = input->data<T>();
+  T *out = ctx.template Alloc<T>(output);
+  IndexType *ind = ctx.template Alloc<IndexType>(indices);
   sorted_out_ptr = out;
   sorted_indices_ptr = ind;
 
   // create iter for counting input
-  cub::CountingInputIterator<IndType> counting_iter(0);
+  cub::CountingInputIterator<IndexType> counting_iter(0);
   // segment_offset is used for move to next row
-  cub::TransformInputIterator<IndType,
+  cub::TransformInputIterator<IndexType,
                               SegmentOffsetIter,
-                              cub::CountingInputIterator<IndType>>
+                              cub::CountingInputIterator<IndexType>>
       segment_offsets_t(counting_iter, SegmentOffsetIter(num_cols));
 
   gpuError_t err;
   if (descending) {
-    err = cub::DeviceSegmentedRadixSort::SortPairsDescending(
-        nullptr,
-        temp_storage_bytes,
-        inp,
-        sorted_out_ptr,
-        input_indices.data<IndType>(),
-        sorted_indices_ptr,
-        num_cols * num_rows,
-        num_rows,
-        segment_offsets_t,
-        segment_offsets_t + 1,
-        0,
-        sizeof(T) * 8,
-        cu_stream);
+    CUB_WRAPPER(cub::DeviceSegmentedRadixSort::SortPairsDescending,
+                ctx,
+                inp,
+                sorted_out_ptr,
+                input_indices.data<IndexType>(),
+                sorted_indices_ptr,
+                num_cols * num_rows,
+                num_rows,
+                segment_offsets_t,
+                segment_offsets_t + 1,
+                0,
+                sizeof(T) * 8,
+                ctx.stream());
   } else {
-    err =
-        cub::DeviceSegmentedRadixSort::SortPairs(nullptr,
-                                                 temp_storage_bytes,
-                                                 inp,
-                                                 sorted_out_ptr,
-                                                 input_indices.data<IndType>(),
-                                                 sorted_indices_ptr,
-                                                 num_cols * num_rows,
-                                                 num_rows,
-                                                 segment_offsets_t,
-                                                 segment_offsets_t + 1,
-                                                 0,
-                                                 sizeof(T) * 8,
-                                                 cu_stream);
+    CUB_WRAPPER(cub::DeviceSegmentedRadixSort::SortPairs,
+                ctx,
+                inp,
+                sorted_out_ptr,
+                input_indices.data<IndexType>(),
+                sorted_indices_ptr,
+                num_cols * num_rows,
+                num_rows,
+                segment_offsets_t,
+                segment_offsets_t + 1,
+                0,
+                sizeof(T) * 8,
+                ctx.stream());
   }
-  PADDLE_ENFORCE_GPU_SUCCESS(err);
-
-  DenseTensor temp_storage;
-  int64_t temp_size = temp_storage_bytes;
-  temp_storage.Resize({temp_size});
-  ctx.template Alloc<uint8_t>(&temp_storage);
-
-  if (descending) {
-    err = cub::DeviceSegmentedRadixSort::SortPairsDescending(
-        temp_storage.data<uint8_t>(),
-        temp_storage_bytes,
-        inp,
-        sorted_out_ptr,
-        input_indices.data<IndType>(),
-        sorted_indices_ptr,
-        num_cols * num_rows,
-        num_rows,
-        segment_offsets_t,
-        segment_offsets_t + 1,
-        0,
-        sizeof(T) * 8,
-        cu_stream);
-  } else {
-    err =
-        cub::DeviceSegmentedRadixSort::SortPairs(temp_storage.data<uint8_t>(),
-                                                 temp_storage_bytes,
-                                                 inp,
-                                                 sorted_out_ptr,
-                                                 input_indices.data<IndType>(),
-                                                 sorted_indices_ptr,
-                                                 num_cols * num_rows,
-                                                 num_rows,
-                                                 segment_offsets_t,
-                                                 segment_offsets_t + 1,
-                                                 0,
-                                                 sizeof(T) * 8,
-                                                 cu_stream);
-  }
-
-  PADDLE_ENFORCE_GPU_SUCCESS(err);
 }
 
 template <typename T, typename Context>
-void ArgsortKernel(const Context& dev_ctx,
-                   const DenseTensor& input,
+void ArgsortKernel(const Context &dev_ctx,
+                   const DenseTensor &input,
                    int axis,
                    bool descending,
-                   DenseTensor* output,
-                   DenseTensor* indices) {
+                   DenseTensor *output,
+                   DenseTensor *indices) {
   auto in_dims = input.dims();
   axis = (axis < 0) ? (in_dims.size() + axis) : axis;
-  const T* in_data = input.data<T>();
+
+  const T *in_data = input.data<T>();
   auto size = input.numel();
-  T* out_data = dev_ctx.template Alloc<T>(output);
-  int64_t* ids_data = dev_ctx.template Alloc<int64_t>(indices);
+  T *out_data = dev_ctx.template Alloc<T>(output);
+  int64_t *ids_data = dev_ctx.template Alloc<int64_t>(indices);
 
   // Use thrust for parallel acceleration when the input size is equal to the
   // length of the ‘axis’ dimension.
@@ -239,13 +423,23 @@ void ArgsortKernel(const Context& dev_ctx,
     const int64_t input_height =
         phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1));
     const int64_t input_width = in_dims[in_dims.size() - 1];
-    ArgFullSort<T, int64_t>(dev_ctx,
-                            &input,
-                            output,
-                            indices,
-                            input_height,
-                            input_width,
-                            descending);
+    if (input_height <= 4) {
+      ArgFullSortForTinyRows<T, int64_t>(dev_ctx,
+                                         &input,
+                                         output,
+                                         indices,
+                                         input_height,
+                                         input_width,
+                                         descending);
+    } else {
+      ArgFullSort<T, int64_t>(dev_ctx,
+                              &input,
+                              output,
+                              indices,
+                              input_height,
+                              input_width,
+                              descending);
+    }
   } else {
     // if not full sort, do transpose first
     std::vector<int> trans;
@@ -264,7 +458,7 @@ void ArgsortKernel(const Context& dev_ctx,
 
     DenseTensor trans_inp;
     trans_inp.Resize(trans_dims);
-    T* trans_inp_data = dev_ctx.template Alloc<T>(&trans_inp);
+    T *trans_inp_data = dev_ctx.template Alloc<T>(&trans_inp);
     // Do transpose
     TransposeKernel<T, Context>(dev_ctx, input, trans, &trans_inp);
 
@@ -282,13 +476,23 @@ void ArgsortKernel(const Context& dev_ctx,
     dev_ctx.template Alloc<int64_t>(&tmp_indices);
     dev_ctx.template Alloc<int64_t>(indices);
 
-    ArgFullSort<T, int64_t>(dev_ctx,
-                            &trans_inp,
-                            &tmp_out,
-                            &tmp_indices,
-                            input_height,
-                            input_width,
-                            descending);
+    if (input_height <= 4) {
+      ArgFullSortForTinyRows<T, int64_t>(dev_ctx,
+                                         &trans_inp,
+                                         &tmp_out,
+                                         &tmp_indices,
+                                         input_height,
+                                         input_width,
+                                         descending);
+    } else {
+      ArgFullSort<T, int64_t>(dev_ctx,
+                              &trans_inp,
+                              &tmp_out,
+                              &tmp_indices,
+                              input_height,
+                              input_width,
+                              descending);
+    }
 
     TransposeKernel<int64_t, Context>(dev_ctx, tmp_indices, trans, indices);
     // transpose back

From fa10524d558770d3ca0485a3dbbc361b0f32014e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C5=82awomir=20Siwek?= <slawomir.siwek@intel.com>
Date: Tue, 29 Nov 2022 10:21:08 +0100
Subject: [PATCH 032/154] eltwise_div + scale [PHI] (#48484)

---
 paddle/fluid/operators/ops_extra_info.h       |  1 +
 .../phi/kernels/onednn/elementwise_kernel.cc  |  6 ++++
 .../test_mkldnn_elt_act_fuse_pass.py          | 36 +++++++++++++++++++
 3 files changed, 43 insertions(+)

diff --git a/paddle/fluid/operators/ops_extra_info.h b/paddle/fluid/operators/ops_extra_info.h
index 77c0aa7a33fb3c..94adfaf3b4500b 100644
--- a/paddle/fluid/operators/ops_extra_info.h
+++ b/paddle/fluid/operators/ops_extra_info.h
@@ -95,6 +95,7 @@ const std::unordered_map<std::string, ExtraAttrPropertySet>
         {"fuse_activation_alpha", ExtraAttrProperty::ONEDNN},
         {"fuse_activation_beta", ExtraAttrProperty::ONEDNN},
         {"fuse_activation_scale", ExtraAttrProperty::ONEDNN},
+        {"fused_output_scale", ExtraAttrProperty::ONEDNN},
         {"fuse_alpha", ExtraAttrProperty::ONEDNN},
         {"fuse_beta", ExtraAttrProperty::ONEDNN},
         {"fuse_relu", ExtraAttrProperty::ONEDNN},
diff --git a/paddle/phi/kernels/onednn/elementwise_kernel.cc b/paddle/phi/kernels/onednn/elementwise_kernel.cc
index 51be7559772d72..e103f23df0dc62 100644
--- a/paddle/phi/kernels/onednn/elementwise_kernel.cc
+++ b/paddle/phi/kernels/onednn/elementwise_kernel.cc
@@ -43,6 +43,12 @@ void ElementwiseKernel(const OneDNNContext& dev_ctx,
 
   dnnl::post_ops post_operations;
   funcs::AppendActivation(dev_ctx, post_operations);
+  if (dev_ctx.HasDnnAttr("fused_output_scale")) {
+    float scale_alpha =
+        PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("fused_output_scale"));
+    post_operations.append_eltwise(
+        1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f);
+  }
 
   auto* non_const_x = &x;
   auto* non_const_y = &y;
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py
index 068fbe1171d588..ac635436f6200a 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py
@@ -356,6 +356,42 @@ def set_params(self):
         self.act = paddle.nn.functional.sigmoid
 
 
+class ElementwiseScaleOneDNNFusePassTest_Add(
+    ElementwiseActivationMkldnnFusePassTest
+):
+    def set_params(self):
+        self.operand = fluid.layers.elementwise_add
+        self.act_alpha = 0.6
+        self.act = paddle.scale
+
+
+class ElementwiseScaleOneDNNFusePassTest_Sub(
+    ElementwiseActivationMkldnnFusePassTest
+):
+    def set_params(self):
+        self.operand = fluid.layers.elementwise_sub
+        self.act_alpha = 0.6
+        self.act = paddle.scale
+
+
+class ElementwiseScaleOneDNNFusePassTest_Mul(
+    ElementwiseActivationMkldnnFusePassTest
+):
+    def set_params(self):
+        self.operand = fluid.layers.elementwise_mul
+        self.act_alpha = 0.6
+        self.act = paddle.scale
+
+
+class ElementwiseScaleOneDNNFusePassTest_Div(
+    ElementwiseActivationMkldnnFusePassTest
+):
+    def set_params(self):
+        self.operand = fluid.layers.elementwise_div
+        self.act_alpha = 0.6
+        self.act = paddle.scale
+
+
 if __name__ == "__main__":
     paddle.enable_static()
     unittest.main()

From 9896ac1e60135887b4d429c5d2b71c96e93a862c Mon Sep 17 00:00:00 2001
From: Asthestarsfalll <72954905+Asthestarsfalll@users.noreply.github.com>
Date: Tue, 29 Nov 2022 17:57:02 +0800
Subject: [PATCH 033/154] [PHI decoupling]migrate enforce_custom.h from fluid
 to phi (#48422)

* migrate enforce_custom.h from fluid to phi

* move to backends/custom/
---
 paddle/fluid/platform/device/device_wrapper.h |  2 +-
 paddle/phi/backends/custom/custom_device.cc   |  4 +--
 .../backends}/custom/enforce_custom.h         | 33 ++++++++-----------
 3 files changed, 17 insertions(+), 22 deletions(-)
 rename paddle/{fluid/platform/device => phi/backends}/custom/enforce_custom.h (74%)

diff --git a/paddle/fluid/platform/device/device_wrapper.h b/paddle/fluid/platform/device/device_wrapper.h
index 6803a39a4fd7fd..f38ff74ae7b63b 100644
--- a/paddle/fluid/platform/device/device_wrapper.h
+++ b/paddle/fluid/platform/device/device_wrapper.h
@@ -40,8 +40,8 @@ limitations under the License. */
 #endif
 
 #ifdef PADDLE_WITH_CUSTOM_DEVICE
-#include "paddle/fluid/platform/device/custom/enforce_custom.h"
 #include "paddle/phi/backends/callback_manager.h"
+#include "paddle/phi/backends/custom/enforce_custom.h"
 #include "paddle/phi/backends/device_guard.h"
 #include "paddle/phi/backends/device_manager.h"
 #include "paddle/phi/backends/event.h"
diff --git a/paddle/phi/backends/custom/custom_device.cc b/paddle/phi/backends/custom/custom_device.cc
index 3b438b562ce1d1..2c986df278173f 100644
--- a/paddle/phi/backends/custom/custom_device.cc
+++ b/paddle/phi/backends/custom/custom_device.cc
@@ -12,16 +12,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/platform/device/custom/enforce_custom.h"
 #include "paddle/fluid/platform/device_context.h"
-#include "paddle/phi/common/data_type.h"
 
 #include "paddle/phi/backends/callback_manager.h"
+#include "paddle/phi/backends/custom/enforce_custom.h"
 #include "paddle/phi/backends/device_base.h"
 #include "paddle/phi/backends/device_guard.h"
 #include "paddle/phi/backends/device_manager.h"
 #include "paddle/phi/backends/event.h"
 #include "paddle/phi/backends/stream.h"
+#include "paddle/phi/common/data_type.h"
 
 static bool operator==(const C_Device_st& d1, const C_Device_st& d2) {
   return d1.id == d2.id;
diff --git a/paddle/fluid/platform/device/custom/enforce_custom.h b/paddle/phi/backends/custom/enforce_custom.h
similarity index 74%
rename from paddle/fluid/platform/device/custom/enforce_custom.h
rename to paddle/phi/backends/custom/enforce_custom.h
index ba92b4ac7deaec..c98d4580d3cdb8 100644
--- a/paddle/fluid/platform/device/custom/enforce_custom.h
+++ b/paddle/phi/backends/custom/enforce_custom.h
@@ -16,12 +16,10 @@ limitations under the License. */
 #ifdef PADDLE_WITH_CUSTOM_DEVICE
 #include <string>
 
-#include "paddle/fluid/platform/enforce.h"
 #include "paddle/phi/backends/device_ext.h"
+#include "paddle/phi/core/enforce.h"
 
-namespace paddle {
-namespace platform {
-namespace details {
+namespace phi {
 template <typename T>
 struct CustomDeviceStatusType {};
 
@@ -33,7 +31,6 @@ struct CustomDeviceStatusType {};
   }
 
 DEFINE_CUSTOM_DEVICE_STATUS_TYPE(C_Status, C_SUCCESS);
-}  // namespace details
 
 inline std::string build_custom_device_error_msg(C_Status stat) {
   std::ostringstream sout;
@@ -41,19 +38,17 @@ inline std::string build_custom_device_error_msg(C_Status stat) {
   return sout.str();
 }
 
-#define PADDLE_ENFORCE_CUSTOM_DEVICE_SUCCESS(COND)                      \
-  do {                                                                  \
-    auto __cond__ = (COND);                                             \
-    using __CUSTOM_DEVICE_STATUS_TYPE__ = decltype(__cond__);           \
-    constexpr auto __success_type__ =                                   \
-        ::paddle::platform::details::CustomDeviceStatusType<            \
-            __CUSTOM_DEVICE_STATUS_TYPE__>::kSuccess;                   \
-    if (UNLIKELY(__cond__ != __success_type__)) {                       \
-      auto __summary__ = ::paddle::platform::errors::External(          \
-          ::paddle::platform::build_custom_device_error_msg(__cond__)); \
-      __THROW_ERROR_INTERNAL__(__summary__);                            \
-    }                                                                   \
+#define PADDLE_ENFORCE_CUSTOM_DEVICE_SUCCESS(COND)                   \
+  do {                                                               \
+    auto __cond__ = (COND);                                          \
+    using __CUSTOM_DEVICE_STATUS_TYPE__ = decltype(__cond__);        \
+    constexpr auto __success_type__ = ::phi::CustomDeviceStatusType< \
+        __CUSTOM_DEVICE_STATUS_TYPE__>::kSuccess;                    \
+    if (UNLIKELY(__cond__ != __success_type__)) {                    \
+      auto __summary__ = ::phi::errors::External(                    \
+          ::phi::build_custom_device_error_msg(__cond__));           \
+      __THROW_ERROR_INTERNAL__(__summary__);                         \
+    }                                                                \
   } while (0)
-}  // namespace platform
-}  // namespace paddle
+}  // namespace phi
 #endif  // PADDLE_WITH_CUSTOM_DEVICE

From 9ae6c8540b4da6d52d93d1e611930dad003c1b3f Mon Sep 17 00:00:00 2001
From: xiaoxiaohehe001 <49090790+xiaoxiaohehe001@users.noreply.github.com>
Date: Tue, 29 Nov 2022 18:41:09 +0800
Subject: [PATCH 034/154] [Paddle Inference] Add take_along_axis trt converter
 (#48358)

---
 .../fluid/inference/api/analysis_predictor.cc |   1 +
 .../inference/tensorrt/convert/CMakeLists.txt |   1 +
 .../tensorrt/convert/take_along_axis_op.cc    |  62 ++++++
 paddle/fluid/inference/tensorrt/op_teller.cc  |  32 +++
 .../test_trt_convert_take_along_axis.py       | 187 ++++++++++++++++++
 5 files changed, 283 insertions(+)
 create mode 100644 paddle/fluid/inference/tensorrt/convert/take_along_axis_op.cc
 create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_take_along_axis.py

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 0a8b3d5eb29ed3..0fa6f243cee79c 100755
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2341,6 +2341,7 @@ USE_TRT_CONVERTER(tanh_shrink)
 USE_TRT_CONVERTER(logsigmoid)
 USE_TRT_CONVERTER(lookup_table)
 USE_TRT_CONVERTER(expand_v2)
+USE_TRT_CONVERTER(take_along_axis)
 #if PADDLE_WITH_CUSPARSELT && IS_TRT_VERSION_GE(8000)
 USE_TRT_CONVERTER(sparse_fc)
 USE_TRT_CONVERTER(sparse_multihead_matmul)
diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
index 070e7c2c0fd8e7..b796cf1c2a2308 100755
--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -82,6 +82,7 @@ list(
   celu_op.cc
   layernorm_shift_partition_op.cc
   tanhshrink_op.cc
+  take_along_axis_op.cc
   logsigmoid_op.cc
   preln_layernorm_shift_partition_op.cc
   merge_layernorm_op.cc
diff --git a/paddle/fluid/inference/tensorrt/convert/take_along_axis_op.cc b/paddle/fluid/inference/tensorrt/convert/take_along_axis_op.cc
new file mode 100644
index 00000000000000..af43d859bb78c5
--- /dev/null
+++ b/paddle/fluid/inference/tensorrt/convert/take_along_axis_op.cc
@@ -0,0 +1,62 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+/*
+ * TakeAlongAxis Op
+ */
+class TakeAlongAxisOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope,
+                  bool test_mode) override {
+    // AddGatherV2 is supported by the trt version of 8.2.
+#if IS_TRT_VERSION_GE(8200)
+    VLOG(3) << "convert take_along_axis op to tensorrt take_along_axis layer";
+    framework::OpDesc op_desc(op, nullptr);
+    const auto input_tensor = engine_->GetITensor(op_desc.Input("Input")[0]);
+    const auto index_tensor = engine_->GetITensor(op_desc.Input("Index")[0]);
+    auto output_name = op_desc.Output("Result")[0];
+
+    int axis = 0;
+    if (op_desc.HasAttr("Axis")) {
+      axis = PADDLE_GET_CONST(int, op_desc.GetAttr("Axis"));
+    }
+    auto input_dims = input_tensor->getDimensions();
+    int NbDims = input_dims.nbDims;
+    if (axis < 0) axis = axis + NbDims;
+
+    auto layer = TRT_ENGINE_ADD_LAYER(engine_,
+                                      GatherV2,
+                                      *input_tensor,
+                                      *index_tensor,
+                                      nvinfer1::GatherMode::kELEMENT);
+    layer->setGatherAxis(axis);
+
+    RreplenishLayerAndOutput(
+        layer, "take_along_axis", {output_name}, test_mode);
+#endif
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(take_along_axis, TakeAlongAxisOpConverter);
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index 0e180cd7d65269..58f99ff3d2ff7b 100755
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -596,6 +596,36 @@ struct SimpleOpTypeSetTeller : public Teller {
 #endif
     }
 
+    if (op_type == "take_along_axis") {
+#if IS_TRT_VERSION_GE(8200)
+      if (!with_dynamic_shape) return false;
+      auto* block = desc.Block();
+      auto input_var_name = desc.Input("Input")[0];
+      auto index_var_name = desc.Input("Index")[0];
+      auto* input_var_desc = block->FindVar(input_var_name);
+      auto* index_var_desc = block->FindVar(index_var_name);
+
+      // The index input must be int32 datatype.
+      if (index_var_desc->GetDataType() !=
+          paddle::framework::proto::VarType_Type::VarType_Type_INT32) {
+        VLOG(3) << "take_along_axis op Index input data type must be int32";
+        return false;
+      }
+
+      const auto input_shape = input_var_desc->GetShape();
+      const auto index_shape = index_var_desc->GetShape();
+      if (input_shape.size() != index_shape.size()) {
+        VLOG(3) << "take_along_axis op Index input dims size ["
+                << index_shape.size() << " ] not equal to input dims size ["
+                << input_shape.size() << "]";
+        return false;
+      }
+#else
+      VLOG(3) << "take_along_axis op is only supported by trt8.2 above ";
+      return false;
+#endif
+    }
+
     if (op_type == "anchor_generator") {
       if (!with_dynamic_shape) return false;
     }
@@ -2399,6 +2429,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "squeeze2",
       "unsqueeze2",
       "layernorm_shift_partition",
+      "take_along_axis",
       "tanh_shrink",
       "logsigmoid",
       "preln_layernorm_shift_partition",
@@ -2530,6 +2561,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "fused_token_prune",
       "layernorm_shift_partition",
       "tanh_shrink",
+      "take_along_axis",
       "logsigmoid",
       "preln_layernorm_shift_partition",
       "merge_layernorm",
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_take_along_axis.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_take_along_axis.py
new file mode 100644
index 00000000000000..63b340f8807a7e
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_take_along_axis.py
@@ -0,0 +1,187 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from functools import partial
+from typing import List
+
+import numpy as np
+from program_config import ProgramConfig, TensorConfig
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest
+
+import paddle.inference as paddle_infer
+
+
+class TrtConvertTakeAlongAxisTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        inputs = program_config.inputs
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+        if len(inputs['input_data'].shape) <= attrs[0]['Axis']:
+            return False
+        if len(inputs['input_data'].shape) != len(inputs['index_data'].shape):
+            return False
+
+        return True
+
+    def sample_program_configs(self):
+        def generate_input1(shape):
+            return np.random.random(shape).astype(np.float32)
+
+        def generate_input2(index):
+            return np.zeros(index).astype(np.int32)
+
+        def generate_input3(axis):
+            return np.array([axis]).astype(np.int32)
+
+        for shape in [[32], [3, 64], [1, 64, 16], [1, 64, 16, 32]]:
+            for index in [[1], [1, 1], [1, 1, 2], [1, 1, 1, 1]]:
+                for axis in [0, 1, 2, 3]:
+                    self.shape = shape
+                    self.axis = axis
+                    dics = [{"Axis": axis}]
+                    ops_config = [
+                        {
+                            "op_type": "take_along_axis",
+                            "op_inputs": {
+                                "Input": ["input_data"],
+                                "Index": ["index_data"],
+                            },
+                            "op_outputs": {"Result": ["output_data"]},
+                            "op_attrs": dics[0],
+                        }
+                    ]
+                    ops = self.generate_op_config(ops_config)
+
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={},
+                        inputs={
+                            "input_data": TensorConfig(
+                                data_gen=partial(generate_input1, shape)
+                            ),
+                            "index_data": TensorConfig(
+                                data_gen=partial(generate_input2, index)
+                            ),
+                        },
+                        outputs=["output_data"],
+                    )
+
+                    yield program_config
+
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if len(self.shape) == 1:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [4],
+                    "index_data": [1],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [128],
+                    "index_data": [4],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [16],
+                    "index_data": [2],
+                }
+            elif len(self.shape) == 2:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [3, 64],
+                    "index_data": [1, 1],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [3, 64],
+                    "index_data": [1, 1],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [3, 64],
+                    "index_data": [1, 1],
+                }
+            elif len(self.shape) == 3:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 64, 16],
+                    "index_data": [1, 1, 2],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [1, 64, 16],
+                    "index_data": [1, 1, 2],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [1, 64, 16],
+                    "index_data": [1, 1, 2],
+                }
+            elif len(self.shape) == 4:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 64, 16, 32],
+                    "index_data": [1, 1, 1, 1],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [1, 64, 16, 32],
+                    "index_data": [1, 1, 1, 1],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [1, 64, 16, 32],
+                    "index_data": [1, 1, 1, 1],
+                }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(dynamic_shape):
+            ver = paddle_infer.get_trt_compile_version()
+            if (
+                ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 > 8200
+                and dynamic_shape
+            ):
+                return 1, 3
+            else:
+                return 0, 4
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            False
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            False
+        ), 1e-5
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(True), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(True), 1e-3
+
+    def add_skip_trt_case(self):
+        pass
+
+    def test(self):
+        self.add_skip_trt_case()
+        self.run_test()
+
+
+if __name__ == "__main__":
+    unittest.main()

From 485de16aa3214e157aad4fa95b7bbf6958ea48e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=82=85=E5=89=91=E5=AF=92?= <Xs1580802568@gmail.com>
Date: Tue, 29 Nov 2022 18:47:48 +0800
Subject: [PATCH 035/154] =?UTF-8?q?(fluid=E6=B8=85=E7=90=86=EF=BC=89move?=
 =?UTF-8?q?=20prelu=20from=20fluid.layers=20to=20static.nn=20(#47894)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 python/paddle/fluid/layers/nn.py              | 107 ------------------
 .../ir/inference/test_trt_activation_pass.py  |  16 +--
 .../test_imperative_load_static_param.py      |   4 +-
 .../unittests/test_inplace_addto_strategy.py  |   2 +-
 .../fluid/tests/unittests/test_layers.py      |  19 +---
 python/paddle/static/nn/__init__.py           |   4 +-
 python/paddle/static/nn/common.py             | 106 +++++++++++++++++
 7 files changed, 120 insertions(+), 138 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 4743e4b49f2164..e066be12eb7841 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -98,7 +98,6 @@
     'resize_nearest',
     'relu',
     'log',
-    'prelu',
     'unique',
     'unique_with_counts',
     'elementwise_add',
@@ -5333,112 +5332,6 @@ def relu(x, name=None):
     return out
 
 
-@deprecated(since="2.0.0", update_to="paddle.static.nn.prelu")
-def prelu(x, mode, param_attr=None, data_format="NCHW", name=None):
-    r"""
-
-    prelu activation.
-
-    .. math::
-        prelu(x) = max(0, x) + \alpha * min(0, x)
-
-    There are three modes for the activation:
-
-    .. code-block:: text
-
-        all: All elements share same alpha.
-        channel: Elements in same channel share same alpha.
-        element: All elements do not share alpha. Each element has its own alpha.
-
-    Parameters:
-        x (Tensor): The input Tensor or LoDTensor with data type float32.
-        mode (str): The mode for weight sharing.
-        param_attr (ParamAttr|None, optional): The parameter attribute for the learnable
-            weight (alpha), it can be create by ParamAttr. None by default.
-            For detailed information, please refer to :ref:`api_fluid_ParamAttr`.
-        data_format(str, optional): Data format that specifies the layout of input.
-            It may be "NC", "NCL", "NCHW", "NCDHW", "NLC", "NHWC" or "NDHWC". Default: "NCHW".
-        name (str, optional): Name for the operation (optional, default is None).
-            For more information, please refer to :ref:`api_guide_Name`.
-
-    Returns:
-        Tensor, A tensor with the same shape and data type as x.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-
-            x = paddle.to_tensor([-1., 2., 3.])
-            param = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0.2))
-            out = paddle.static.nn.prelu(x, 'all', param)
-            # [-0.2, 2., 3.]
-
-    """
-    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'prelu')
-
-    helper = LayerHelper('prelu', **locals())
-    if mode not in ['all', 'channel', 'element']:
-        raise ValueError('mode should be one of all, channel, element.')
-
-    alpha_shape = [1]
-    if mode == 'channel':
-
-        true_data_format = [
-            'NC',
-            'NCL',
-            'NCHW',
-            'NCDHW',
-            'NLC',
-            'NHWC',
-            'NDHWC',
-        ]
-        if data_format not in true_data_format:
-            raise ValueError(
-                "data_format must be one of 'NC', 'NCL', 'NCHW', 'NCDHW', "
-                "'NLC', 'NHWC', 'NDHWC' but receive {}".format(data_format)
-            )
-
-        data_format = 'NCHW' if data_format[1] == 'C' else 'NHWC'
-
-        assert (
-            len(x.shape) >= 2
-        ), "The size of input shape should be equal or larger than 2 in prelu() when mode is 'channel'"
-        # NOTE(zhiqiu): The alpha_shape should be [1, channel] + [1] * len(x.shape[2:]).
-        # To be consistent with Prelu, it is simplified.
-        # NOTE(zhiqiu): Revert shape to [1, channel, 1, 1] for compatibility with saved model of old version.
-        # NOTE(GuoxiaWang): support NHWC data format
-        if data_format == 'NHWC':
-            alpha_shape = [1, 1, 1, x.shape[-1]]
-        else:
-            alpha_shape = [1, x.shape[1], 1, 1]
-
-    elif mode == 'element':
-        assert (
-            len(x.shape) >= 1
-        ), "The size of input shape should be equal or larger than 1 in prelu() when mode is 'element'"
-        alpha_shape = [1] + list(x.shape)[1:]
-    dtype = helper.input_dtype(input_param_name='x')
-    alpha = helper.create_parameter(
-        attr=helper.param_attr,
-        shape=alpha_shape,
-        dtype=dtype,
-        is_bias=False,
-        default_initializer=Constant(0.25),
-    )
-    if in_dygraph_mode():
-        return _C_ops.prelu(x, alpha, data_format, mode)
-
-    out = helper.create_variable_for_type_inference(dtype)
-    helper.append_op(
-        type="prelu",
-        inputs={"X": x, 'Alpha': alpha},
-        attrs={"mode": mode, "data_format": data_format},
-        outputs={"Out": out},
-    )
-    return out
-
-
 from paddle.fluid.framework import convert_np_dtype_to_dtype_
 
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py
index c2526ca1ee72f5..3597f11c550827 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py
@@ -204,17 +204,17 @@ def append_act(self, x):
 
 class TensorRTSubgraphPassPreluAllTest(TensorRTSubgraphPassActivationTest):
     def append_act(self, x):
-        return fluid.layers.prelu(x, mode='all')
+        return paddle.static.nn.prelu(x, mode='all')
 
 
 class TensorRTSubgraphPassPreluChannelTest(TensorRTSubgraphPassActivationTest):
     def append_act(self, x):
-        return fluid.layers.prelu(x, mode='channel')
+        return paddle.static.nn.prelu(x, mode='channel')
 
 
 class TensorRTSubgraphPassPreluElementTest(TensorRTSubgraphPassActivationTest):
     def append_act(self, x):
-        return fluid.layers.prelu(x, mode='element')
+        return paddle.static.nn.prelu(x, mode='element')
 
 
 class TensorRTSubgraphPassPreluDynamicTest(TensorRTSubgraphPassActivationTest):
@@ -233,7 +233,7 @@ def setUpTensorRTParam(self):
         )
 
     def append_act(self, x):
-        return fluid.layers.prelu(x, mode='all')
+        return paddle.static.nn.prelu(x, mode='all')
 
 
 class TensorRTSubgraphPassPreluFp16Test(TensorRTSubgraphPassActivationTest):
@@ -244,7 +244,7 @@ def setUpTensorRTParam(self):
         )
 
     def append_act(self, x):
-        return fluid.layers.prelu(x, mode='all')
+        return paddle.static.nn.prelu(x, mode='all')
 
 
 class TensorRTSubgraphPassPreluFp16SerializeTest(
@@ -257,7 +257,7 @@ def setUpTensorRTParam(self):
         )
 
     def append_act(self, x):
-        return fluid.layers.prelu(x, mode='all')
+        return paddle.static.nn.prelu(x, mode='all')
 
 
 class TensorRTSubgraphPassPreluFp16DynamicTest(
@@ -278,7 +278,7 @@ def setUpTensorRTParam(self):
         )
 
     def append_act(self, x):
-        return fluid.layers.prelu(x, mode='all')
+        return paddle.static.nn.prelu(x, mode='all')
 
 
 class TensorRTSubgraphPassPreluFp16DynamicSerializeTest(
@@ -299,7 +299,7 @@ def setUpTensorRTParam(self):
         )
 
     def append_act(self, x):
-        return fluid.layers.prelu(x, mode='all')
+        return paddle.static.nn.prelu(x, mode='all')
 
 
 class TensorRTSubgraphPassGeluTest(TensorRTSubgraphPassActivationTest):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
index 0fb5f40470a092..90e757a5d4eae1 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
@@ -82,8 +82,8 @@ def testLoadStaticModel(self):
         prelu_in = fluid.data(
             name="prelu_in", shape=[None, 5, 10, 10], dtype='float32'
         )
-        prelu_out_1 = fluid.layers.prelu(prelu_in, "channel")
-        prelu_out_2 = fluid.layers.prelu(prelu_in, "channel")
+        prelu_out_1 = paddle.static.nn.prelu(prelu_in, "channel")
+        prelu_out_2 = paddle.static.nn.prelu(prelu_in, "channel")
 
         bilinear_tensor_pro_x = fluid.data(
             "t1", shape=[None, 5], dtype="float32"
diff --git a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py
index 60832ae07d298e..252fdf68699b6c 100644
--- a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py
@@ -60,7 +60,7 @@ def create_program(data_format="NCHW"):
         x.stop_gradient = False
         if data_format == "NHWC":
             x = paddle.transpose(x, [0, 2, 3, 1])
-        x = fluid.layers.prelu(x, mode="channel")
+        x = paddle.static.nn.prelu(x, mode="channel")
         conv = ConvBNLayer(
             num_channels=3,
             num_filters=3,
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 37f83e47e791d7..c5cf9253f755fe 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -1064,7 +1064,7 @@ def prelu_test(self, mode):
                 dtype="float32",
                 append_batch_size=False,
             )
-            out = layers.prelu(
+            out = paddle.static.nn.prelu(
                 data_t, mode, param_attr=ParamAttr(initializer=Constant(1.0))
             )
             static_rlt = self.get_static_graph_result(
@@ -2916,7 +2916,6 @@ def setUp(self):
             {
                 "make_gaussian_random",
                 "make_kldiv_loss",
-                "make_prelu",
                 "make_sampling_id",
                 "make_uniform_random_batch_size_like",
             }
@@ -3482,22 +3481,6 @@ def make_pad2d(self):
             out = tmp_pad(input)
             return out
 
-    def make_prelu(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            input = self._get_data(
-                name="input", shape=[5, 200, 100, 100], dtype="float32"
-            )
-            mode = 'channel'
-            out = layers.prelu(
-                input,
-                mode,
-                param_attr=ParamAttr(initializer=Constant(1.0)),
-                name='prelu',
-            )
-            return out
-
     def make_mish(self):
         with program_guard(
             fluid.default_main_program(), fluid.default_startup_program()
diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py
index 5dfae6c98092f2..449cd478a2c3b9 100755
--- a/python/paddle/static/nn/__init__.py
+++ b/python/paddle/static/nn/__init__.py
@@ -31,7 +31,7 @@
 from ...fluid.layers import layer_norm  # noqa: F401
 from ...fluid.layers import multi_box_head  # noqa: F401
 from .loss import nce  # noqa: F401
-from ...fluid.layers import prelu  # noqa: F401
+from .common import prelu  # noqa: F401
 from ...fluid.layers import py_func  # noqa: F401
 from ...fluid.layers import row_conv  # noqa: F401
 from ...fluid.layers import spectral_norm  # noqa: F401
@@ -78,7 +78,6 @@
     'layer_norm',
     'multi_box_head',
     'nce',
-    'prelu',
     'py_func',
     'row_conv',
     'spectral_norm',
@@ -101,4 +100,5 @@
     'sequence_enumerate',
     'sequence_reverse',
     'StaticRNN',
+    'prelu',
 ]
diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py
index da3b58bb18205a..d70d958016ab4e 100755
--- a/python/paddle/static/nn/common.py
+++ b/python/paddle/static/nn/common.py
@@ -2083,3 +2083,109 @@ def deform_conv2d(
             modulated=True,
             name=name,
         )
+
+
+@static_only
+def prelu(x, mode, param_attr=None, data_format="NCHW", name=None):
+    r"""
+
+    prelu activation.
+
+    .. math::
+        prelu(x) = max(0, x) + \alpha * min(0, x)
+
+    There are three modes for the activation:
+
+    .. code-block:: text
+
+        all: All elements share same alpha.
+        channel: Elements in same channel share same alpha.
+        element: All elements do not share alpha. Each element has its own alpha.
+
+    Parameters:
+        x (Tensor): The input Tensor or LoDTensor with data type float32.
+        mode (str): The mode for weight sharing.
+        param_attr (ParamAttr|None, optional): The parameter attribute for the learnable \
+            weight (alpha), it can be create by ParamAttr. None by default. \
+            For detailed information, please refer to :ref:`api_paddle_ParamAttr`.
+        data_format(str, optional): Data format that specifies the layout of input.
+            It may be "NC", "NCL", "NCHW", "NCDHW", "NLC", "NHWC" or "NDHWC". Default: "NCHW".
+        name (str, optional): Name for the operation (optional, default is None). \
+            For more information, please refer to :ref:`api_guide_Name`.
+
+    Returns:
+        Tensor: A tensor with the same shape and data type as x.
+
+    Examples:
+
+        .. code-block:: python
+
+            import paddle
+            paddle.enable_static()
+
+            x = paddle.static.data(name="x", shape=[None,5,10,10], dtype="float32")
+            mode = 'channel'
+            output = paddle.static.nn.prelu(
+                x,mode,param_attr=paddle.ParamAttr(name='alpha'))
+
+    """
+    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'prelu')
+
+    helper = LayerHelper('prelu', **locals())
+    if mode not in ['all', 'channel', 'element']:
+        raise ValueError('mode should be one of all, channel, element.')
+
+    alpha_shape = [1]
+    if mode == 'channel':
+
+        true_data_format = [
+            'NC',
+            'NCL',
+            'NCHW',
+            'NCDHW',
+            'NLC',
+            'NHWC',
+            'NDHWC',
+        ]
+        if data_format not in true_data_format:
+            raise ValueError(
+                "data_format must be one of 'NC', 'NCL', 'NCHW', 'NCDHW', "
+                "'NLC', 'NHWC', 'NDHWC' but receive {}".format(data_format)
+            )
+
+        data_format = 'NCHW' if data_format[1] == 'C' else 'NHWC'
+
+        assert (
+            len(x.shape) >= 2
+        ), "The size of input shape should be equal or larger than 2 in prelu() when mode is 'channel'"
+        # NOTE(zhiqiu): The alpha_shape should be [1, channel] + [1] * len(x.shape[2:]).
+        # To be consistent with Prelu, it is simplified.
+        # NOTE(zhiqiu): Revert shape to [1, channel, 1, 1] for compatibility with saved model of old version.
+        # NOTE(GuoxiaWang): support NHWC data format
+        if data_format == 'NHWC':
+            alpha_shape = [1, 1, 1, x.shape[-1]]
+        else:
+            alpha_shape = [1, x.shape[1], 1, 1]
+
+    elif mode == 'element':
+        assert (
+            len(x.shape) >= 1
+        ), "The size of input shape should be equal or larger than 1 in prelu() when mode is 'element'"
+        alpha_shape = [1] + list(x.shape)[1:]
+    dtype = helper.input_dtype(input_param_name='x')
+    alpha = helper.create_parameter(
+        attr=helper.param_attr,
+        shape=alpha_shape,
+        dtype=dtype,
+        is_bias=False,
+        default_initializer=paddle.nn.initializer.Constant(0.25),
+    )
+
+    out = helper.create_variable_for_type_inference(dtype)
+    helper.append_op(
+        type="prelu",
+        inputs={"X": x, 'Alpha': alpha},
+        attrs={"mode": mode, "data_format": data_format},
+        outputs={"Out": out},
+    )
+    return out

From f85def977562e674681b0dda7ca203ff4aefa094 Mon Sep 17 00:00:00 2001
From: Nyakku Shigure <sigure.qaq@gmail.com>
Date: Tue, 29 Nov 2022 18:50:04 +0800
Subject: [PATCH 036/154] [CodeStyle][isort] introduce isort (part4) (#48402)

* isort all files

* revert conflicting files

* revert conflicting files

* revert conflicting files
---
 cmake/copyfile.py                             |  4 +-
 .../generate_file_structures.py               |  2 +-
 .../generator/codegen_utils.py                |  3 +-
 .../generator/eager_gen.py                    | 42 ++++++------
 .../generator/python_c_gen.py                 | 13 ++--
 .../inference/api/demo_ci/untar_model.py      |  2 +-
 .../api/full_ILSVRC2012_val_preprocess.py     | 12 ++--
 .../api/full_pascalvoc_test_preprocess.py     | 15 +++--
 .../api/test_detection_dataset_preprocess.py  |  2 +-
 paddle/fluid/operators/generator/filters.py   | 12 ++--
 .../fluid/operators/generator/generate_op.py  | 12 ++--
 .../operators/generator/generate_sparse_op.py | 14 ++--
 paddle/fluid/operators/generator/parse_op.py  |  1 -
 .../fluid/operators/generator/parse_utils.py  |  3 +-
 paddle/fluid/operators/generator/tests.py     |  3 +-
 .../fluid/pybind/generate_file_structures.py  |  2 +-
 paddle/infrt/tests/models/abs_model.py        |  5 +-
 .../tests/models/efficientnet-b4/model.py     |  6 +-
 .../efficientnet-b4/net/efficientnet.py       |  8 +--
 .../tests/models/efficientnet-b4/net/utils.py |  4 +-
 paddle/infrt/tests/models/linear.py           |  1 +
 paddle/infrt/tests/models/resnet50_model.py   |  5 +-
 paddle/phi/api/yaml/generator/api_base.py     |  2 +-
 paddle/phi/api/yaml/generator/api_gen.py      |  4 +-
 .../api/yaml/generator/backward_api_gen.py    |  2 +-
 .../yaml/generator/intermediate_api_gen.py    |  2 +-
 .../api/yaml/generator/ops_extra_info_gen.py  |  5 +-
 .../phi/api/yaml/generator/sparse_api_gen.py  |  4 +-
 .../api/yaml/generator/sparse_bw_api_gen.py   |  4 +-
 .../phi/api/yaml/generator/strings_api_gen.py |  2 +-
 .../yaml/generator/wrapped_infermeta_gen.py   |  2 +-
 paddle/scripts/conda_build.py                 |  5 +-
 paddle/scripts/installation_validate.py       |  2 +-
 python/paddle/_legacy_C_ops.py                |  1 +
 python/paddle/amp/auto_cast.py                |  3 +-
 python/paddle/amp/grad_scaler.py              |  4 +-
 python/paddle/audio/backends/backend.py       |  4 +-
 python/paddle/audio/backends/init_backend.py  |  4 +-
 python/paddle/audio/backends/wave_backend.py  |  9 +--
 python/paddle/audio/datasets/dataset.py       |  5 +-
 python/paddle/audio/datasets/esc50.py         |  6 +-
 python/paddle/audio/datasets/tess.py          |  6 +-
 python/paddle/audio/features/layers.py        |  7 +-
 python/paddle/audio/functional/functional.py  |  3 +-
 python/paddle/audio/functional/window.py      |  4 +-
 python/paddle/autograd/backward_mode.py       |  5 +-
 python/paddle/autograd/py_layer.py            |  4 +-
 python/paddle/callbacks.py                    |  8 +--
 python/paddle/cost_model/cost_model.py        |  8 ++-
 python/paddle/dataset/cifar.py                |  6 +-
 python/paddle/dataset/common.py               | 14 ++--
 python/paddle/dataset/conll05.py              |  3 +-
 python/paddle/dataset/flowers.py              | 11 ++--
 python/paddle/dataset/image.py                |  7 +-
 python/paddle/dataset/imdb.py                 |  7 +-
 python/paddle/dataset/imikolov.py             |  5 +-
 python/paddle/dataset/mnist.py                |  8 ++-
 python/paddle/dataset/movielens.py            |  8 ++-
 python/paddle/dataset/tests/cifar_test.py     |  3 +-
 python/paddle/dataset/tests/flowers_test.py   |  3 +-
 python/paddle/dataset/tests/imdb_test.py      |  5 +-
 python/paddle/dataset/tests/imikolov_test.py  |  3 +-
 python/paddle/dataset/tests/mnist_test.py     |  3 +-
 python/paddle/dataset/tests/test_image.py     |  2 +
 python/paddle/dataset/tests/voc2012_test.py   |  3 +-
 python/paddle/dataset/tests/wmt16_test.py     |  3 +-
 python/paddle/dataset/uci_housing.py          |  8 ++-
 python/paddle/dataset/voc2012.py              |  8 ++-
 python/paddle/device/cuda/graphs.py           |  7 +-
 python/paddle/device/cuda/streams.py          |  2 +-
 .../distributed/auto_parallel/callbacks.py    |  9 +--
 .../distributed/auto_parallel/cluster.py      |  6 +-
 .../distributed/auto_parallel/cluster_v2.py   |  4 +-
 .../distributed/auto_parallel/completion.py   | 20 ++++--
 .../distributed/auto_parallel/converter.py    |  7 +-
 .../auto_parallel/cost/base_cost.py           |  6 +-
 .../auto_parallel/cost/estimate_cost.py       |  7 +-
 .../auto_parallel/cost/tensor_cost.py         |  2 +-
 .../distributed/auto_parallel/cost_model.py   |  4 +-
 .../auto_parallel/dist_attribute.py           |  2 +
 .../distributed/auto_parallel/dist_context.py | 17 +++--
 .../distributed/auto_parallel/dist_loader.py  |  5 +-
 .../distributed/auto_parallel/dist_op.py      | 12 ++--
 .../distributed/auto_parallel/dist_saver.py   | 14 ++--
 .../distributed/auto_parallel/dist_tensor.py  |  5 +-
 .../distributed/auto_parallel/engine.py       | 51 ++++++++-------
 .../distributed/auto_parallel/helper.py       | 14 ++--
 .../distributed/auto_parallel/interface.py    | 10 +--
 .../distributed/auto_parallel/mapper.py       | 10 +--
 .../auto_parallel/operators/common.py         |  4 +-
 .../auto_parallel/operators/dist_assign.py    | 12 ++--
 .../dist_check_finite_and_unscale.py          | 22 ++++---
 .../auto_parallel/operators/dist_default.py   | 36 +++++++----
 .../auto_parallel/operators/dist_eltwise.py   | 29 ++++++---
 .../auto_parallel/operators/dist_embedding.py | 55 ++++++++--------
 .../dist_fill_constant_batch_size_like.py     | 21 +++---
 .../operators/dist_fused_attention.py         | 22 ++++---
 .../operators/dist_fused_feedforward.py       | 22 ++++---
 .../auto_parallel/operators/dist_matmul.py    | 64 +++++++++++--------
 .../auto_parallel/operators/dist_pnorm.py     | 30 +++++----
 .../operators/dist_reduce_sum_p.py            | 15 +++--
 .../auto_parallel/operators/dist_reshape.py   | 33 ++++++----
 .../auto_parallel/operators/dist_scale.py     | 12 ++--
 .../auto_parallel/operators/dist_shape.py     | 12 ++--
 .../auto_parallel/operators/dist_slice.py     | 13 ++--
 .../auto_parallel/operators/dist_softmax.py   | 28 ++++----
 .../auto_parallel/operators/dist_split.py     | 13 ++--
 .../auto_parallel/operators/dist_transpose.py | 25 +++++---
 .../operators/dist_update_loss_scaling.py     | 10 +--
 .../distributed/auto_parallel/parallelizer.py | 44 +++++++------
 .../auto_parallel/parallelizer_v2.py          | 10 +--
 .../distributed/auto_parallel/partitioner.py  | 11 ++--
 .../distributed/auto_parallel/planner.py      | 25 +++++---
 .../auto_parallel/process_group.py            |  5 +-
 .../distributed/auto_parallel/process_mesh.py |  6 +-
 .../auto_parallel/process_mesh_v2.py          |  1 +
 .../distributed/auto_parallel/reshard.py      | 23 ++++---
 .../distributed/auto_parallel/strategy.py     |  1 +
 .../auto_parallel/tuner/algorithms.py         |  4 +-
 .../distributed/auto_parallel/tuner/config.py |  2 +-
 .../auto_parallel/tuner/optimization_tuner.py | 34 +++++-----
 .../auto_parallel/tuner/parallel_tuner.py     | 13 ++--
 .../auto_parallel/tuner/profiler.py           | 15 ++---
 .../auto_parallel/tuner/rule_based_tuner.py   |  1 -
 .../distributed/auto_parallel/tuner/trial.py  |  2 +-
 .../auto_parallel/tuner/tunable_space.py      |  6 +-
 .../paddle/distributed/auto_parallel/utils.py | 32 ++++++----
 python/paddle/distributed/cloud_utils.py      |  5 +-
 python/paddle/distributed/collective.py       | 21 +++---
 .../distributed/communication/all_gather.py   |  3 +-
 .../distributed/communication/all_reduce.py   |  2 +-
 .../distributed/communication/all_to_all.py   |  2 +-
 .../communication/batch_isend_irecv.py        |  1 +
 .../distributed/communication/broadcast.py    |  2 +-
 .../paddle/distributed/communication/group.py |  1 +
 .../paddle/distributed/communication/recv.py  |  2 +-
 .../distributed/communication/reduce.py       |  4 +-
 .../communication/reduce_scatter.py           |  2 +-
 .../distributed/communication/scatter.py      |  2 +-
 .../paddle/distributed/communication/send.py  |  2 +-
 .../communication/stream/all_gather.py        |  2 +-
 .../communication/stream/all_reduce.py        |  4 +-
 .../communication/stream/all_to_all.py        |  2 +-
 .../communication/stream/broadcast.py         |  4 +-
 .../distributed/communication/stream/recv.py  |  4 +-
 .../communication/stream/reduce.py            |  6 +-
 .../communication/stream/reduce_scatter.py    |  2 +-
 .../communication/stream/scatter.py           |  5 +-
 .../distributed/communication/stream/send.py  |  4 +-
 python/paddle/distributed/communicator.py     |  2 +-
 .../paddle/distributed/fleet/ascend_utils.py  |  3 +-
 .../fleet/base/distributed_strategy.py        |  7 +-
 .../fleet/base/orthogonal_strategy.py         |  3 +-
 .../fleet/base/private_helper_function.py     |  2 +-
 .../distributed/fleet/base/role_maker.py      |  5 +-
 .../distributed/fleet/base/runtime_factory.py |  2 +-
 .../paddle/distributed/fleet/base/topology.py |  6 +-
 .../distributed/fleet/base/util_factory.py    | 20 +++---
 .../paddle/distributed/fleet/cloud_utils.py   |  1 +
 .../distributed/fleet/dataset/dataset.py      |  3 +-
 .../distributed/fleet/elastic/collective.py   |  6 +-
 .../distributed/fleet/elastic/manager.py      | 13 ++--
 python/paddle/distributed/fleet/fleet.py      | 18 +++---
 .../distributed/fleet/fleet_executor_utils.py |  2 +-
 python/paddle/distributed/fleet/launch.py     | 36 +++++------
 .../paddle/distributed/fleet/launch_utils.py  | 18 +++---
 .../distributed/fleet/layers/mpu/mp_layers.py |  5 +-
 .../distributed/fleet/layers/mpu/mp_ops.py    | 20 +++---
 .../distributed/fleet/layers/mpu/random.py    |  9 +--
 .../fleet/meta_optimizers/amp_optimizer.py    |  1 +
 .../ascend/ascend_optimizer.py                | 11 ++--
 .../meta_optimizers/ascend/ascend_parser.py   |  6 +-
 .../fleet/meta_optimizers/asp_optimizer.py    |  3 +-
 .../fleet/meta_optimizers/common.py           |  2 +
 .../fleet/meta_optimizers/dgc_optimizer.py    |  9 +--
 .../dygraph_sharding_optimizer.py             |  1 +
 .../hybrid_parallel_gradscaler.py             |  5 +-
 .../hybrid_parallel_optimizer.py              | 11 ++--
 .../sharding_optimizer_stage2.py              | 14 ++--
 .../fp16_allreduce_optimizer.py               |  3 +-
 .../gradient_merge_optimizer.py               |  1 +
 .../graph_execution_optimizer.py              |  8 ++-
 .../fleet/meta_optimizers/lamb_optimizer.py   |  6 +-
 .../fleet/meta_optimizers/lars_optimizer.py   |  7 +-
 .../meta_optimizers/localsgd_optimizer.py     | 10 ++-
 .../parameter_server_graph_optimizer.py       |  3 +-
 .../parameter_server_optimizer.py             | 12 ++--
 .../meta_optimizers/pipeline_optimizer.py     |  5 +-
 .../fleet/meta_optimizers/ps_optimizer.py     | 16 +++--
 .../meta_optimizers/raw_program_optimizer.py  |  7 +-
 .../meta_optimizers/recompute_optimizer.py    |  1 +
 .../meta_optimizers/sharding/fp16_helper.py   |  3 +-
 .../sharding/offload_helper.py                |  3 +-
 .../fleet/meta_optimizers/sharding/shard.py   |  3 +-
 .../fleet/meta_optimizers/sharding/utils.py   | 17 ++---
 .../meta_optimizers/sharding_optimizer.py     | 45 ++++++++-----
 .../tensor_parallel_optimizer.py              |  5 +-
 .../parallel_layers/mp_layers.py              |  4 +-
 .../parallel_layers/pp_layers.py              |  7 +-
 .../meta_parallel/parallel_layers/random.py   |  4 +-
 .../fleet/meta_parallel/pipeline_parallel.py  | 18 +++---
 .../pp_utils/p2p_communication.py             | 10 +--
 .../group_sharded_optimizer_stage2.py         | 14 ++--
 .../sharding/group_sharded_stage2.py          |  2 +-
 .../sharding/group_sharded_stage3.py          | 15 +++--
 .../sharding/group_sharded_storage.py         |  3 +-
 .../sharding/group_sharded_utils.py           |  6 +-
 .../meta_parallel/sharding/sharding_stage2.py |  9 +--
 .../meta_parallel/sharding/sharding_stage3.py | 15 +++--
 .../meta_parallel/sharding/sharding_utils.py  |  8 +--
 .../fleet/meta_parallel/sharding_parallel.py  |  2 +-
 .../fleet/meta_parallel/tensor_parallel.py    |  6 +-
 .../distributed/fleet/metrics/metric.py       |  4 +-
 python/paddle/distributed/fleet/model.py      | 11 ++--
 python/paddle/distributed/fleet/optimizer.py  |  4 +-
 .../distributed/fleet/recompute/recompute.py  |  7 +-
 .../fleet/recompute/recompute_hybrid.py       |  6 +-
 .../fleet/runtime/collective_runtime.py       |  3 +-
 .../fleet/runtime/parameter_server_runtime.py | 14 ++--
 .../distributed/fleet/runtime/the_one_ps.py   | 11 ++--
 python/paddle/distributed/fleet/scaler.py     | 13 ++--
 python/paddle/distributed/fleet/utils/fs.py   | 12 ++--
 .../distributed/fleet/utils/http_server.py    |  5 +-
 .../fleet/utils/hybrid_parallel_inference.py  |  9 +--
 .../fleet/utils/hybrid_parallel_util.py       |  7 +-
 .../fleet/utils/internal_storage.py           |  1 +
 .../paddle/distributed/fleet/utils/ps_util.py |  3 +-
 python/paddle/distributed/io.py               |  2 +-
 .../distributed/launch/context/args_envs.py   |  2 +-
 .../distributed/launch/context/device.py      |  1 +
 .../paddle/distributed/launch/context/node.py |  4 +-
 .../launch/controllers/collective.py          |  6 +-
 .../launch/controllers/controller.py          |  4 +-
 .../launch/controllers/ipu_controller.py      |  5 +-
 .../distributed/launch/controllers/master.py  | 12 ++--
 .../distributed/launch/controllers/ps.py      |  7 +-
 .../distributed/launch/controllers/rpc.py     |  4 +-
 .../distributed/launch/controllers/watcher.py |  6 +-
 .../distributed/launch/job/container.py       |  6 +-
 python/paddle/distributed/launch/job/pod.py   |  7 +-
 .../paddle/distributed/launch/plugins/test.py |  3 +-
 .../distributed/launch/utils/kv_client.py     |  3 +-
 .../distributed/launch/utils/kv_server.py     |  8 +--
 .../paddle/distributed/launch/utils/nvsmi.py  |  4 +-
 .../launch/utils/process_context.py           |  5 +-
 python/paddle/distributed/metric/metrics.py   |  4 +-
 python/paddle/distributed/models/moe/utils.py |  6 +-
 python/paddle/distributed/parallel.py         | 40 ++++++------
 .../paddle/distributed/parallel_with_gloo.py  |  7 +-
 .../distributed/passes/auto_parallel_amp.py   | 43 ++++++-------
 .../distributed/passes/auto_parallel_fp16.py  | 37 +++++------
 .../passes/auto_parallel_gradient_merge.py    | 25 ++++----
 .../passes/auto_parallel_quantization.py      | 15 +++--
 .../passes/auto_parallel_recompute.py         | 23 ++++---
 .../passes/auto_parallel_sharding.py          | 30 ++++-----
 python/paddle/distributed/passes/cpp_pass.py  |  6 +-
 .../distributed/passes/fuse_all_reduce.py     |  4 +-
 python/paddle/distributed/passes/pass_base.py |  1 +
 .../distributed/passes/ps_server_pass.py      | 25 ++++----
 .../distributed/passes/ps_trainer_pass.py     | 13 ++--
 python/paddle/distributed/ps/coordinator.py   | 14 ++--
 python/paddle/distributed/ps/the_one_ps.py    | 15 +++--
 .../ps/utils/collective_transpiler.py         |  7 +-
 .../ps/utils/ps_program_builder.py            |  3 +-
 python/paddle/distributed/ps/utils/public.py  |  8 +--
 python/paddle/distributed/rpc/internal.py     |  2 +-
 python/paddle/distributed/rpc/rpc.py          |  8 +--
 .../distributed/sharding/group_sharded.py     | 27 ++++----
 python/paddle/distributed/spawn.py            | 18 +++---
 .../paddle/distributed/utils/launch_utils.py  |  9 +--
 python/paddle/distributed/utils/moe_utils.py  |  6 +-
 python/paddle/distribution/categorical.py     |  1 +
 python/paddle/distribution/dirichlet.py       |  2 +-
 python/paddle/distribution/distribution.py    |  3 +-
 python/paddle/distribution/gumbel.py          |  5 +-
 python/paddle/distribution/kl.py              |  4 +-
 python/paddle/distribution/laplace.py         |  1 +
 python/paddle/distribution/multinomial.py     |  4 +-
 python/paddle/distribution/normal.py          |  5 +-
 .../distribution/transformed_distribution.py  |  4 +-
 python/paddle/distribution/uniform.py         |  7 +-
 python/paddle/distribution/variable.py        |  3 +-
 python/paddle/geometric/math.py               |  4 +-
 .../geometric/message_passing/send_recv.py    |  8 +--
 .../paddle/geometric/message_passing/utils.py |  3 +-
 python/paddle/geometric/reindex.py            |  6 +-
 python/paddle/geometric/sampling/neighbors.py |  6 +-
 python/paddle/hapi/callbacks.py               |  2 +-
 python/paddle/hapi/dynamic_flops.py           | 13 ++--
 python/paddle/hapi/hub.py                     |  3 +-
 python/paddle/hapi/logger.py                  |  2 +-
 python/paddle/hapi/model.py                   | 38 +++++------
 python/paddle/hapi/model_summary.py           |  7 +-
 python/paddle/hapi/static_flops.py            |  4 +-
 python/paddle/hub.py                          |  2 +-
 python/paddle/incubate/autograd/primrules.py  |  4 +-
 python/paddle/incubate/autotune.py            |  3 +-
 .../models/moe/gate/gshard_gate.py            |  4 +-
 .../distributed/models/moe/gate/naive_gate.py |  4 +-
 .../models/moe/gate/switch_gate.py            |  4 +-
 .../distributed/models/moe/grad_clip.py       |  3 +-
 .../distributed/models/moe/moe_layer.py       |  9 +--
 .../incubate/distributed/models/moe/utils.py  |  6 +-
 .../distributed/utils/io/dist_load.py         |  7 +-
 .../distributed/utils/io/dist_save.py         | 11 ++--
 .../distributed/utils/io/save_for_auto.py     | 17 ++---
 .../nn/functional/fused_matmul_bias.py        |  4 +-
 .../nn/functional/fused_transformer.py        |  8 +--
 .../paddle/incubate/nn/layer/fused_linear.py  |  2 +-
 .../incubate/nn/layer/fused_transformer.py    | 15 +++--
 python/paddle/incubate/nn/loss.py             |  8 +--
 .../incubate/operators/graph_khop_sampler.py  |  6 +-
 .../incubate/operators/graph_reindex.py       |  8 +--
 .../operators/graph_sample_neighbors.py       |  8 +--
 .../incubate/operators/graph_send_recv.py     | 14 ++--
 .../paddle/incubate/operators/resnet_unit.py  |  7 +-
 .../incubate/operators/softmax_mask_fuse.py   |  4 +-
 .../softmax_mask_fuse_upper_triangle.py       |  4 +-
 .../optimizer/distributed_fused_lamb.py       |  9 ++-
 .../incubate/optimizer/functional/bfgs.py     |  6 +-
 .../incubate/optimizer/functional/lbfgs.py    |  6 +-
 .../optimizer/functional/line_search.py       |  3 +-
 .../incubate/optimizer/functional/utils.py    |  2 +-
 python/paddle/incubate/optimizer/lookahead.py |  6 +-
 .../paddle/incubate/optimizer/modelaverage.py | 11 ++--
 python/paddle/incubate/tensor/manipulation.py |  5 +-
 python/paddle/incubate/tensor/math.py         |  6 +-
 python/paddle/incubate/xpu/resnet_block.py    |  9 +--
 python/paddle/linalg.py                       | 30 ++++-----
 python/paddle/metric/metrics.py               |  8 ++-
 python/paddle/nn/functional/activation.py     | 26 ++++----
 python/paddle/nn/functional/common.py         | 26 +++-----
 python/paddle/nn/functional/conv.py           | 39 +++++------
 python/paddle/nn/functional/distance.py       |  7 +-
 python/paddle/nn/functional/extension.py      | 24 ++++---
 python/paddle/nn/functional/input.py          |  7 +-
 python/paddle/nn/functional/loss.py           | 24 ++++---
 python/paddle/nn/functional/norm.py           | 13 ++--
 python/paddle/nn/functional/pooling.py        | 19 ++++--
 .../paddle/nn/functional/sparse_attention.py  |  3 +-
 python/paddle/nn/functional/vision.py         | 15 ++---
 python/paddle/nn/initializer/assign.py        |  1 +
 python/paddle/nn/initializer/dirac.py         | 14 ++--
 python/paddle/nn/initializer/normal.py        |  3 +-
 python/paddle/nn/initializer/orthogonal.py    |  9 +--
 python/paddle/nn/layer/activation.py          |  5 +-
 python/paddle/nn/layer/common.py              |  5 +-
 python/paddle/nn/layer/container.py           |  5 +-
 python/paddle/nn/layer/conv.py                | 13 ++--
 python/paddle/nn/layer/loss.py                |  8 ++-
 python/paddle/nn/layer/norm.py                | 30 ++++-----
 python/paddle/nn/layer/pooling.py             |  2 +-
 python/paddle/nn/layer/rnn.py                 | 20 +++---
 python/paddle/nn/layer/transformer.py         | 14 ++--
 python/paddle/nn/layer/vision.py              |  3 +-
 python/paddle/nn/quant/functional_layers.py   |  2 +-
 python/paddle/nn/quant/lsq.py                 |  9 +--
 python/paddle/nn/quant/quant_layers.py        | 17 +++--
 python/paddle/nn/utils/spectral_norm_hook.py  |  5 +-
 .../paddle/nn/utils/transform_parameters.py   |  4 +-
 python/paddle/nn/utils/weight_norm_hook.py    |  5 +-
 python/paddle/onnx/export.py                  |  1 +
 python/paddle/optimizer/adadelta.py           |  7 +-
 python/paddle/optimizer/adagrad.py            |  2 +-
 python/paddle/optimizer/adam.py               | 14 ++--
 python/paddle/optimizer/adamax.py             |  7 +-
 python/paddle/optimizer/adamw.py              | 21 +++---
 python/paddle/optimizer/lamb.py               | 12 ++--
 python/paddle/optimizer/lr.py                 |  7 +-
 python/paddle/optimizer/momentum.py           | 16 ++---
 python/paddle/optimizer/optimizer.py          | 27 ++++----
 python/paddle/optimizer/rmsprop.py            |  2 +-
 python/paddle/optimizer/sgd.py                | 15 ++---
 python/paddle/profiler/profiler.py            | 18 +++---
 python/paddle/profiler/profiler_statistic.py  |  3 +-
 python/paddle/profiler/utils.py               |  6 +-
 python/paddle/reader/decorator.py             | 12 ++--
 python/paddle/reader/tests/decorator_test.py  |  2 +-
 python/paddle/signal.py                       |  8 +--
 python/paddle/sparse/binary.py                |  6 +-
 python/paddle/sparse/creation.py              | 17 +++--
 .../paddle/sparse/nn/functional/activation.py |  3 +-
 python/paddle/sparse/nn/functional/conv.py    |  5 +-
 python/paddle/sparse/nn/functional/pooling.py |  2 +-
 python/paddle/sparse/nn/layer/activation.py   |  3 +-
 python/paddle/sparse/nn/layer/conv.py         |  8 ++-
 python/paddle/sparse/nn/layer/norm.py         |  7 +-
 python/paddle/sparse/nn/layer/pooling.py      |  1 +
 python/paddle/sparse/unary.py                 |  4 +-
 python/paddle/static/input.py                 |  7 +-
 python/paddle/static/io.py                    | 13 ++--
 python/paddle/static/nn/loss.py               | 14 ++--
 .../paddle/tests/dist_hapi_mnist_dynamic.py   |  7 +-
 python/paddle/tests/dist_hapi_mnist_static.py |  7 +-
 .../tests/dist_hapi_pure_fp16_static.py       |  6 +-
 python/paddle/tests/hapi_mnist_bf16_static.py | 13 ++--
 python/paddle/tests/test_async_read_write.py  |  5 +-
 python/paddle/tests/test_audio_backend.py     |  5 +-
 python/paddle/tests/test_audio_datasets.py    |  5 +-
 python/paddle/tests/test_audio_functions.py   |  8 +--
 .../paddle/tests/test_audio_logmel_feature.py |  8 +--
 python/paddle/tests/test_audio_mel_feature.py |  6 +-
 .../paddle/tests/test_callback_early_stop.py  | 11 ++--
 .../test_callback_reduce_lr_on_plateau.py     |  8 +--
 python/paddle/tests/test_callback_visualdl.py |  8 +--
 python/paddle/tests/test_callback_wandb.py    |  5 +-
 python/paddle/tests/test_callbacks.py         | 11 ++--
 python/paddle/tests/test_dataset_cifar.py     |  1 +
 python/paddle/tests/test_dataset_conll05.py   |  1 +
 python/paddle/tests/test_dataset_imdb.py      |  1 +
 python/paddle/tests/test_dataset_imikolov.py  |  1 +
 python/paddle/tests/test_dataset_movielens.py |  1 +
 .../paddle/tests/test_dataset_uci_housing.py  |  3 +-
 python/paddle/tests/test_dataset_voc.py       |  3 +-
 python/paddle/tests/test_dataset_wmt.py       |  1 +
 python/paddle/tests/test_datasets.py          | 15 +++--
 python/paddle/tests/test_dist_hapi_model.py   | 12 ++--
 python/paddle/tests/test_dlpack.py            |  1 +
 python/paddle/tests/test_download.py          |  4 +-
 python/paddle/tests/test_hapi_amp.py          | 13 ++--
 python/paddle/tests/test_hapi_hub.py          |  6 +-
 python/paddle/tests/test_logger.py            |  2 +-
 python/paddle/tests/test_metrics.py           |  2 +-
 python/paddle/tests/test_model.py             | 27 ++++----
 python/paddle/tests/test_ops_roi_align.py     |  3 +-
 python/paddle/tests/test_ops_roi_pool.py      |  3 +-
 python/paddle/tests/test_pretrained_model.py  |  7 +-
 python/paddle/tests/test_progressbar.py       |  5 +-
 python/paddle/tests/test_read_file.py         |  7 +-
 python/paddle/tests/test_transforms.py        |  7 +-
 python/paddle/tests/test_utils_lazyimport.py  |  1 +
 python/paddle/tests/test_vision_models.py     |  3 +-
 python/paddle/text/datasets/conll05.py        |  3 +-
 python/paddle/text/datasets/imdb.py           |  5 +-
 python/paddle/text/datasets/imikolov.py       |  5 +-
 python/paddle/text/datasets/movielens.py      |  7 +-
 python/paddle/text/datasets/uci_housing.py    |  2 +-
 python/paddle/text/datasets/wmt14.py          |  3 +-
 python/paddle/text/datasets/wmt16.py          |  5 +-
 python/paddle/text/viterbi_decode.py          |  7 +-
 .../utils/cpp_extension/cpp_extension.py      |  2 +
 .../utils/cpp_extension/extension_utils.py    | 20 +++---
 python/paddle/utils/deprecated.py             |  5 +-
 python/paddle/utils/dlpack.py                 |  3 +-
 python/paddle/utils/download.py               |  9 +--
 python/paddle/utils/image_util.py             |  3 +-
 python/paddle/utils/install_check.py          |  1 +
 python/paddle/utils/profiler.py               |  4 +-
 python/paddle/utils/unique_name.py            |  2 +-
 python/paddle/vision/datasets/cifar.py        |  5 +-
 python/paddle/vision/datasets/flowers.py      |  3 +-
 python/paddle/vision/datasets/mnist.py        |  3 +-
 python/paddle/vision/datasets/voc2012.py      |  3 +-
 python/paddle/vision/image.py                 |  1 +
 python/paddle/vision/models/alexnet.py        |  7 +-
 python/paddle/vision/models/densenet.py       | 13 +++-
 python/paddle/vision/models/googlenet.py      | 13 ++--
 python/paddle/vision/models/inceptionv3.py    |  8 +--
 python/paddle/vision/models/mobilenetv1.py    |  2 +-
 python/paddle/vision/models/mobilenetv2.py    |  2 +-
 python/paddle/vision/models/mobilenetv3.py    |  5 +-
 python/paddle/vision/models/resnet.py         |  1 -
 python/paddle/vision/models/squeezenet.py     |  4 +-
 python/paddle/vision/models/vgg.py            |  1 -
 python/paddle/vision/ops.py                   | 14 ++--
 python/paddle/vision/transforms/functional.py |  3 +-
 .../vision/transforms/functional_cv2.py       |  3 +-
 .../vision/transforms/functional_pil.py       |  6 +-
 python/paddle/vision/transforms/transforms.py |  8 +--
 r/example/mobilenet.py                        |  4 +-
 tools/CrossStackProfiler/CspFileReader.py     |  6 +-
 tools/CrossStackProfiler/CspReporter.py       | 18 ++++--
 tools/CrossStackProfiler/DCGMFileReader.py    | 18 +++---
 tools/CrossStackProfiler/NetFileReader.py     | 11 ++--
 tools/CrossStackProfiler/ProfileFileReader.py | 13 ++--
 tools/analysisPyXml.py                        |  7 +-
 tools/check_api_compatible.py                 |  4 +-
 tools/check_api_source_without_core_ops.py    |  1 +
 tools/check_ctest_hung.py                     |  2 +-
 tools/check_op_benchmark_result.py            |  4 +-
 tools/check_op_desc.py                        |  3 +-
 tools/check_op_kernel_same_dtypes.py          |  5 +-
 tools/check_op_register_type.py               |  7 +-
 tools/check_pr_approval.py                    |  2 +-
 tools/check_ut.py                             |  1 +
 tools/codestyle/docstring_checker.py          |  7 +-
 tools/codestyle/test_docstring_checker.py     |  2 +-
 tools/count_api_without_core_ops.py           |  4 +-
 tools/diff_use_default_grad_op_maker.py       |  3 +-
 .../build_scripts/python-tag-abi-tag.py       |  2 +-
 tools/externalError/spider.py                 |  9 +--
 tools/final_ut_parallel_rule.py               |  2 +-
 tools/gen_ut_cmakelists.py                    |  4 +-
 tools/get_op_list.py                          |  6 +-
 tools/get_pr_ut.py                            | 11 ++--
 tools/get_quick_disable_lt.py                 |  4 +-
 tools/get_single_test_cov.py                  |  2 +-
 tools/get_ut_file_map.py                      |  2 +-
 tools/get_ut_mem_map.py                       |  2 +-
 tools/handle_h_cu_file.py                     |  4 +-
 tools/infrt/generate_phi_kernel_dialect.py    |  3 +-
 tools/infrt/get_phi_kernel_info.py            |  3 +-
 tools/jetson_infer_op.py                      |  6 +-
 tools/print_op_desc.py                        |  3 +-
 tools/print_signatures.py                     |  9 +--
 tools/prune_for_jetson.py                     |  2 +-
 tools/pyCov_multithreading.py                 |  6 +-
 tools/remove_grad_op_and_kernel.py            |  2 +-
 tools/sampcd_processor.py                     | 14 ++--
 tools/summary_env.py                          |  5 +-
 tools/test_check_api_compatible.py            | 10 +--
 tools/test_check_pr_approval.py               |  2 +-
 tools/test_print_signatures.py                |  8 +--
 tools/test_runner.py                          |  7 +-
 tools/test_sampcd_processor.py                | 27 ++++----
 515 files changed, 2317 insertions(+), 1961 deletions(-)

diff --git a/cmake/copyfile.py b/cmake/copyfile.py
index 552e8ea1a2b14e..277ed2c2458694 100644
--- a/cmake/copyfile.py
+++ b/cmake/copyfile.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import glob
 import os
-import sys
 import shutil
-import glob
+import sys
 
 
 def main():
diff --git a/paddle/fluid/eager/auto_code_generator/generate_file_structures.py b/paddle/fluid/eager/auto_code_generator/generate_file_structures.py
index bb2a236c28e592..d2beeff5ac22c4 100644
--- a/paddle/fluid/eager/auto_code_generator/generate_file_structures.py
+++ b/paddle/fluid/eager/auto_code_generator/generate_file_structures.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import sys
 import os
+import sys
 
 
 def GenerateFileStructureForFinalDygraph(eager_dir):
diff --git a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
index ac0b01dd4de98e..748c9d1ad22f22 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import yaml
 import re
 
+import yaml
+
 ####################
 # Global Variables #
 ####################
diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
index 1b73b3c67d1023..e726ec8bd96706 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
@@ -12,30 +12,36 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import re
 import argparse
 import os
+import re
+
 from codegen_utils import (
-    core_ops_returns_info,
-    core_ops_args_info,
-    core_ops_args_type_info,
-)
-from codegen_utils import ReadBwdFile
-from codegen_utils import FindForwardName, GetGradNodeName, GetSavedName
-from codegen_utils import IsPlainTensorType, IsVectorTensorType
-from codegen_utils import GetConstReference, RemoveConstAndReference
-from codegen_utils import (
+    AssertMessage,
+    FindForwardName,
+    FunctionGeneratorBase,
+    GeneratorBase,
+    GetAutoGradMetaName,
+    GetAutoGradMetaVectorName,
+    GetConstReference,
     GetDygraphForwardFunctionName,
+    GetGradNodeName,
+    GetIndent,
+    GetInplacedFunctionName,
     GetIntermediateAPIFunctionName,
+    GetSavedName,
+    IsPlainTensorType,
+    IsVectorTensorType,
+    ParseYamlBackward,
+    ParseYamlForwardFromBackward,
+    ParseYamlInplaceInfo,
+    ReadBwdFile,
+    RemoveConstAndReference,
+    core_ops_args_info,
+    core_ops_args_type_info,
+    core_ops_returns_info,
+    ops_to_fill_zero_for_empty_grads,
 )
-from codegen_utils import GetAutoGradMetaName, GetAutoGradMetaVectorName
-from codegen_utils import GetInplacedFunctionName
-from codegen_utils import ParseYamlForwardFromBackward
-from codegen_utils import ParseYamlBackward
-from codegen_utils import ParseYamlInplaceInfo
-from codegen_utils import FunctionGeneratorBase, GeneratorBase
-from codegen_utils import ops_to_fill_zero_for_empty_grads
-from codegen_utils import AssertMessage, GetIndent
 
 # Note: assign is a inplace api when parameter(output) isn't none,
 # so we should check parameter(output) with rule of inplace.
diff --git a/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py b/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py
index aacde58fa7bc2e..99cc75e3f0b861 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py
@@ -12,11 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import argparse
-from codegen_utils import FunctionGeneratorBase, GeneratorBase
-from codegen_utils import GetForwardFunctionName, IsVectorTensorType
-from codegen_utils import GetInplacedFunctionName
+import os
+
+from codegen_utils import (
+    FunctionGeneratorBase,
+    GeneratorBase,
+    GetForwardFunctionName,
+    GetInplacedFunctionName,
+    IsVectorTensorType,
+)
 
 #########################
 # Global Configurations #
diff --git a/paddle/fluid/inference/api/demo_ci/untar_model.py b/paddle/fluid/inference/api/demo_ci/untar_model.py
index 50c12aa28756d9..7016b02432de9b 100644
--- a/paddle/fluid/inference/api/demo_ci/untar_model.py
+++ b/paddle/fluid/inference/api/demo_ci/untar_model.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import tarfile
 import sys
+import tarfile
 
 
 def untar(fname, dirs):
diff --git a/paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py b/paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
index a5ff3717199b01..59f222a2a5eaed 100644
--- a/paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
+++ b/paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
@@ -10,15 +10,17 @@
 # without warranties or conditions of any kind, either express or implied.
 # see the license for the specific language governing permissions and
 # limitations under the license.
-import os
+import argparse
 import io
-import numpy as np
+import os
+import shutil
 import sys
+import tarfile
+
+import numpy as np
 from PIL import Image
+
 from paddle.dataset.common import download
-import tarfile
-import argparse
-import shutil
 
 np.random.seed(0)
 
diff --git a/paddle/fluid/inference/tests/api/full_pascalvoc_test_preprocess.py b/paddle/fluid/inference/tests/api/full_pascalvoc_test_preprocess.py
index 6fc2e072e00cc4..afe9eedc9e2584 100644
--- a/paddle/fluid/inference/tests/api/full_pascalvoc_test_preprocess.py
+++ b/paddle/fluid/inference/tests/api/full_pascalvoc_test_preprocess.py
@@ -12,17 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import xml.etree.ElementTree
-from PIL import Image
-import numpy as np
+import argparse
+import hashlib
 import os
 import sys
-from paddle.dataset.common import download
 import tarfile
+import xml.etree.ElementTree
 from io import StringIO
-import hashlib
-import tarfile
-import argparse
+
+import numpy as np
+from PIL import Image
+
+from paddle.dataset.common import download
 
 DATA_URL = (
     "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar"
diff --git a/paddle/fluid/inference/tests/api/test_detection_dataset_preprocess.py b/paddle/fluid/inference/tests/api/test_detection_dataset_preprocess.py
index 4ae32b0f77a84b..7b879d16648253 100644
--- a/paddle/fluid/inference/tests/api/test_detection_dataset_preprocess.py
+++ b/paddle/fluid/inference/tests/api/test_detection_dataset_preprocess.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import os
+import unittest
 
 
 class Test_Preprocess(unittest.TestCase):
diff --git a/paddle/fluid/operators/generator/filters.py b/paddle/fluid/operators/generator/filters.py
index e361e0af921dce..e39a3122d538eb 100644
--- a/paddle/fluid/operators/generator/filters.py
+++ b/paddle/fluid/operators/generator/filters.py
@@ -16,18 +16,16 @@
 import re
 
 from type_mapping import (
-    input_types_map,
-    optional_input_types_map,
     attr_types_map,
-    opmaker_attr_types_map,
-    output_type_map,
-)
-from type_mapping import (
     dense_input_types_map,
     dense_optional_input_types_map,
     dense_output_types_map,
-    sr_output_types_map,
+    input_types_map,
+    opmaker_attr_types_map,
+    optional_input_types_map,
+    output_type_map,
     phi_attr_types_map,
+    sr_output_types_map,
 )
 
 
diff --git a/paddle/fluid/operators/generator/generate_op.py b/paddle/fluid/operators/generator/generate_op.py
index 36db64643122da..8746b83a325bd9 100644
--- a/paddle/fluid/operators/generator/generate_op.py
+++ b/paddle/fluid/operators/generator/generate_op.py
@@ -17,24 +17,24 @@
 from pathlib import Path
 
 import yaml
-from jinja2 import Environment, FileSystemLoader, StrictUndefined
-
 from filters import (
+    cartesian_prod_mapping,
+    to_input_name,
     to_op_attr_type,
     to_opmaker_name,
     to_opmaker_name_cstr,
     to_pascal_case,
 )
+from jinja2 import Environment, FileSystemLoader, StrictUndefined
+from parse_utils import to_named_dict
 from tests import (
     is_base_op,
-    is_vec,
-    is_scalar,
     is_initializer_list,
+    is_scalar,
+    is_vec,
     supports_inplace,
     supports_no_need_buffer,
 )
-from filters import to_input_name, cartesian_prod_mapping
-from parse_utils import to_named_dict
 
 file_loader = FileSystemLoader(Path(__file__).parent / "templates")
 env = Environment(
diff --git a/paddle/fluid/operators/generator/generate_sparse_op.py b/paddle/fluid/operators/generator/generate_sparse_op.py
index 10ee034ff3b473..0f04e6130840c7 100644
--- a/paddle/fluid/operators/generator/generate_sparse_op.py
+++ b/paddle/fluid/operators/generator/generate_sparse_op.py
@@ -17,25 +17,25 @@
 from pathlib import Path
 
 import yaml
-from jinja2 import Environment, FileSystemLoader, StrictUndefined
-
 from filters import (
+    cartesian_prod_mapping,
+    to_input_name,
     to_op_attr_type,
     to_opmaker_name,
     to_opmaker_name_cstr,
     to_pascal_case,
 )
+from generate_op import process_invoke_op
+from jinja2 import Environment, FileSystemLoader, StrictUndefined
+from parse_utils import to_named_dict
 from tests import (
     is_base_op,
-    is_vec,
-    is_scalar,
     is_initializer_list,
+    is_scalar,
+    is_vec,
     supports_inplace,
     supports_no_need_buffer,
 )
-from filters import to_input_name, cartesian_prod_mapping
-from parse_utils import to_named_dict
-from generate_op import process_invoke_op
 
 file_loader = FileSystemLoader(Path(__file__).parent / "templates")
 env = Environment(
diff --git a/paddle/fluid/operators/generator/parse_op.py b/paddle/fluid/operators/generator/parse_op.py
index 58b7b73436663a..79d31113e33b7e 100644
--- a/paddle/fluid/operators/generator/parse_op.py
+++ b/paddle/fluid/operators/generator/parse_op.py
@@ -15,7 +15,6 @@
 import argparse
 
 import yaml
-
 from parse_utils import parse_op_entry
 
 
diff --git a/paddle/fluid/operators/generator/parse_utils.py b/paddle/fluid/operators/generator/parse_utils.py
index 099b0f7b6fad4e..fb7940ddfe608f 100644
--- a/paddle/fluid/operators/generator/parse_utils.py
+++ b/paddle/fluid/operators/generator/parse_utils.py
@@ -14,7 +14,8 @@
 
 import re
 from copy import copy
-from typing import Dict, Any, List, Tuple
+from typing import Any, Dict, List, Tuple
+
 from tests import is_attr, is_input, is_output, is_vec
 
 
diff --git a/paddle/fluid/operators/generator/tests.py b/paddle/fluid/operators/generator/tests.py
index b9f0645e699134..0c6c5cf0f3f8a1 100644
--- a/paddle/fluid/operators/generator/tests.py
+++ b/paddle/fluid/operators/generator/tests.py
@@ -13,7 +13,8 @@
 # limitations under the License.
 
 import re
-from type_mapping import input_types_map, attr_types_map, output_type_map
+
+from type_mapping import attr_types_map, input_types_map, output_type_map
 
 
 # tests for typename
diff --git a/paddle/fluid/pybind/generate_file_structures.py b/paddle/fluid/pybind/generate_file_structures.py
index 4904f97dc2fdd9..54bd1ee699a740 100644
--- a/paddle/fluid/pybind/generate_file_structures.py
+++ b/paddle/fluid/pybind/generate_file_structures.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import sys
 import os
+import sys
 
 if __name__ == "__main__":
     assert len(sys.argv) == 3
diff --git a/paddle/infrt/tests/models/abs_model.py b/paddle/infrt/tests/models/abs_model.py
index 6e04c8ec9bc2e6..83b572149c7227 100644
--- a/paddle/infrt/tests/models/abs_model.py
+++ b/paddle/infrt/tests/models/abs_model.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
+
 import paddle
-from paddle.static import InputSpec
 from paddle.jit import to_static
-import sys
+from paddle.static import InputSpec
 
 
 class AbsNet(paddle.nn.Layer):
diff --git a/paddle/infrt/tests/models/efficientnet-b4/model.py b/paddle/infrt/tests/models/efficientnet-b4/model.py
index 1b493897e654a2..6c3237f566689b 100644
--- a/paddle/infrt/tests/models/efficientnet-b4/model.py
+++ b/paddle/infrt/tests/models/efficientnet-b4/model.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
+
 # url: https://aistudio.baidu.com/aistudio/projectdetail/3756986?forkThirdPart=1
 from net import EfficientNet
+
+import paddle
 from paddle.jit import to_static
 from paddle.static import InputSpec
-import paddle
-import sys
 
 model = EfficientNet.from_name('efficientnet-b4')
 net = to_static(
diff --git a/paddle/infrt/tests/models/efficientnet-b4/net/efficientnet.py b/paddle/infrt/tests/models/efficientnet-b4/net/efficientnet.py
index c14be25a81590f..a70db67a507a4d 100644
--- a/paddle/infrt/tests/models/efficientnet-b4/net/efficientnet.py
+++ b/paddle/infrt/tests/models/efficientnet-b4/net/efficientnet.py
@@ -17,13 +17,13 @@
 import paddle.nn.functional as F
 
 from .utils import (
-    round_filters,
-    round_repeats,
     drop_connect,
-    get_same_padding_conv2d,
-    get_model_params,
     efficientnet_params,
+    get_model_params,
+    get_same_padding_conv2d,
     load_pretrained_weights,
+    round_filters,
+    round_repeats,
 )
 
 
diff --git a/paddle/infrt/tests/models/efficientnet-b4/net/utils.py b/paddle/infrt/tests/models/efficientnet-b4/net/utils.py
index 0617870bf01909..76c29ee8eb32ff 100644
--- a/paddle/infrt/tests/models/efficientnet-b4/net/utils.py
+++ b/paddle/infrt/tests/models/efficientnet-b4/net/utils.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import re
+import collections
 import math
+import re
 from functools import partial
-import collections
 
 import paddle
 import paddle.nn as nn
diff --git a/paddle/infrt/tests/models/linear.py b/paddle/infrt/tests/models/linear.py
index 1444e5ac21c6c3..afc21162d48d09 100644
--- a/paddle/infrt/tests/models/linear.py
+++ b/paddle/infrt/tests/models/linear.py
@@ -14,6 +14,7 @@
 
 # example 1: save layer
 import numpy as np
+
 import paddle
 import paddle.nn as nn
 import paddle.optimizer as opt
diff --git a/paddle/infrt/tests/models/resnet50_model.py b/paddle/infrt/tests/models/resnet50_model.py
index ff84c3988abcba..5932fbc5cd5487 100644
--- a/paddle/infrt/tests/models/resnet50_model.py
+++ b/paddle/infrt/tests/models/resnet50_model.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
+
 import paddle
-from paddle.vision.models import resnet50
 from paddle.jit import to_static
 from paddle.static import InputSpec
-import sys
+from paddle.vision.models import resnet50
 
 model = resnet50(True)
 net = to_static(
diff --git a/paddle/phi/api/yaml/generator/api_base.py b/paddle/phi/api/yaml/generator/api_base.py
index 45305479302e8f..9b78f528f1c5e3 100644
--- a/paddle/phi/api/yaml/generator/api_base.py
+++ b/paddle/phi/api/yaml/generator/api_base.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import re
 import collections
+import re
 
 PREFIX_TENSOR_NAME = 'input_'
 PREFIX_META_TENSOR_NAME = 'meta_'
diff --git a/paddle/phi/api/yaml/generator/api_gen.py b/paddle/phi/api/yaml/generator/api_gen.py
index 9e83fa1dc7aae7..c9ebfd7fea387d 100644
--- a/paddle/phi/api/yaml/generator/api_gen.py
+++ b/paddle/phi/api/yaml/generator/api_gen.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import yaml
 import argparse
 import re
 
-from api_base import BaseAPI, PREFIX_TENSOR_NAME
+import yaml
+from api_base import PREFIX_TENSOR_NAME, BaseAPI
 
 inplace_out_type_map = {
     "Tensor": "Tensor&",
diff --git a/paddle/phi/api/yaml/generator/backward_api_gen.py b/paddle/phi/api/yaml/generator/backward_api_gen.py
index 0899da7142b31e..75914ac4004975 100644
--- a/paddle/phi/api/yaml/generator/backward_api_gen.py
+++ b/paddle/phi/api/yaml/generator/backward_api_gen.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import yaml
 import argparse
 import re
 
+import yaml
 from api_base import BaseAPI
 
 
diff --git a/paddle/phi/api/yaml/generator/intermediate_api_gen.py b/paddle/phi/api/yaml/generator/intermediate_api_gen.py
index 1f1a55b9d1a377..0c53a578b3201b 100644
--- a/paddle/phi/api/yaml/generator/intermediate_api_gen.py
+++ b/paddle/phi/api/yaml/generator/intermediate_api_gen.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import yaml
 import argparse
 
+import yaml
 from api_gen import ForwardAPI
 from sparse_api_gen import SparseAPI
 
diff --git a/paddle/phi/api/yaml/generator/ops_extra_info_gen.py b/paddle/phi/api/yaml/generator/ops_extra_info_gen.py
index 77596b9f2d46b9..34b1f763381102 100644
--- a/paddle/phi/api/yaml/generator/ops_extra_info_gen.py
+++ b/paddle/phi/api/yaml/generator/ops_extra_info_gen.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import yaml
-import re
 import argparse
+import re
+
+import yaml
 
 
 def map_code_template(attrs_str, attrs_checker_str):
diff --git a/paddle/phi/api/yaml/generator/sparse_api_gen.py b/paddle/phi/api/yaml/generator/sparse_api_gen.py
index 8b747a7cb63962..3dbd424be2c6bd 100644
--- a/paddle/phi/api/yaml/generator/sparse_api_gen.py
+++ b/paddle/phi/api/yaml/generator/sparse_api_gen.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import yaml
 import argparse
 
-from api_gen import ForwardAPI
+import yaml
 from api_base import PREFIX_TENSOR_NAME
+from api_gen import ForwardAPI
 
 
 class SparseAPI(ForwardAPI):
diff --git a/paddle/phi/api/yaml/generator/sparse_bw_api_gen.py b/paddle/phi/api/yaml/generator/sparse_bw_api_gen.py
index e17a4f34de0955..b77733273621d9 100644
--- a/paddle/phi/api/yaml/generator/sparse_bw_api_gen.py
+++ b/paddle/phi/api/yaml/generator/sparse_bw_api_gen.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import yaml
 import argparse
 
-from sparse_api_gen import SparseAPI
+import yaml
 from backward_api_gen import BackwardAPI
+from sparse_api_gen import SparseAPI
 
 
 class SparseBackwardAPI(SparseAPI, BackwardAPI):
diff --git a/paddle/phi/api/yaml/generator/strings_api_gen.py b/paddle/phi/api/yaml/generator/strings_api_gen.py
index 11fa96b43e190a..029fa9d8a20a8b 100644
--- a/paddle/phi/api/yaml/generator/strings_api_gen.py
+++ b/paddle/phi/api/yaml/generator/strings_api_gen.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import yaml
 import argparse
 
+import yaml
 from api_gen import ForwardAPI
 
 PREFIX_TENSOR_NAME = 'input_'
diff --git a/paddle/phi/api/yaml/generator/wrapped_infermeta_gen.py b/paddle/phi/api/yaml/generator/wrapped_infermeta_gen.py
index c14259349ce608..fdbb9caa3301cc 100644
--- a/paddle/phi/api/yaml/generator/wrapped_infermeta_gen.py
+++ b/paddle/phi/api/yaml/generator/wrapped_infermeta_gen.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import yaml
 import argparse
 
+import yaml
 from api_gen import ForwardAPI
 
 kernel_func_set = set()
diff --git a/paddle/scripts/conda_build.py b/paddle/scripts/conda_build.py
index ed3f1709884c1d..71c29b1a135c3a 100644
--- a/paddle/scripts/conda_build.py
+++ b/paddle/scripts/conda_build.py
@@ -14,11 +14,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-#
-import platform
 import argparse
 import os
 
+#
+import platform
+
 
 def parse_args():
     parser = argparse.ArgumentParser("conda build for paddlepaddle version")
diff --git a/paddle/scripts/installation_validate.py b/paddle/scripts/installation_validate.py
index b765291a3b80fb..430b32ec4754b6 100644
--- a/paddle/scripts/installation_validate.py
+++ b/paddle/scripts/installation_validate.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import paddle as pd
+import paddle.fluid as fluid
 
 fluid.install_check.run_check()
 print(pd.__version__)
diff --git a/python/paddle/_legacy_C_ops.py b/python/paddle/_legacy_C_ops.py
index ace90e62edfb80..971726bfdf1638 100644
--- a/python/paddle/_legacy_C_ops.py
+++ b/python/paddle/_legacy_C_ops.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from paddle.fluid import core
+
 from .fluid import framework
 
 __all__ = []
diff --git a/python/paddle/amp/auto_cast.py b/python/paddle/amp/auto_cast.py
index 930e97f62f4d0e..bfa7bc8e66e22e 100644
--- a/python/paddle/amp/auto_cast.py
+++ b/python/paddle/amp/auto_cast.py
@@ -12,8 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.dygraph.amp import amp_guard
-from paddle.fluid.dygraph.amp import amp_decorate
+from paddle.fluid.dygraph.amp import amp_decorate, amp_guard
 
 __all__ = []
 
diff --git a/python/paddle/amp/grad_scaler.py b/python/paddle/amp/grad_scaler.py
index 223be65f920b87..d847da5455d7bf 100644
--- a/python/paddle/amp/grad_scaler.py
+++ b/python/paddle/amp/grad_scaler.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.dygraph.amp import AmpScaler
-from paddle.fluid.dygraph.amp import OptimizerState
 from collections import defaultdict
 
+from paddle.fluid.dygraph.amp import AmpScaler, OptimizerState
+
 __all__ = []
 
 
diff --git a/python/paddle/audio/backends/backend.py b/python/paddle/audio/backends/backend.py
index d092968a68ecfe..80c29608ee1e52 100644
--- a/python/paddle/audio/backends/backend.py
+++ b/python/paddle/audio/backends/backend.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-import paddle
-
 from pathlib import Path
 from typing import Optional, Tuple, Union
 
+import paddle
+
 
 class AudioInfo:
     """Audio info, return type of backend info function"""
diff --git a/python/paddle/audio/backends/init_backend.py b/python/paddle/audio/backends/init_backend.py
index 6bf972d435f886..66420cb01fd3f5 100644
--- a/python/paddle/audio/backends/init_backend.py
+++ b/python/paddle/audio/backends/init_backend.py
@@ -14,12 +14,12 @@
 
 import sys
 import warnings
-from . import wave_backend
-from . import backend
 from typing import List
 
 import paddle
 
+from . import backend, wave_backend
+
 
 def _check_version(version: str) -> bool:
     # require paddleaudio >= 1.0.2
diff --git a/python/paddle/audio/backends/wave_backend.py b/python/paddle/audio/backends/wave_backend.py
index 4be5592099d974..f0cb8236dcf879 100644
--- a/python/paddle/audio/backends/wave_backend.py
+++ b/python/paddle/audio/backends/wave_backend.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-
 import wave
-import numpy as np
 from pathlib import Path
-
 from typing import Optional, Tuple, Union
+
+import numpy as np
+
+import paddle
+
 from .backend import AudioInfo
 
 
diff --git a/python/paddle/audio/datasets/dataset.py b/python/paddle/audio/datasets/dataset.py
index 3e6d2abd2b40fe..95a3419840ae3a 100644
--- a/python/paddle/audio/datasets/dataset.py
+++ b/python/paddle/audio/datasets/dataset.py
@@ -15,10 +15,7 @@
 
 import paddle
 
-from ..features import MelSpectrogram
-from ..features import Spectrogram
-from ..features import MFCC
-from ..features import LogMelSpectrogram
+from ..features import MFCC, LogMelSpectrogram, MelSpectrogram, Spectrogram
 
 feat_funcs = {
     'raw': None,
diff --git a/python/paddle/audio/datasets/esc50.py b/python/paddle/audio/datasets/esc50.py
index 110f1cd5e415fd..412c3916bf1e9f 100644
--- a/python/paddle/audio/datasets/esc50.py
+++ b/python/paddle/audio/datasets/esc50.py
@@ -13,11 +13,11 @@
 # limitations under the License.
 import collections
 import os
-from typing import List
-from typing import Tuple
+from typing import List, Tuple
 
-from paddle.utils import download
 from paddle.dataset.common import DATA_HOME
+from paddle.utils import download
+
 from .dataset import AudioClassificationDataset
 
 __all__ = []
diff --git a/python/paddle/audio/datasets/tess.py b/python/paddle/audio/datasets/tess.py
index 46ee1425ec9fb3..6ded358d054a44 100644
--- a/python/paddle/audio/datasets/tess.py
+++ b/python/paddle/audio/datasets/tess.py
@@ -13,11 +13,11 @@
 # limitations under the License.
 import collections
 import os
-from typing import List
-from typing import Tuple
+from typing import List, Tuple
 
-from paddle.utils import download
 from paddle.dataset.common import DATA_HOME
+from paddle.utils import download
+
 from .dataset import AudioClassificationDataset
 
 __all__ = []
diff --git a/python/paddle/audio/features/layers.py b/python/paddle/audio/features/layers.py
index af97c8b6791617..a3b1d0d81c49f4 100644
--- a/python/paddle/audio/features/layers.py
+++ b/python/paddle/audio/features/layers.py
@@ -12,16 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from functools import partial
-from typing import Optional
-from typing import Union
+from typing import Optional, Union
 
 import paddle
 import paddle.nn as nn
 from paddle import Tensor
 
-from ..functional import compute_fbank_matrix
-from ..functional import create_dct
-from ..functional import power_to_db
+from ..functional import compute_fbank_matrix, create_dct, power_to_db
 from ..functional.window import get_window
 
 
diff --git a/python/paddle/audio/functional/functional.py b/python/paddle/audio/functional/functional.py
index d8fb2436153077..fada4e177542d7 100644
--- a/python/paddle/audio/functional/functional.py
+++ b/python/paddle/audio/functional/functional.py
@@ -13,8 +13,7 @@
 # limitations under the License.
 # Modified from librosa(https://github.com/librosa/librosa)
 import math
-from typing import Optional
-from typing import Union
+from typing import Optional, Union
 
 import paddle
 from paddle import Tensor
diff --git a/python/paddle/audio/functional/window.py b/python/paddle/audio/functional/window.py
index 52ca3e477324c3..f9be027374f94e 100644
--- a/python/paddle/audio/functional/window.py
+++ b/python/paddle/audio/functional/window.py
@@ -11,9 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 import math
-from typing import List
-from typing import Tuple
-from typing import Union
+from typing import List, Tuple, Union
 
 import paddle
 from paddle import Tensor
diff --git a/python/paddle/autograd/backward_mode.py b/python/paddle/autograd/backward_mode.py
index 9f673e62e21c5b..64c2423d4395e8 100644
--- a/python/paddle/autograd/backward_mode.py
+++ b/python/paddle/autograd/backward_mode.py
@@ -12,10 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid import core
-from paddle.fluid import framework
-from paddle.fluid.backward import gradients_with_optimizer  # noqa: F401
 import paddle
+from paddle.fluid import core, framework
+from paddle.fluid.backward import gradients_with_optimizer  # noqa: F401
 
 __all__ = []
 
diff --git a/python/paddle/autograd/py_layer.py b/python/paddle/autograd/py_layer.py
index 252cfd5d91decc..d90e437438c86f 100644
--- a/python/paddle/autograd/py_layer.py
+++ b/python/paddle/autograd/py_layer.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 
 import paddle
-from paddle.fluid.framework import dygraph_only
-from paddle.fluid.dygraph.amp.auto_cast import amp_state
 from paddle.amp.auto_cast import auto_cast
 from paddle.fluid import core
+from paddle.fluid.dygraph.amp.auto_cast import amp_state
+from paddle.fluid.framework import dygraph_only
 
 __all__ = []
 
diff --git a/python/paddle/callbacks.py b/python/paddle/callbacks.py
index 28f97f44babbdd..960399c6b97967 100644
--- a/python/paddle/callbacks.py
+++ b/python/paddle/callbacks.py
@@ -13,12 +13,12 @@
 # limitations under the License.
 
 from .hapi.callbacks import Callback  # noqa: F401
-from .hapi.callbacks import ProgBarLogger  # noqa: F401
-from .hapi.callbacks import ModelCheckpoint  # noqa: F401
-from .hapi.callbacks import VisualDL  # noqa: F401
-from .hapi.callbacks import LRScheduler  # noqa: F401
 from .hapi.callbacks import EarlyStopping  # noqa: F401
+from .hapi.callbacks import LRScheduler  # noqa: F401
+from .hapi.callbacks import ModelCheckpoint  # noqa: F401
+from .hapi.callbacks import ProgBarLogger  # noqa: F401
 from .hapi.callbacks import ReduceLROnPlateau  # noqa: F401
+from .hapi.callbacks import VisualDL  # noqa: F401
 from .hapi.callbacks import WandbCallback  # noqa: F401
 
 __all__ = [  # noqa
diff --git a/python/paddle/cost_model/cost_model.py b/python/paddle/cost_model/cost_model.py
index b3178d2e05a66b..12e73540fe7aca 100644
--- a/python/paddle/cost_model/cost_model.py
+++ b/python/paddle/cost_model/cost_model.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.static as static
-import numpy as np
 import json
 import os
+
+import numpy as np
+
+import paddle
+import paddle.static as static
 from paddle.fluid import core
 
 
diff --git a/python/paddle/dataset/cifar.py b/python/paddle/dataset/cifar.py
index 80fd43eb52f71b..dbdc5cb6882357 100644
--- a/python/paddle/dataset/cifar.py
+++ b/python/paddle/dataset/cifar.py
@@ -27,11 +27,13 @@
 
 """
 
+import pickle
+import tarfile
+
 import numpy
+
 import paddle.dataset.common
 import paddle.utils.deprecated as deprecated
-import tarfile
-import pickle
 
 __all__ = []
 
diff --git a/python/paddle/dataset/common.py b/python/paddle/dataset/common.py
index 157cdeb91fec72..eab4d37676190b 100644
--- a/python/paddle/dataset/common.py
+++ b/python/paddle/dataset/common.py
@@ -12,18 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import requests
+import errno
+import glob
 import hashlib
+import importlib
 import os
-import errno
+import pickle
 import shutil
 import sys
-import importlib
-import paddle.dataset
-import pickle
 import tempfile
-import glob
+
+import requests
+
 import paddle
+import paddle.dataset
 
 __all__ = []
 
diff --git a/python/paddle/dataset/conll05.py b/python/paddle/dataset/conll05.py
index 08a383badae5de..12929475eccbe2 100644
--- a/python/paddle/dataset/conll05.py
+++ b/python/paddle/dataset/conll05.py
@@ -20,8 +20,9 @@
 to initialize SRL model.
 """
 
-import tarfile
 import gzip
+import tarfile
+
 import paddle.dataset.common
 import paddle.utils.deprecated as deprecated
 
diff --git a/python/paddle/dataset/flowers.py b/python/paddle/dataset/flowers.py
index 41ffc488aa1ac3..6d6de32096ec1e 100644
--- a/python/paddle/dataset/flowers.py
+++ b/python/paddle/dataset/flowers.py
@@ -30,17 +30,16 @@
 """
 
 import functools
-from .common import download
 import tarfile
+from multiprocessing import cpu_count
 
-from paddle.dataset.image import load_image_bytes
-from paddle.dataset.image import simple_transform
-
-from paddle.reader import map_readers, xmap_readers
 import paddle.utils.deprecated as deprecated
-from multiprocessing import cpu_count
+from paddle.dataset.image import load_image_bytes, simple_transform
+from paddle.reader import map_readers, xmap_readers
 from paddle.utils import try_import
 
+from .common import download
+
 __all__ = []
 
 DATA_URL = 'http://paddlemodels.bj.bcebos.com/flowers/102flowers.tgz'
diff --git a/python/paddle/dataset/image.py b/python/paddle/dataset/image.py
index c8d5124f0feace..008712f1a738cb 100644
--- a/python/paddle/dataset/image.py
+++ b/python/paddle/dataset/image.py
@@ -30,13 +30,14 @@
   be keep consistent between the training and inference period.
 """
 
-import numpy as np
+import os
 
 # FIXME(minqiyang): this is an ugly fix for the numpy bug reported here
 # https://github.com/numpy/numpy/issues/12497
 import subprocess
 import sys
-import os
+
+import numpy as np
 
 interpreter = sys.executable
 # Note(zhouwei): if use Python/C 'PyRun_SimpleString', 'sys.executable'
@@ -59,8 +60,8 @@
         cv2 = None
 
 import os
-import tarfile
 import pickle
+import tarfile
 
 __all__ = []
 
diff --git a/python/paddle/dataset/imdb.py b/python/paddle/dataset/imdb.py
index 622d33aa1873cb..e1aeb7b04df634 100644
--- a/python/paddle/dataset/imdb.py
+++ b/python/paddle/dataset/imdb.py
@@ -20,12 +20,13 @@
 Besides, this module also provides API for building dictionary.
 """
 
-import paddle.dataset.common
-import paddle.utils.deprecated as deprecated
 import collections
-import tarfile
 import re
 import string
+import tarfile
+
+import paddle.dataset.common
+import paddle.utils.deprecated as deprecated
 
 __all__ = []
 
diff --git a/python/paddle/dataset/imikolov.py b/python/paddle/dataset/imikolov.py
index f618bbc2c4867c..a4c0e10d9c111e 100644
--- a/python/paddle/dataset/imikolov.py
+++ b/python/paddle/dataset/imikolov.py
@@ -19,11 +19,12 @@
 into paddle reader creators.
 """
 
-import paddle.dataset.common
-import paddle.utils.deprecated as deprecated
 import collections
 import tarfile
 
+import paddle.dataset.common
+import paddle.utils.deprecated as deprecated
+
 __all__ = []
 
 # URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'
diff --git a/python/paddle/dataset/mnist.py b/python/paddle/dataset/mnist.py
index ec1cb9855cfe42..22594ff14313e2 100644
--- a/python/paddle/dataset/mnist.py
+++ b/python/paddle/dataset/mnist.py
@@ -18,12 +18,14 @@
 parse training set and test set into paddle reader creators.
 """
 
-import paddle.dataset.common
-import paddle.utils.deprecated as deprecated
 import gzip
-import numpy
 import struct
 
+import numpy
+
+import paddle.dataset.common
+import paddle.utils.deprecated as deprecated
+
 __all__ = []
 
 URL_PREFIX = 'https://dataset.bj.bcebos.com/mnist/'
diff --git a/python/paddle/dataset/movielens.py b/python/paddle/dataset/movielens.py
index 7a47293c3c7e04..255ac99517fe23 100644
--- a/python/paddle/dataset/movielens.py
+++ b/python/paddle/dataset/movielens.py
@@ -22,12 +22,14 @@
 
 """
 
-import numpy as np
+import functools
+import re
 import zipfile
+
+import numpy as np
+
 import paddle.dataset.common
 import paddle.utils.deprecated as deprecated
-import re
-import functools
 
 __all__ = []
 
diff --git a/python/paddle/dataset/tests/cifar_test.py b/python/paddle/dataset/tests/cifar_test.py
index 6f25975dd8e0f1..a32a0265bc1bbc 100644
--- a/python/paddle/dataset/tests/cifar_test.py
+++ b/python/paddle/dataset/tests/cifar_test.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.dataset.cifar
 import unittest
 
+import paddle.dataset.cifar
+
 __all__ = []
 
 
diff --git a/python/paddle/dataset/tests/flowers_test.py b/python/paddle/dataset/tests/flowers_test.py
index 55a27b0ce8fca6..be51f19a5bc3b9 100644
--- a/python/paddle/dataset/tests/flowers_test.py
+++ b/python/paddle/dataset/tests/flowers_test.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.dataset.flowers
 import unittest
 
+import paddle.dataset.flowers
+
 __all__ = []
 
 
diff --git a/python/paddle/dataset/tests/imdb_test.py b/python/paddle/dataset/tests/imdb_test.py
index 32dbc75b874e12..d703713b6cead3 100644
--- a/python/paddle/dataset/tests/imdb_test.py
+++ b/python/paddle/dataset/tests/imdb_test.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.dataset.imdb
-import unittest
 import re
+import unittest
+
+import paddle.dataset.imdb
 
 __all__ = []
 
diff --git a/python/paddle/dataset/tests/imikolov_test.py b/python/paddle/dataset/tests/imikolov_test.py
index f8d8b182e2d77d..ec66cd374421e6 100644
--- a/python/paddle/dataset/tests/imikolov_test.py
+++ b/python/paddle/dataset/tests/imikolov_test.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.dataset.imikolov
 import unittest
 
+import paddle.dataset.imikolov
+
 WORD_DICT = paddle.dataset.imikolov.build_dict()
 
 __all__ = []
diff --git a/python/paddle/dataset/tests/mnist_test.py b/python/paddle/dataset/tests/mnist_test.py
index 978fa714f72679..4d44e471a4e768 100644
--- a/python/paddle/dataset/tests/mnist_test.py
+++ b/python/paddle/dataset/tests/mnist_test.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.dataset.mnist
 import unittest
 
+import paddle.dataset.mnist
+
 __all__ = []
 
 
diff --git a/python/paddle/dataset/tests/test_image.py b/python/paddle/dataset/tests/test_image.py
index 953e70c41865d3..8656ef2669bfa8 100644
--- a/python/paddle/dataset/tests/test_image.py
+++ b/python/paddle/dataset/tests/test_image.py
@@ -20,7 +20,9 @@
 
 import sys
 import unittest
+
 import numpy as np
+
 from paddle.dataset import image
 
 __all__ = []
diff --git a/python/paddle/dataset/tests/voc2012_test.py b/python/paddle/dataset/tests/voc2012_test.py
index 7db2f5abd385f1..a4fd3d682a5346 100644
--- a/python/paddle/dataset/tests/voc2012_test.py
+++ b/python/paddle/dataset/tests/voc2012_test.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.dataset.voc2012
 import unittest
 
+import paddle.dataset.voc2012
+
 __all__ = []
 
 
diff --git a/python/paddle/dataset/tests/wmt16_test.py b/python/paddle/dataset/tests/wmt16_test.py
index 58f405b26e306b..91aa4cab5f466b 100644
--- a/python/paddle/dataset/tests/wmt16_test.py
+++ b/python/paddle/dataset/tests/wmt16_test.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.dataset.wmt16
 import unittest
 
+import paddle.dataset.wmt16
+
 __all__ = []
 
 
diff --git a/python/paddle/dataset/uci_housing.py b/python/paddle/dataset/uci_housing.py
index 6c085920e2e053..14d61db0774ec7 100644
--- a/python/paddle/dataset/uci_housing.py
+++ b/python/paddle/dataset/uci_housing.py
@@ -19,10 +19,12 @@
 parse training set and test set into paddle reader creators.
 """
 
-import numpy as np
-import tempfile
-import tarfile
 import os
+import tarfile
+import tempfile
+
+import numpy as np
+
 import paddle.dataset.common
 import paddle.utils.deprecated as deprecated
 
diff --git a/python/paddle/dataset/voc2012.py b/python/paddle/dataset/voc2012.py
index 2a80eab1e84b90..3a7b2f8a0b58b4 100644
--- a/python/paddle/dataset/voc2012.py
+++ b/python/paddle/dataset/voc2012.py
@@ -19,13 +19,15 @@
 with segmentation has been increased from 7,062 to 9,993.
 """
 
-import tarfile
 import io
+import tarfile
+
 import numpy as np
-from paddle.dataset.common import download
-import paddle.utils.deprecated as deprecated
 from PIL import Image
 
+import paddle.utils.deprecated as deprecated
+from paddle.dataset.common import download
+
 __all__ = []
 
 VOC_URL = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/\
diff --git a/python/paddle/device/cuda/graphs.py b/python/paddle/device/cuda/graphs.py
index ea9962f22997cf..75c3e61ad2cb87 100644
--- a/python/paddle/device/cuda/graphs.py
+++ b/python/paddle/device/cuda/graphs.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 
 import os
+import warnings
+
 import paddle
 from paddle.fluid import core
-from paddle.fluid.layers.utils import _hash_with_id
 from paddle.fluid.core import (
+    CUDAPlace,
     is_compiled_with_cuda,
     is_compiled_with_rocm,
-    CUDAPlace,
 )
-import warnings
+from paddle.fluid.layers.utils import _hash_with_id
 
 if is_compiled_with_cuda() and not is_compiled_with_rocm():
     from paddle.fluid.core import CUDAGraph as CoreCUDAGraph
diff --git a/python/paddle/device/cuda/streams.py b/python/paddle/device/cuda/streams.py
index 573750a862fafe..6d716fd9016c58 100644
--- a/python/paddle/device/cuda/streams.py
+++ b/python/paddle/device/cuda/streams.py
@@ -12,5 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.core import CUDAStream as Stream  # noqa: F401
 from paddle.fluid.core import CUDAEvent as Event  # noqa: F401
+from paddle.fluid.core import CUDAStream as Stream  # noqa: F401
diff --git a/python/paddle/distributed/auto_parallel/callbacks.py b/python/paddle/distributed/auto_parallel/callbacks.py
index 98e32430b2e179..abf6423bfe8ea9 100644
--- a/python/paddle/distributed/auto_parallel/callbacks.py
+++ b/python/paddle/distributed/auto_parallel/callbacks.py
@@ -17,12 +17,13 @@
 
 import paddle
 from paddle.hapi.callbacks import (
-    ProgBarLogger,
-    ModelCheckpoint,
-    LRScheduler,
-    CallbackList,
     Callback,
+    CallbackList,
+    LRScheduler,
+    ModelCheckpoint,
+    ProgBarLogger,
 )
+
 from .interface import CollectionNames, get_collection
 
 
diff --git a/python/paddle/distributed/auto_parallel/cluster.py b/python/paddle/distributed/auto_parallel/cluster.py
index ca66a770e3c4cf..b1cb2b8a741c14 100644
--- a/python/paddle/distributed/auto_parallel/cluster.py
+++ b/python/paddle/distributed/auto_parallel/cluster.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import json
-from enum import IntEnum
-from enum import unique
+import os
+from enum import IntEnum, unique
+
 import paddle
 
 
diff --git a/python/paddle/distributed/auto_parallel/cluster_v2.py b/python/paddle/distributed/auto_parallel/cluster_v2.py
index 1ec2332ad4003b..951114ff38e6d3 100644
--- a/python/paddle/distributed/auto_parallel/cluster_v2.py
+++ b/python/paddle/distributed/auto_parallel/cluster_v2.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from enum import IntEnum, unique
+
 import numpy as np
-from enum import IntEnum
-from enum import unique
 
 from paddle.fluid import core
 from paddle.fluid.core import Device  # noqa: F401
diff --git a/python/paddle/distributed/auto_parallel/completion.py b/python/paddle/distributed/auto_parallel/completion.py
index 7f5e0fee775267..643408d49cbe6a 100644
--- a/python/paddle/distributed/auto_parallel/completion.py
+++ b/python/paddle/distributed/auto_parallel/completion.py
@@ -15,17 +15,23 @@
 import copy
 import logging
 
+from paddle.distributed.fleet.meta_optimizers.common import OpRole
 from paddle.fluid import core
 
-from .utils import is_naive_data_parallel, get_logger
-from .utils import is_gradient_clip_op, __no_shape_var_type__
-from .operators import find_compatible_distributed_operator_impls
+from .dist_attribute import (
+    OperatorDistributedAttribute,
+    TensorDistributedAttribute,
+)
 from .dist_context import _node_id
-from .dist_attribute import TensorDistributedAttribute
-from .dist_attribute import OperatorDistributedAttribute
-from .process_mesh import ProcessMesh
+from .operators import find_compatible_distributed_operator_impls
 from .process_group import get_world_process_group
-from paddle.distributed.fleet.meta_optimizers.common import OpRole
+from .process_mesh import ProcessMesh
+from .utils import (
+    __no_shape_var_type__,
+    get_logger,
+    is_gradient_clip_op,
+    is_naive_data_parallel,
+)
 
 
 def compute_compatible_process_mesh(process_mesh_list):
diff --git a/python/paddle/distributed/auto_parallel/converter.py b/python/paddle/distributed/auto_parallel/converter.py
index cc0966be4aba4c..890f48ae9d22ca 100644
--- a/python/paddle/distributed/auto_parallel/converter.py
+++ b/python/paddle/distributed/auto_parallel/converter.py
@@ -12,10 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import warnings
 import logging
+import warnings
+
 import numpy as np
+
+import paddle
+
 from ..utils.log_utils import get_logger
 
 
diff --git a/python/paddle/distributed/auto_parallel/cost/base_cost.py b/python/paddle/distributed/auto_parallel/cost/base_cost.py
index f9dc6b6fc27321..3de1b46453d307 100644
--- a/python/paddle/distributed/auto_parallel/cost/base_cost.py
+++ b/python/paddle/distributed/auto_parallel/cost/base_cost.py
@@ -17,12 +17,10 @@
 
 import paddle
 
-from ..utils import _get_comm_group
-from ..process_group import get_process_group
 from ..cluster import LinkType
 from ..dist_tensor import DistributedTensor
-from ..utils import _get_idx_in_axis
-from ..dist_tensor import DistributedTensor
+from ..process_group import get_process_group
+from ..utils import _get_comm_group, _get_idx_in_axis
 
 COMM_OP_TYPE = [
     "send_v2",
diff --git a/python/paddle/distributed/auto_parallel/cost/estimate_cost.py b/python/paddle/distributed/auto_parallel/cost/estimate_cost.py
index 5808b706fce03d..7d6f8d8474c831 100644
--- a/python/paddle/distributed/auto_parallel/cost/estimate_cost.py
+++ b/python/paddle/distributed/auto_parallel/cost/estimate_cost.py
@@ -18,9 +18,9 @@
 import paddle
 from paddle.distributed.fleet.meta_optimizers.common import OpRole
 
-from .base_cost import Cost
-from ..operators.common import get_distributed_operator_impl_container
 from ..dist_tensor import DistributedTensor
+from ..operators.common import get_distributed_operator_impl_container
+from .base_cost import Cost
 
 
 class CostEstimator:
@@ -544,9 +544,10 @@ def pretty_print_cost(self):
 
 
 def get_cost_from_engine(engine, mode):
-    from ..utils import to_list
     import copy
 
+    from ..utils import to_list
+
     # Construct cost estimator by original main program
     serial_main_prog = (
         engine._fwd_main_progs[mode].clone()
diff --git a/python/paddle/distributed/auto_parallel/cost/tensor_cost.py b/python/paddle/distributed/auto_parallel/cost/tensor_cost.py
index 9d0794e23757da..03140ae3dc3405 100644
--- a/python/paddle/distributed/auto_parallel/cost/tensor_cost.py
+++ b/python/paddle/distributed/auto_parallel/cost/tensor_cost.py
@@ -15,8 +15,8 @@
 from functools import reduce
 
 import paddle
-from paddle.fluid.framework import Variable
 from paddle.distributed.auto_parallel.dist_tensor import DistributedTensor
+from paddle.fluid.framework import Variable
 
 from .base_cost import Cost
 
diff --git a/python/paddle/distributed/auto_parallel/cost_model.py b/python/paddle/distributed/auto_parallel/cost_model.py
index 73e899614d4332..5eeb1e41cd4730 100644
--- a/python/paddle/distributed/auto_parallel/cost_model.py
+++ b/python/paddle/distributed/auto_parallel/cost_model.py
@@ -12,15 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import queue
 import copy
+import queue
 from enum import Enum
 
 import numpy as np
 
 import paddle
-from paddle.fluid import core
 from paddle.distributed.fleet.meta_optimizers.common import OpRole
+from paddle.fluid import core
 
 SUCC = 0  # successor
 PRED = 1  # predecessor
diff --git a/python/paddle/distributed/auto_parallel/dist_attribute.py b/python/paddle/distributed/auto_parallel/dist_attribute.py
index 19aaa07f24df66..8635818bb3436b 100644
--- a/python/paddle/distributed/auto_parallel/dist_attribute.py
+++ b/python/paddle/distributed/auto_parallel/dist_attribute.py
@@ -13,7 +13,9 @@
 # limitations under the License
 
 import copy
+
 from paddle.fluid.framework import Variable
+
 from .process_mesh import ProcessMesh
 
 _g_tensor_dist_attr_field_keys = [
diff --git a/python/paddle/distributed/auto_parallel/dist_context.py b/python/paddle/distributed/auto_parallel/dist_context.py
index 199f27934d728e..7ac260af1501fb 100644
--- a/python/paddle/distributed/auto_parallel/dist_context.py
+++ b/python/paddle/distributed/auto_parallel/dist_context.py
@@ -14,16 +14,19 @@
 
 import copy
 from collections import defaultdict
-from paddle.fluid import framework
-from paddle.fluid.framework import set_flags
-from paddle.fluid import core
+
 from paddle.distributed.passes import PassContext
-from .dist_tensor import DistributedTensor
+from paddle.fluid import core, framework
+from paddle.fluid.framework import set_flags
+
 from .dist_op import DistributedOperator
+from .dist_tensor import DistributedTensor
 from .process_mesh import ProcessMesh
-from .utils import _copy_dist_attr_to_cpp
-from .utils import is_loss_grad_op, __no_shape_var_type__
-
+from .utils import (
+    __no_shape_var_type__,
+    _copy_dist_attr_to_cpp,
+    is_loss_grad_op,
+)
 
 # There always exists a default context for user. And user can set it to another one.
 _g_default_distributed_context = None
diff --git a/python/paddle/distributed/auto_parallel/dist_loader.py b/python/paddle/distributed/auto_parallel/dist_loader.py
index f0e0b8aa5a0d7a..7da0dc7dacd704 100644
--- a/python/paddle/distributed/auto_parallel/dist_loader.py
+++ b/python/paddle/distributed/auto_parallel/dist_loader.py
@@ -13,19 +13,20 @@
 # limitations under the License
 
 import abc
+
 import numpy as np
 
 import paddle
-from paddle.io import BatchSampler, IterableDataset
 from paddle.fluid.dataloader.batch_sampler import (
-    _InfiniteIterableSampler,
     DistributedBatchSampler,
+    _InfiniteIterableSampler,
 )
 from paddle.fluid.dataloader.dataloader_iter import (
     _DatasetKind,
     default_collate_fn,
     default_convert_fn,
 )
+from paddle.io import BatchSampler, IterableDataset
 
 
 class DistributedDataLoaderBase(metaclass=abc.ABCMeta):
diff --git a/python/paddle/distributed/auto_parallel/dist_op.py b/python/paddle/distributed/auto_parallel/dist_op.py
index 80141730bc1a16..484bf45111dc99 100644
--- a/python/paddle/distributed/auto_parallel/dist_op.py
+++ b/python/paddle/distributed/auto_parallel/dist_op.py
@@ -13,15 +13,19 @@
 # limitations under the License
 
 import copy
+
 import paddle
 from paddle.fluid.framework import Variable
-from .dist_attribute import OperatorDistributedAttribute
-from .dist_attribute import append_op_input_suffix
-from .dist_attribute import append_op_output_suffix
+
+from .dist_attribute import (
+    OperatorDistributedAttribute,
+    append_op_input_suffix,
+    append_op_output_suffix,
+)
 from .utils import (
+    __no_shape_var_type__,
     convert_to_shard_spec,
     verify_shard_spec,
-    __no_shape_var_type__,
 )
 
 
diff --git a/python/paddle/distributed/auto_parallel/dist_saver.py b/python/paddle/distributed/auto_parallel/dist_saver.py
index f2766bff602e38..7205a268d09222 100644
--- a/python/paddle/distributed/auto_parallel/dist_saver.py
+++ b/python/paddle/distributed/auto_parallel/dist_saver.py
@@ -12,19 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-import re
-import os
 import errno
-import pickle
 import logging
+import os
+import pickle
+import re
+
 import numpy as np
-import paddle
 
+import paddle
 from paddle import fluid
 from paddle.fluid import core
-from .utils import get_dist_attr
-from .process_group import _g_process_group_map
+
 from ..utils.log_utils import get_logger
+from .process_group import _g_process_group_map
+from .utils import get_dist_attr
 
 
 def check_filename(re_exp, filename):
diff --git a/python/paddle/distributed/auto_parallel/dist_tensor.py b/python/paddle/distributed/auto_parallel/dist_tensor.py
index 8ba0e8570e1d54..9c55998857da9b 100644
--- a/python/paddle/distributed/auto_parallel/dist_tensor.py
+++ b/python/paddle/distributed/auto_parallel/dist_tensor.py
@@ -16,9 +16,10 @@
 import inspect
 
 import paddle
-from paddle.fluid.framework import Parameter, Block, Variable
+from paddle.fluid.framework import Block, Parameter, Variable
+
 from .dist_attribute import TensorDistributedAttribute
-from .utils import _linear_idx2coordinate, __no_shape_var_type__
+from .utils import __no_shape_var_type__, _linear_idx2coordinate
 
 
 class DistributedTensor:
diff --git a/python/paddle/distributed/auto_parallel/engine.py b/python/paddle/distributed/auto_parallel/engine.py
index 8e27b9aac6c703..092212a87168b5 100644
--- a/python/paddle/distributed/auto_parallel/engine.py
+++ b/python/paddle/distributed/auto_parallel/engine.py
@@ -12,49 +12,48 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import copy
 import logging
-import random
 import numbers
-import numpy as np
+import os
+import random
 from collections import defaultdict
 
+import numpy as np
+
 import paddle
-import paddle.utils as utils
 import paddle.distributed.auto_parallel.utils as auto_utils
-
+import paddle.utils as utils
 from paddle import fluid, static
+from paddle.distributed import fleet
+from paddle.fluid import Variable, core
+from paddle.fluid.dygraph.parallel import ParallelEnv
+from paddle.fluid.executor import _to_name_str, global_scope
+from paddle.fluid.framework import Operator
+from paddle.fluid.framework import _current_expected_place as _get_device
+from paddle.fluid.framework import _non_static_mode
+from paddle.fluid.layers.utils import flatten
 from paddle.metric import Metric
 from paddle.static import InputSpec
-from paddle.fluid import core
-from paddle.fluid import Variable
-from paddle.fluid.layers.utils import flatten
-from paddle.fluid.executor import global_scope, _to_name_str
-from paddle.fluid.framework import Operator, _non_static_mode
-from paddle.fluid.framework import _current_expected_place as _get_device
-from paddle.fluid.dygraph.parallel import ParallelEnv
-from paddle.distributed import fleet
 
+from ..utils.log_utils import get_logger
 from .callbacks import config_callbacks
-from .converter import Converter
-from .helper import ProgramHelper
 from .cluster import Cluster, get_default_cluster
-from .planner_v2 import Planner
-from .parallelizer_v2 import Parallelizer
-from .dist_op import DistributedOperator
-from .dist_saver import DistributedSaver
+from .converter import Converter
+from .cost.estimate_cost import get_cost_from_engine
+from .dist_context import DistributedContext, get_default_distributed_context
 from .dist_loader import (
-    DistributedDataLoaderFromGenerator,
     DistributedDataLoader,
+    DistributedDataLoaderFromGenerator,
 )
-from .strategy import Strategy
-from .process_group import new_process_group, get_all_process_groups
-from .dist_context import DistributedContext, get_default_distributed_context
+from .dist_op import DistributedOperator
+from .dist_saver import DistributedSaver
+from .helper import ProgramHelper
 from .interface import CollectionNames, get_collection
-from .cost.estimate_cost import get_cost_from_engine
-
-from ..utils.log_utils import get_logger
+from .parallelizer_v2 import Parallelizer
+from .planner_v2 import Planner
+from .process_group import get_all_process_groups, new_process_group
+from .strategy import Strategy
 
 
 class Engine:
diff --git a/python/paddle/distributed/auto_parallel/helper.py b/python/paddle/distributed/auto_parallel/helper.py
index 9cc2a57e56a504..f156eba1b0e47f 100644
--- a/python/paddle/distributed/auto_parallel/helper.py
+++ b/python/paddle/distributed/auto_parallel/helper.py
@@ -15,18 +15,14 @@
 import logging
 from collections import defaultdict
 
-from paddle.nn import Layer
-from paddle.jit import to_static, not_to_static
-from paddle.fluid.framework import Parameter
-from paddle.fluid.framework import program_guard
 from paddle.fluid.executor import global_scope
-from paddle.jit.dy2static.program_translator import (
-    StaticFunction,
-)
+from paddle.fluid.framework import Parameter, program_guard
+from paddle.jit import not_to_static, to_static
+from paddle.jit.dy2static.program_translator import StaticFunction
+from paddle.nn import Layer
 
-from .utils import to_list
-from .utils import get_logger
 from .converter import Converter
+from .utils import get_logger, to_list
 
 
 class ProxyLayer(Layer):
diff --git a/python/paddle/distributed/auto_parallel/interface.py b/python/paddle/distributed/auto_parallel/interface.py
index b85d85011a1fab..882b63b39395b1 100644
--- a/python/paddle/distributed/auto_parallel/interface.py
+++ b/python/paddle/distributed/auto_parallel/interface.py
@@ -13,15 +13,15 @@
 # limitations under the License.
 
 import paddle
-from .process_mesh import ProcessMesh
-from .process_mesh import get_current_process_mesh
+
 from .dist_context import get_default_distributed_context
-from .dist_tensor import DistributedTensor
 from .dist_op import DistributedOperatorHelper
+from .dist_tensor import DistributedTensor
+from .process_mesh import ProcessMesh, get_current_process_mesh
 from .utils import (
-    verify_shard_spec,
-    convert_to_dims_mapping,
     __no_shape_var_type__,
+    convert_to_dims_mapping,
+    verify_shard_spec,
 )
 
 
diff --git a/python/paddle/distributed/auto_parallel/mapper.py b/python/paddle/distributed/auto_parallel/mapper.py
index d7b599aaa1c441..8b0d17e8fc6bd1 100644
--- a/python/paddle/distributed/auto_parallel/mapper.py
+++ b/python/paddle/distributed/auto_parallel/mapper.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-import os
-import operator
 import functools
-import paddle
+import operator
+import os
 from collections import deque
-from .graph import Graph
+
+import paddle
+
 from .cluster import DeviceType
+from .graph import Graph
 from .process_group import get_process_group
 
 
diff --git a/python/paddle/distributed/auto_parallel/operators/common.py b/python/paddle/distributed/auto_parallel/operators/common.py
index 72ed66f3e41a0c..7eace81155c4e4 100644
--- a/python/paddle/distributed/auto_parallel/operators/common.py
+++ b/python/paddle/distributed/auto_parallel/operators/common.py
@@ -13,10 +13,12 @@
 # limitations under the License
 
 import abc
+
 from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
+
 from ..dist_attribute import OperatorDistributedAttribute
-from ..utils import _get_comm_group, _get_corresponding_rank, is_optimize_op
 from ..process_group import new_process_group
+from ..utils import _get_comm_group, _get_corresponding_rank, is_optimize_op
 
 _g_distributed_operator_impl_containers = {}
 
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_assign.py b/python/paddle/distributed/auto_parallel/operators/dist_assign.py
index 9bf7e26d3fad75..c4beefd52ddd71 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_assign.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_assign.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl
-from .dist_default import DistributedDefaultImpl0
 from ..utils import compute_compatible_and_update_dim_mapping
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+)
+from .dist_default import DistributedDefaultImpl0
 
 
 class DistributedAssign(DistributedOperatorImplContainer):
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py b/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py
index c1834bde1136c7..1c2e4890736f57 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py
@@ -12,19 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl
-from paddle.fluid import core
-from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
-from ..utils import set_var_dist_attr
-from ..utils import set_dist_op_desc_original_id
-from ..process_group import new_process_group
-from ..dist_attribute import OperatorDistributedAttribute
 from paddle.distributed.auto_parallel.process_group import (
     get_world_process_group,
 )
+from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
+from paddle.fluid import core
+
+from ..dist_attribute import OperatorDistributedAttribute
+from ..process_group import new_process_group
+from ..utils import set_dist_op_desc_original_id, set_var_dist_attr
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+)
 
 world_process_group = get_world_process_group()
 
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_default.py b/python/paddle/distributed/auto_parallel/operators/dist_default.py
index 85ffb77d97b525..402c4cd74b4b96 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_default.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_default.py
@@ -12,21 +12,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import gradient_synchronization
-from .common import register_distributed_operator_impl, is_parameter_related
-from ..utils import is_prim_op
-from ..utils import compute_compatible_dim_mapping
-from ..utils import set_dist_op_desc_original_id
-from ..dist_attribute import OperatorDistributedAttribute
 from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
+
+from ..cost import (
+    _g_op_cost_factory,
+    build_comp_costs_from_descs,
+    build_comp_desc_from_dist_op,
+    build_dp_costs,
+)
+from ..dist_attribute import OperatorDistributedAttribute
 from ..process_group import new_process_group
-from ..utils import _get_comm_group, _get_corresponding_rank
-from ..cost import _g_op_cost_factory
-from ..cost import build_comp_desc_from_dist_op, build_dp_costs
-from ..cost import build_comp_costs_from_descs
+from ..utils import (
+    _get_comm_group,
+    _get_corresponding_rank,
+    compute_compatible_dim_mapping,
+    is_prim_op,
+    set_dist_op_desc_original_id,
+)
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    gradient_synchronization,
+    is_parameter_related,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+)
 
 __op_not_need_param_init__ = ["while", "cond"]
 
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py b/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py
index 75dcc98faa130c..e0e1b1213f645f 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py
@@ -12,18 +12,27 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl, is_parameter_related
-from .common import is_elementwise_op
-from ..utils import compute_compatible_dim_mapping
-from ..utils import compute_compatible_dims_mapping
 from paddle.distributed.fleet.meta_optimizers.common import OpRole
+
+from ..cost import (
+    _g_op_cost_factory,
+    build_comp_costs_from_descs,
+    build_comp_desc_from_dist_op,
+    build_dp_costs,
+)
+from ..utils import (
+    compute_compatible_dim_mapping,
+    compute_compatible_dims_mapping,
+)
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    is_elementwise_op,
+    is_parameter_related,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+)
 from .dist_default import DistributedDefaultImpl0
-from ..cost import _g_op_cost_factory
-from ..cost import build_comp_desc_from_dist_op, build_dp_costs
-from ..cost import build_comp_costs_from_descs
 
 
 class DistributedElementwise(DistributedOperatorImplContainer):
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_embedding.py b/python/paddle/distributed/auto_parallel/operators/dist_embedding.py
index 683236cadd14f8..9619d12681aa99 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_embedding.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_embedding.py
@@ -12,40 +12,43 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-from .common import infer_shape
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import gradient_synchronization
-from .common import (
-    naive_copy_op_dist_attr_for_program,
-    register_distributed_operator_impl,
-    set_comm_op_dist_attr_for_program,
+from paddle.distributed.auto_parallel.cost.comm_op_cost import (
+    AllreduceSumOpCost,
+    IdentityOpCost,
 )
-from ..utils import is_dim_shard
-from ..utils import is_dim_replicate
-from ..utils import compute_compatible_and_update_dim_mapping
-from ..dist_attribute import OperatorDistributedAttribute
-from paddle.fluid import core, unique_name
-from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
 from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
+from paddle.fluid import core, unique_name
+from paddle.fluid.data_feeder import check_dtype, check_variable_and_dtype
+
+from ..cost import (
+    EmbeddingGradOpCost,
+    EmbeddingOpCost,
+    build_comm_costs_from_descs,
+    build_comm_desc_from_dist_op,
+    build_comp_costs_from_descs,
+    build_comp_desc_from_dist_op,
+    build_dp_costs,
+)
+from ..dist_attribute import OperatorDistributedAttribute
 from ..process_group import new_process_group
 from ..utils import (
     _get_comm_group,
-    _get_idx_in_axis,
     _get_corresponding_rank,
+    _get_idx_in_axis,
+    compute_compatible_and_update_dim_mapping,
+    is_dim_replicate,
+    is_dim_shard,
     set_var_dist_attr,
 )
-from ..cost import build_comp_desc_from_dist_op, build_comm_desc_from_dist_op
-from ..cost import (
-    build_comm_costs_from_descs,
-    build_comp_costs_from_descs,
-    build_dp_costs,
-)
-from ..cost import EmbeddingOpCost, EmbeddingGradOpCost
-from paddle.distributed.auto_parallel.cost.comm_op_cost import (
-    AllreduceSumOpCost,
-    IdentityOpCost,
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    gradient_synchronization,
+    infer_shape,
+    naive_copy_op_dist_attr_for_program,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+    set_comm_op_dist_attr_for_program,
 )
 
 
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py b/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py
index c7cd2df2ce4343..014aa0e98d8837 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py
@@ -12,16 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl
-from ..utils import compute_compatible_and_update_dim_mapping
 from paddle.distributed.fleet.meta_optimizers.common import OpRole
+
+from ..cost import (
+    FillConstantBatchSizeLikeOpCost,
+    build_comp_costs_from_descs,
+    build_comp_desc_from_dist_op,
+)
+from ..utils import compute_compatible_and_update_dim_mapping
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+)
 from .dist_default import DistributedDefaultImpl0
-from ..cost import FillConstantBatchSizeLikeOpCost
-from ..cost import build_comp_desc_from_dist_op
-from ..cost import build_comp_costs_from_descs
 
 
 class DistributedFillConstantBatchSizeLike(DistributedOperatorImplContainer):
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py b/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py
index 7318ee9c73579a..b4cf4da452e601 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py
@@ -12,15 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl
-from ..utils import is_dim_shard, is_dim_replicate
-from ..utils import compute_compatible_and_update_dim_mapping
-from .dist_default import DistributedDefaultImpl0
-from ..utils import _get_comm_group, _get_corresponding_rank
 from ..process_group import new_process_group
+from ..utils import (
+    _get_comm_group,
+    _get_corresponding_rank,
+    compute_compatible_and_update_dim_mapping,
+    is_dim_replicate,
+    is_dim_shard,
+)
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+)
+from .dist_default import DistributedDefaultImpl0
 
 
 class DistributedFusedAttention(DistributedOperatorImplContainer):
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py b/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py
index 6f671a535cf615..2b7f38d4754707 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py
@@ -12,15 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl
-from ..utils import is_dim_shard, is_dim_replicate
-from ..utils import compute_compatible_and_update_dim_mapping
-from .dist_default import DistributedDefaultImpl0
-from ..utils import _get_comm_group, _get_corresponding_rank
 from ..process_group import new_process_group
+from ..utils import (
+    _get_comm_group,
+    _get_corresponding_rank,
+    compute_compatible_and_update_dim_mapping,
+    is_dim_replicate,
+    is_dim_shard,
+)
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+)
+from .dist_default import DistributedDefaultImpl0
 
 
 class DistributedFusedFeedForward(DistributedOperatorImplContainer):
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py b/python/paddle/distributed/auto_parallel/operators/dist_matmul.py
index fa6557f497bb28..9249837d51361a 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_matmul.py
@@ -14,38 +14,50 @@
 
 import copy
 
-from .common import infer_shape
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl
-from .common import gradient_synchronization
-from .common import is_parameter_related, set_comm_op_dist_attr_for_program
-from ..utils import is_dim_shard
-from ..utils import is_dim_replicate
-from ..utils import is_valid_list_index
-from ..utils import compute_compatible_dims_mapping
-from ..utils import compute_compatible_and_update_dim_mapping
-from ..utils import set_dist_op_desc_original_id
-from ..dist_attribute import OperatorDistributedAttribute
-from paddle.fluid import core, unique_name
-from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
+from paddle.distributed.auto_parallel.cost.comm_op_cost import (
+    AllreduceSumOpCost,
+    IdentityOpCost,
+)
 from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
-from ..process_group import new_process_group
-from ..utils import _get_comm_group, _get_corresponding_rank
-from .dist_default import DistributedDefaultImpl0
+from paddle.fluid import core, unique_name
+from paddle.fluid.data_feeder import check_dtype, check_variable_and_dtype
+
 from ..cost import (
-    build_comp_desc_from_dist_op,
+    MatmulGradOpCost,
+    MatmulOpCost,
+    MatmulV2GradOpCost,
+    MatmulV2OpCost,
+    MulGradOpCost,
+    MulOpCost,
+    build_comm_costs_from_descs,
     build_comm_desc_from_dist_op,
+    build_comp_costs_from_descs,
+    build_comp_desc_from_dist_op,
     build_dp_costs,
 )
-from ..cost import build_comm_costs_from_descs, build_comp_costs_from_descs
-from ..cost import MatmulV2OpCost, MatmulOpCost, MulOpCost
-from ..cost import MatmulV2GradOpCost, MatmulGradOpCost, MulGradOpCost
-from paddle.distributed.auto_parallel.cost.comm_op_cost import (
-    AllreduceSumOpCost,
-    IdentityOpCost,
+from ..dist_attribute import OperatorDistributedAttribute
+from ..process_group import new_process_group
+from ..utils import (
+    _get_comm_group,
+    _get_corresponding_rank,
+    compute_compatible_and_update_dim_mapping,
+    compute_compatible_dims_mapping,
+    is_dim_replicate,
+    is_dim_shard,
+    is_valid_list_index,
+    set_dist_op_desc_original_id,
 )
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    gradient_synchronization,
+    infer_shape,
+    is_parameter_related,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+    set_comm_op_dist_attr_for_program,
+)
+from .dist_default import DistributedDefaultImpl0
 
 
 def trans_x_y_dims_mapping(trans_x, trans_y, x_dims_mapping, y_dims_mapping):
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py b/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py
index 99cc63a7b93dd5..766256aaa50b3f 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py
@@ -14,26 +14,30 @@
 
 import copy
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl
+from paddle.fluid import core
+from paddle.fluid.data_feeder import check_dtype, check_variable_and_dtype
+from paddle.fluid.framework import Operator
+
+from ..dist_attribute import (
+    OperatorDistributedAttribute,
+    TensorDistributedAttribute,
+)
 from ..process_group import new_process_group
-from ..utils import is_dim_shard, is_dim_replicate, _get_corresponding_rank
 from ..utils import (
+    _get_comm_group,
+    _get_corresponding_rank,
     compute_compatible_dim_mapping,
+    is_dim_replicate,
+    is_dim_shard,
     set_dist_op_desc_original_id,
-    _get_comm_group,
 )
-from ..dist_attribute import (
-    TensorDistributedAttribute,
-    OperatorDistributedAttribute,
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
 )
 
-from paddle.fluid import core
-from paddle.fluid.framework import Operator
-from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
-
 
 class DistributedPNorm(DistributedOperatorImplContainer):
     def __init__(self, op_type):
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py b/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py
index 75dbb7f9c0dcba..4789c4b54fe44a 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py
@@ -12,14 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl
-from ..utils import set_dist_op_desc_original_id
-from ..dist_attribute import OperatorDistributedAttribute
 from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
+
+from ..dist_attribute import OperatorDistributedAttribute
 from ..process_group import new_process_group
+from ..utils import set_dist_op_desc_original_id
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+)
 
 
 class DistributedReduceSumPrimtive(DistributedOperatorImplContainer):
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_reshape.py b/python/paddle/distributed/auto_parallel/operators/dist_reshape.py
index 7d4aa3f517be86..43b6f984fa675c 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_reshape.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_reshape.py
@@ -12,20 +12,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl, is_parameter_related
-from ..utils import is_dim_shard
-from ..utils import compute_compatible_and_update_dim_mapping
-from ..utils import set_dist_op_desc_original_id
-from .dist_default import DistributedDefaultImpl0
-from ..cost import build_comp_desc_from_dist_op, build_comp_costs_from_descs
-from ..cost import Reshape2OpCost
-from ..cost import Reshape2GradOpCost
-from ..cost import build_dp_costs
 from paddle.distributed.fleet.meta_optimizers.common import OpRole
 
+from ..cost import (
+    Reshape2GradOpCost,
+    Reshape2OpCost,
+    build_comp_costs_from_descs,
+    build_comp_desc_from_dist_op,
+    build_dp_costs,
+)
+from ..utils import (
+    compute_compatible_and_update_dim_mapping,
+    is_dim_shard,
+    set_dist_op_desc_original_id,
+)
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    is_parameter_related,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+)
+from .dist_default import DistributedDefaultImpl0
+
 
 class DistributedReshape2(DistributedOperatorImplContainer):
     def __init__(self, op_type):
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_scale.py b/python/paddle/distributed/auto_parallel/operators/dist_scale.py
index e419dd6c824282..9fc28d05a20775 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_scale.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_scale.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl
-from .dist_default import DistributedDefaultImpl0
 from ..utils import compute_compatible_and_update_dim_mapping
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+)
+from .dist_default import DistributedDefaultImpl0
 
 
 class DistributedScale(DistributedOperatorImplContainer):
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_shape.py b/python/paddle/distributed/auto_parallel/operators/dist_shape.py
index 23bd35f04f62eb..920edb54a73c67 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_shape.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_shape.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl
-from .dist_default import DistributedDefaultImpl0
 from ..utils import is_dim_shard
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+)
+from .dist_default import DistributedDefaultImpl0
 
 
 class DistributedShape(DistributedOperatorImplContainer):
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_slice.py b/python/paddle/distributed/auto_parallel/operators/dist_slice.py
index 18c643c1d76cbb..2e28b6a7a22fb7 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_slice.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_slice.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl
-from ..utils import is_dim_shard
-from ..utils import compute_compatible_dim_mapping
+from ..utils import compute_compatible_dim_mapping, is_dim_shard
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+)
 from .dist_default import DistributedDefaultImpl0
 
 
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_softmax.py b/python/paddle/distributed/auto_parallel/operators/dist_softmax.py
index 0059d0e1bb4592..97b21ba920345e 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_softmax.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_softmax.py
@@ -12,19 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl
-from .common import is_parameter_related
-from ..utils import is_dim_shard
-from ..utils import compute_compatible_and_update_dim_mapping
-from .dist_default import DistributedDefaultImpl0
-from ..cost import build_comp_desc_from_dist_op, build_dp_costs
-from ..cost import build_comp_costs_from_descs
-from ..cost import SoftmaxOpCost, SoftmaxGradOpCost
 from paddle.distributed.fleet.meta_optimizers.common import OpRole
 
+from ..cost import (
+    SoftmaxGradOpCost,
+    SoftmaxOpCost,
+    build_comp_costs_from_descs,
+    build_comp_desc_from_dist_op,
+    build_dp_costs,
+)
+from ..utils import compute_compatible_and_update_dim_mapping, is_dim_shard
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    is_parameter_related,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+)
+from .dist_default import DistributedDefaultImpl0
+
 
 class DistributedSoftmax(DistributedOperatorImplContainer):
     def __init__(self, op_type):
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_split.py b/python/paddle/distributed/auto_parallel/operators/dist_split.py
index de6776c2631f5d..f404b4c37c2c6b 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_split.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_split.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl
-from ..utils import is_dim_shard
-from ..utils import compute_compatible_and_update_dim_mapping
+from ..utils import compute_compatible_and_update_dim_mapping, is_dim_shard
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+)
 from .dist_default import DistributedDefaultImpl0
 
 
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_transpose.py b/python/paddle/distributed/auto_parallel/operators/dist_transpose.py
index c5ce7628dc7d4e..a4ab19c36cc972 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_transpose.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_transpose.py
@@ -12,17 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl
-from .common import is_parameter_related
+from paddle.distributed.fleet.meta_optimizers.common import OpRole
+
+from ..cost import (
+    Transpose2GradOpCost,
+    Transpose2OpCost,
+    build_comp_costs_from_descs,
+    build_comp_desc_from_dist_op,
+    build_dp_costs,
+)
 from ..utils import compute_compatible_and_update_dim_mapping
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    is_parameter_related,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+)
 from .dist_default import DistributedDefaultImpl0
-from ..cost import Transpose2OpCost, Transpose2GradOpCost
-from ..cost import build_comp_desc_from_dist_op, build_dp_costs
-from ..cost import build_comp_costs_from_descs
-from paddle.distributed.fleet.meta_optimizers.common import OpRole
 
 
 class DistributedTranspose2(DistributedOperatorImplContainer):
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_update_loss_scaling.py b/python/paddle/distributed/auto_parallel/operators/dist_update_loss_scaling.py
index 048d06791bbfe9..c4f1794b46f5c1 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_update_loss_scaling.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_update_loss_scaling.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-from .common import DistributedOperatorImplContainer
-from .common import DistributedOperatorImpl
-from .common import register_distributed_operator_impl_container
-from .common import register_distributed_operator_impl
 from ..utils import set_dist_op_desc_original_id
+from .common import (
+    DistributedOperatorImpl,
+    DistributedOperatorImplContainer,
+    register_distributed_operator_impl,
+    register_distributed_operator_impl_container,
+)
 
 
 class DistributedUpdateLossScaling(DistributedOperatorImplContainer):
diff --git a/python/paddle/distributed/auto_parallel/parallelizer.py b/python/paddle/distributed/auto_parallel/parallelizer.py
index aad15642f1b3e1..25fc93995f96eb 100644
--- a/python/paddle/distributed/auto_parallel/parallelizer.py
+++ b/python/paddle/distributed/auto_parallel/parallelizer.py
@@ -12,39 +12,41 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
-import sys
-import json
-import shlex
 import copy
-import pathlib
-import subprocess
+import json
 import logging
+import os
+import pathlib
 import pickle
+import shlex
+import subprocess
+import sys
 import time
+
 import paddle
 import paddle.fluid.core as core
+from paddle.distributed.passes import PassContext, new_pass
+from paddle.distributed.utils.log_utils import get_logger
 from paddle.fluid import program_guard
 from paddle.fluid.backward import append_backward
-from paddle.distributed.utils.log_utils import get_logger
-from paddle.distributed.passes import new_pass, PassContext
-from .dist_context import DistributedContext
-from .dist_context import set_default_distributed_context
-from .completion import Completer
-from .partitioner import Partitioner
-from .process_group import get_all_process_groups
-from .process_group import get_process_group
-from .process_group import get_world_process_group
-from .process_group import _g_process_group_map, ProcessGroup
-from .utils import make_data_unshard
-from .utils import set_grad_var_shape
-from .utils import SerialProgramInfo
-from .reshard import Resharder
+
 from .cluster import Cluster
-from .mapper import mapping
+from .completion import Completer
+from .dist_context import DistributedContext, set_default_distributed_context
 from .dist_op import DistributedOperator
 from .dist_tensor import DistributedTensor
+from .mapper import mapping
+from .partitioner import Partitioner
 from .planner import Planner
+from .process_group import (
+    ProcessGroup,
+    _g_process_group_map,
+    get_all_process_groups,
+    get_process_group,
+    get_world_process_group,
+)
+from .reshard import Resharder
+from .utils import SerialProgramInfo, make_data_unshard, set_grad_var_shape
 
 _logger = get_logger(logging.INFO)
 
diff --git a/python/paddle/distributed/auto_parallel/parallelizer_v2.py b/python/paddle/distributed/auto_parallel/parallelizer_v2.py
index 7a7ee603e9c61c..dba27588c7377e 100644
--- a/python/paddle/distributed/auto_parallel/parallelizer_v2.py
+++ b/python/paddle/distributed/auto_parallel/parallelizer_v2.py
@@ -13,19 +13,19 @@
 # limitations under the License.
 
 import copy
-import time
 import logging
+import time
 
+from paddle.distributed.passes import new_pass
 from paddle.fluid import program_guard
 from paddle.fluid.backward import append_backward
 from paddle.fluid.framework import unique_name
-from paddle.distributed.passes import new_pass
 
-from .reshard import Resharder
+from ..utils.log_utils import get_logger
 from .partitioner import Partitioner
-from .utils import set_grad_var_shape
 from .process_group import get_world_process_group
-from ..utils.log_utils import get_logger
+from .reshard import Resharder
+from .utils import set_grad_var_shape
 
 
 class Parallelizer:
diff --git a/python/paddle/distributed/auto_parallel/partitioner.py b/python/paddle/distributed/auto_parallel/partitioner.py
index cad9fe1d4277ea..1a2571b2ceba04 100644
--- a/python/paddle/distributed/auto_parallel/partitioner.py
+++ b/python/paddle/distributed/auto_parallel/partitioner.py
@@ -13,22 +13,23 @@
 # limitations under the License
 
 import copy
+
 import paddle.fluid as fluid
-from paddle.fluid import core
-from paddle.fluid import core
-from paddle.fluid.framework import Parameter, Program
+from paddle.distributed.auto_parallel.dist_context import DistributedContext
 from paddle.distributed.auto_parallel.operators.common import (
     get_distributed_operator_impl_container,
 )
-from paddle.distributed.auto_parallel.dist_context import DistributedContext
+from paddle.fluid import core
+from paddle.fluid.framework import Parameter, Program
+
 from .dist_attribute import OperatorDistributedAttribute
 from .operators.common import BACKWARD_ONLY_DIST_OPS
 from .utils import (
+    __no_shape_var_type__,
     is_backward_op,
     is_forward_op,
     is_loss_op,
     is_optimize_op,
-    __no_shape_var_type__,
 )
 
 __varname_not_in_block__ = ["lod_tensor_blocking_queue"]
diff --git a/python/paddle/distributed/auto_parallel/planner.py b/python/paddle/distributed/auto_parallel/planner.py
index 15bf8058f73961..7ac776bbc523f5 100755
--- a/python/paddle/distributed/auto_parallel/planner.py
+++ b/python/paddle/distributed/auto_parallel/planner.py
@@ -13,29 +13,34 @@
 # limitations under the License.
 
 import copy
-import time
 import random
+import time
+from collections import OrderedDict
 from functools import reduce
 from itertools import chain, product
-from collections import OrderedDict
 
 import numpy as np
 
 import paddle
 from paddle.distributed.fleet import auto
+
 from .cost_model import estimate_cost
-from .dist_op import DistributedOperator
-from .process_group import get_process_group
-from .operators.common import is_elementwise_op
-from .operators.common import get_distributed_operator_impl_container
-from .utils import update_op_dims_mapping_by_default_dist_impl
-from .utils import update_op_dims_mapping_by_elementwise_like_dist_impl
-from .utils import get_all_distributed_main_program
-from .dist_context import DistributedContext, DistributedOperatorContext
 from .dist_attribute import (
     OperatorDistributedAttribute,
     TensorDistributedAttribute,
 )
+from .dist_context import DistributedContext, DistributedOperatorContext
+from .dist_op import DistributedOperator
+from .operators.common import (
+    get_distributed_operator_impl_container,
+    is_elementwise_op,
+)
+from .process_group import get_process_group
+from .utils import (
+    get_all_distributed_main_program,
+    update_op_dims_mapping_by_default_dist_impl,
+    update_op_dims_mapping_by_elementwise_like_dist_impl,
+)
 
 paddle.seed(123)
 random.seed(123)
diff --git a/python/paddle/distributed/auto_parallel/process_group.py b/python/paddle/distributed/auto_parallel/process_group.py
index ebe478f1dff022..63e5e6ff4ce3ff 100644
--- a/python/paddle/distributed/auto_parallel/process_group.py
+++ b/python/paddle/distributed/auto_parallel/process_group.py
@@ -16,12 +16,11 @@
 
 import paddle
 import paddle.fluid.core as core
+from paddle import _legacy_C_ops
 
-from ..collective import _get_global_env
-from ..collective import _new_ring_id
 from ...fluid.framework import _non_static_mode
 from ...fluid.layers.tensor import fill_constant
-from paddle import _legacy_C_ops
+from ..collective import _get_global_env, _new_ring_id
 
 
 def get_all_process_groups():
diff --git a/python/paddle/distributed/auto_parallel/process_mesh.py b/python/paddle/distributed/auto_parallel/process_mesh.py
index 1630289dde8aab..2ccd188dde4cd6 100644
--- a/python/paddle/distributed/auto_parallel/process_mesh.py
+++ b/python/paddle/distributed/auto_parallel/process_mesh.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import copy
+
+import numpy as np
+
 import paddle
 
 # Use to store the previous and current process mesh
@@ -188,8 +190,8 @@ def __enter__(self):
         self._old_op_size = len(cur_block.ops)
 
     def __exit__(self, exc_type, exc_value, exc_traceback):
-        from .dist_tensor import DistributedTensor
         from .dist_op import DistributedOperator
+        from .dist_tensor import DistributedTensor
 
         default_prog = paddle.fluid.default_main_program()
         cur_block = default_prog.current_block()
diff --git a/python/paddle/distributed/auto_parallel/process_mesh_v2.py b/python/paddle/distributed/auto_parallel/process_mesh_v2.py
index 0b427c9d161873..46f03b2cc605cf 100644
--- a/python/paddle/distributed/auto_parallel/process_mesh_v2.py
+++ b/python/paddle/distributed/auto_parallel/process_mesh_v2.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import numpy as np
+
 from paddle.fluid import core
 
 
diff --git a/python/paddle/distributed/auto_parallel/reshard.py b/python/paddle/distributed/auto_parallel/reshard.py
index 2dd85df800a47a..06231d9b59a267 100644
--- a/python/paddle/distributed/auto_parallel/reshard.py
+++ b/python/paddle/distributed/auto_parallel/reshard.py
@@ -16,17 +16,24 @@
 
 import paddle
 import paddle.fluid.core as core
-from paddle.utils import unique_name
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.framework import Program, OpProtoHolder
-from paddle.distributed.fleet.meta_optimizers.common import OpRole
 import paddle.fluid.layers.utils as utils
-from .dist_context import DistributedContext
+from paddle.distributed.fleet.meta_optimizers.common import OpRole
+from paddle.fluid.framework import OpProtoHolder, Program
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.utils import unique_name
+
+from .cost import (
+    AllgatherOpCost,
+    CommContext,
+    ConcatOpCost,
+    SendOpCost,
+    SliceOpCost,
+    SplitOpCost,
+    build_comm_desc,
+)
 from .dist_attribute import TensorDistributedAttribute
+from .dist_context import DistributedContext
 from .process_group import new_process_group
-from .cost import build_comm_desc, CommContext
-from .cost import AllgatherOpCost, SendOpCost
-from .cost import SliceOpCost, SplitOpCost, ConcatOpCost
 from .utils import is_gradient_clip_op
 
 # NOTE: If op in _g_special_ops or _g_gradient_clip_ops, it will not be resharded.
diff --git a/python/paddle/distributed/auto_parallel/strategy.py b/python/paddle/distributed/auto_parallel/strategy.py
index dcfd453f63a33a..4d626bb6ae4950 100644
--- a/python/paddle/distributed/auto_parallel/strategy.py
+++ b/python/paddle/distributed/auto_parallel/strategy.py
@@ -13,6 +13,7 @@
 # limitations under the License
 
 import copy
+
 from . import constants
 
 
diff --git a/python/paddle/distributed/auto_parallel/tuner/algorithms.py b/python/paddle/distributed/auto_parallel/tuner/algorithms.py
index efc3358ebe41aa..8ce570d03c2881 100644
--- a/python/paddle/distributed/auto_parallel/tuner/algorithms.py
+++ b/python/paddle/distributed/auto_parallel/tuner/algorithms.py
@@ -13,12 +13,12 @@
 # limitations under the License.
 
 import copy
-from abc import ABC, abstractmethod
 import logging
+from abc import ABC, abstractmethod
 
 from ..utils import get_logger
-from .trial import TrialStatus
 from .trial import OptimizationTunerTrial as Trial
+from .trial import TrialStatus
 
 
 class AlgorithmBase(ABC):
diff --git a/python/paddle/distributed/auto_parallel/tuner/config.py b/python/paddle/distributed/auto_parallel/tuner/config.py
index 7bb9d4f18bcef0..f47ec1ae2d0416 100644
--- a/python/paddle/distributed/auto_parallel/tuner/config.py
+++ b/python/paddle/distributed/auto_parallel/tuner/config.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import copy
+import os
 
 from ..strategy import Strategy
 
diff --git a/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py b/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py
index 3cd58f2c004026..8a2867a315d3e0 100644
--- a/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py
+++ b/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py
@@ -12,38 +12,40 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
+import json
+import logging
+
 # import yaml
 import os
-import sys
-import copy
-import shlex
 import pathlib
-import time
-import shutil
 import pickle
-import json
-import logging
+import shlex
+import shutil
 import subprocess
+import sys
+import time
 
 import paddle
-from paddle.fluid import program_guard
-from paddle.fluid.backward import append_backward
-from paddle.distributed.passes import new_pass, PassContext
-
-from paddle.distributed.auto_parallel.dist_context import DistributedContext
 from paddle.distributed.auto_parallel.completion import Completer
-from paddle.distributed.auto_parallel.reshard import Resharder
+from paddle.distributed.auto_parallel.dist_context import DistributedContext
 from paddle.distributed.auto_parallel.partitioner import Partitioner
 from paddle.distributed.auto_parallel.process_group import (
     clear_all_process_groups,
     get_all_process_groups,
 )
-from paddle.distributed.auto_parallel.utils import debug_program
-from paddle.distributed.auto_parallel.utils import set_grad_var_shape
+from paddle.distributed.auto_parallel.reshard import Resharder
+from paddle.distributed.auto_parallel.utils import (
+    debug_program,
+    set_grad_var_shape,
+)
+from paddle.distributed.passes import PassContext, new_pass
+from paddle.fluid import program_guard
+from paddle.fluid.backward import append_backward
 
 from ..utils import get_logger
-from .config import TuningConfig
 from .algorithms import new_algorithm
+from .config import TuningConfig
 from .trial import TrialStatus
 
 
diff --git a/python/paddle/distributed/auto_parallel/tuner/parallel_tuner.py b/python/paddle/distributed/auto_parallel/tuner/parallel_tuner.py
index 9f31766f19f2f3..e1d8217a99a9fc 100644
--- a/python/paddle/distributed/auto_parallel/tuner/parallel_tuner.py
+++ b/python/paddle/distributed/auto_parallel/tuner/parallel_tuner.py
@@ -12,24 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import time
-import math
 import copy
 import hashlib
 import itertools
+import math
+import time
 from collections import defaultdict
+
 import numpy as np
-from ..process_mesh import ProcessMesh
+
 from ..completion import Completer
-from ..parallelizer_v2 import Parallelizer
+from ..cost import CostEstimator
 from ..dist_context import _node_id
 from ..dist_op import DistributedOperator
 from ..operators.common import find_compatible_distributed_operator_impls
+from ..parallelizer_v2 import Parallelizer
+from ..process_mesh import ProcessMesh
 from .trial import Trial, TrialStatus
 from .tunable_space import TunableSpace
 from .tunable_variable import Boolean, IntRange
-from ..cost import CostEstimator
-from .tunable_variable import Boolean, IntRange
 
 
 class ParallelTuner:
diff --git a/python/paddle/distributed/auto_parallel/tuner/profiler.py b/python/paddle/distributed/auto_parallel/tuner/profiler.py
index 1aeafbea76410a..4a4dfea7631575 100644
--- a/python/paddle/distributed/auto_parallel/tuner/profiler.py
+++ b/python/paddle/distributed/auto_parallel/tuner/profiler.py
@@ -12,24 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import argparse
-import traceback
-import pickle
 import json
+import os
+import pickle
 import time
+import traceback
 
 import paddle
-from paddle.fluid.framework import Program, _current_expected_place
-from paddle.fluid.framework import Operator
+from paddle.distributed.auto_parallel.dist_loader import (
+    DistributedDataLoaderFromGenerator,
+)
 from paddle.distributed.auto_parallel.process_group import (
     get_all_process_groups,
     new_process_group,
 )
-from paddle.distributed.auto_parallel.dist_loader import (
-    DistributedDataLoaderFromGenerator,
-)
 from paddle.distributed.collective import _get_global_env
+from paddle.fluid.framework import Operator, Program, _current_expected_place
 
 paddle.enable_static()
 
diff --git a/python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py b/python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py
index cf057b4a62e7d0..f6e855f71ffb04 100644
--- a/python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py
+++ b/python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py
@@ -16,7 +16,6 @@
 
 from ..graph import Graph
 
-
 _PATTERNS = {}
 
 
diff --git a/python/paddle/distributed/auto_parallel/tuner/trial.py b/python/paddle/distributed/auto_parallel/tuner/trial.py
index 8ba631cecd3e3a..2a52ae1e1a8723 100644
--- a/python/paddle/distributed/auto_parallel/tuner/trial.py
+++ b/python/paddle/distributed/auto_parallel/tuner/trial.py
@@ -19,8 +19,8 @@
 import random
 import time
 
-from .storable import Storable
 from .recorder import MetricsRecorder
+from .storable import Storable
 from .tunable_space import TunableSpace
 
 
diff --git a/python/paddle/distributed/auto_parallel/tuner/tunable_space.py b/python/paddle/distributed/auto_parallel/tuner/tunable_space.py
index a4383f5385dee3..d7a5ffa52229eb 100644
--- a/python/paddle/distributed/auto_parallel/tuner/tunable_space.py
+++ b/python/paddle/distributed/auto_parallel/tuner/tunable_space.py
@@ -15,11 +15,7 @@
 # Notice that the following codes are modified from KerasTuner to implement our own tuner.
 # Please refer to https://github.com/keras-team/keras-tuner/blob/master/keras_tuner/engine/hyperparameters.py.
 
-from .tunable_variable import Boolean
-from .tunable_variable import Fixed
-from .tunable_variable import Choice
-from .tunable_variable import IntRange
-from .tunable_variable import FloatRange
+from .tunable_variable import Boolean, Choice, Fixed, FloatRange, IntRange
 
 
 class TunableSpace:
diff --git a/python/paddle/distributed/auto_parallel/utils.py b/python/paddle/distributed/auto_parallel/utils.py
index 280868773cdc3c..0196ca9e7aefeb 100644
--- a/python/paddle/distributed/auto_parallel/utils.py
+++ b/python/paddle/distributed/auto_parallel/utils.py
@@ -12,26 +12,27 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-import os
 import copy
-import paddle
+import logging
+import os
 import threading
-import numpy as np
 import warnings
-import logging
 from functools import reduce
 
+import numpy as np
+
+import paddle
 import paddle.fluid.core as core
-from paddle.fluid.framework import Variable
-from paddle.distributed.fleet.meta_optimizers.common import OpRole
-from paddle.distributed.auto_parallel.process_group import (
-    get_all_process_groups,
-)
-from paddle.fluid.io import is_parameter, is_belong_to_optimizer
 from paddle.distributed.auto_parallel.dist_attribute import (
-    TensorDistributedAttribute,
     OperatorDistributedAttribute,
+    TensorDistributedAttribute,
+)
+from paddle.distributed.auto_parallel.process_group import (
+    get_all_process_groups,
 )
+from paddle.distributed.fleet.meta_optimizers.common import OpRole
+from paddle.fluid.framework import Variable
+from paddle.fluid.io import is_belong_to_optimizer, is_parameter
 
 OP_ROLE_KEY = core.op_proto_and_checker_maker.kOpRoleAttrName()
 OpRole = core.op_proto_and_checker_maker.OpRole
@@ -255,8 +256,10 @@ def print_program_with_dist_attr(program, dist_context=None):
     """
     lock = threading.Lock()
     lock.acquire()
-    from .dist_context import get_default_distributed_context
-    from .dist_context import set_default_distributed_context
+    from .dist_context import (
+        get_default_distributed_context,
+        set_default_distributed_context,
+    )
 
     if dist_context is None:
         dist_context = get_default_distributed_context()
@@ -1847,6 +1850,7 @@ def get_lr(optimizer):
 
 def initialize_pg_in_full_mode(all_process_groups, cur_rank):
     import socket
+
     from ..collective import _get_global_env
 
     has_recv_by_socket = []
@@ -1983,8 +1987,8 @@ def validate_opt(optimizer):
 
 
 def set_data_parallel(x):
+    from .interface import ProcessMesh, shard_tensor
     from .process_group import get_world_process_group
-    from .interface import shard_tensor, ProcessMesh
 
     world_ranks = get_world_process_group().ranks
     process_mesh = ProcessMesh(world_ranks, ['dp'])
diff --git a/python/paddle/distributed/cloud_utils.py b/python/paddle/distributed/cloud_utils.py
index 2e1d85205a538c..1b8952e0db2ceb 100644
--- a/python/paddle/distributed/cloud_utils.py
+++ b/python/paddle/distributed/cloud_utils.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import os
+
 from paddle.distributed.utils.launch_utils import (
     get_cluster,
-    get_gpus,
     get_cluster_from_args,
+    get_gpus,
+    logger,
 )
-from paddle.distributed.utils.launch_utils import logger
 
 __all__ = []
 
diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py
index eeb45959ee630b..6d8cd60c6b48e3 100644
--- a/python/paddle/distributed/collective.py
+++ b/python/paddle/distributed/collective.py
@@ -13,23 +13,24 @@
 # limitations under the License.
 
 import datetime
-from ..fluid.framework import in_dygraph_mode
-from ..fluid.framework import _non_static_mode
+
 import paddle
 import paddle.fluid.core as core
-from .fleet.layers.mpu.mp_ops import split  # noqa: F401
-from .fleet.layers.mpu.mp_ops import _c_identity  # noqa: F401
+
+from ..fluid.framework import _non_static_mode, in_dygraph_mode
+from .communication.group import Group, _add_new_group, is_initialized
 from .fleet.layers.mpu.mp_ops import _c_concat  # noqa: F401
-from .fleet.layers.mpu.mp_ops import _c_split  # noqa: F401
-from .fleet.layers.mpu.mp_ops import _mp_allreduce  # noqa: F401
+from .fleet.layers.mpu.mp_ops import _c_identity  # noqa: F401
 from .fleet.layers.mpu.mp_ops import _c_lookup_table  # noqa: F401
-from .fleet.layers.mpu.mp_ops import _Linear  # noqa: F401
-from .fleet.layers.mpu.mp_ops import _set_var_distributed  # noqa: F401
 from .fleet.layers.mpu.mp_ops import _c_softmax_with_cross_entropy  # noqa: F401
+from .fleet.layers.mpu.mp_ops import _c_split  # noqa: F401
+from .fleet.layers.mpu.mp_ops import _Linear  # noqa: F401
 from .fleet.layers.mpu.mp_ops import _linear  # noqa: F401
-from .fleet.layers.mpu.mp_ops import _parallel_linear  # noqa: F401
+from .fleet.layers.mpu.mp_ops import _mp_allreduce  # noqa: F401
 from .fleet.layers.mpu.mp_ops import _parallel_embedding  # noqa: F401
-from .communication.group import Group, _add_new_group, is_initialized
+from .fleet.layers.mpu.mp_ops import _parallel_linear  # noqa: F401
+from .fleet.layers.mpu.mp_ops import _set_var_distributed  # noqa: F401
+from .fleet.layers.mpu.mp_ops import split  # noqa: F401
 
 __all__ = []
 
diff --git a/python/paddle/distributed/communication/all_gather.py b/python/paddle/distributed/communication/all_gather.py
index 2a14a05a0b128a..e6540a327426b9 100644
--- a/python/paddle/distributed/communication/all_gather.py
+++ b/python/paddle/distributed/communication/all_gather.py
@@ -16,10 +16,11 @@
 import pickle
 
 import numpy as np
+
 import paddle
 import paddle.distributed as dist
-import paddle.fluid.framework as framework
 import paddle.distributed.communication.stream as stream
+import paddle.fluid.framework as framework
 
 
 def all_gather(tensor_list, tensor, group=None, sync_op=True):
diff --git a/python/paddle/distributed/communication/all_reduce.py b/python/paddle/distributed/communication/all_reduce.py
index 50d8fac160f209..0eea15db70ab99 100644
--- a/python/paddle/distributed/communication/all_reduce.py
+++ b/python/paddle/distributed/communication/all_reduce.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 
 import paddle
-import paddle.fluid.framework as framework
 import paddle.distributed.communication.stream as stream
+import paddle.fluid.framework as framework
 from paddle.distributed.communication.reduce import ReduceOp
 
 
diff --git a/python/paddle/distributed/communication/all_to_all.py b/python/paddle/distributed/communication/all_to_all.py
index 4fbd3d11fcb4c6..d8465341aa6277 100644
--- a/python/paddle/distributed/communication/all_to_all.py
+++ b/python/paddle/distributed/communication/all_to_all.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 
 import paddle
-import paddle.fluid.framework as framework
 import paddle.distributed.communication.stream as stream
+import paddle.fluid.framework as framework
 
 
 def alltoall(in_tensor_list, out_tensor_list, group=None, sync_op=True):
diff --git a/python/paddle/distributed/communication/batch_isend_irecv.py b/python/paddle/distributed/communication/batch_isend_irecv.py
index d3f0372b685332..a85fdcbacbf94a 100644
--- a/python/paddle/distributed/communication/batch_isend_irecv.py
+++ b/python/paddle/distributed/communication/batch_isend_irecv.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import contextlib
+
 import paddle.distributed as dist
 import paddle.fluid.core as core
 import paddle.fluid.framework as framework
diff --git a/python/paddle/distributed/communication/broadcast.py b/python/paddle/distributed/communication/broadcast.py
index cf8e605ecaeed5..7a04e839cdb7fa 100644
--- a/python/paddle/distributed/communication/broadcast.py
+++ b/python/paddle/distributed/communication/broadcast.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 
 import paddle
-import paddle.fluid.framework as framework
 import paddle.distributed.communication.stream as stream
+import paddle.fluid.framework as framework
 
 
 def broadcast(tensor, src, group=None, sync_op=True):
diff --git a/python/paddle/distributed/communication/group.py b/python/paddle/distributed/communication/group.py
index ea0fd6c3aaa914..85db479674699b 100644
--- a/python/paddle/distributed/communication/group.py
+++ b/python/paddle/distributed/communication/group.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import warnings
+
 import paddle
 import paddle.distributed as dist
 import paddle.fluid.core as core
diff --git a/python/paddle/distributed/communication/recv.py b/python/paddle/distributed/communication/recv.py
index 93196ce5f2e626..a340b26aef4fd3 100644
--- a/python/paddle/distributed/communication/recv.py
+++ b/python/paddle/distributed/communication/recv.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 
 import paddle
-import paddle.fluid.framework as framework
 import paddle.distributed.communication.stream as stream
+import paddle.fluid.framework as framework
 
 
 def recv(tensor, src=0, group=None, sync_op=True):
diff --git a/python/paddle/distributed/communication/reduce.py b/python/paddle/distributed/communication/reduce.py
index 696daf23e108ac..01002a1f58f8d4 100644
--- a/python/paddle/distributed/communication/reduce.py
+++ b/python/paddle/distributed/communication/reduce.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 import paddle
-import paddle.fluid.framework as framework
-import paddle.fluid.core as core
 import paddle.distributed.communication.stream as stream
+import paddle.fluid.core as core
+import paddle.fluid.framework as framework
 
 
 class ReduceOp:
diff --git a/python/paddle/distributed/communication/reduce_scatter.py b/python/paddle/distributed/communication/reduce_scatter.py
index 0b01b05dc44de5..e919fc971c8e14 100644
--- a/python/paddle/distributed/communication/reduce_scatter.py
+++ b/python/paddle/distributed/communication/reduce_scatter.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid.framework as framework
 import paddle.distributed.communication.stream as stream
+import paddle.fluid.framework as framework
 from paddle.distributed.communication.reduce import ReduceOp
 from paddle.distributed.communication.stream.reduce_scatter import (
     _reduce_scatter_base as _reduce_scatter_base_stream,
diff --git a/python/paddle/distributed/communication/scatter.py b/python/paddle/distributed/communication/scatter.py
index da7809df9c11a2..a0a9aeb0de2960 100644
--- a/python/paddle/distributed/communication/scatter.py
+++ b/python/paddle/distributed/communication/scatter.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 
 import paddle
-import paddle.fluid.framework as framework
 import paddle.distributed.communication.stream as stream
+import paddle.fluid.framework as framework
 from paddle.distributed.communication.group import _get_global_group
 
 
diff --git a/python/paddle/distributed/communication/send.py b/python/paddle/distributed/communication/send.py
index fa3eb3dff36f23..0d78fb85480236 100644
--- a/python/paddle/distributed/communication/send.py
+++ b/python/paddle/distributed/communication/send.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 
 import paddle
-import paddle.fluid.framework as framework
 import paddle.distributed.communication.stream as stream
+import paddle.fluid.framework as framework
 
 
 def send(tensor, dst=0, group=None, sync_op=True):
diff --git a/python/paddle/distributed/communication/stream/all_gather.py b/python/paddle/distributed/communication/stream/all_gather.py
index 641d2ea3be9a50..8e81a8723aac24 100644
--- a/python/paddle/distributed/communication/stream/all_gather.py
+++ b/python/paddle/distributed/communication/stream/all_gather.py
@@ -14,8 +14,8 @@
 
 import paddle
 import paddle.distributed as dist
-import paddle.fluid.framework as framework
 import paddle.fluid.data_feeder as data_feeder
+import paddle.fluid.framework as framework
 import paddle.fluid.layer_helper as layer_helper
 from paddle.distributed.communication.group import _get_global_group
 
diff --git a/python/paddle/distributed/communication/stream/all_reduce.py b/python/paddle/distributed/communication/stream/all_reduce.py
index 1969b4d058ede6..dd04ab0852bf34 100644
--- a/python/paddle/distributed/communication/stream/all_reduce.py
+++ b/python/paddle/distributed/communication/stream/all_reduce.py
@@ -12,14 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid.framework as framework
 import paddle.fluid.data_feeder as data_feeder
+import paddle.fluid.framework as framework
 import paddle.fluid.layer_helper as layer_helper
-from paddle.distributed.communication.reduce import _get_reduce_op, ReduceOp
 from paddle.distributed.communication.group import (
     _get_global_group,
     _warn_cur_rank_not_in_group,
 )
+from paddle.distributed.communication.reduce import ReduceOp, _get_reduce_op
 
 
 def _all_reduce_in_dygraph(tensor, op, group, sync_op, use_calc_stream):
diff --git a/python/paddle/distributed/communication/stream/all_to_all.py b/python/paddle/distributed/communication/stream/all_to_all.py
index cc3b473d90d34e..a5293aa46e6c3d 100644
--- a/python/paddle/distributed/communication/stream/all_to_all.py
+++ b/python/paddle/distributed/communication/stream/all_to_all.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 import paddle
-import paddle.fluid.framework as framework
 import paddle.distributed as dist
 import paddle.fluid.data_feeder as data_feeder
+import paddle.fluid.framework as framework
 import paddle.fluid.layer_helper as layer_helper
 from paddle.distributed.communication.group import (
     _get_global_group,
diff --git a/python/paddle/distributed/communication/stream/broadcast.py b/python/paddle/distributed/communication/stream/broadcast.py
index e4e58963f30975..3c3e7767d0d908 100644
--- a/python/paddle/distributed/communication/stream/broadcast.py
+++ b/python/paddle/distributed/communication/stream/broadcast.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid.framework as framework
 import paddle.fluid.data_feeder as data_feeder
+import paddle.fluid.framework as framework
 import paddle.fluid.layer_helper as layer_helper
 from paddle.distributed.communication.group import (
     _get_global_group,
-    _warn_cur_rank_not_in_group,
     _get_or_throw_group_rank,
+    _warn_cur_rank_not_in_group,
 )
 
 
diff --git a/python/paddle/distributed/communication/stream/recv.py b/python/paddle/distributed/communication/stream/recv.py
index 757b0f0c28f0d5..b1b66f959789dd 100644
--- a/python/paddle/distributed/communication/stream/recv.py
+++ b/python/paddle/distributed/communication/stream/recv.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid.framework as framework
 import paddle.fluid.data_feeder as data_feeder
+import paddle.fluid.framework as framework
 import paddle.fluid.layer_helper as layer_helper
 from paddle.distributed.communication.group import (
     _get_global_group,
-    _warn_cur_rank_not_in_group,
     _get_or_throw_group_rank,
+    _warn_cur_rank_not_in_group,
 )
 
 
diff --git a/python/paddle/distributed/communication/stream/reduce.py b/python/paddle/distributed/communication/stream/reduce.py
index eb8b3af80639bc..391d797f3c112a 100644
--- a/python/paddle/distributed/communication/stream/reduce.py
+++ b/python/paddle/distributed/communication/stream/reduce.py
@@ -12,15 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid.framework as framework
 import paddle.fluid.data_feeder as data_feeder
+import paddle.fluid.framework as framework
 import paddle.fluid.layer_helper as layer_helper
 from paddle.distributed.communication.group import (
     _get_global_group,
-    _warn_cur_rank_not_in_group,
     _get_or_throw_group_rank,
+    _warn_cur_rank_not_in_group,
 )
-from paddle.distributed.communication.reduce import _get_reduce_op, ReduceOp
+from paddle.distributed.communication.reduce import ReduceOp, _get_reduce_op
 
 
 def _reduce_in_dygraph(
diff --git a/python/paddle/distributed/communication/stream/reduce_scatter.py b/python/paddle/distributed/communication/stream/reduce_scatter.py
index 80e1ae7aa2156b..3442365863002e 100644
--- a/python/paddle/distributed/communication/stream/reduce_scatter.py
+++ b/python/paddle/distributed/communication/stream/reduce_scatter.py
@@ -18,7 +18,7 @@
     _get_global_group,
     _warn_cur_rank_not_in_group,
 )
-from paddle.distributed.communication.reduce import _get_reduce_op, ReduceOp
+from paddle.distributed.communication.reduce import ReduceOp, _get_reduce_op
 
 
 def _reduce_scatter_tensor_in_dygraph(
diff --git a/python/paddle/distributed/communication/stream/scatter.py b/python/paddle/distributed/communication/stream/scatter.py
index a1df9c71aee866..a75cc7c2922371 100644
--- a/python/paddle/distributed/communication/stream/scatter.py
+++ b/python/paddle/distributed/communication/stream/scatter.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 
 import warnings
+
 import paddle
 import paddle.distributed as dist
-import paddle.fluid.framework as framework
 import paddle.fluid.data_feeder as data_feeder
+import paddle.fluid.framework as framework
 import paddle.fluid.layer_helper as layer_helper
 from paddle.distributed.communication.group import (
     _get_global_group,
-    _warn_cur_rank_not_in_group,
     _get_or_throw_group_rank,
+    _warn_cur_rank_not_in_group,
 )
 
 
diff --git a/python/paddle/distributed/communication/stream/send.py b/python/paddle/distributed/communication/stream/send.py
index f2d135abed3c92..f4325a6c19ab19 100644
--- a/python/paddle/distributed/communication/stream/send.py
+++ b/python/paddle/distributed/communication/stream/send.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid.framework as framework
 import paddle.fluid.data_feeder as data_feeder
+import paddle.fluid.framework as framework
 import paddle.fluid.layer_helper as layer_helper
 from paddle.distributed.communication.group import (
     _get_global_group,
-    _warn_cur_rank_not_in_group,
     _get_or_throw_group_rank,
+    _warn_cur_rank_not_in_group,
 )
 
 
diff --git a/python/paddle/distributed/communicator.py b/python/paddle/distributed/communicator.py
index d81ec001708bb1..4626dd9864e473 100755
--- a/python/paddle/distributed/communicator.py
+++ b/python/paddle/distributed/communicator.py
@@ -31,8 +31,8 @@
 It's a wrapper of a cpp class Communicator and should be used inside fleet API.
 """
 import paddle
-from paddle.framework import core
 from paddle.distributed.ps.utils.public import DistributedMode
+from paddle.framework import core
 
 __all__ = ['Communicator', 'FLCommunicator', 'LargeScaleKV']
 
diff --git a/python/paddle/distributed/fleet/ascend_utils.py b/python/paddle/distributed/fleet/ascend_utils.py
index ee10cd78a5ed07..f0549dc9a92d47 100644
--- a/python/paddle/distributed/fleet/ascend_utils.py
+++ b/python/paddle/distributed/fleet/ascend_utils.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import json
+import os
+
 from paddle.distributed.fleet.launch_utils import (
     DeviceMode,
     get_cluster,
diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py
index 32656c19a38dc0..68d2d7a5b0e753 100755
--- a/python/paddle/distributed/fleet/base/distributed_strategy.py
+++ b/python/paddle/distributed/fleet/base/distributed_strategy.py
@@ -13,13 +13,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import google.protobuf
+import google.protobuf.text_format
+
 import paddle
-from paddle.distributed.fleet.utils.log_util import logger
 from paddle.distributed.fleet.proto import distributed_strategy_pb2
+from paddle.distributed.fleet.utils.log_util import logger
 from paddle.fluid.framework import _global_flags
 from paddle.fluid.wrapped_decorator import wrap_decorator
-import google.protobuf.text_format
-import google.protobuf
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/base/orthogonal_strategy.py b/python/paddle/distributed/fleet/base/orthogonal_strategy.py
index 36af85d415d556..9da61af0734734 100644
--- a/python/paddle/distributed/fleet/base/orthogonal_strategy.py
+++ b/python/paddle/distributed/fleet/base/orthogonal_strategy.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import itertools
 import collections
 import functools
+import itertools
+
 import paddle.distributed as dist
 from paddle.distributed.fleet.base.strategy_group import StrategyGroupBase
 
diff --git a/python/paddle/distributed/fleet/base/private_helper_function.py b/python/paddle/distributed/fleet/base/private_helper_function.py
index b67146065899a1..20858379c2bc29 100644
--- a/python/paddle/distributed/fleet/base/private_helper_function.py
+++ b/python/paddle/distributed/fleet/base/private_helper_function.py
@@ -11,9 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import socket
 import sys
 import time
-import socket
 from contextlib import closing
 
 __all__ = []
diff --git a/python/paddle/distributed/fleet/base/role_maker.py b/python/paddle/distributed/fleet/base/role_maker.py
index e29cee04fca03c..49b19586a75a65 100755
--- a/python/paddle/distributed/fleet/base/role_maker.py
+++ b/python/paddle/distributed/fleet/base/role_maker.py
@@ -14,9 +14,10 @@
 """Defination of Role Makers."""
 import os
 import time
-import numpy as np
 import warnings
-from multiprocessing import Process, Manager
+from multiprocessing import Manager, Process
+
+import numpy as np
 
 import paddle
 import paddle.fluid.core as core
diff --git a/python/paddle/distributed/fleet/base/runtime_factory.py b/python/paddle/distributed/fleet/base/runtime_factory.py
index 51758859035d49..1bc6eef1404fa9 100644
--- a/python/paddle/distributed/fleet/base/runtime_factory.py
+++ b/python/paddle/distributed/fleet/base/runtime_factory.py
@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from ..runtime.collective_runtime import CollectiveRuntime
 from ...ps.the_one_ps import TheOnePSRuntime
+from ..runtime.collective_runtime import CollectiveRuntime
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/base/topology.py b/python/paddle/distributed/fleet/base/topology.py
index c34d64e6117008..20faacf4ffe45d 100644
--- a/python/paddle/distributed/fleet/base/topology.py
+++ b/python/paddle/distributed/fleet/base/topology.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import collections
-from itertools import product
 from functools import reduce
+from itertools import product
+
+import paddle
+
 from ..utils.log_util import logger
 
 __all__ = ['CommunicateTopology', 'HybridCommunicateGroup']
diff --git a/python/paddle/distributed/fleet/base/util_factory.py b/python/paddle/distributed/fleet/base/util_factory.py
index 1f9a0c8d5f30c7..3d2bc98a40bf2e 100755
--- a/python/paddle/distributed/fleet/base/util_factory.py
+++ b/python/paddle/distributed/fleet/base/util_factory.py
@@ -16,18 +16,20 @@
 """basic collective operations in python"""
 """remote file system"""
 
+import os
+import subprocess
+from collections import OrderedDict
+
+import numpy as np
+from google.protobuf import text_format
+
 import paddle
-from ..utils.fs import FS
+import paddle.framework as framework
+from paddle.fluid import core, debugger
 from paddle.fluid.proto import framework_pb2
 from paddle.static import Program
-from paddle.fluid import debugger
-from google.protobuf import text_format
-import paddle.framework as framework
-from collections import OrderedDict
-from paddle.fluid import core
-import subprocess
-import os
-import numpy as np
+
+from ..utils.fs import FS
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/cloud_utils.py b/python/paddle/distributed/fleet/cloud_utils.py
index cf74f8446705f0..9409867d82801a 100644
--- a/python/paddle/distributed/fleet/cloud_utils.py
+++ b/python/paddle/distributed/fleet/cloud_utils.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import os
+
 from paddle.distributed.fleet.launch_utils import get_cluster, logger
 
 __all__ = []
diff --git a/python/paddle/distributed/fleet/dataset/dataset.py b/python/paddle/distributed/fleet/dataset/dataset.py
index 870c936e543a67..2511699246fe95 100755
--- a/python/paddle/distributed/fleet/dataset/dataset.py
+++ b/python/paddle/distributed/fleet/dataset/dataset.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 """This is definition of dataset class, which is high performance IO."""
 
-from paddle.fluid.proto import data_feed_pb2
 from google.protobuf import text_format
+
 import paddle.fluid.core as core
+from paddle.fluid.proto import data_feed_pb2
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/elastic/collective.py b/python/paddle/distributed/fleet/elastic/collective.py
index bdb8a6c577094e..499da820672bc1 100644
--- a/python/paddle/distributed/fleet/elastic/collective.py
+++ b/python/paddle/distributed/fleet/elastic/collective.py
@@ -12,17 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import tempfile
 import os
 import shutil
-import paddle
+import tempfile
 
+import paddle
+from paddle.distributed.fleet.elastic.manager import LauncherInterface
 from paddle.distributed.fleet.launch_utils import (
     logger,
     pull_worker_log,
     start_local_trainers,
 )
-from paddle.distributed.fleet.elastic.manager import LauncherInterface
 
 
 class CollectiveLauncher(LauncherInterface):
diff --git a/python/paddle/distributed/fleet/elastic/manager.py b/python/paddle/distributed/fleet/elastic/manager.py
index 0344c1d43741b8..be99727efd5a19 100644
--- a/python/paddle/distributed/fleet/elastic/manager.py
+++ b/python/paddle/distributed/fleet/elastic/manager.py
@@ -12,18 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import time
-import socket
-import os
 import copy
-import signal
+import os
 import random
+import signal
+import socket
+import subprocess
 import threading
+import time
 import traceback
-import subprocess
-from paddle.distributed.fleet import cloud_utils
-from paddle.distributed.fleet import launch_utils
 
+from paddle.distributed.fleet import cloud_utils, launch_utils
 from paddle.distributed.utils.log_utils import get_logger
 
 logger = get_logger("INFO", "ELASTIC")
diff --git a/python/paddle/distributed/fleet/fleet.py b/python/paddle/distributed/fleet/fleet.py
index 6b265c4902c7fa..644b9fabf74283 100755
--- a/python/paddle/distributed/fleet/fleet.py
+++ b/python/paddle/distributed/fleet/fleet.py
@@ -13,19 +13,21 @@
 # limitations under the License.
 
 import copy
-import paddle
 import os
-from paddle.framework import _global_flags
+
+import paddle
 from paddle.fluid import compiler
-from .base.role_maker import PaddleCloudRoleMaker, RoleMakerBase
-from .base.strategy_compiler import StrategyCompiler
-from .base.distributed_strategy import DistributedStrategy
-from .base.meta_optimizer_factory import MetaOptimizerFactory
-from .base.runtime_factory import RuntimeFactory
-from paddle.fluid.wrapped_decorator import wrap_decorator
 from paddle.fluid.dygraph import parallel_helper
 from paddle.fluid.ir import apply_build_strategy
+from paddle.fluid.wrapped_decorator import wrap_decorator
+from paddle.framework import _global_flags
+
 from .base import topology as tp
+from .base.distributed_strategy import DistributedStrategy
+from .base.meta_optimizer_factory import MetaOptimizerFactory
+from .base.role_maker import PaddleCloudRoleMaker, RoleMakerBase
+from .base.runtime_factory import RuntimeFactory
+from .base.strategy_compiler import StrategyCompiler
 from .meta_parallel import model_parallel_random_seed
 from .utils.log_util import logger, set_log_level
 
diff --git a/python/paddle/distributed/fleet/fleet_executor_utils.py b/python/paddle/distributed/fleet/fleet_executor_utils.py
index f92ab945894d96..8f9101c38f75b1 100755
--- a/python/paddle/distributed/fleet/fleet_executor_utils.py
+++ b/python/paddle/distributed/fleet/fleet_executor_utils.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY
+from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
 from paddle.framework import core
 from paddle.static import Program
 
diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py
index e7837032cebe57..217e3028ea5517 100755
--- a/python/paddle/distributed/fleet/launch.py
+++ b/python/paddle/distributed/fleet/launch.py
@@ -54,36 +54,34 @@
                 your_training_py (arg1 arg2 and all others)
 """
 
+import copy
+import os
+import pathlib
 import shutil
 import sys
 import tempfile
-import os
 import time
-import copy
-import pathlib
-from argparse import ArgumentParser, REMAINDER
+from argparse import REMAINDER, ArgumentParser
+
 import paddle.framework as framework
-from paddle.distributed.fleet import launch_utils
+from paddle.distributed.fleet import ascend_utils, cloud_utils, launch_utils
+from paddle.distributed.fleet.elastic import enable_elastic, launch_elastic
 from paddle.distributed.fleet.launch_utils import (
-    get_host_name_ip,
-    find_free_ports,
-    logger,
-    get_cluster,
     DeviceMode,
-    start_local_trainers,
-    direct_start,
-    watch_local_trainers,
-    terminate_local_procs,
     DistributeMode,
     ParameterServerLauncher,
-    get_logger,
-    check_backend,
     block_windows_and_macos,
+    check_backend,
+    direct_start,
+    find_free_ports,
+    get_cluster,
+    get_host_name_ip,
+    get_logger,
+    logger,
+    start_local_trainers,
+    terminate_local_procs,
+    watch_local_trainers,
 )
-from paddle.distributed.fleet import cloud_utils
-from paddle.distributed.fleet import ascend_utils
-
-from paddle.distributed.fleet.elastic import enable_elastic, launch_elastic
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py
index 69220924a38f80..73821457d5b199 100755
--- a/python/paddle/distributed/fleet/launch_utils.py
+++ b/python/paddle/distributed/fleet/launch_utils.py
@@ -12,23 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
+import json
 import logging
-import time
+import multiprocessing
 import os
+import shutil
 import signal
-import copy
-import sys
+import socket
+import struct
 import subprocess
+import sys
 import tempfile
-import shutil
+import time
 from contextlib import closing
-import multiprocessing
-import socket
-import struct
-import json
+from distutils.util import strtobool
 
 import paddle.framework as framework
-from distutils.util import strtobool
 import paddle.utils.cpp_extension.extension_utils as utils
 
 logger = logging.getLogger("root")
diff --git a/python/paddle/distributed/fleet/layers/mpu/mp_layers.py b/python/paddle/distributed/fleet/layers/mpu/mp_layers.py
index acbd95f8ff50a3..8c7907b40f2652 100644
--- a/python/paddle/distributed/fleet/layers/mpu/mp_layers.py
+++ b/python/paddle/distributed/fleet/layers/mpu/mp_layers.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import paddle
-from . import mp_ops
 from paddle.fluid import core
 from paddle.nn import Layer
-from .random import get_rng_state_tracker
 from paddle.nn import functional as F
+
 from ...base import topology as tp
+from . import mp_ops
+from .random import get_rng_state_tracker
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/layers/mpu/mp_ops.py b/python/paddle/distributed/fleet/layers/mpu/mp_ops.py
index 8a463e996604e7..67e2d96de6a38a 100644
--- a/python/paddle/distributed/fleet/layers/mpu/mp_ops.py
+++ b/python/paddle/distributed/fleet/layers/mpu/mp_ops.py
@@ -14,18 +14,20 @@
 
 import paddle
 from paddle import _legacy_C_ops
+from paddle.common_ops_import import dygraph_utils
+from paddle.distributed import collective
 from paddle.fluid import core
-from paddle.framework import in_dynamic_mode
-from paddle.framework import _in_legacy_dygraph
-from paddle.framework import in_dygraph_mode
-from paddle.framework import _varbase_creator
-from paddle.framework import LayerHelper
-from paddle.fluid.data_feeder import check_variable_and_dtype
+from paddle.fluid.data_feeder import check_dtype, check_variable_and_dtype
+from paddle.framework import (
+    LayerHelper,
+    _in_legacy_dygraph,
+    _varbase_creator,
+    in_dygraph_mode,
+    in_dynamic_mode,
+)
 from paddle.nn import Layer
-from paddle.distributed import collective
+
 from ....communication.reduce import ReduceOp, _get_reduce_op
-from paddle.fluid.data_feeder import check_dtype
-from paddle.common_ops_import import dygraph_utils
 
 
 def _c_identity(tensor, group=None):
diff --git a/python/paddle/distributed/fleet/layers/mpu/random.py b/python/paddle/distributed/fleet/layers/mpu/random.py
index 5661804a279667..e7674b08be47a7 100644
--- a/python/paddle/distributed/fleet/layers/mpu/random.py
+++ b/python/paddle/distributed/fleet/layers/mpu/random.py
@@ -12,15 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import contextlib
+
+import numpy as np
+
+import paddle
 from paddle import _legacy_C_ops
 from paddle.fluid import core
 from paddle.fluid.data_feeder import check_variable_and_dtype
+from paddle.framework import LayerHelper, in_dynamic_mode
 from paddle.static import Variable
-from paddle.framework import in_dynamic_mode
-from paddle.framework import LayerHelper
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py
index ed9e4e4d354b71..2c0dbce0556de1 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 
 import paddle.fluid.contrib.mixed_precision as mixed_precision
+
 from .meta_optimizer_base import MetaOptimizerBase
 
 __all__ = []
diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py
index 64c1881223ed5d..e30a84e12826ab 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py
@@ -12,12 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.optimizer import Optimizer
+from collections import namedtuple
+
+import hccl.manage.api as hccl
+
 import paddle.framework.core as core
-from . import ascend_parser
 from paddle.distributed import fleet
-import hccl.manage.api as hccl
-from collections import namedtuple
+from paddle.optimizer import Optimizer
+
+from . import ascend_parser
 
 HcomGroupConfig = namedtuple('HcomGroupConfig', ['name', 'nranks', 'rank_ids'])
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py
index 79f79a8dea462e..b9840b0333ba53 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py
@@ -11,10 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import paddle.framework.core as core
-import numpy as np
 from functools import reduce
 
+import numpy as np
+
+import paddle.framework.core as core
+
 __all__ = []
 
 registerd_op = {  # forwards
diff --git a/python/paddle/distributed/fleet/meta_optimizers/asp_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/asp_optimizer.py
index a2f494e4a84386..53ababc4824514 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/asp_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/asp_optimizer.py
@@ -12,9 +12,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 
-from .meta_optimizer_base import MetaOptimizerBase
 from paddle.fluid.contrib.sparsity.asp import ASPHelper
 
+from .meta_optimizer_base import MetaOptimizerBase
+
 __all__ = []
 
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/common.py b/python/paddle/distributed/fleet/meta_optimizers/common.py
index bbcd1d82159ea8..bc79bea4e2359d 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/common.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/common.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import os
+
 import paddle
 from paddle.framework import core
 from paddle.utils import unique_name
+
 from ..base.private_helper_function import wait_server_ready
 
 __all__ = []
diff --git a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
index 1c728ed16eddd0..cb4bdbb6ce5832 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
@@ -11,20 +11,21 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 
+import logging
 from functools import reduce
+
 from .meta_optimizer_base import MetaOptimizerBase
-import logging
 
 __all__ = []
 
-from paddle.fluid.layers import tensor
 import paddle
 from paddle import framework
-from paddle.framework import core
 from paddle.common_ops_import import LayerHelper
 from paddle.fluid.clip import GradientClipByNorm, append_gradient_clip_ops
-from paddle.fluid.optimizer import Optimizer, Momentum
 from paddle.fluid.dygraph import base as imperative_base
+from paddle.fluid.layers import tensor
+from paddle.fluid.optimizer import Momentum, Optimizer
+from paddle.framework import core
 
 
 class DGCMomentumOptimizer(Optimizer):
diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py
index 63037dc6f61682..78c3a4cfd0ccba 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py
@@ -18,6 +18,7 @@
 
 import paddle
 from paddle import framework
+
 from ...utils.log_util import logger
 
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py
index 6d723a3af77f76..144dc8b6586c3d 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...base.topology import ParallelMode
-import paddle.autograd as imperative_base
 import paddle
+import paddle.autograd as imperative_base
 from paddle import _legacy_C_ops
 
+from ...base.topology import ParallelMode
+
 __all__ = []
 
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
index 38c9b7b2bfc209..75f0061b2ca20b 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
@@ -13,17 +13,18 @@
 # limitations under the License.
 
 import paddle
+from paddle import framework
+from paddle.autograd import no_grad
+from paddle.fluid import layers
 from paddle.fluid.clip import ClipGradByGlobalNorm
+from paddle.framework import core
+
+from ...base.topology import ParallelMode
 from ...utils.hybrid_parallel_util import (
     fused_allreduce_gradients,
     sharding_reduce_gradients,
 )
-from ...base.topology import ParallelMode
-from paddle.autograd import no_grad
-from paddle import framework
 from ...utils.log_util import logger
-from paddle.framework import core
-from paddle.fluid import layers
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py
index 129ba772a058bc..32d8aa9b2279eb 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py
@@ -23,25 +23,23 @@
 # LICENSE file in the root directory of this source tree.
 
 import logging
-import numpy as np
 from collections import OrderedDict
 
+import numpy as np
+
 import paddle
 import paddle.distributed as dist
+from paddle.distributed.collective import _get_global_group, new_group
+from paddle.fluid.clip import ClipGradByGlobalNorm
 from paddle.framework import core
 from paddle.optimizer import Optimizer
-from paddle.fluid.clip import ClipGradByGlobalNorm
-from paddle.distributed.collective import (
-    _get_global_group,
-    new_group,
-)
 
-from ...utils.internal_storage import ParamStorage, GradStorage
 from ...meta_parallel.sharding.sharding_utils import (
+    ShardingClipGrad,
     Type,
     device_guard,
-    ShardingClipGrad,
 )
+from ...utils.internal_storage import GradStorage, ParamStorage
 
 # CUDA alignment 256 bytes, cpu alignment 4096 bytes
 alignment = {"gpu": 256, "cpu": 4096}
diff --git a/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py
index 1a29448e0245da..0c08066ea54dbf 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py
@@ -11,10 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 
+import paddle
 from paddle.framework import core
 from paddle.utils import unique_name
+
 from .meta_optimizer_base import MetaOptimizerBase
-import paddle
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py
index a90bc7f13a8c29..524761a01f0128 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 
 from paddle.fluid.optimizer import GradientMergeOptimizer as GM
+
 from .meta_optimizer_base import MetaOptimizerBase
 
 __all__ = []
diff --git a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py
index ccc4fecbb54866..5e1cc3b1c51ae2 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 
 import copy
+import logging
+
 import paddle
 from paddle.framework import core
-from .meta_optimizer_base import MetaOptimizerBase
-from ..base.private_helper_function import wait_server_ready
-import logging
 from paddle.static import BuildStrategy
 
+from ..base.private_helper_function import wait_server_ready
+from .meta_optimizer_base import MetaOptimizerBase
+
 __all__ = []
 
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py
index b160c5f6fa789f..9a0ccde597984f 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py
@@ -11,10 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 
+import logging
+
+from paddle.fluid.optimizer import AdamOptimizer
 from paddle.fluid.optimizer import LambOptimizer as LAMB
+
 from .meta_optimizer_base import MetaOptimizerBase
-from paddle.fluid.optimizer import AdamOptimizer
-import logging
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py
index 5c716bd375ac45..b58bdd446c2868 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py
@@ -11,11 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 
-from paddle.fluid.optimizer import Momentum
-from paddle.fluid.optimizer import LarsMomentumOptimizer
-from .meta_optimizer_base import MetaOptimizerBase
 import logging
 
+from paddle.fluid.optimizer import LarsMomentumOptimizer, Momentum
+
+from .meta_optimizer_base import MetaOptimizerBase
+
 __all__ = []
 
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py
index e73d3c6b4b0f9f..e9e9f353cfd980 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py
@@ -13,10 +13,14 @@
 # limitations under the License.
 
 import paddle
-from paddle.static import program_guard, default_main_program
-from paddle.static import default_startup_program
+from paddle.static import (
+    default_main_program,
+    default_startup_program,
+    program_guard,
+)
+
+from .common import OP_ROLE_KEY, CollectiveHelper, OpRole
 from .meta_optimizer_base import MetaOptimizerBase
-from .common import CollectiveHelper, OP_ROLE_KEY, OpRole
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py
index 74d57fe59bfad7..e6bf8b85c20211 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py
@@ -11,9 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 
-from .parameter_server_optimizer import ParameterServerOptimizer
 import paddle
 
+from .parameter_server_optimizer import ParameterServerOptimizer
+
 __all__ = []
 
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py
index 362dec4e622573..5bc27bfd9c032f 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py
@@ -11,15 +11,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 
+import os
+import platform
+import re
+import subprocess
+
 import paddle
 from paddle import fluid
-from .meta_optimizer_base import MetaOptimizerBase
 from paddle.framework import core
-import subprocess
-import re
-import os
-import platform
+
 from ..base.private_helper_function import wait_server_ready
+from .meta_optimizer_base import MetaOptimizerBase
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py
index 45dde10b1ed7d3..a501b6256884ec 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py
@@ -13,15 +13,16 @@
 
 import paddle
 from paddle.fluid.optimizer import PipelineOptimizer as PO
-from .meta_optimizer_base import MetaOptimizerBase
+
 from .common import (
-    CollectiveHelper,
     OP_ROLE_KEY,
     OP_ROLE_VAR_KEY,
+    CollectiveHelper,
     OpRole,
     is_backward_op,
     is_loss_grad_op,
 )
+from .meta_optimizer_base import MetaOptimizerBase
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py
index 31fcf3450d42ca..6c7a33192ca1e7 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py
@@ -11,13 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 
-import paddle.distributed.passes
-from .meta_optimizer_base import MetaOptimizerBase
-from paddle.framework import core
-import subprocess
-import re
 import os
 import platform
+import re
+import subprocess
+
+import paddle.distributed.passes
+from paddle.distributed.passes import PassContext
+from paddle.distributed.ps.utils.ps_factory import PsProgramBuilderFactory
 from paddle.distributed.ps.utils.public import (
     TrainerRuntimeConfig,
     build_var_distributed,
@@ -26,8 +27,9 @@
     get_var_mem_size,
     logger,
 )
-from paddle.distributed.passes import PassContext
-from paddle.distributed.ps.utils.ps_factory import PsProgramBuilderFactory
+from paddle.framework import core
+
+from .meta_optimizer_base import MetaOptimizerBase
 
 
 class ParameterServerOptimizer(MetaOptimizerBase):
diff --git a/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py
index 6b1425c703f970..5ca078a06b94cc 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py
@@ -14,16 +14,17 @@
 import paddle.static as static
 from paddle.fluid import core
 from paddle.utils import unique_name
-from .meta_optimizer_base import MetaOptimizerBase
+
 from .common import (
-    OpRole,
     OP_ROLE_KEY,
     OP_ROLE_VAR_KEY,
     CollectiveHelper,
-    is_loss_grad_op,
+    OpRole,
     is_backward_op,
+    is_loss_grad_op,
     is_optimizer_op,
 )
+from .meta_optimizer_base import MetaOptimizerBase
 
 
 class RawProgramOptimizer(MetaOptimizerBase):
diff --git a/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py
index 62c17292fc2ebe..f43d1779c193d4 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 
 from paddle.fluid.optimizer import RecomputeOptimizer as RO
+
 from .meta_optimizer_base import MetaOptimizerBase
 
 __all__ = []
diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py
index f1244c30df089b..e2165f33d8d350 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py
@@ -13,11 +13,10 @@
 # limitations under the License.
 
 from paddle.distributed.fleet.meta_optimizers.common import (
-    is_optimizer_op,
     OP_ROLE_KEY,
     OpRole,
+    is_optimizer_op,
 )
-
 from paddle.framework import core
 
 __all__ = []
diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py
index 058b2adc8e1851..04abc71e0f7a01 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ..common import is_optimizer_op, OP_ROLE_KEY, OpRole, is_update_op
 from paddle.framework import core
 from paddle.utils import unique_name
 
+from ..common import OP_ROLE_KEY, OpRole, is_optimizer_op, is_update_op
+
 __all__ = []
 
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py
index 82a7a7494d5e66..60ae50453ae27b 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 import re
+
 from paddle.distributed.fleet.meta_optimizers.common import is_optimizer_op
-from paddle.distributed.fleet.meta_optimizers.sharding.utils import get_var_size
 from paddle.distributed.fleet.meta_optimizers.sharding.fp16_helper import (
     FP16Utils,
 )
+from paddle.distributed.fleet.meta_optimizers.sharding.utils import get_var_size
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py
index e5f794e51a5366..e9e909f80bdaf2 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py
@@ -11,19 +11,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import paddle
-from paddle.framework import core
-from paddle.utils import unique_name
+import os
+import re
 from functools import reduce
+
+import paddle
 from paddle.distributed.fleet.meta_optimizers.common import (
-    is_loss_grad_op,
+    OP_ROLE_KEY,
+    OpRole,
     is_backward_op,
+    is_loss_grad_op,
     is_optimizer_op,
 )
-from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
-
-import re
-import os
+from paddle.framework import core
+from paddle.utils import unique_name
 
 
 def check_broadcast(block):
diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
index 05fa6e16ca51a0..19c7147da2d509 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
@@ -13,36 +13,47 @@
 # limitations under the License.
 
 import os
+
 from paddle.fluid import core
-from paddle.utils import unique_name
 from paddle.fluid.optimizer import PipelineOptimizer
-from paddle.static import default_startup_program, device_guard
-from paddle.static import create_global_var
+from paddle.static import (
+    create_global_var,
+    default_startup_program,
+    device_guard,
+)
+from paddle.utils import unique_name
 
-from .common import OpRole, OP_ROLE_VAR_KEY, CollectiveHelper, OP_ROLE_KEY
-from .common import is_backward_op, is_optimizer_op, is_update_op
+from ..utils.log_util import logger
+from .common import (
+    OP_ROLE_KEY,
+    OP_ROLE_VAR_KEY,
+    CollectiveHelper,
+    OpRole,
+    is_backward_op,
+    is_optimizer_op,
+    is_update_op,
+)
 from .meta_optimizer_base import MetaOptimizerBase
-from .sharding.shard import Shard, ProgramSegment
+from .sharding import utils
 from .sharding.fp16_helper import FP16Utils
-from .sharding.weight_decay_helper import WeightDecayHelper
 from .sharding.gradient_clip_helper import GradientClipHelper
 from .sharding.offload_helper import OffloadHelper
 from .sharding.prune import ProgramDeps
-from .sharding import utils
+from .sharding.shard import ProgramSegment, Shard
 from .sharding.utils import (
-    insert_sync_calc_op,
-    insert_sync_comm_ops,
-    insert_fill_constant_ops,
-    insert_cast_ops,
-    insert_allreduce_ops,
-    insert_reduce_ops,
-    get_grad_device,
     get_first_optimize_op_idx,
-    insert_broadcast_ops,
+    get_grad_device,
     get_var_size,
+    insert_allreduce_ops,
+    insert_broadcast_ops,
+    insert_cast_ops,
+    insert_fill_constant_ops,
+    insert_reduce_ops,
     insert_scale_loss_grad_ops,
+    insert_sync_calc_op,
+    insert_sync_comm_ops,
 )
-from ..utils.log_util import logger
+from .sharding.weight_decay_helper import WeightDecayHelper
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py
index 41ef5f6190ebf9..59058bb4b2766f 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py
@@ -12,16 +12,17 @@
 # See the License for the specific language governing permissions and
 
 import paddle.static as static
-from .meta_optimizer_base import MetaOptimizerBase
+
 from .common import (
-    CollectiveHelper,
     OP_ROLE_KEY,
     OP_ROLE_VAR_KEY,
+    CollectiveHelper,
     OpRole,
     is_backward_op,
     is_loss_grad_op,
     is_optimizer_op,
 )
+from .meta_optimizer_base import MetaOptimizerBase
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py
index 66a1c87756220e..f0d7ca602feb77 100644
--- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py
+++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...layers.mpu.mp_layers import VocabParallelEmbedding  # noqa: F401
 from ...layers.mpu.mp_layers import ColumnParallelLinear  # noqa: F401
-from ...layers.mpu.mp_layers import RowParallelLinear  # noqa: F401
 from ...layers.mpu.mp_layers import ParallelCrossEntropy  # noqa: F401
+from ...layers.mpu.mp_layers import RowParallelLinear  # noqa: F401
+from ...layers.mpu.mp_layers import VocabParallelEmbedding  # noqa: F401
 
 __all__ = []
diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py
index 7ddbb64883d914..5910e64b8b7384 100755
--- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py
+++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py
@@ -38,18 +38,19 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE
 
-import math
-import re
 import glob
+import math
 import os
+import re
 from functools import partial
 
 import paddle
 from paddle.fluid.dygraph.layers import Layer
-from ...utils.log_util import logger, layer_to_str
 from paddle.fluid.framework import in_dygraph_mode
 from paddle.incubate.distributed.fleet import recompute_hybrid
 
+from ...utils.log_util import layer_to_str, logger
+
 __all__ = []
 
 
diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py
index 9deed30db66f5c..7139d40adc7583 100644
--- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py
+++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 from ...layers.mpu.random import RNGStatesTracker  # noqa: F401
-from ...layers.mpu.random import get_rng_state_tracker  # noqa: F401
-from ...layers.mpu.random import model_parallel_random_seed  # noqa: F401
 from ...layers.mpu.random import determinate_seed  # noqa: F401
 from ...layers.mpu.random import dropout  # noqa: F401
+from ...layers.mpu.random import get_rng_state_tracker  # noqa: F401
+from ...layers.mpu.random import model_parallel_random_seed  # noqa: F401
 
 __all__ = []
diff --git a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py
index 75b541163d836b..276d6b4bced3da 100755
--- a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py
+++ b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py
@@ -13,17 +13,19 @@
 
 import paddle
 import paddle.fluid as fluid
-from .meta_parallel_base import MetaParallelBase
-from .parallel_layers.pp_layers import PipelineLayer
+import paddle.fluid.core as core
+import paddle.fluid.framework as framework
 
-from ..utils.hybrid_parallel_util import broadcast_mp_parameters
-from ..utils.hybrid_parallel_util import broadcast_dp_parameters
-from ..utils.hybrid_parallel_util import broadcast_sharding_parameters
-from ..utils.log_util import logger
 from ..meta_optimizers.dygraph_optimizer import HybridParallelOptimizer
-import paddle.fluid.framework as framework
+from ..utils.hybrid_parallel_util import (
+    broadcast_dp_parameters,
+    broadcast_mp_parameters,
+    broadcast_sharding_parameters,
+)
+from ..utils.log_util import logger
+from .meta_parallel_base import MetaParallelBase
+from .parallel_layers.pp_layers import PipelineLayer
 from .pp_utils import p2p_communication as p2p
-import paddle.fluid.core as core
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py
index 7f2e2b8d7a48bd..bbe1e5c6137648 100644
--- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py
+++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-from ...utils.log_util import logger
 import numpy as np
-from paddle import _legacy_C_ops
+
+import paddle
 import paddle.fluid.core as core
+from paddle import _legacy_C_ops
 from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
-from .utils import paddle_2_number, paddle_2_number, number_2_dtype
+
+from ...utils.log_util import logger
+from .utils import number_2_dtype, paddle_2_number
 
 _hcg = None
 _use_cache = False
diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py
index 6a0a0b66cbeb25..230ad8ade034e7 100644
--- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py
+++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py
@@ -24,26 +24,22 @@
 
 import logging
 import warnings
-
 from collections import OrderedDict
 
 import paddle
 import paddle.distributed as dist
+from paddle.distributed import ParallelMode, fleet
 from paddle.fluid import core
-from paddle.optimizer import Optimizer
 from paddle.fluid.clip import ClipGradByGlobalNorm
-from paddle.distributed import fleet, ParallelMode
+from paddle.optimizer import Optimizer
 
 HybridParallelClipGrad = (
     fleet.meta_optimizers.dygraph_optimizer.hybrid_parallel_optimizer.HybridParallelClipGrad
 )
-from paddle.distributed.collective import (
-    _get_global_group,
-    new_group,
-)
+from paddle.distributed.collective import _get_global_group, new_group
 
-from .group_sharded_storage import ParamStorage, GradStorage
-from .group_sharded_utils import Type, device_guard, GroupShardedClipGrad
+from .group_sharded_storage import GradStorage, ParamStorage
+from .group_sharded_utils import GroupShardedClipGrad, Type, device_guard
 
 # CUDA alignment 256 bytes, cpu alignment 4096 bytes
 alignment = {"gpu": 256, "cpu": 4096, "xpu": 256}
diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py
index f756162727edde..f800b43f9d2820 100644
--- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py
+++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py
@@ -32,8 +32,8 @@
 from paddle.distributed import collective
 from paddle.distributed.utils.log_utils import get_logger
 
-from .group_sharded_storage import GradStorage
 from .group_sharded_optimizer_stage2 import GroupShardedOptimizerStage2
+from .group_sharded_storage import GradStorage
 from .group_sharded_utils import Type, device_guard
 
 logger_ = get_logger(logging.WARNING)
diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py
index 7504955e8dbb1c..ac41b4af4c9b0a 100644
--- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py
+++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py
@@ -13,22 +13,23 @@
 # limitations under the License.
 
 import logging
-import numpy as np
-from types import MethodType
 from collections import OrderedDict
+from types import MethodType
+
+import numpy as np
 
 import paddle
 import paddle.distributed as dist
-from paddle import nn
-from paddle.autograd import PyLayer
 import paddle.fluid.core as core
 import paddle.fluid.framework as framework
-from paddle.fluid.framework import EagerParamBase
-from paddle.fluid.clip import ClipGradByGlobalNorm
+from paddle import nn
+from paddle.autograd import PyLayer
 from paddle.distributed import collective
+from paddle.fluid.clip import ClipGradByGlobalNorm
+from paddle.fluid.framework import EagerParamBase
 
 from .group_sharded_storage import GradStorage
-from .group_sharded_utils import Type, GroupShardedClipGrad, device_guard
+from .group_sharded_utils import GroupShardedClipGrad, Type, device_guard
 
 
 def _all_gather(tensor, buffer_size, group):
diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_storage.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_storage.py
index 06b5ed9d8caea3..abab68a1912170 100644
--- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_storage.py
+++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_storage.py
@@ -26,7 +26,8 @@
 
 import paddle
 from paddle.fluid import core
-from .group_sharded_utils import Type, device_guard, cvt_to_device
+
+from .group_sharded_utils import Type, cvt_to_device, device_guard
 
 
 class InternalStorage:
diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
index ab32b3528bbefe..39d88fef67d922 100644
--- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
+++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
@@ -14,13 +14,13 @@
 
 import contextlib
 from enum import Enum
-import numpy as np
 from types import MethodType
 
+import numpy as np
+
 import paddle
 from paddle import _legacy_C_ops
-from paddle.fluid import core
-from paddle.fluid import layers
+from paddle.fluid import core, layers
 from paddle.fluid.dygraph import to_variable
 from paddle.fluid.framework import dygraph_only
 
diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py
index e19b08a7d9c33e..5dd3ae96580d97 100644
--- a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py
+++ b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py
@@ -23,22 +23,23 @@
 # LICENSE file in the root directory of this source tree.
 
 import logging
-import numpy as np
-from itertools import chain
-from functools import reduce
 from collections import deque
+from functools import reduce
+from itertools import chain
 from types import MethodType
 
+import numpy as np
+
 import paddle
 import paddle.distributed as dist
 from paddle import nn
 from paddle.distributed import collective as collective
 from paddle.distributed.collective import _get_global_group
 
-from ...utils.internal_storage import GradStorage
 from ...meta_optimizers.dygraph_optimizer.sharding_optimizer_stage2 import (
     ShardingOptimizerStage2,
 )
+from ...utils.internal_storage import GradStorage
 from .sharding_utils import Taskflow, Type
 
 
diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py
index 7da9762f8cb262..28ab704fb19873 100644
--- a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py
+++ b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py
@@ -13,23 +13,24 @@
 # limitations under the License.
 
 import logging
-import numpy as np
-from types import MethodType
 from collections import OrderedDict
+from types import MethodType
+
+import numpy as np
 
 import paddle
 import paddle.distributed as dist
+import paddle.fluid.core as core
 from paddle import nn
 from paddle.autograd import PyLayer
-import paddle.fluid.core as core
-from paddle.fluid.framework import ParamBase
-from paddle.fluid.clip import ClipGradByGlobalNorm
 from paddle.distributed import collective
 from paddle.distributed.collective import _get_global_group
+from paddle.fluid.clip import ClipGradByGlobalNorm
+from paddle.fluid.framework import ParamBase
 
-from .sharding_utils import Type, ShardingClipGrad, device_guard
-from ..pp_utils.utils import _all_gather
 from ...utils.internal_storage import GradStorage
+from ..pp_utils.utils import _all_gather
+from .sharding_utils import ShardingClipGrad, Type, device_guard
 
 # CUDA alignment 256 bytes
 alignment = {
diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py
index 391d0cc786cfcc..89978cceff7c05 100644
--- a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py
+++ b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py
@@ -14,16 +14,16 @@
 
 import contextlib
 from enum import Enum
-import numpy as np
 from types import MethodType
 
+import numpy as np
+
 import paddle
 from paddle import _legacy_C_ops
-from paddle.fluid import core
-from paddle.fluid import layers
+from paddle.fluid import core, layers
+from paddle.fluid.dygraph import base as imperative_base
 from paddle.fluid.dygraph import to_variable
 from paddle.fluid.framework import dygraph_only
-from paddle.fluid.dygraph import base as imperative_base
 
 
 class Taskflow:
diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding_parallel.py b/python/paddle/distributed/fleet/meta_parallel/sharding_parallel.py
index 33f0ef35b34348..d50d0b62e1bdf1 100644
--- a/python/paddle/distributed/fleet/meta_parallel/sharding_parallel.py
+++ b/python/paddle/distributed/fleet/meta_parallel/sharding_parallel.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .meta_parallel_base import MetaParallelBase
 from ..utils.hybrid_parallel_util import broadcast_sharding_parameters
 from ..utils.log_util import logger
+from .meta_parallel_base import MetaParallelBase
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/meta_parallel/tensor_parallel.py b/python/paddle/distributed/fleet/meta_parallel/tensor_parallel.py
index 0d457a4d579140..883533d8e1724d 100755
--- a/python/paddle/distributed/fleet/meta_parallel/tensor_parallel.py
+++ b/python/paddle/distributed/fleet/meta_parallel/tensor_parallel.py
@@ -12,14 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .meta_parallel_base import MetaParallelBase
-from ..utils.hybrid_parallel_util import broadcast_dp_parameters
-from ..utils.hybrid_parallel_util import broadcast_input_data
 from ..utils.hybrid_parallel_util import (
+    broadcast_dp_parameters,
+    broadcast_input_data,
     broadcast_mp_parameters,
     broadcast_sharding_parameters,
 )
 from ..utils.log_util import logger
+from .meta_parallel_base import MetaParallelBase
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/metrics/metric.py b/python/paddle/distributed/fleet/metrics/metric.py
index 36c5a56220a3b7..aaf1115af864d5 100644
--- a/python/paddle/distributed/fleet/metrics/metric.py
+++ b/python/paddle/distributed/fleet/metrics/metric.py
@@ -14,9 +14,11 @@
 """Fleet Metrics"""
 
 import math
+
 import numpy as np
-from paddle.static import Variable
+
 import paddle
+from paddle.static import Variable
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/model.py b/python/paddle/distributed/fleet/model.py
index 12c8fe295b0c9b..8a480f0b9ea6f2 100755
--- a/python/paddle/distributed/fleet/model.py
+++ b/python/paddle/distributed/fleet/model.py
@@ -13,16 +13,17 @@
 # limitations under the License.
 
 import paddle
+from paddle.distributed import fleet
+from paddle.fluid.dygraph.varbase_patch_methods import _grad_scalar
+
 from .base.topology import ParallelMode
-from .meta_parallel import TensorParallel
 from .meta_parallel import (
+    PipelineLayer,
     PipelineParallel,
-    ShardingParallel,
     PipelineParallelWithInterleave,
-    PipelineLayer,
+    ShardingParallel,
+    TensorParallel,
 )
-from paddle.fluid.dygraph.varbase_patch_methods import _grad_scalar
-from paddle.distributed import fleet
 
 _grad_scalar = None
 
diff --git a/python/paddle/distributed/fleet/optimizer.py b/python/paddle/distributed/fleet/optimizer.py
index 042646ac505704..e6e2696d52e5d2 100755
--- a/python/paddle/distributed/fleet/optimizer.py
+++ b/python/paddle/distributed/fleet/optimizer.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import copy
+
 import paddle
-from .meta_optimizers import HybridParallelOptimizer, HeterParallelOptimizer
 from paddle.distributed import fleet
+
+from .meta_optimizers import HeterParallelOptimizer, HybridParallelOptimizer
 from .utils.log_util import logger
 
 
diff --git a/python/paddle/distributed/fleet/recompute/recompute.py b/python/paddle/distributed/fleet/recompute/recompute.py
index 783fa19509df70..f429cf306268f9 100755
--- a/python/paddle/distributed/fleet/recompute/recompute.py
+++ b/python/paddle/distributed/fleet/recompute/recompute.py
@@ -12,13 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import contextlib
+
 import paddle
-from paddle.fluid import core
 from paddle.autograd import PyLayer
 from paddle.autograd.py_layer import LegacyPyLayer
-
-from paddle.fluid import framework
-import contextlib
+from paddle.fluid import core, framework
 from paddle.fluid.framework import in_dygraph_mode
 
 from ..utils.log_util import logger
diff --git a/python/paddle/distributed/fleet/recompute/recompute_hybrid.py b/python/paddle/distributed/fleet/recompute/recompute_hybrid.py
index 2e53c6ee174e06..1595ffaf9ea511 100644
--- a/python/paddle/distributed/fleet/recompute/recompute_hybrid.py
+++ b/python/paddle/distributed/fleet/recompute/recompute_hybrid.py
@@ -13,16 +13,16 @@
 # limitations under the License.
 
 import paddle
-from paddle.fluid import core
 from paddle.autograd import PyLayer
-from paddle.fluid import framework
+from paddle.fluid import core, framework
+
 from ..meta_parallel.parallel_layers.random import get_rng_state_tracker
+from ..meta_parallel.pp_utils import utils
 from .recompute import (
     check_recompute_necessary,
     detach_variable,
     swith_rng_state_tracker,
 )
-from ..meta_parallel.pp_utils import utils
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/runtime/collective_runtime.py b/python/paddle/distributed/fleet/runtime/collective_runtime.py
index d90e92fd2748bc..ec5aaa397624a6 100644
--- a/python/paddle/distributed/fleet/runtime/collective_runtime.py
+++ b/python/paddle/distributed/fleet/runtime/collective_runtime.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .runtime_base import RuntimeBase
 import logging
 
+from .runtime_base import RuntimeBase
+
 __all__ = []
 
 
diff --git a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py
index 572e495fd16f6b..21086b1802516d 100644
--- a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py
+++ b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py
@@ -17,14 +17,13 @@
 
 import paddle.fluid as fluid
 from paddle.fluid import core
-from paddle.fluid.framework import Program
 from paddle.fluid.compiler import CompiledProgram
 from paddle.fluid.executor import Executor
+from paddle.fluid.framework import Program, Variable
 from paddle.fluid.parallel_executor import ParallelExecutor
-from paddle.fluid.framework import Variable
 
-from .runtime_base import RuntimeBase
 from ..base.private_helper_function import wait_server_ready
+from .runtime_base import RuntimeBase
 
 __all__ = []
 
@@ -233,16 +232,15 @@ def get_sparse_attrs():
             kwargs["sparse_attrs"] = get_sparse_attrs()
             return kwargs
 
+        from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import (
+            GeoStrategy,
+            SyncStrategy,
+        )
         from paddle.fluid.incubate.fleet.parameter_server.ir.public import (
             _get_lr_ops,
             _has_global_step,
         )
 
-        from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import (
-            SyncStrategy,
-            GeoStrategy,
-        )
-
         trainer_config = self.async_strategy.get_trainer_runtime_config()
         print(trainer_config)
 
diff --git a/python/paddle/distributed/fleet/runtime/the_one_ps.py b/python/paddle/distributed/fleet/runtime/the_one_ps.py
index 98e06299789135..45782b2a4b106e 100644
--- a/python/paddle/distributed/fleet/runtime/the_one_ps.py
+++ b/python/paddle/distributed/fleet/runtime/the_one_ps.py
@@ -12,17 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import warnings
 
-import os
 import paddle.fluid as fluid
 from paddle.fluid import core
-from paddle.fluid.framework import Program
 from paddle.fluid.compiler import CompiledProgram
 from paddle.fluid.executor import Executor
+from paddle.fluid.framework import Program
 from paddle.fluid.parallel_executor import ParallelExecutor
-from .runtime_base import RuntimeBase
+
 from ..base.private_helper_function import wait_server_ready
+from .runtime_base import RuntimeBase
 
 __all__ = []
 
@@ -37,8 +38,6 @@ def conv_indent(indent):
 def parse_table_class(varname, o_main_program):
     from paddle.fluid.incubate.fleet.parameter_server.ir.public import (
         is_distributed_sparse_op,
-    )
-    from paddle.fluid.incubate.fleet.parameter_server.ir.public import (
         is_sparse_op,
     )
 
@@ -250,8 +249,6 @@ def define_optimize_map(self):
     def parse_entry(self, varname, o_main_program):
         from paddle.fluid.incubate.fleet.parameter_server.ir.public import (
             is_distributed_sparse_op,
-        )
-        from paddle.fluid.incubate.fleet.parameter_server.ir.public import (
             is_sparse_op,
         )
 
diff --git a/python/paddle/distributed/fleet/scaler.py b/python/paddle/distributed/fleet/scaler.py
index 0b8299517a79b3..003265a86123fa 100755
--- a/python/paddle/distributed/fleet/scaler.py
+++ b/python/paddle/distributed/fleet/scaler.py
@@ -12,14 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-from .base.topology import ParallelMode
-from paddle.distributed import fleet
 from types import MethodType
-from paddle.framework import core
-from paddle.fluid.dygraph import to_variable
+
 import numpy as np
+
+import paddle
 from paddle import _legacy_C_ops
+from paddle.distributed import fleet
+from paddle.fluid.dygraph import to_variable
+from paddle.framework import core
+
+from .base.topology import ParallelMode
 
 
 def distributed_scaler(scaler):
diff --git a/python/paddle/distributed/fleet/utils/fs.py b/python/paddle/distributed/fleet/utils/fs.py
index fd2338e36cc25a..ea1fbc5c940feb 100644
--- a/python/paddle/distributed/fleet/utils/fs.py
+++ b/python/paddle/distributed/fleet/utils/fs.py
@@ -12,17 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
+import abc
+import functools
 import multiprocessing
-
+import os
 import re
+import shutil
 import time
-import abc
+
 from paddle.fluid import core
-from .log_util import logger
-import functools
 
-import shutil
+from .log_util import logger
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/utils/http_server.py b/python/paddle/distributed/fleet/utils/http_server.py
index 2828b9e5ddfecf..4caab0efaa536e 100644
--- a/python/paddle/distributed/fleet/utils/http_server.py
+++ b/python/paddle/distributed/fleet/utils/http_server.py
@@ -13,13 +13,12 @@
 # limitations under the License.
 """Http Server."""
 
+import http.server as SimpleHTTPServer
 import logging
+import threading
 
 # NOTE: HTTPServer has a different name in python2 and python3
 from http.server import HTTPServer
-import http.server as SimpleHTTPServer
-
-import threading
 
 __all__ = []
 
diff --git a/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py b/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py
index 5ba7c9c29762ad..cd1d05e913cb61 100644
--- a/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py
+++ b/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 from collections import defaultdict
-from paddle.fluid.framework import Block, Program
-from paddle.fluid.framework import _non_static_mode
-import paddle.fluid.core as core
-import paddle.distributed.fleet as fleet
+
 import numpy as np
 
+import paddle.distributed.fleet as fleet
+import paddle.fluid.core as core
+from paddle.fluid.framework import Block, Program, _non_static_mode
+
 
 class HybridParallelInferenceHelper:
     """
diff --git a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py
index 74ccd16656724c..5f7709f0fe1215 100644
--- a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py
+++ b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py
@@ -12,15 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle import framework
 import paddle
+from paddle import framework
 from paddle.fluid import core
 from paddle.fluid.dygraph.parallel import (
     _split_tensors,
-    sync_params_buffers,
     build_groups,
+    sync_params_buffers,
 )
-from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph
+from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
+
 from .log_util import logger
 
 __all__ = []
diff --git a/python/paddle/distributed/fleet/utils/internal_storage.py b/python/paddle/distributed/fleet/utils/internal_storage.py
index e0f2ef0a04640d..ce3a4040988a52 100644
--- a/python/paddle/distributed/fleet/utils/internal_storage.py
+++ b/python/paddle/distributed/fleet/utils/internal_storage.py
@@ -27,6 +27,7 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
+
 from ..meta_parallel.sharding.sharding_utils import Type, device_guard
 
 
diff --git a/python/paddle/distributed/fleet/utils/ps_util.py b/python/paddle/distributed/fleet/utils/ps_util.py
index d283dbe1fe8b67..ec2724ed2781ef 100644
--- a/python/paddle/distributed/fleet/utils/ps_util.py
+++ b/python/paddle/distributed/fleet/utils/ps_util.py
@@ -14,9 +14,10 @@
 """Parameter Server utils"""
 
 import os
-import paddle
 import warnings
 
+import paddle
+
 __all__ = []
 
 
diff --git a/python/paddle/distributed/io.py b/python/paddle/distributed/io.py
index 47c3368c5f591c..3e7056c6390bed 100644
--- a/python/paddle/distributed/io.py
+++ b/python/paddle/distributed/io.py
@@ -15,8 +15,8 @@
 import os
 
 import paddle
-from paddle.framework import dygraph_not_support, core
 from paddle.fluid.framework import Program
+from paddle.framework import core, dygraph_not_support
 
 
 def _save_distributed_persistables(executor, dirname, main_program):
diff --git a/python/paddle/distributed/launch/context/args_envs.py b/python/paddle/distributed/launch/context/args_envs.py
index 2013ba6a3d7baa..4e942e35662f81 100644
--- a/python/paddle/distributed/launch/context/args_envs.py
+++ b/python/paddle/distributed/launch/context/args_envs.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import os
-from argparse import ArgumentParser, REMAINDER
+from argparse import REMAINDER, ArgumentParser
 
 env_args_mapping = {
     'POD_IP': 'host',
diff --git a/python/paddle/distributed/launch/context/device.py b/python/paddle/distributed/launch/context/device.py
index 2708755c2eb7a2..f5aaf83d135a19 100644
--- a/python/paddle/distributed/launch/context/device.py
+++ b/python/paddle/distributed/launch/context/device.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import os
+
 import paddle.fluid as fluid
 from paddle.device import get_available_custom_device
 
diff --git a/python/paddle/distributed/launch/context/node.py b/python/paddle/distributed/launch/context/node.py
index 04748d2935bcc9..86298da280baed 100644
--- a/python/paddle/distributed/launch/context/node.py
+++ b/python/paddle/distributed/launch/context/node.py
@@ -12,14 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .device import Device
-
 import os
 import random
 import socket
 import struct
 from contextlib import closing
 
+from .device import Device
+
 
 class Node:
     def __init__(self):
diff --git a/python/paddle/distributed/launch/controllers/collective.py b/python/paddle/distributed/launch/controllers/collective.py
index 0ea9041b5fa123..170f19123e3f12 100644
--- a/python/paddle/distributed/launch/controllers/collective.py
+++ b/python/paddle/distributed/launch/controllers/collective.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .controller import Controller, ControleMode
-from ..context.device import DeviceType
-
 import json
 
+from ..context.device import DeviceType
+from .controller import ControleMode, Controller
+
 
 class CollectiveController(Controller):
     @classmethod
diff --git a/python/paddle/distributed/launch/controllers/controller.py b/python/paddle/distributed/launch/controllers/controller.py
index 73954adaab7413..b6cc7440c48974 100644
--- a/python/paddle/distributed/launch/controllers/controller.py
+++ b/python/paddle/distributed/launch/controllers/controller.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import sys
 import os
 import signal
+import sys
 
+from paddle.distributed.launch.job.container import Container
 from paddle.distributed.launch.job.job import Job
 from paddle.distributed.launch.job.pod import Pod
-from paddle.distributed.launch.job.container import Container
 
 from .master import Master
 from .watcher import Watcher
diff --git a/python/paddle/distributed/launch/controllers/ipu_controller.py b/python/paddle/distributed/launch/controllers/ipu_controller.py
index ea342e5ebe03aa..7535177e8929e6 100644
--- a/python/paddle/distributed/launch/controllers/ipu_controller.py
+++ b/python/paddle/distributed/launch/controllers/ipu_controller.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import argparse
 import os
 import sys
-import argparse
 
-from .collective import CollectiveController, ControleMode
 from paddle.distributed.launch.job.container import Container
 
+from .collective import CollectiveController, ControleMode
+
 
 class IPUController(CollectiveController):
     @classmethod
diff --git a/python/paddle/distributed/launch/controllers/master.py b/python/paddle/distributed/launch/controllers/master.py
index 37c6f7a50872be..efb1e0d40a822a 100644
--- a/python/paddle/distributed/launch/controllers/master.py
+++ b/python/paddle/distributed/launch/controllers/master.py
@@ -12,14 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.distributed.launch.utils.kv_client import KVClient
-from paddle.distributed.launch.utils.kv_server import KVServer
-
-import time
-import sys
-import threading
 import copy
 import random
+import sys
+import threading
+import time
+
+from paddle.distributed.launch.utils.kv_client import KVClient
+from paddle.distributed.launch.utils.kv_server import KVServer
 
 ETCD_PROTOCAL = 'etcd://'
 
diff --git a/python/paddle/distributed/launch/controllers/ps.py b/python/paddle/distributed/launch/controllers/ps.py
index b78ab34d490962..7d70fa93bcc1f8 100644
--- a/python/paddle/distributed/launch/controllers/ps.py
+++ b/python/paddle/distributed/launch/controllers/ps.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .controller import Controller, ControleMode
-
 import json
-import os, shutil
+import os
+import shutil
+
+from .controller import ControleMode, Controller
 
 
 class PSController(Controller):
diff --git a/python/paddle/distributed/launch/controllers/rpc.py b/python/paddle/distributed/launch/controllers/rpc.py
index 0d3c314ec77880..096aeea0c8d7ea 100644
--- a/python/paddle/distributed/launch/controllers/rpc.py
+++ b/python/paddle/distributed/launch/controllers/rpc.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .controller import Controller, ControleMode
-
 import json
 
+from .controller import ControleMode, Controller
+
 
 class RpcController(Controller):
     @classmethod
diff --git a/python/paddle/distributed/launch/controllers/watcher.py b/python/paddle/distributed/launch/controllers/watcher.py
index c76a428d7489a7..1768450f382a6e 100644
--- a/python/paddle/distributed/launch/controllers/watcher.py
+++ b/python/paddle/distributed/launch/controllers/watcher.py
@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ..utils.nvsmi import get_gpu_process, get_gpu_util, get_gpu_info
-import time
 import os
-
+import time
 from threading import Thread
 
+from ..utils.nvsmi import get_gpu_info, get_gpu_process, get_gpu_util
+
 
 class Watcher:
     def __init__(self, ctx):
diff --git a/python/paddle/distributed/launch/job/container.py b/python/paddle/distributed/launch/job/container.py
index c78c3323a8597e..c21487e1ea8b3a 100644
--- a/python/paddle/distributed/launch/job/container.py
+++ b/python/paddle/distributed/launch/job/container.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+import sys
+
 from paddle.distributed.launch.utils.process_context import ProcessContext
 
 from .status import Status
 
-import os
-import sys
-
 
 class Container:
     '''
diff --git a/python/paddle/distributed/launch/job/pod.py b/python/paddle/distributed/launch/job/pod.py
index ef72263dd8758d..5c8dad2721cfb4 100644
--- a/python/paddle/distributed/launch/job/pod.py
+++ b/python/paddle/distributed/launch/job/pod.py
@@ -14,13 +14,12 @@
 
 from __future__ import annotations
 
-from .container import Container
-
-from .status import Status
-
 import random
 import time
 
+from .container import Container
+from .status import Status
+
 
 class PodSepc:
     def __init__(self):
diff --git a/python/paddle/distributed/launch/plugins/test.py b/python/paddle/distributed/launch/plugins/test.py
index 3dd7ab886e61bf..29f378ea50e8cc 100644
--- a/python/paddle/distributed/launch/plugins/test.py
+++ b/python/paddle/distributed/launch/plugins/test.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 import numpy as np
+
 import paddle
 from paddle.distributed import fleet
+from paddle.io import DataLoader, Dataset
 from paddle.vision.models import ResNet
 from paddle.vision.models.resnet import BottleneckBlock
-from paddle.io import DataLoader, Dataset
 
 base_lr = 0.1
 momentum_rate = 0.9
diff --git a/python/paddle/distributed/launch/utils/kv_client.py b/python/paddle/distributed/launch/utils/kv_client.py
index e0482203357c72..b3839ef65ac660 100644
--- a/python/paddle/distributed/launch/utils/kv_client.py
+++ b/python/paddle/distributed/launch/utils/kv_client.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import requests
 import time
 
+import requests
+
 
 class KVClient:
     def __init__(self, endpoint='localhost:2379'):
diff --git a/python/paddle/distributed/launch/utils/kv_server.py b/python/paddle/distributed/launch/utils/kv_server.py
index 90dcbcf9375230..19358d9ac03d35 100644
--- a/python/paddle/distributed/launch/utils/kv_server.py
+++ b/python/paddle/distributed/launch/utils/kv_server.py
@@ -12,13 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from http.server import HTTPServer
 import http.server as SimpleHTTPServer
-
-from multiprocessing import Process
-
-import threading
 import json
+import threading
+from http.server import HTTPServer
+from multiprocessing import Process
 
 
 class KVHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
diff --git a/python/paddle/distributed/launch/utils/nvsmi.py b/python/paddle/distributed/launch/utils/nvsmi.py
index 762870baa15a12..da446006154587 100644
--- a/python/paddle/distributed/launch/utils/nvsmi.py
+++ b/python/paddle/distributed/launch/utils/nvsmi.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import subprocess
-import os
 import json
+import os
 import shutil
+import subprocess
 
 
 class Info:
diff --git a/python/paddle/distributed/launch/utils/process_context.py b/python/paddle/distributed/launch/utils/process_context.py
index 3a8c52851778c5..6543d7bd9ebae2 100644
--- a/python/paddle/distributed/launch/utils/process_context.py
+++ b/python/paddle/distributed/launch/utils/process_context.py
@@ -12,8 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+import signal
 import subprocess
-import os, sys, signal, time
+import sys
+import time
 
 
 class ProcessContext:
diff --git a/python/paddle/distributed/metric/metrics.py b/python/paddle/distributed/metric/metrics.py
index 64f62d85251abb..306590b92dae1e 100644
--- a/python/paddle/distributed/metric/metrics.py
+++ b/python/paddle/distributed/metric/metrics.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
 import sys
+
 import yaml
-import logging
+
 from paddle.distributed.utils.log_utils import get_logger
 
 __all__ = []
diff --git a/python/paddle/distributed/models/moe/utils.py b/python/paddle/distributed/models/moe/utils.py
index 2011e558701e76..a021c741c63836 100644
--- a/python/paddle/distributed/models/moe/utils.py
+++ b/python/paddle/distributed/models/moe/utils.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from paddle import _legacy_C_ops
 from paddle.fluid import core
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
 from paddle.fluid.data_feeder import check_variable_and_dtype
-from paddle import _legacy_C_ops
+from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
+from paddle.fluid.layer_helper import LayerHelper
 
 
 def _number_count(numbers, upper_range):
diff --git a/python/paddle/distributed/parallel.py b/python/paddle/distributed/parallel.py
index 18339cd5af37cb..bd449acabf4db7 100644
--- a/python/paddle/distributed/parallel.py
+++ b/python/paddle/distributed/parallel.py
@@ -13,33 +13,35 @@
 # limitations under the License.
 
 import os
+import time
 import warnings
-from multiprocessing import Process  # noqa: F401
 from multiprocessing import Manager  # noqa: F401
-import time
+from multiprocessing import Process  # noqa: F401
+
 import paddle
+from paddle.distributed.collective import (
+    Group,
+    _default_group_name,
+    _get_group_map_by_name,
+    _new_process_group_impl,
+    _set_default_backend,
+    _set_default_store,
+    _set_group_map,
+    _set_group_map_backend,
+    _set_group_map_by_name,
+    _valid_backend_list,
+)
+from paddle.distributed.communication.group import _add_new_group
+from paddle.distributed.fleet.base.private_helper_function import (  # noqa: F401
+    wait_server_ready,
+)
+from paddle.distributed.fleet.launch_utils import check_backend
 
 # deprecated module import
 from paddle.fluid import core
-from paddle.fluid.framework import in_dygraph_mode
-from paddle.fluid.framework import _set_expected_place
 from paddle.fluid.dygraph import parallel_helper
-from paddle.distributed.fleet.launch_utils import check_backend
 from paddle.fluid.dygraph.parallel import ParallelEnv
-from paddle.distributed.fleet.base.private_helper_function import (
-    wait_server_ready,
-)  # noqa: F401
-from paddle.distributed.collective import _set_group_map
-from paddle.distributed.collective import _set_group_map_by_name
-from paddle.distributed.collective import _get_group_map_by_name
-from paddle.distributed.collective import _default_group_name
-from paddle.distributed.collective import _valid_backend_list
-from paddle.distributed.collective import _set_default_backend
-from paddle.distributed.collective import _set_default_store
-from paddle.distributed.collective import _new_process_group_impl
-from paddle.distributed.collective import Group
-from paddle.distributed.collective import _set_group_map_backend
-from paddle.distributed.communication.group import _add_new_group
+from paddle.fluid.framework import _set_expected_place, in_dygraph_mode
 
 __all__ = []
 
diff --git a/python/paddle/distributed/parallel_with_gloo.py b/python/paddle/distributed/parallel_with_gloo.py
index eeb91332070b45..d0c1b3eac90ae7 100755
--- a/python/paddle/distributed/parallel_with_gloo.py
+++ b/python/paddle/distributed/parallel_with_gloo.py
@@ -13,14 +13,15 @@
 # limitations under the License.
 
 import time
-from multiprocessing import Process, Manager
+from multiprocessing import Manager, Process
 
-# deprecated module import
-from paddle.fluid import core
 from paddle.distributed.fleet.base.private_helper_function import (
     wait_server_ready,
 )
 
+# deprecated module import
+from paddle.fluid import core
+
 __all__ = []
 
 _global_gloo_ctx = None
diff --git a/python/paddle/distributed/passes/auto_parallel_amp.py b/python/paddle/distributed/passes/auto_parallel_amp.py
index d15d38abee9133..e96cd4ec77d8f1 100644
--- a/python/paddle/distributed/passes/auto_parallel_amp.py
+++ b/python/paddle/distributed/passes/auto_parallel_amp.py
@@ -13,43 +13,36 @@
 # limitations under the License.
 
 import paddle
-from paddle.framework import core
-from paddle.fluid import unique_name
-from .pass_base import PassBase, register_pass
-from paddle.distributed.fleet.meta_optimizers.common import OpRole
-from paddle.fluid.data_feeder import check_variable_and_dtype, check_type
-from paddle.distributed.auto_parallel.utils import (
-    get_loss_op,
-    set_var_dist_attr,
-)
-from paddle.distributed.auto_parallel.utils import (
-    naive_set_dist_op_attr_for_program_by_mesh_and_mapping,
+from paddle.distributed.auto_parallel.dist_attribute import (
+    OperatorDistributedAttribute,
 )
 from paddle.distributed.auto_parallel.process_group import (
     get_world_process_group,
 )
-from paddle.fluid.contrib.mixed_precision.fp16_utils import (
-    AutoMixedPrecisionLists,
+from paddle.distributed.auto_parallel.utils import (
+    get_loss_op,
+    naive_set_dist_op_attr_for_program_by_mesh_and_mapping,
+    set_var_dist_attr,
 )
+from paddle.distributed.fleet.meta_optimizers.common import OpRole
+from paddle.fluid import unique_name
 from paddle.fluid.contrib.mixed_precision.fp16_utils import (
+    AutoMixedPrecisionLists,
+    _dtype_to_str,
+    _is_in_black_varnames,
     _keep_fp32_input,
     _keep_fp32_output,
-    find_op_index,
-)
-from paddle.fluid.contrib.mixed_precision.fp16_utils import (
+    _rename_arg,
     _valid_types,
+    find_op_index,
     find_true_post_op,
     find_true_prev_op,
 )
-from paddle.fluid.contrib.mixed_precision.fp16_utils import (
-    _is_in_black_varnames,
-    _dtype_to_str,
-    _rename_arg,
-)
-from paddle.distributed.auto_parallel.dist_attribute import (
-    OperatorDistributedAttribute,
-)
-from ..auto_parallel.utils import is_forward_op, is_backward_op, is_loss_op
+from paddle.fluid.data_feeder import check_type, check_variable_and_dtype
+from paddle.framework import core
+
+from ..auto_parallel.utils import is_backward_op, is_forward_op, is_loss_op
+from .pass_base import PassBase, register_pass
 
 world_process_group = get_world_process_group()
 
diff --git a/python/paddle/distributed/passes/auto_parallel_fp16.py b/python/paddle/distributed/passes/auto_parallel_fp16.py
index eb45b41c551d12..7aed31b01ec2b2 100644
--- a/python/paddle/distributed/passes/auto_parallel_fp16.py
+++ b/python/paddle/distributed/passes/auto_parallel_fp16.py
@@ -15,37 +15,34 @@
 from collections import defaultdict
 
 import paddle
-from paddle.framework import core
-from paddle.fluid.framework import default_main_program, default_startup_program
-from paddle.fluid import unique_name
-from .pass_base import register_pass
-from paddle.fluid.data_feeder import check_variable_and_dtype, check_type
-from paddle.distributed.auto_parallel.utils import (
-    set_var_dist_attr,
-    naive_set_dist_op_attr_for_program_by_mesh_and_mapping,
+from paddle.distributed.auto_parallel.dist_attribute import (
+    OperatorDistributedAttribute,
 )
 from paddle.distributed.auto_parallel.process_group import (
     get_world_process_group,
 )
-from paddle.fluid.contrib.mixed_precision.fp16_utils import (
-    AutoMixedPrecisionLists,
+from paddle.distributed.auto_parallel.utils import (
+    OP_ROLE_KEY,
+    OpRole,
+    is_backward_op,
+    is_forward_op,
+    naive_set_dist_op_attr_for_program_by_mesh_and_mapping,
+    set_var_dist_attr,
 )
+from paddle.fluid import unique_name
 from paddle.fluid.contrib.mixed_precision.fp16_utils import (
+    AutoMixedPrecisionLists,
+    _dtype_to_str,
     _keep_layer_norm_scale_bias_to_fp32,
     _need_keep_fp32,
     _valid_types,
-    _dtype_to_str,
-)
-from paddle.distributed.auto_parallel.dist_attribute import (
-    OperatorDistributedAttribute,
-)
-from paddle.distributed.auto_parallel.utils import (
-    is_forward_op,
-    is_backward_op,
-    OP_ROLE_KEY,
-    OpRole,
 )
+from paddle.fluid.data_feeder import check_type, check_variable_and_dtype
+from paddle.fluid.framework import default_main_program, default_startup_program
+from paddle.framework import core
+
 from .auto_parallel_amp import AMPPass
+from .pass_base import register_pass
 
 world_process_group = get_world_process_group()
 # if user use python "+, -, * /" for network, there might be cast in vanilla program
diff --git a/python/paddle/distributed/passes/auto_parallel_gradient_merge.py b/python/paddle/distributed/passes/auto_parallel_gradient_merge.py
index 443d28e3660ff2..8ac3492c2b14d3 100644
--- a/python/paddle/distributed/passes/auto_parallel_gradient_merge.py
+++ b/python/paddle/distributed/passes/auto_parallel_gradient_merge.py
@@ -12,25 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List, Tuple, Dict, Any
+from typing import Any, Dict, List, Tuple
 
 import paddle
-from paddle.framework import core
-from paddle.fluid import layers
-from paddle.fluid.framework import device_guard
-from .pass_base import PassBase, PassType, register_pass
-from paddle.distributed.auto_parallel.utils import (
-    set_var_dist_attr,
-    is_optimize_op,
-    OpRole,
-    OP_ROLE_KEY,
+from paddle.distributed.auto_parallel.process_group import (
+    get_world_process_group,
 )
 from paddle.distributed.auto_parallel.utils import (
+    OP_ROLE_KEY,
+    OpRole,
+    is_optimize_op,
     naive_set_dist_op_attr_for_program_by_mesh_and_mapping,
+    set_var_dist_attr,
 )
-from paddle.distributed.auto_parallel.process_group import (
-    get_world_process_group,
-)
+from paddle.fluid import layers
+from paddle.fluid.framework import device_guard
+from paddle.framework import core
+
+from .pass_base import PassBase, PassType, register_pass
 
 world_process_group = get_world_process_group()
 
diff --git a/python/paddle/distributed/passes/auto_parallel_quantization.py b/python/paddle/distributed/passes/auto_parallel_quantization.py
index fbe6dcb86fb4b0..d63456ab45c2b1 100644
--- a/python/paddle/distributed/passes/auto_parallel_quantization.py
+++ b/python/paddle/distributed/passes/auto_parallel_quantization.py
@@ -13,17 +13,18 @@
 # limitations under the License.
 
 import paddle
-
-from paddle.fluid import core, framework
-from paddle.fluid.dygraph.parallel import ParallelEnv
-from paddle.fluid.contrib.slim.quantization import utils
-from paddle.fluid.contrib.slim.quantization import QuantizationTransformPassV2
-from paddle.fluid.contrib.slim.quantization import AddQuantDequantPassV2
-from paddle.fluid.contrib.slim.quantization import OutScaleForTrainingPass
 from paddle.distributed.auto_parallel.dist_attribute import (
     OperatorDistributedAttribute,
     TensorDistributedAttribute,
 )
+from paddle.fluid import core, framework
+from paddle.fluid.contrib.slim.quantization import (
+    AddQuantDequantPassV2,
+    OutScaleForTrainingPass,
+    QuantizationTransformPassV2,
+    utils,
+)
+from paddle.fluid.dygraph.parallel import ParallelEnv
 
 from .pass_base import PassBase, register_pass
 
diff --git a/python/paddle/distributed/passes/auto_parallel_recompute.py b/python/paddle/distributed/passes/auto_parallel_recompute.py
index 23fb73f10eff71..aa213e24322323 100644
--- a/python/paddle/distributed/passes/auto_parallel_recompute.py
+++ b/python/paddle/distributed/passes/auto_parallel_recompute.py
@@ -14,24 +14,29 @@
 
 import logging
 
-from .pass_base import PassBase, register_pass
-from paddle.fluid import core, unique_name
-from paddle.fluid import framework as framework
-from paddle.fluid.backward import _append_grad_suffix_, _get_no_grad_set_name
-from paddle.fluid.backward import ProgramStats, _rename_arg_, _find_op_path_
 from paddle.distributed.auto_parallel.dist_attribute import (
     OperatorDistributedAttribute,
 )
 from paddle.distributed.auto_parallel.utils import (
     get_loss_op,
-    set_var_dist_attr,
+    insert_dependencies_for_two_ops,
+    naive_set_dist_op_attr_for_program_by_mesh_and_mapping,
     set_dist_op_desc_original_id,
+    set_var_dist_attr,
 )
-from paddle.distributed.auto_parallel.utils import (
-    naive_set_dist_op_attr_for_program_by_mesh_and_mapping,
-    insert_dependencies_for_two_ops,
+from paddle.fluid import core
+from paddle.fluid import framework as framework
+from paddle.fluid import unique_name
+from paddle.fluid.backward import (
+    ProgramStats,
+    _append_grad_suffix_,
+    _find_op_path_,
+    _get_no_grad_set_name,
+    _rename_arg_,
 )
 
+from .pass_base import PassBase, register_pass
+
 
 def _to_be_recomputed(op):
     return op.has_attr('op_namescope') and "/auto_parallel/rc_" in op.attr(
diff --git a/python/paddle/distributed/passes/auto_parallel_sharding.py b/python/paddle/distributed/passes/auto_parallel_sharding.py
index 49583e3ae66930..c001a93d78991d 100644
--- a/python/paddle/distributed/passes/auto_parallel_sharding.py
+++ b/python/paddle/distributed/passes/auto_parallel_sharding.py
@@ -12,32 +12,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from functools import reduce
 import logging
+from functools import reduce
 
 import paddle
-
-from paddle.framework import core
-from paddle.fluid.framework import default_main_program, default_startup_program
-from paddle.fluid import unique_name
-from .pass_base import PassBase, register_pass
-from paddle.distributed.auto_parallel.process_group import new_process_group
-from paddle.distributed.fleet.meta_optimizers.sharding.utils import get_var_size
-from paddle.distributed.fleet.meta_optimizers.common import (
-    is_backward_op,
-    is_optimizer_op,
-)
 from paddle.distributed.auto_parallel.operators.common import (
-    is_parameter_related,
     is_data_parallel_reduce_op,
+    is_parameter_related,
 )
+from paddle.distributed.auto_parallel.process_group import new_process_group
 from paddle.distributed.auto_parallel.utils import (
     _get_comm_group,
+    get_logger,
+    get_var_numel,
     naive_set_dist_op_attr_for_program_by_mesh_and_mapping,
     set_var_dist_attr,
-    get_var_numel,
-    get_logger,
 )
+from paddle.distributed.fleet.meta_optimizers.common import (
+    is_backward_op,
+    is_optimizer_op,
+)
+from paddle.distributed.fleet.meta_optimizers.sharding.utils import get_var_size
+from paddle.fluid import unique_name
+from paddle.fluid.framework import default_main_program, default_startup_program
+from paddle.framework import core
+
+from .pass_base import PassBase, register_pass
 
 OpRole = core.op_proto_and_checker_maker.OpRole
 OP_ROLE_KEY = core.op_proto_and_checker_maker.kOpRoleAttrName()
diff --git a/python/paddle/distributed/passes/cpp_pass.py b/python/paddle/distributed/passes/cpp_pass.py
index a14d28053f2cc3..07b5661950ab39 100755
--- a/python/paddle/distributed/passes/cpp_pass.py
+++ b/python/paddle/distributed/passes/cpp_pass.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from paddle.framework import _apply_pass as _apply_cpp_pass
+from paddle.framework import core
 from paddle.static import Executor
-from .pass_base import PassType, CPPPassWrapper, register_pass
-from paddle.framework import core, _apply_pass as _apply_cpp_pass
+
+from .pass_base import CPPPassWrapper, PassType, register_pass
 
 
 @register_pass("fuse_elewise_add_act")
diff --git a/python/paddle/distributed/passes/fuse_all_reduce.py b/python/paddle/distributed/passes/fuse_all_reduce.py
index 3e5ca75d62fbea..5bc936b23c10ce 100755
--- a/python/paddle/distributed/passes/fuse_all_reduce.py
+++ b/python/paddle/distributed/passes/fuse_all_reduce.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import numpy as np
+
 from paddle.framework import core
 from paddle.utils import unique_name
+
 from .pass_base import PassBase, PassType, register_pass
-import numpy as np
 
 
 def find_adjacent_match_sequences(
diff --git a/python/paddle/distributed/passes/pass_base.py b/python/paddle/distributed/passes/pass_base.py
index 00f7bcc156d065..be717ff0c940d1 100755
--- a/python/paddle/distributed/passes/pass_base.py
+++ b/python/paddle/distributed/passes/pass_base.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from abc import ABC, abstractmethod
+
 from paddle.framework import _apply_pass as _apply_cpp_pass
 
 
diff --git a/python/paddle/distributed/passes/ps_server_pass.py b/python/paddle/distributed/passes/ps_server_pass.py
index 9548b0222165c3..c243c0602ff39e 100755
--- a/python/paddle/distributed/passes/ps_server_pass.py
+++ b/python/paddle/distributed/passes/ps_server_pass.py
@@ -15,26 +15,27 @@
 import logging
 
 import paddle
-from ..ps.utils.public import (
-    get_optimize_ops,
-    get_ps_endpoint,
-    get_role_id,
-    get_trainers,
+from paddle.fluid.layers.learning_rate_scheduler import (
+    exponential_decay,
+    inverse_time_decay,
+    natural_exp_decay,
+    noam_decay,
 )
-from .pass_base import PassBase, register_pass
-from paddle.optimizer.lr import LRScheduler
 from paddle.optimizer.lr import (
     ExponentialDecay,
     InverseTimeDecay,
+    LRScheduler,
     NaturalExpDecay,
     NoamDecay,
 )
-from paddle.fluid.layers.learning_rate_scheduler import (
-    exponential_decay,
-    inverse_time_decay,
-    natural_exp_decay,
-    noam_decay,
+
+from ..ps.utils.public import (
+    get_optimize_ops,
+    get_ps_endpoint,
+    get_role_id,
+    get_trainers,
 )
+from .pass_base import PassBase, register_pass
 
 
 @register_pass("add_lr_decay_table_pass")
diff --git a/python/paddle/distributed/passes/ps_trainer_pass.py b/python/paddle/distributed/passes/ps_trainer_pass.py
index c465425aa5b4d5..588578514393ce 100755
--- a/python/paddle/distributed/passes/ps_trainer_pass.py
+++ b/python/paddle/distributed/passes/ps_trainer_pass.py
@@ -13,14 +13,17 @@
 # limitations under the License.
 
 import os
+
+from _collections import defaultdict
+
 import paddle
-from ..ps.utils.public import *  # noqa: F403
-from paddle.framework import core
 from paddle.distributed.passes.pass_base import PassBase, register_pass
-from ..ps.utils.collective_transpiler import SingleProcessMultiThread
-from _collections import defaultdict
-from paddle.static import Program
 from paddle.fluid.framework import Parameter
+from paddle.framework import core
+from paddle.static import Program
+
+from ..ps.utils.collective_transpiler import SingleProcessMultiThread
+from ..ps.utils.public import *  # noqa: F403
 
 
 @register_pass("append_send_ops_pass")
diff --git a/python/paddle/distributed/ps/coordinator.py b/python/paddle/distributed/ps/coordinator.py
index a5316bd0890d72..f54aa3602b484b 100755
--- a/python/paddle/distributed/ps/coordinator.py
+++ b/python/paddle/distributed/ps/coordinator.py
@@ -12,16 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import abc
+import logging
+import os
+import time
+
+from google.protobuf import text_format
+
 import paddle
+from paddle.distributed import fleet
 from paddle.distributed.communicator import FLCommunicator
 from paddle.distributed.fleet.proto import the_one_ps_pb2
-from google.protobuf import text_format
 from paddle.distributed.ps.utils.public import is_distributed_env
-from paddle.distributed import fleet
-import time
-import abc
-import os
-import logging
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
diff --git a/python/paddle/distributed/ps/the_one_ps.py b/python/paddle/distributed/ps/the_one_ps.py
index 68a6960338c9ad..ce725e6f3717a5 100755
--- a/python/paddle/distributed/ps/the_one_ps.py
+++ b/python/paddle/distributed/ps/the_one_ps.py
@@ -12,22 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import warnings
 
-import os
+from google.protobuf import text_format
+
 import paddle
 from paddle.distributed import fleet
-from paddle.framework import core
-from paddle.distributed.ps.utils.public import *  # noqa: F403
-from paddle.static import Program, CompiledProgram, Executor, ParallelExecutor
-from paddle.distributed.fleet.runtime.runtime_base import RuntimeBase
+from paddle.distributed.communicator import Communicator, HeterClient
 from paddle.distributed.fleet.base.private_helper_function import (
     wait_server_ready,
 )
 from paddle.distributed.fleet.proto import the_one_ps_pb2
-from paddle.distributed.communicator import Communicator, HeterClient
-from google.protobuf import text_format
+from paddle.distributed.fleet.runtime.runtime_base import RuntimeBase
 from paddle.distributed.ps.coordinator import Coordinator
+from paddle.distributed.ps.utils.public import *  # noqa: F403
+from paddle.framework import core
+from paddle.static import CompiledProgram, Executor, ParallelExecutor, Program
 
 __all__ = [
     'Table',
diff --git a/python/paddle/distributed/ps/utils/collective_transpiler.py b/python/paddle/distributed/ps/utils/collective_transpiler.py
index 8701df5c29775c..183666ebe87002 100644
--- a/python/paddle/distributed/ps/utils/collective_transpiler.py
+++ b/python/paddle/distributed/ps/utils/collective_transpiler.py
@@ -14,13 +14,12 @@
 
 import os
 
-
-from paddle.framework import core
-from paddle.fluid import unique_name
-from paddle.static import default_main_program, default_startup_program
 from paddle.distributed.fleet.base.private_helper_function import (
     wait_server_ready,
 )
+from paddle.fluid import unique_name
+from paddle.framework import core
+from paddle.static import default_main_program, default_startup_program
 
 __all__ = ['GradAllReduce', 'LocalSGD', 'MultiThread']
 
diff --git a/python/paddle/distributed/ps/utils/ps_program_builder.py b/python/paddle/distributed/ps/utils/ps_program_builder.py
index c7f80420f0c8fd..02712e31d949da 100755
--- a/python/paddle/distributed/ps/utils/ps_program_builder.py
+++ b/python/paddle/distributed/ps/utils/ps_program_builder.py
@@ -14,12 +14,13 @@
 
 import paddle
 import paddle.fluid as fluid
-from .public import *  # noqa: F403
 from paddle.distributed.fleet.base.private_helper_function import (
     wait_server_ready,
 )
 from paddle.distributed.passes import new_pass
 
+from .public import *  # noqa: F403
+
 
 class PsProgramBuilder:
     def __init__(self, pass_ctx):
diff --git a/python/paddle/distributed/ps/utils/public.py b/python/paddle/distributed/ps/utils/public.py
index 96092aa20f163e..d797591f96921d 100755
--- a/python/paddle/distributed/ps/utils/public.py
+++ b/python/paddle/distributed/ps/utils/public.py
@@ -12,15 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from functools import reduce
-
 import collections
+import logging
 import os
 import warnings
-import logging
+from functools import reduce
+
 import paddle.fluid as fluid
-from paddle.framework import core
 import paddle.fluid.framework as framework
+from paddle.framework import core
 
 # logging.basicConfig(
 #    format='%(levelname)s - %(asctime)s - %(pathname)s: %(lineno)s - %(message)s', level=logging.INFO)
diff --git a/python/paddle/distributed/rpc/internal.py b/python/paddle/distributed/rpc/internal.py
index 5cae48404a9482..059f3121b80d54 100644
--- a/python/paddle/distributed/rpc/internal.py
+++ b/python/paddle/distributed/rpc/internal.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from collections import namedtuple
 import pickle
+from collections import namedtuple
 
 PythonFunc = namedtuple("PythonFunc", ["func", "args", "kwargs"])
 """Some Python code interfaces called in C++"""
diff --git a/python/paddle/distributed/rpc/rpc.py b/python/paddle/distributed/rpc/rpc.py
index e01446a53744a9..c2bcef7afb4f73 100644
--- a/python/paddle/distributed/rpc/rpc.py
+++ b/python/paddle/distributed/rpc/rpc.py
@@ -12,16 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import datetime
 import os
-from collections import namedtuple
 import pickle
 import time
-import datetime
+from collections import namedtuple
 
 import paddle.fluid.core as core
-from paddle.distributed.utils.launch_utils import logger
-from paddle.distributed.rpc.internal import _serialize, PythonFunc
 from paddle.distributed.launch.context import Node
+from paddle.distributed.rpc.internal import PythonFunc, _serialize
+from paddle.distributed.utils.launch_utils import logger
 
 WorkerInfo = namedtuple("WorkerInfo", ["name", "rank", "ip", "port"])
 
diff --git a/python/paddle/distributed/sharding/group_sharded.py b/python/paddle/distributed/sharding/group_sharded.py
index ce5eae88cf9984..a69718261d9092 100644
--- a/python/paddle/distributed/sharding/group_sharded.py
+++ b/python/paddle/distributed/sharding/group_sharded.py
@@ -12,28 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import logging
+import os
 
 import paddle
 
-from paddle.optimizer import Optimizer
-from paddle.distributed.utils.log_utils import get_logger
-from paddle.fluid.framework import in_dygraph_mode
-
 # Old version
 from paddle.distributed.fleet.meta_optimizers.dygraph_optimizer.sharding_optimizer_stage2 import (
     ShardingOptimizerStage2,
 )
-from paddle.distributed.fleet.meta_parallel.sharding.sharding_stage2 import (
-    ShardingStage2,
-)
-from paddle.distributed.fleet.meta_parallel.sharding.sharding_stage3 import (
-    ShardingStage3,
-)
-from paddle.distributed.fleet.meta_parallel.sharding.sharding_utils import (
-    ShardingScaler,
-)
 
 # New version
 from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_optimizer_stage2 import (
@@ -48,6 +35,18 @@
 from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_utils import (
     GroupShardedScaler,
 )
+from paddle.distributed.fleet.meta_parallel.sharding.sharding_stage2 import (
+    ShardingStage2,
+)
+from paddle.distributed.fleet.meta_parallel.sharding.sharding_stage3 import (
+    ShardingStage3,
+)
+from paddle.distributed.fleet.meta_parallel.sharding.sharding_utils import (
+    ShardingScaler,
+)
+from paddle.distributed.utils.log_utils import get_logger
+from paddle.fluid.framework import in_dygraph_mode
+from paddle.optimizer import Optimizer
 
 logger_ = get_logger(logging.WARNING)
 
diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py
index f3505da0bb7180..21ea1d4bdc682f 100644
--- a/python/paddle/distributed/spawn.py
+++ b/python/paddle/distributed/spawn.py
@@ -18,23 +18,23 @@
 import sys
 import warnings
 
-from paddle.distributed.utils.launch_utils import (
-    _print_arguments,
-    _prepare_trainer_env,
-    get_host_name_ip,
-)
+from paddle.device import get_device
 from paddle.distributed.cloud_utils import (
-    get_cluster_and_pod,
     _get_trainers_num,
+    get_cluster_and_pod,
 )
-from paddle.distributed.fleet.launch import get_cluster_from_args
 from paddle.distributed.fleet.cloud_utils import use_paddlecloud
+from paddle.distributed.fleet.launch import get_cluster_from_args
 from paddle.distributed.fleet.launch_utils import (
     DeviceMode,
-    check_backend,
     block_windows_and_macos,
+    check_backend,
+)
+from paddle.distributed.utils.launch_utils import (
+    _prepare_trainer_env,
+    _print_arguments,
+    get_host_name_ip,
 )
-from paddle.device import get_device
 
 # deprecated module import
 from paddle.fluid import core
diff --git a/python/paddle/distributed/utils/launch_utils.py b/python/paddle/distributed/utils/launch_utils.py
index 0c1ab76e5506d1..76642fe7616c5b 100644
--- a/python/paddle/distributed/utils/launch_utils.py
+++ b/python/paddle/distributed/utils/launch_utils.py
@@ -12,17 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import time
+import copy
 import os
 import signal
-import copy
-import sys
+import socket
 import subprocess
+import sys
+import time
 from contextlib import closing
-import socket
 from distutils.util import strtobool
 
 from paddle.distributed.fleet.launch_utils import get_backend_by_compile_flag
+
 from ..utils.log_utils import get_logger
 
 logger = get_logger("INFO", "root")
diff --git a/python/paddle/distributed/utils/moe_utils.py b/python/paddle/distributed/utils/moe_utils.py
index eb7e73c363bf2c..298ae9fc916e83 100644
--- a/python/paddle/distributed/utils/moe_utils.py
+++ b/python/paddle/distributed/utils/moe_utils.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.framework import _non_static_mode
-from paddle.fluid.data_feeder import check_variable_and_dtype
 from paddle import _legacy_C_ops
+from paddle.fluid.data_feeder import check_variable_and_dtype
+from paddle.fluid.framework import _non_static_mode
+from paddle.fluid.layer_helper import LayerHelper
 
 
 def global_scatter(
diff --git a/python/paddle/distribution/categorical.py b/python/paddle/distribution/categorical.py
index b6aa173c298b46..e9270d37780f42 100644
--- a/python/paddle/distribution/categorical.py
+++ b/python/paddle/distribution/categorical.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import numpy as np
+
 import paddle
 from paddle.distribution import distribution
 from paddle.fluid.data_feeder import check_type, convert_dtype
diff --git a/python/paddle/distribution/dirichlet.py b/python/paddle/distribution/dirichlet.py
index ff8ea19daa2e5c..bb89d323c24f2a 100644
--- a/python/paddle/distribution/dirichlet.py
+++ b/python/paddle/distribution/dirichlet.py
@@ -15,7 +15,7 @@
 import paddle
 from paddle.distribution import exponential_family
 from paddle.fluid.data_feeder import check_variable_and_dtype
-from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph
+from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
 from paddle.fluid.layer_helper import LayerHelper
 
 
diff --git a/python/paddle/distribution/distribution.py b/python/paddle/distribution/distribution.py
index 27febe07a3810c..15ee5d8e011e4e 100644
--- a/python/paddle/distribution/distribution.py
+++ b/python/paddle/distribution/distribution.py
@@ -22,13 +22,14 @@
 import warnings
 
 import numpy as np
+
 import paddle
 from paddle import _C_ops, _legacy_C_ops
 from paddle.fluid.data_feeder import check_variable_and_dtype, convert_dtype
 from paddle.fluid.framework import (
+    _in_legacy_dygraph,
     _non_static_mode,
     in_dygraph_mode,
-    _in_legacy_dygraph,
 )
 from paddle.fluid.layers import tensor
 
diff --git a/python/paddle/distribution/gumbel.py b/python/paddle/distribution/gumbel.py
index 583acef2c26f3b..067f87ca8375af 100644
--- a/python/paddle/distribution/gumbel.py
+++ b/python/paddle/distribution/gumbel.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numbers
 import math
+import numbers
+
 import numpy as np
 
+import paddle
 from paddle.distribution.transformed_distribution import TransformedDistribution
 from paddle.fluid import framework as framework
 
diff --git a/python/paddle/distribution/kl.py b/python/paddle/distribution/kl.py
index cf8857629893be..010f781d041cc4 100644
--- a/python/paddle/distribution/kl.py
+++ b/python/paddle/distribution/kl.py
@@ -20,10 +20,10 @@
 from paddle.distribution.dirichlet import Dirichlet
 from paddle.distribution.distribution import Distribution
 from paddle.distribution.exponential_family import ExponentialFamily
-from paddle.distribution.normal import Normal
+from paddle.distribution.laplace import Laplace
 from paddle.distribution.lognormal import LogNormal
+from paddle.distribution.normal import Normal
 from paddle.distribution.uniform import Uniform
-from paddle.distribution.laplace import Laplace
 from paddle.fluid.framework import _non_static_mode
 
 __all__ = ["register_kl", "kl_divergence"]
diff --git a/python/paddle/distribution/laplace.py b/python/paddle/distribution/laplace.py
index e6da366069856f..5c047aebfd9b21 100644
--- a/python/paddle/distribution/laplace.py
+++ b/python/paddle/distribution/laplace.py
@@ -15,6 +15,7 @@
 import numbers
 
 import numpy as np
+
 import paddle
 from paddle.distribution import distribution
 from paddle.fluid import framework as framework
diff --git a/python/paddle/distribution/multinomial.py b/python/paddle/distribution/multinomial.py
index 5630fd14691890..585cb2152d9480 100644
--- a/python/paddle/distribution/multinomial.py
+++ b/python/paddle/distribution/multinomial.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from collections.abc import Iterable
+
 import paddle
 from paddle.distribution import categorical, distribution
 
-from collections.abc import Iterable
-
 
 class Multinomial(distribution.Distribution):
     r"""
diff --git a/python/paddle/distribution/normal.py b/python/paddle/distribution/normal.py
index c2b20297d5e2a3..2d4b0bed98090c 100644
--- a/python/paddle/distribution/normal.py
+++ b/python/paddle/distribution/normal.py
@@ -13,7 +13,10 @@
 # limitations under the License.
 
 import math
+from collections.abc import Iterable
+
 import numpy as np
+
 import paddle
 from paddle.distribution import distribution
 from paddle.fluid.data_feeder import check_type, convert_dtype
@@ -26,8 +29,6 @@
     tensor,
 )
 
-from collections.abc import Iterable
-
 
 class Normal(distribution.Distribution):
     r"""The Normal distribution with location `loc` and `scale` parameters.
diff --git a/python/paddle/distribution/transformed_distribution.py b/python/paddle/distribution/transformed_distribution.py
index f0d38b2d336fa0..95f7670c563a32 100644
--- a/python/paddle/distribution/transformed_distribution.py
+++ b/python/paddle/distribution/transformed_distribution.py
@@ -14,9 +14,7 @@
 
 import typing
 
-from paddle.distribution import distribution
-from paddle.distribution import transform
-from paddle.distribution import independent
+from paddle.distribution import distribution, independent, transform
 
 
 class TransformedDistribution(distribution.Distribution):
diff --git a/python/paddle/distribution/uniform.py b/python/paddle/distribution/uniform.py
index a5013ab9880dd2..9571cdb08c2591 100644
--- a/python/paddle/distribution/uniform.py
+++ b/python/paddle/distribution/uniform.py
@@ -13,14 +13,15 @@
 # limitations under the License.
 
 import numpy as np
+
+import paddle
 from paddle import _C_ops, _legacy_C_ops
 from paddle.distribution import distribution
-from paddle.tensor import random
 from paddle.fluid.data_feeder import check_type, convert_dtype
 from paddle.fluid.framework import (
+    _in_legacy_dygraph,
     _non_static_mode,
     in_dygraph_mode,
-    _in_legacy_dygraph,
 )
 from paddle.fluid.layers import (
     elementwise_add,
@@ -29,7 +30,7 @@
     nn,
     tensor,
 )
-import paddle
+from paddle.tensor import random
 
 
 class Uniform(distribution.Distribution):
diff --git a/python/paddle/distribution/variable.py b/python/paddle/distribution/variable.py
index 58d4a7521542ea..e9327fdee0b73c 100644
--- a/python/paddle/distribution/variable.py
+++ b/python/paddle/distribution/variable.py
@@ -12,9 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.distribution import constraint
-
 import paddle
+from paddle.distribution import constraint
 
 
 class Variable:
diff --git a/python/paddle/geometric/math.py b/python/paddle/geometric/math.py
index 03bb398c0e022f..4ed370da0c5ff4 100644
--- a/python/paddle/geometric/math.py
+++ b/python/paddle/geometric/math.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.data_feeder import check_variable_and_dtype
 from paddle import _C_ops, _legacy_C_ops
+from paddle.fluid.data_feeder import check_variable_and_dtype
 from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
+from paddle.fluid.layer_helper import LayerHelper
 
 __all__ = []
 
diff --git a/python/paddle/geometric/message_passing/send_recv.py b/python/paddle/geometric/message_passing/send_recv.py
index 5131930d1d9598..76627e9827c99d 100644
--- a/python/paddle/geometric/message_passing/send_recv.py
+++ b/python/paddle/geometric/message_passing/send_recv.py
@@ -13,15 +13,15 @@
 # limitations under the License.
 
 import numpy as np
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
-from paddle.fluid.framework import Variable
+
+from paddle import _C_ops, _legacy_C_ops
 from paddle.fluid.data_feeder import (
     check_dtype,
     check_type,
     check_variable_and_dtype,
 )
-from paddle import _C_ops, _legacy_C_ops
+from paddle.fluid.framework import Variable, _in_legacy_dygraph, in_dygraph_mode
+from paddle.fluid.layer_helper import LayerHelper
 
 from .utils import (
     convert_out_size_to_list,
diff --git a/python/paddle/geometric/message_passing/utils.py b/python/paddle/geometric/message_passing/utils.py
index 12e2e52d6b454d..09a051feb93402 100644
--- a/python/paddle/geometric/message_passing/utils.py
+++ b/python/paddle/geometric/message_passing/utils.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import numpy as np
+
 import paddle
-from paddle.fluid.framework import Variable
 from paddle.fluid.data_feeder import check_dtype, convert_dtype
+from paddle.fluid.framework import Variable
 from paddle.fluid.layers.tensor import cast
 
 
diff --git a/python/paddle/geometric/reindex.py b/python/paddle/geometric/reindex.py
index 8b755d191c44e3..75d6f688361cdd 100644
--- a/python/paddle/geometric/reindex.py
+++ b/python/paddle/geometric/reindex.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 
 import paddle
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.framework import _non_static_mode, Variable
-from paddle.fluid.data_feeder import check_variable_and_dtype
 from paddle import _legacy_C_ops
+from paddle.fluid.data_feeder import check_variable_and_dtype
+from paddle.fluid.framework import Variable, _non_static_mode
+from paddle.fluid.layer_helper import LayerHelper
 
 __all__ = []
 
diff --git a/python/paddle/geometric/sampling/neighbors.py b/python/paddle/geometric/sampling/neighbors.py
index 092d87f92331cb..093fd39617af3e 100644
--- a/python/paddle/geometric/sampling/neighbors.py
+++ b/python/paddle/geometric/sampling/neighbors.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.framework import _non_static_mode
-from paddle.fluid.data_feeder import check_variable_and_dtype
 from paddle import _legacy_C_ops
+from paddle.fluid.data_feeder import check_variable_and_dtype
+from paddle.fluid.framework import _non_static_mode
+from paddle.fluid.layer_helper import LayerHelper
 
 __all__ = []
 
diff --git a/python/paddle/hapi/callbacks.py b/python/paddle/hapi/callbacks.py
index f9d280d76dc54e..2d069a39e5f4d8 100644
--- a/python/paddle/hapi/callbacks.py
+++ b/python/paddle/hapi/callbacks.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import numbers
 import os
 import time
-import numbers
 import warnings
 
 import numpy as np
diff --git a/python/paddle/hapi/dynamic_flops.py b/python/paddle/hapi/dynamic_flops.py
index 677a7f2b732f6a..3043aa8f493674 100644
--- a/python/paddle/hapi/dynamic_flops.py
+++ b/python/paddle/hapi/dynamic_flops.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import warnings
-import paddle.nn as nn
+
 import numpy as np
-from .static_flops import static_flops, Table
-from paddle.jit.dy2static.program_translator import (
-    unwrap_decorators,
-)
+
+import paddle
+import paddle.nn as nn
+from paddle.jit.dy2static.program_translator import unwrap_decorators
+
+from .static_flops import Table, static_flops
 
 __all__ = []
 
diff --git a/python/paddle/hapi/hub.py b/python/paddle/hapi/hub.py
index 3d5cb3027c49d3..2788922482ab79 100644
--- a/python/paddle/hapi/hub.py
+++ b/python/paddle/hapi/hub.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import os
-import sys
 import shutil
+import sys
 import zipfile
+
 from paddle.utils.download import get_path_from_url
 
 __all__ = []
diff --git a/python/paddle/hapi/logger.py b/python/paddle/hapi/logger.py
index 918c73a308b5aa..ac6f29b338d43e 100644
--- a/python/paddle/hapi/logger.py
+++ b/python/paddle/hapi/logger.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
 import os
 import sys
-import logging
 
 from paddle.fluid.dygraph.parallel import ParallelEnv
 
diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py
index c38497d1a9b0a7..025abd9acc9dcd 100644
--- a/python/paddle/hapi/model.py
+++ b/python/paddle/hapi/model.py
@@ -12,42 +12,38 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import contextlib
 import inspect
 import os
 import pickle
-import numpy as np
-import warnings
-import time
 import socket
-import contextlib
+import time
+import warnings
+
+import numpy as np
 
 import paddle
+import paddle.distributed as dist
+import paddle.distributed.fleet as fleet
 from paddle import fluid
+from paddle.autograd import no_grad
+from paddle.distributed.fleet.base import role_maker
 from paddle.fluid import core
-from paddle.fluid.framework import _non_static_mode
+from paddle.fluid.dygraph.base import to_variable
+from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
+from paddle.fluid.dygraph.parallel import ParallelEnv
+from paddle.fluid.executor import global_scope
 from paddle.fluid.framework import Variable
-from paddle.fluid.framework import _get_paddle_place
 from paddle.fluid.framework import _current_expected_place as _get_device
-from paddle.fluid.executor import global_scope
+from paddle.fluid.framework import _get_paddle_place, _non_static_mode
 from paddle.fluid.io import is_belong_to_optimizer
-from paddle.fluid.dygraph.base import to_variable
-from paddle.fluid.dygraph.parallel import ParallelEnv
-from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX
-from paddle.fluid.dygraph.io import INFER_PARAMS_SUFFIX
-from paddle.fluid.layers.utils import flatten
 from paddle.fluid.layers import collective
-
-from paddle.io import DataLoader
-from paddle.io import Dataset
-from paddle.io import DistributedBatchSampler
+from paddle.fluid.layers.utils import flatten
+from paddle.io import DataLoader, Dataset, DistributedBatchSampler
 from paddle.metric import Metric
 from paddle.static import InputSpec as Input
-import paddle.distributed as dist
-import paddle.distributed.fleet as fleet
-from paddle.distributed.fleet.base import role_maker
-from paddle.autograd import no_grad
 
-from .callbacks import config_callbacks, EarlyStopping
+from .callbacks import EarlyStopping, config_callbacks
 from .model_summary import summary
 
 __all__ = []
diff --git a/python/paddle/hapi/model_summary.py b/python/paddle/hapi/model_summary.py
index 4b6322bb14f529..259262a106def8 100644
--- a/python/paddle/hapi/model_summary.py
+++ b/python/paddle/hapi/model_summary.py
@@ -12,15 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import numbers
 import warnings
+from collections import OrderedDict
+
 import numpy as np
-import numbers
 
 import paddle
 import paddle.nn as nn
-from paddle.static import InputSpec
 from paddle.autograd import no_grad
-from collections import OrderedDict
+from paddle.static import InputSpec
 
 __all__ = []
 
diff --git a/python/paddle/hapi/static_flops.py b/python/paddle/hapi/static_flops.py
index c72bcc4273c528..c6d099d8b88725 100644
--- a/python/paddle/hapi/static_flops.py
+++ b/python/paddle/hapi/static_flops.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 from collections import OrderedDict
+
+import numpy as np
+
 from paddle.static import Program, Variable
 
 __all__ = []
diff --git a/python/paddle/hub.py b/python/paddle/hub.py
index e6f38d6ee11f70..1960d98e95b670 100644
--- a/python/paddle/hub.py
+++ b/python/paddle/hub.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .hapi.hub import list  # noqa: F401
 from .hapi.hub import help  # noqa: F401
+from .hapi.hub import list  # noqa: F401
 from .hapi.hub import load  # noqa: F401
 
 __all__ = ['list', 'help', 'load']  # noqa
diff --git a/python/paddle/incubate/autograd/primrules.py b/python/paddle/incubate/autograd/primrules.py
index 43fd9611a7637a..137747e75da15e 100644
--- a/python/paddle/incubate/autograd/primrules.py
+++ b/python/paddle/incubate/autograd/primrules.py
@@ -21,6 +21,7 @@
 from . import primops
 from .primops import (
     add,
+    bernoulli,
     broadcast,
     concat,
     cos,
@@ -39,6 +40,7 @@
     neg,
     reduce_sum,
     reshape,
+    rsqrt,
     scatter_add,
     select,
     set_value,
@@ -50,8 +52,6 @@
     sub,
     tanh,
     transpose,
-    bernoulli,
-    rsqrt,
     uniform_random,
 )
 from .primreg import (
diff --git a/python/paddle/incubate/autotune.py b/python/paddle/incubate/autotune.py
index daca4aee14e0b8..f7b1d9091eee9c 100644
--- a/python/paddle/incubate/autotune.py
+++ b/python/paddle/incubate/autotune.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import json
 import warnings
+
+import paddle
 from paddle.fluid import core
 
 __all__ = ['set_config']
diff --git a/python/paddle/incubate/distributed/models/moe/gate/gshard_gate.py b/python/paddle/incubate/distributed/models/moe/gate/gshard_gate.py
index 9587dfb346adda..b024a41a5f18eb 100644
--- a/python/paddle/incubate/distributed/models/moe/gate/gshard_gate.py
+++ b/python/paddle/incubate/distributed/models/moe/gate/gshard_gate.py
@@ -20,10 +20,12 @@
 #   Licensed under the Apache License, Version 2.0 (the "License").
 
 import math
+
 import paddle
 import paddle.nn.functional as F
-from .naive_gate import NaiveGate
+
 from ..utils import limit_by_capacity
+from .naive_gate import NaiveGate
 
 
 class GShardGate(NaiveGate):
diff --git a/python/paddle/incubate/distributed/models/moe/gate/naive_gate.py b/python/paddle/incubate/distributed/models/moe/gate/naive_gate.py
index 4781f6bba898c4..e4a518b8db1bc6 100644
--- a/python/paddle/incubate/distributed/models/moe/gate/naive_gate.py
+++ b/python/paddle/incubate/distributed/models/moe/gate/naive_gate.py
@@ -19,11 +19,11 @@
 #     Copyright 2021, Jiaao He. All rights reserved.
 #   Licensed under the Apache License, Version 2.0 (the "License").
 
-from .base_gate import BaseGate
-
 import paddle
 import paddle.nn as nn
 
+from .base_gate import BaseGate
+
 
 class NaiveGate(BaseGate):
     def __init__(self, d_model, num_expert, world_size, topk=2):
diff --git a/python/paddle/incubate/distributed/models/moe/gate/switch_gate.py b/python/paddle/incubate/distributed/models/moe/gate/switch_gate.py
index c9e2f1fca498aa..98474dafd0111a 100644
--- a/python/paddle/incubate/distributed/models/moe/gate/switch_gate.py
+++ b/python/paddle/incubate/distributed/models/moe/gate/switch_gate.py
@@ -20,10 +20,12 @@
 #   Licensed under the Apache License, Version 2.0 (the "License").
 
 import math
+
 import paddle
 import paddle.nn.functional as F
-from .naive_gate import NaiveGate
+
 from ..utils import limit_by_capacity
+from .naive_gate import NaiveGate
 
 
 class SwitchGate(NaiveGate):
diff --git a/python/paddle/incubate/distributed/models/moe/grad_clip.py b/python/paddle/incubate/distributed/models/moe/grad_clip.py
index e55172a34ba470..c3faa7bd202cc8 100644
--- a/python/paddle/incubate/distributed/models/moe/grad_clip.py
+++ b/python/paddle/incubate/distributed/models/moe/grad_clip.py
@@ -13,11 +13,10 @@
 # limitations under the License.
 
 import paddle
-
 import paddle.distributed as dist
+from paddle.fluid import core, layers
 from paddle.fluid.clip import ClipGradBase, _squared_l2_norm
 from paddle.fluid.dygraph import base as imperative_base
-from paddle.fluid import core, layers
 
 
 class ClipGradForMOEByGlobalNorm(ClipGradBase):
diff --git a/python/paddle/incubate/distributed/models/moe/moe_layer.py b/python/paddle/incubate/distributed/models/moe/moe_layer.py
index 31740e92c72252..e5456cf3785052 100644
--- a/python/paddle/incubate/distributed/models/moe/moe_layer.py
+++ b/python/paddle/incubate/distributed/models/moe/moe_layer.py
@@ -20,16 +20,17 @@
 #   Licensed under the Apache License, Version 2.0 (the "License").
 
 import numpy as np
+
 import paddle
 import paddle.nn as nn
-from paddle.distributed.utils.moe_utils import global_scatter, global_gather
-
 from paddle.autograd import PyLayer
-from .gate import NaiveGate, GShardGate, SwitchGate, BaseGate
-from .utils import count_by_gate
+from paddle.distributed.utils.moe_utils import global_gather, global_scatter
 from paddle.fluid.framework import in_dygraph_mode
 from paddle.incubate.distributed.fleet import recompute_hybrid
 
+from .gate import BaseGate, GShardGate, NaiveGate, SwitchGate
+from .utils import count_by_gate
+
 
 def _local_scatter(inp, pos):
     if pos.shape != [0]:
diff --git a/python/paddle/incubate/distributed/models/moe/utils.py b/python/paddle/incubate/distributed/models/moe/utils.py
index 65ab86ded6aa92..aa952f878b5e6b 100644
--- a/python/paddle/incubate/distributed/models/moe/utils.py
+++ b/python/paddle/incubate/distributed/models/moe/utils.py
@@ -19,13 +19,13 @@
 #     Copyright 2021, Jiaao He. All rights reserved.
 #   Licensed under the Apache License, Version 2.0 (the "License").
 
+import paddle
 from paddle.distributed.models.moe.utils import (
-    _number_count,
+    _assign_pos,
     _limit_by_capacity,
+    _number_count,
     _prune_gate_by_capacity,
-    _assign_pos,
 )
-import paddle
 from paddle.fluid.framework import in_dygraph_mode
 
 
diff --git a/python/paddle/incubate/distributed/utils/io/dist_load.py b/python/paddle/incubate/distributed/utils/io/dist_load.py
index 38907489c8d348..5815b0237baefc 100644
--- a/python/paddle/incubate/distributed/utils/io/dist_load.py
+++ b/python/paddle/incubate/distributed/utils/io/dist_load.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.framework import dygraph_only
+import copy
+import re
+
 import paddle
 import paddle.distributed as dist
 from paddle.distributed import fleet
-import re
-import copy
+from paddle.fluid.framework import dygraph_only
 
 
 @dygraph_only
diff --git a/python/paddle/incubate/distributed/utils/io/dist_save.py b/python/paddle/incubate/distributed/utils/io/dist_save.py
index 2244aa974fc79f..94a0df27d5c653 100644
--- a/python/paddle/incubate/distributed/utils/io/dist_save.py
+++ b/python/paddle/incubate/distributed/utils/io/dist_save.py
@@ -12,16 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.distributed as dist
-import paddle.distributed.fleet as fleet
+import copy
 import re
+import sys
+
 import paddle
+import paddle.distributed as dist
+import paddle.distributed.fleet as fleet
 from paddle.distributed.fleet.utils.log_util import logger
 from paddle.fluid.framework import dygraph_only
-import copy
-import sys
+
 from .save_for_auto import save_for_auto_inference
-from paddle.distributed.fleet.utils.log_util import logger
 
 __all__ = ["save", "save_for_auto_inference"]
 
diff --git a/python/paddle/incubate/distributed/utils/io/save_for_auto.py b/python/paddle/incubate/distributed/utils/io/save_for_auto.py
index 30b1ac0c9ba534..3008201d2fd119 100644
--- a/python/paddle/incubate/distributed/utils/io/save_for_auto.py
+++ b/python/paddle/incubate/distributed/utils/io/save_for_auto.py
@@ -12,20 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.distributed as dist
-import paddle.distributed.fleet as fleet
-import re
-import paddle
-from paddle.distributed.fleet.utils.log_util import logger
+import copy
 import os
 import pickle
+import re
+
+import numpy as np
+
+import paddle
+import paddle.distributed as dist
+import paddle.distributed.fleet as fleet
 from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_stage3 import (
     GroupShardedStage3,
 )
+from paddle.distributed.fleet.utils.log_util import logger
 from paddle.fluid.framework import dygraph_only
-import copy
-
-import numpy as np
 
 __all__ = ["save_for_auto_inference"]
 
diff --git a/python/paddle/incubate/nn/functional/fused_matmul_bias.py b/python/paddle/incubate/nn/functional/fused_matmul_bias.py
index e7180abfe9b14d..c80a437b390052 100644
--- a/python/paddle/incubate/nn/functional/fused_matmul_bias.py
+++ b/python/paddle/incubate/nn/functional/fused_matmul_bias.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.layer_helper import LayerHelper
+from paddle import _legacy_C_ops
 from paddle.fluid.framework import _non_static_mode
+from paddle.fluid.layer_helper import LayerHelper
 from paddle.tensor.linalg import matmul
-from paddle import _legacy_C_ops
 
 
 def fused_matmul_bias(
diff --git a/python/paddle/incubate/nn/functional/fused_transformer.py b/python/paddle/incubate/nn/functional/fused_transformer.py
index e6c8f33efb2b31..a7e342a8acc36f 100644
--- a/python/paddle/incubate/nn/functional/fused_transformer.py
+++ b/python/paddle/incubate/nn/functional/fused_transformer.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.framework import _non_static_mode, default_main_program
-from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
-from paddle.fluid import core
 from paddle import _legacy_C_ops
+from paddle.fluid import core
+from paddle.fluid.data_feeder import check_dtype, check_variable_and_dtype
+from paddle.fluid.framework import _non_static_mode, default_main_program
+from paddle.fluid.layer_helper import LayerHelper
 
 __all__ = []
 
diff --git a/python/paddle/incubate/nn/layer/fused_linear.py b/python/paddle/incubate/nn/layer/fused_linear.py
index 0a8d6ad46cd949..965aedaa391e97 100644
--- a/python/paddle/incubate/nn/layer/fused_linear.py
+++ b/python/paddle/incubate/nn/layer/fused_linear.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.nn import Layer
 from paddle.incubate.nn import functional as F
+from paddle.nn import Layer
 
 
 class FusedLinear(Layer):
diff --git a/python/paddle/incubate/nn/layer/fused_transformer.py b/python/paddle/incubate/nn/layer/fused_transformer.py
index 72b074a68cb154..ad96ab9669e674 100644
--- a/python/paddle/incubate/nn/layer/fused_transformer.py
+++ b/python/paddle/incubate/nn/layer/fused_transformer.py
@@ -11,19 +11,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import numpy as np
+
+import paddle
+from paddle.fluid import core
+from paddle.fluid.core import VarDesc
+from paddle.fluid.dygraph import no_grad
+from paddle.fluid.framework import _non_static_mode, convert_np_dtype_to_dtype_
 from paddle.incubate.nn import functional as incubate_f
 from paddle.nn import Layer
-import paddle
+from paddle.nn.initializer import Constant
 from paddle.nn.layer.transformer import (
     _convert_attention_mask,
     _convert_param_attr_to_list,
 )
-from paddle.nn.initializer import Constant
-from paddle.fluid.dygraph import no_grad
-from paddle.fluid.framework import convert_np_dtype_to_dtype_, _non_static_mode
-from paddle.fluid.core import VarDesc
-from paddle.fluid import core
-import numpy as np
 
 
 # for distributed tensor model parallel
diff --git a/python/paddle/incubate/nn/loss.py b/python/paddle/incubate/nn/loss.py
index 7175834084499b..9d5d261839143c 100644
--- a/python/paddle/incubate/nn/loss.py
+++ b/python/paddle/incubate/nn/loss.py
@@ -12,12 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.framework import (
-    _non_static_mode,
-)
-from paddle.fluid.data_feeder import check_variable_and_dtype
 from paddle import _legacy_C_ops
+from paddle.fluid.data_feeder import check_variable_and_dtype
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.framework import _non_static_mode
 
 
 def identity_loss(x, reduction="none"):
diff --git a/python/paddle/incubate/operators/graph_khop_sampler.py b/python/paddle/incubate/operators/graph_khop_sampler.py
index bbe8d6a5646d37..7bea5dbe762362 100644
--- a/python/paddle/incubate/operators/graph_khop_sampler.py
+++ b/python/paddle/incubate/operators/graph_khop_sampler.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.framework import _non_static_mode
-from paddle.fluid.data_feeder import check_variable_and_dtype
 from paddle import _legacy_C_ops
+from paddle.fluid.data_feeder import check_variable_and_dtype
+from paddle.fluid.framework import _non_static_mode
+from paddle.fluid.layer_helper import LayerHelper
 
 
 def graph_khop_sampler(
diff --git a/python/paddle/incubate/operators/graph_reindex.py b/python/paddle/incubate/operators/graph_reindex.py
index 0ac5f0246f26ce..9538fbf299e547 100644
--- a/python/paddle/incubate/operators/graph_reindex.py
+++ b/python/paddle/incubate/operators/graph_reindex.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.framework import _non_static_mode
-from paddle.fluid.data_feeder import check_variable_and_dtype
-from paddle import _legacy_C_ops
 import paddle.utils.deprecated as deprecated
+from paddle import _legacy_C_ops
+from paddle.fluid.data_feeder import check_variable_and_dtype
+from paddle.fluid.framework import _non_static_mode
+from paddle.fluid.layer_helper import LayerHelper
 
 
 @deprecated(
diff --git a/python/paddle/incubate/operators/graph_sample_neighbors.py b/python/paddle/incubate/operators/graph_sample_neighbors.py
index 14af243784f710..75d365b16d11d7 100644
--- a/python/paddle/incubate/operators/graph_sample_neighbors.py
+++ b/python/paddle/incubate/operators/graph_sample_neighbors.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.framework import _non_static_mode
-from paddle.fluid.data_feeder import check_variable_and_dtype
-from paddle import _legacy_C_ops
 import paddle.utils.deprecated as deprecated
+from paddle import _legacy_C_ops
+from paddle.fluid.data_feeder import check_variable_and_dtype
+from paddle.fluid.framework import _non_static_mode
+from paddle.fluid.layer_helper import LayerHelper
 
 
 @deprecated(
diff --git a/python/paddle/incubate/operators/graph_send_recv.py b/python/paddle/incubate/operators/graph_send_recv.py
index 73edc03cf4414d..ed3307756535c2 100644
--- a/python/paddle/incubate/operators/graph_send_recv.py
+++ b/python/paddle/incubate/operators/graph_send_recv.py
@@ -13,18 +13,18 @@
 # limitations under the License.
 
 import numpy as np
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
-from paddle.fluid.framework import Variable
+
+import paddle.utils.deprecated as deprecated
+from paddle import _C_ops, _legacy_C_ops
 from paddle.fluid.data_feeder import (
-    check_variable_and_dtype,
-    check_type,
     check_dtype,
+    check_type,
+    check_variable_and_dtype,
     convert_dtype,
 )
+from paddle.fluid.framework import Variable, _in_legacy_dygraph, in_dygraph_mode
+from paddle.fluid.layer_helper import LayerHelper
 from paddle.fluid.layers.tensor import cast
-from paddle import _C_ops, _legacy_C_ops
-import paddle.utils.deprecated as deprecated
 
 
 @deprecated(
diff --git a/python/paddle/incubate/operators/resnet_unit.py b/python/paddle/incubate/operators/resnet_unit.py
index fee1eb2d3478d2..2d5d6305ab6325 100644
--- a/python/paddle/incubate/operators/resnet_unit.py
+++ b/python/paddle/incubate/operators/resnet_unit.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import numpy as np
+
 import paddle.fluid as fluid
-from paddle.nn import initializer as I
-from paddle.nn import Layer
-from paddle.fluid.layers import utils
 from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.layers import utils
 from paddle.fluid.param_attr import ParamAttr
+from paddle.nn import Layer
+from paddle.nn import initializer as I
 
 
 def resnet_unit(
diff --git a/python/paddle/incubate/operators/softmax_mask_fuse.py b/python/paddle/incubate/operators/softmax_mask_fuse.py
index b8081af826da30..672f4ad545e43f 100644
--- a/python/paddle/incubate/operators/softmax_mask_fuse.py
+++ b/python/paddle/incubate/operators/softmax_mask_fuse.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.framework import _non_static_mode
 from paddle import _legacy_C_ops
+from paddle.fluid.framework import _non_static_mode
+from paddle.fluid.layer_helper import LayerHelper
 
 
 def softmax_mask_fuse(x, mask, name=None):
diff --git a/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py b/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py
index aab40986befd0c..936b1971513a04 100644
--- a/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py
+++ b/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.framework import _non_static_mode
 from paddle import _legacy_C_ops
+from paddle.fluid.framework import _non_static_mode
+from paddle.fluid.layer_helper import LayerHelper
 
 
 def softmax_mask_fuse_upper_triangle(x):
diff --git a/python/paddle/incubate/optimizer/distributed_fused_lamb.py b/python/paddle/incubate/optimizer/distributed_fused_lamb.py
index 921ae72262c8f8..229d4b792c1fa4 100644
--- a/python/paddle/incubate/optimizer/distributed_fused_lamb.py
+++ b/python/paddle/incubate/optimizer/distributed_fused_lamb.py
@@ -13,15 +13,14 @@
 # limitations under the License.
 
 import os
+
 import paddle
-from paddle.fluid import framework, core, layers, unique_name
-from paddle.fluid.framework import Variable
+from paddle.fluid import core, framework, layers, unique_name
 from paddle.fluid.clip import ClipGradByGlobalNorm
+from paddle.fluid.executor import global_scope
+from paddle.fluid.framework import Variable, name_scope
 from paddle.fluid.layer_helper import LayerHelper
 from paddle.fluid.optimizer import Optimizer
-from paddle.fluid.executor import global_scope
-from paddle.fluid.framework import name_scope
-from paddle.fluid import core, unique_name
 
 
 def init_communicator(block, rank, ranks, ring_id):
diff --git a/python/paddle/incubate/optimizer/functional/bfgs.py b/python/paddle/incubate/optimizer/functional/bfgs.py
index 6da30309c74a62..a62f70cccf38e3 100644
--- a/python/paddle/incubate/optimizer/functional/bfgs.py
+++ b/python/paddle/incubate/optimizer/functional/bfgs.py
@@ -14,15 +14,15 @@
 
 import numpy as np
 
+import paddle
+
 from .line_search import strong_wolfe
 from .utils import (
     _value_and_gradient,
-    check_input_type,
     check_initial_inverse_hessian_estimate,
+    check_input_type,
 )
 
-import paddle
-
 
 def minimize_bfgs(
     objective_func,
diff --git a/python/paddle/incubate/optimizer/functional/lbfgs.py b/python/paddle/incubate/optimizer/functional/lbfgs.py
index a7bfc9a6c0da9b..9001c2812b7685 100644
--- a/python/paddle/incubate/optimizer/functional/lbfgs.py
+++ b/python/paddle/incubate/optimizer/functional/lbfgs.py
@@ -14,15 +14,15 @@
 
 import numpy as np
 
+import paddle
+
 from .line_search import strong_wolfe
 from .utils import (
     _value_and_gradient,
-    check_input_type,
     check_initial_inverse_hessian_estimate,
+    check_input_type,
 )
 
-import paddle
-
 
 def minimize_lbfgs(
     objective_func,
diff --git a/python/paddle/incubate/optimizer/functional/line_search.py b/python/paddle/incubate/optimizer/functional/line_search.py
index 94400200f2535d..aa40afc7010d28 100644
--- a/python/paddle/incubate/optimizer/functional/line_search.py
+++ b/python/paddle/incubate/optimizer/functional/line_search.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .utils import _value_and_gradient
 import paddle
 
+from .utils import _value_and_gradient
+
 
 def cubic_interpolation_(x1, f1, g1, x2, f2, g2):
     r"""Cubic interpolation between (x1, f1, g1) and (x2, f2, g2).
diff --git a/python/paddle/incubate/optimizer/functional/utils.py b/python/paddle/incubate/optimizer/functional/utils.py
index f9d0dddb117df0..79b6085a74783f 100644
--- a/python/paddle/incubate/optimizer/functional/utils.py
+++ b/python/paddle/incubate/optimizer/functional/utils.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 
 import paddle
-from paddle.fluid.framework import Variable
 from paddle.fluid.data_feeder import check_type
+from paddle.fluid.framework import Variable
 
 
 def check_input_type(input, name, op_name):
diff --git a/python/paddle/incubate/optimizer/lookahead.py b/python/paddle/incubate/optimizer/lookahead.py
index ff75652bf46e8a..d3b39bb77074eb 100644
--- a/python/paddle/incubate/optimizer/lookahead.py
+++ b/python/paddle/incubate/optimizer/lookahead.py
@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.optimizer import Optimizer
+import paddle
 from paddle.fluid import framework, layers, unique_name
+from paddle.fluid.dygraph import base as imperative_base
 from paddle.fluid.framework import Variable
 from paddle.fluid.layer_helper import LayerHelper
-import paddle
-from paddle.fluid.dygraph import base as imperative_base
+from paddle.optimizer import Optimizer
 
 __all__ = []
 
diff --git a/python/paddle/incubate/optimizer/modelaverage.py b/python/paddle/incubate/optimizer/modelaverage.py
index 1f9c60cf508b38..21b176573407aa 100644
--- a/python/paddle/incubate/optimizer/modelaverage.py
+++ b/python/paddle/incubate/optimizer/modelaverage.py
@@ -12,15 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.optimizer import Optimizer
-from paddle.fluid import framework, layers
-from paddle.fluid.framework import Program
-from paddle.fluid.layer_helper import LayerHelper
 import paddle
+from paddle import _C_ops, _legacy_C_ops
+from paddle.fluid import framework, layers
 from paddle.fluid.dygraph import base as imperative_base
+from paddle.fluid.framework import Program, in_dygraph_mode
+from paddle.fluid.layer_helper import LayerHelper
 from paddle.fluid.wrapped_decorator import signature_safe_contextmanager
-from paddle import _C_ops, _legacy_C_ops
-from paddle.fluid.framework import in_dygraph_mode
+from paddle.optimizer import Optimizer
 
 __all__ = []
 
diff --git a/python/paddle/incubate/tensor/manipulation.py b/python/paddle/incubate/tensor/manipulation.py
index b5f1681b9e2eba..4d65934e2e671b 100644
--- a/python/paddle/incubate/tensor/manipulation.py
+++ b/python/paddle/incubate/tensor/manipulation.py
@@ -12,11 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from paddle import _C_ops, _legacy_C_ops
+from paddle.fluid.data_feeder import check_variable_and_dtype
 from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
 from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.data_feeder import check_variable_and_dtype
-from paddle import _C_ops, _legacy_C_ops
-
 
 __all__ = []
 
diff --git a/python/paddle/incubate/tensor/math.py b/python/paddle/incubate/tensor/math.py
index 36f4944427f372..21e49135441cac 100644
--- a/python/paddle/incubate/tensor/math.py
+++ b/python/paddle/incubate/tensor/math.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.layer_helper import LayerHelper, _non_static_mode
-from paddle.fluid.data_feeder import check_variable_and_dtype
+import paddle.utils.deprecated as deprecated
 from paddle import _C_ops, _legacy_C_ops
+from paddle.fluid.data_feeder import check_variable_and_dtype
 from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
-import paddle.utils.deprecated as deprecated
+from paddle.fluid.layer_helper import LayerHelper, _non_static_mode
 
 __all__ = []
 
diff --git a/python/paddle/incubate/xpu/resnet_block.py b/python/paddle/incubate/xpu/resnet_block.py
index a02dcffeff8979..6e39893367c8f5 100644
--- a/python/paddle/incubate/xpu/resnet_block.py
+++ b/python/paddle/incubate/xpu/resnet_block.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import numpy as np
+
 import paddle.fluid as fluid
-from paddle.nn import initializer as I
-from paddle.nn import Layer
-from paddle.fluid.layers import utils
+from paddle import _legacy_C_ops
 from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.layers import utils
 from paddle.fluid.param_attr import ParamAttr
-from paddle import _legacy_C_ops
+from paddle.nn import Layer
+from paddle.nn import initializer as I
 
 __all__ = ['resnet_basic_block', 'ResNetBasicBlock']
 
diff --git a/python/paddle/linalg.py b/python/paddle/linalg.py
index 9ea43479569b40..bd899037fd63d8 100644
--- a/python/paddle/linalg.py
+++ b/python/paddle/linalg.py
@@ -12,28 +12,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from .tensor import inverse as inv  # noqa: F401
 from .tensor.linalg import cholesky  # noqa: F401
-from .tensor.linalg import norm  # noqa: F401
-from .tensor.linalg import eig  # noqa: F401
-from .tensor.linalg import cov  # noqa: F401
-from .tensor.linalg import corrcoef  # noqa: F401
-from .tensor.linalg import cond  # noqa: F401
-from .tensor.linalg import matrix_power  # noqa: F401
-from .tensor.linalg import solve  # noqa: F401
 from .tensor.linalg import cholesky_solve  # noqa: F401
-from .tensor import inverse as inv  # noqa: F401
+from .tensor.linalg import cond  # noqa: F401
+from .tensor.linalg import corrcoef  # noqa: F401
+from .tensor.linalg import cov  # noqa: F401
+from .tensor.linalg import det  # noqa: F401
+from .tensor.linalg import eig  # noqa: F401
+from .tensor.linalg import eigh  # noqa: F401
 from .tensor.linalg import eigvals  # noqa: F401
-from .tensor.linalg import multi_dot  # noqa: F401
-from .tensor.linalg import matrix_rank  # noqa: F401
-from .tensor.linalg import svd  # noqa: F401
 from .tensor.linalg import eigvalsh  # noqa: F401
-from .tensor.linalg import qr  # noqa: F401
 from .tensor.linalg import lu  # noqa: F401
 from .tensor.linalg import lu_unpack  # noqa: F401
-from .tensor.linalg import eigh  # noqa: F401
-from .tensor.linalg import det  # noqa: F401
-from .tensor.linalg import slogdet  # noqa: F401
+from .tensor.linalg import matrix_power  # noqa: F401
+from .tensor.linalg import matrix_rank  # noqa: F401
+from .tensor.linalg import multi_dot  # noqa: F401
+from .tensor.linalg import norm  # noqa: F401
 from .tensor.linalg import pinv  # noqa: F401
+from .tensor.linalg import qr  # noqa: F401
+from .tensor.linalg import slogdet  # noqa: F401
+from .tensor.linalg import solve  # noqa: F401
+from .tensor.linalg import svd  # noqa: F401
 from .tensor.linalg import triangular_solve  # noqa: F401
 from .tensor.linalg import lstsq
 
diff --git a/python/paddle/metric/metrics.py b/python/paddle/metric/metrics.py
index cdac604835862a..0fa5c84f07f7b9 100644
--- a/python/paddle/metric/metrics.py
+++ b/python/paddle/metric/metrics.py
@@ -13,14 +13,16 @@
 # limitations under the License.
 
 import abc
+
 import numpy as np
 
-from ..fluid.data_feeder import check_variable_and_dtype
-from ..fluid.layer_helper import LayerHelper
-from ..fluid.framework import _non_static_mode, _varbase_creator
 import paddle
 from paddle import _legacy_C_ops
 
+from ..fluid.data_feeder import check_variable_and_dtype
+from ..fluid.framework import _non_static_mode, _varbase_creator
+from ..fluid.layer_helper import LayerHelper
+
 __all__ = []
 
 
diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py
index af5fa1336f1f0b..69e54a067df25f 100644
--- a/python/paddle/nn/functional/activation.py
+++ b/python/paddle/nn/functional/activation.py
@@ -12,21 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...tensor.ops import sigmoid  # noqa: F401
-from ...tensor.math import tanh  # noqa: F401
-from ...tensor.math import tanh_  # noqa: F401
-
-from ...fluid.dygraph.inplace_utils import inplace_apis_in_dygraph_only
-from ...tensor.manipulation import chunk
-
-from ...fluid.layer_helper import LayerHelper
-from ...fluid.framework import convert_np_dtype_to_dtype_
-from ...fluid.framework import _in_legacy_dygraph, in_dygraph_mode
-from ...fluid.data_feeder import check_variable_and_dtype, check_dtype
 import paddle
 from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode
-from paddle.framework import core
 from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
+from paddle.framework import core
+
+from ...fluid.data_feeder import check_dtype, check_variable_and_dtype
+from ...fluid.dygraph.inplace_utils import inplace_apis_in_dygraph_only
+from ...fluid.framework import (
+    _in_legacy_dygraph,
+    convert_np_dtype_to_dtype_,
+    in_dygraph_mode,
+)
+from ...fluid.layer_helper import LayerHelper
+from ...tensor.manipulation import chunk
+from ...tensor.math import tanh  # noqa: F401
+from ...tensor.math import tanh_  # noqa: F401
+from ...tensor.ops import sigmoid  # noqa: F401
 
 __all__ = []
 
diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py
index b8cef847477910..2e27a4fc8c6df4 100644
--- a/python/paddle/nn/functional/common.py
+++ b/python/paddle/nn/functional/common.py
@@ -13,35 +13,29 @@
 # limitations under the License.
 
 import paddle
+from paddle import _C_ops, _legacy_C_ops
+from paddle.fluid.framework import _in_legacy_dygraph
 from paddle.fluid.layer_helper import LayerHelper
 from paddle.fluid.layers.tensor import fill_constant
-from ...tensor import concat
-from ...tensor.creation import zeros
-from paddle.static import Variable
+from paddle.framework import core, in_dynamic_mode
+from paddle.static import Variable, default_main_program
+from paddle.tensor.creation import full
 
-# TODO: define the common functions to build a neural network
-from ...tensor.manipulation import squeeze
-from ...tensor.manipulation import unsqueeze
-from ...tensor import clip
-from ...tensor import sum
-from ...tensor import sqrt
 from ...fluid.data_feeder import (
-    check_variable_and_dtype,
     check_dtype,
     check_type,
+    check_variable_and_dtype,
 )
 from ...fluid.framework import (
     _in_legacy_dygraph,
     _non_static_mode,
     in_dygraph_mode,
 )
+from ...tensor import clip, concat, sqrt, sum
+from ...tensor.creation import zeros
 
-from paddle import _C_ops, _legacy_C_ops
-from paddle.framework import in_dynamic_mode
-from paddle.tensor.creation import full
-from paddle.framework import core
-from paddle.fluid.framework import _in_legacy_dygraph
-from paddle.static import default_main_program
+# TODO: define the common functions to build a neural network
+from ...tensor.manipulation import squeeze, unsqueeze
 
 __all__ = []
 
diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py
index 58f0254f09ac2e..38c5064a1cfc39 100644
--- a/python/paddle/nn/functional/conv.py
+++ b/python/paddle/nn/functional/conv.py
@@ -12,31 +12,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from paddle import _C_ops, _legacy_C_ops, get_flags, in_dynamic_mode
+from paddle.device import (
+    get_all_custom_device_type,
+    is_compiled_with_cuda,
+    is_compiled_with_npu,
+    is_compiled_with_rocm,
+)
+from paddle.fluid.framework import (
+    _global_flags,
+    _in_legacy_dygraph,
+    in_dygraph_mode,
+)
+
 from ...device import get_cudnn_version
-from ...static import Variable
+from ...fluid.data_feeder import check_dtype, check_variable_and_dtype
+from ...fluid.layer_helper import LayerHelper
+from ...fluid.layers import nn
 from ...fluid.layers.utils import (
-    convert_to_list,
-    _is_symmetric_padding,
     _contain_var,
     _convert_to_tensor_list,
+    _is_symmetric_padding,
+    convert_to_list,
 )
-from ...fluid.data_feeder import check_variable_and_dtype, check_dtype
-from ...fluid.layer_helper import LayerHelper
-from ...tensor.manipulation import unsqueeze, squeeze
-from ...fluid.layers import nn
 from ...framework import no_grad
-from paddle import _C_ops, _legacy_C_ops
-from paddle import get_flags
-from paddle import in_dynamic_mode
-from paddle.device import is_compiled_with_cuda
-from paddle.device import is_compiled_with_npu
-from paddle.device import get_all_custom_device_type
-from paddle import in_dynamic_mode
-from paddle import get_flags
-from paddle.device import is_compiled_with_rocm
-from paddle.fluid.framework import _global_flags
-from paddle.fluid.framework import _in_legacy_dygraph
-from paddle.fluid.framework import in_dygraph_mode
+from ...static import Variable
+from ...tensor.manipulation import squeeze, unsqueeze
 
 __all__ = []
 
diff --git a/python/paddle/nn/functional/distance.py b/python/paddle/nn/functional/distance.py
index 4383b0cc7a2f2c..a931d3cb006ada 100644
--- a/python/paddle/nn/functional/distance.py
+++ b/python/paddle/nn/functional/distance.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import paddle
-from ...fluid.data_feeder import check_variable_and_dtype, check_type
-from ...fluid.layer_helper import LayerHelper
 from paddle import _C_ops, _legacy_C_ops
-from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph
+from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
+
+from ...fluid.data_feeder import check_type, check_variable_and_dtype
+from ...fluid.layer_helper import LayerHelper
 
 __all__ = []
 
diff --git a/python/paddle/nn/functional/extension.py b/python/paddle/nn/functional/extension.py
index 1cc9ad6caf4364..e2327871bcfc35 100644
--- a/python/paddle/nn/functional/extension.py
+++ b/python/paddle/nn/functional/extension.py
@@ -15,20 +15,24 @@
 # TODO: define the extention functions
 
 import numpy as np
-from ...fluid.data_feeder import check_dtype
-from ...fluid.layer_helper import LayerHelper
-from ...static import Variable
-from ...tensor.creation import assign
-from ...tensor.layer_function_generator import templatedoc
-from paddle import in_dynamic_mode
-from paddle import _C_ops, _legacy_C_ops
+
+from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode
+
+from ...fluid.data_feeder import (
+    check_dtype,
+    check_type,
+    check_variable_and_dtype,
+)
 from ...fluid.framework import (
-    _non_static_mode,
     _in_legacy_dygraph,
+    _non_static_mode,
     in_dygraph_mode,
 )
-from ...fluid.data_feeder import check_variable_and_dtype, check_type
-from ...framework import core, convert_np_dtype_to_dtype_
+from ...fluid.layer_helper import LayerHelper
+from ...framework import convert_np_dtype_to_dtype_, core
+from ...static import Variable
+from ...tensor.creation import assign
+from ...tensor.layer_function_generator import templatedoc
 
 __all__ = []
 
diff --git a/python/paddle/nn/functional/input.py b/python/paddle/nn/functional/input.py
index 674483208204cf..78c8b036d8a7fa 100644
--- a/python/paddle/nn/functional/input.py
+++ b/python/paddle/nn/functional/input.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...static import Variable
-from ...fluid.layer_helper import LayerHelper
-from ...fluid.data_feeder import check_variable_and_dtype
 from paddle import _C_ops, _legacy_C_ops
+
+from ...fluid.data_feeder import check_variable_and_dtype
 from ...fluid.framework import _in_legacy_dygraph, in_dygraph_mode
+from ...fluid.layer_helper import LayerHelper
+from ...static import Variable
 
 __all__ = []
 
diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py
index 85167651091969..9a99a6ac9804df 100755
--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -13,27 +13,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-from ...fluid.data_feeder import check_variable_and_dtype
-
 # TODO: define loss functions of neural network
 import paddle
 import paddle.fluid as fluid
-from ...fluid.layers.nn import _elementwise_op_in_dygraph
-from ...tensor.manipulation import reshape
-from ...fluid.layer_helper import LayerHelper
-from ...fluid.framework import _varbase_creator
-from ...static import Variable
+from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode
+from paddle.framework import _non_static_mode, core
 from paddle.utils import deprecated
-from paddle import _C_ops, _legacy_C_ops
-from paddle import in_dynamic_mode
-from paddle.framework import core, _non_static_mode
+
+from ...fluid.data_feeder import check_variable_and_dtype
 from ...fluid.framework import (
+    _current_expected_place,
     _in_legacy_dygraph,
-    in_dygraph_mode,
     _non_static_mode,
-    _current_expected_place,
+    _varbase_creator,
+    in_dygraph_mode,
 )
+from ...fluid.layer_helper import LayerHelper
+from ...fluid.layers.nn import _elementwise_op_in_dygraph
+from ...static import Variable
+from ...tensor.manipulation import reshape
 
 __all__ = []
 
diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py
index c356b8c9940750..f2546b62442d05 100644
--- a/python/paddle/nn/functional/norm.py
+++ b/python/paddle/nn/functional/norm.py
@@ -12,17 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import numbers
+
 # TODO: define normalization api
 import paddle
 import paddle.fluid as fluid
-from ...fluid.data_feeder import check_variable_and_dtype, check_type
-from ...fluid.layer_helper import LayerHelper
-from ...fluid import dygraph_utils
-import numbers
-from paddle import _C_ops, _legacy_C_ops
-from paddle import in_dynamic_mode
+from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode
 from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
 
+from ...fluid import dygraph_utils
+from ...fluid.data_feeder import check_type, check_variable_and_dtype
+from ...fluid.layer_helper import LayerHelper
+
 __all__ = []
 
 
diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py
index f30be705207cd1..3b8660a677cba9 100755
--- a/python/paddle/nn/functional/pooling.py
+++ b/python/paddle/nn/functional/pooling.py
@@ -12,14 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# TODO: define pooling functions
-from ...fluid.layers import utils, LayerHelper
-from ...tensor.manipulation import unsqueeze, squeeze
+from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode
+from paddle.fluid.framework import (
+    Variable,
+    _in_legacy_dygraph,
+    _non_static_mode,
+    in_dygraph_mode,
+)
+
 from ...fluid.data_feeder import check_type, check_variable_and_dtype
-from paddle import _C_ops, _legacy_C_ops
-from paddle import in_dynamic_mode
-from paddle.fluid.framework import _in_legacy_dygraph, Variable
-from paddle.fluid.framework import in_dygraph_mode, _non_static_mode
+
+# TODO: define pooling functions
+from ...fluid.layers import LayerHelper, utils
+from ...tensor.manipulation import squeeze, unsqueeze
 
 __all__ = []
 
diff --git a/python/paddle/nn/functional/sparse_attention.py b/python/paddle/nn/functional/sparse_attention.py
index 1437ae8269c7e4..df95efb1705439 100644
--- a/python/paddle/nn/functional/sparse_attention.py
+++ b/python/paddle/nn/functional/sparse_attention.py
@@ -12,9 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from paddle import _legacy_C_ops, in_dynamic_mode
 from paddle.fluid.layer_helper import LayerHelper
-from paddle import _legacy_C_ops
-from paddle import in_dynamic_mode
 
 
 def sparse_attention(
diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py
index 6d061ff6294b80..54ed9903744e1b 100644
--- a/python/paddle/nn/functional/vision.py
+++ b/python/paddle/nn/functional/vision.py
@@ -12,16 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...device import get_cudnn_version
-from ...static import Variable
-from ...fluid.layer_helper import LayerHelper
-from ...fluid.data_feeder import check_variable_and_dtype
-from paddle import _C_ops, _legacy_C_ops
-from ...device import is_compiled_with_rocm
-from paddle import in_dynamic_mode
-from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph
+from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode
+from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
 from paddle.framework import _non_static_mode
 
+from ...device import get_cudnn_version, is_compiled_with_rocm
+from ...fluid.data_feeder import check_variable_and_dtype
+from ...fluid.layer_helper import LayerHelper
+from ...static import Variable
+
 __all__ = []
 
 
diff --git a/python/paddle/nn/initializer/assign.py b/python/paddle/nn/initializer/assign.py
index 63138b7b482106..052da37af244e0 100644
--- a/python/paddle/nn/initializer/assign.py
+++ b/python/paddle/nn/initializer/assign.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
+
 from ...fluid.data_feeder import check_type
 from ...fluid.initializer import NumpyArrayInitializer
 
diff --git a/python/paddle/nn/initializer/dirac.py b/python/paddle/nn/initializer/dirac.py
index 489033291a7fc3..0917859415d365 100644
--- a/python/paddle/nn/initializer/dirac.py
+++ b/python/paddle/nn/initializer/dirac.py
@@ -12,15 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...fluid.initializer import Initializer
-from ...fluid.data_feeder import check_variable_and_dtype
-from ...fluid.core import VarDesc
-from ...fluid import framework
-from ...fluid.framework import _current_expected_place
-from paddle import in_dynamic_mode
+from paddle import _C_ops, in_dynamic_mode
 from paddle.utils import unique_name
-from paddle import _C_ops
+
 from ... import fluid
+from ...fluid import framework
+from ...fluid.core import VarDesc
+from ...fluid.data_feeder import check_variable_and_dtype
+from ...fluid.framework import _current_expected_place
+from ...fluid.initializer import Initializer
 
 __all__ = []
 
diff --git a/python/paddle/nn/initializer/normal.py b/python/paddle/nn/initializer/normal.py
index 7d52339d3865da..5ead30f4f1e3e0 100644
--- a/python/paddle/nn/initializer/normal.py
+++ b/python/paddle/nn/initializer/normal.py
@@ -12,8 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...fluid.initializer import NormalInitializer
-from ...fluid.initializer import TruncatedNormalInitializer
+from ...fluid.initializer import NormalInitializer, TruncatedNormalInitializer
 
 __all__ = []
 
diff --git a/python/paddle/nn/initializer/orthogonal.py b/python/paddle/nn/initializer/orthogonal.py
index f0e46b48d211f1..764cd6ece4677d 100644
--- a/python/paddle/nn/initializer/orthogonal.py
+++ b/python/paddle/nn/initializer/orthogonal.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...fluid.initializer import Initializer
-from ...fluid.data_feeder import check_variable_and_dtype
-from ...fluid import framework
+from paddle import _C_ops
 from paddle.utils import unique_name
+
+from ...fluid import framework
+from ...fluid.data_feeder import check_variable_and_dtype
 from ...fluid.dygraph import no_grad
-from paddle import _C_ops
+from ...fluid.initializer import Initializer
 
 __all__ = []
 
diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py
index 7b60c52ea5497b..1433b587001092 100644
--- a/python/paddle/nn/layer/activation.py
+++ b/python/paddle/nn/layer/activation.py
@@ -14,11 +14,12 @@
 
 # TODO: define activation functions of neural network
 
-from ..initializer import Constant
 from paddle.framework import get_default_dtype
-from .. import functional as F
 from paddle.nn import Layer
 
+from .. import functional as F
+from ..initializer import Constant
+
 __all__ = []
 
 
diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py
index 413e96d1ca924c..ea4dfb41525c6c 100644
--- a/python/paddle/nn/layer/common.py
+++ b/python/paddle/nn/layer/common.py
@@ -14,10 +14,11 @@
 
 # TODO: define the common classes to build a neural network
 import paddle
+from paddle import in_dynamic_mode
+from paddle.nn import Layer
+
 from ...fluid.dygraph import Flatten  # noqa: F401
 from .. import functional as F
-from paddle.nn import Layer
-from paddle import in_dynamic_mode
 
 __all__ = []
 
diff --git a/python/paddle/nn/layer/container.py b/python/paddle/nn/layer/container.py
index 6a54a20787c18e..542c1266e9b0b8 100644
--- a/python/paddle/nn/layer/container.py
+++ b/python/paddle/nn/layer/container.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 from collections import OrderedDict
-from .. import Layer
 from collections.abc import Iterable, Mapping
-from ...fluid.framework import Parameter
+
 from ...fluid.dygraph.base import param_guard
+from ...fluid.framework import Parameter
+from .. import Layer
 
 __all__ = []
 
diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py
index 6769e2bc5fae87..81eef1091c1d60 100644
--- a/python/paddle/nn/layer/conv.py
+++ b/python/paddle/nn/layer/conv.py
@@ -17,14 +17,17 @@
 import numpy as np
 
 from paddle import get_flags
-from ...device import get_cudnn_version
+
+from ...device import (
+    get_cudnn_version,
+    is_compiled_with_cuda,
+    is_compiled_with_rocm,
+)
+from ...fluid.layers import utils
 from .. import Layer
-from ..initializer import Normal
 from .. import functional as F
-from ...fluid.layers import utils
 from ..functional.conv import _update_padding_nd
-from ...device import is_compiled_with_cuda
-from ...device import is_compiled_with_rocm
+from ..initializer import Normal
 
 __all__ = []
 
diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py
index 0a448ad7fe8e80..95db0d9acd7fcf 100644
--- a/python/paddle/nn/layer/loss.py
+++ b/python/paddle/nn/layer/loss.py
@@ -13,13 +13,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import paddle
+
 # TODO: define loss functions of neural network
 import paddle.fluid as fluid
-import paddle
-from .. import functional as F
+from paddle import in_dynamic_mode
 from paddle.fluid.framework import in_dygraph_mode
+
 from .. import Layer
-from paddle import in_dynamic_mode
+from .. import functional as F
 
 __all__ = []
 
diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py
index 2395f2ed546699..64f9f8913313de 100644
--- a/python/paddle/nn/layer/norm.py
+++ b/python/paddle/nn/layer/norm.py
@@ -27,27 +27,23 @@
 
 # TODO: define normalization api
 
-from ...fluid.dygraph import BatchNorm  # noqa: F401
-from ...fluid.dygraph import SpectralNorm  # noqa: F401
-
-from ...framework import get_default_dtype
+import numbers
+import warnings
 
-from ..initializer import Constant
-from ...framework import ParamAttr
-from ...fluid.data_feeder import check_variable_and_dtype
+import numpy as np
 
-from ..functional import batch_norm, layer_norm, instance_norm
+from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode
+from paddle.device import get_all_custom_device_type
+from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
 
-import numpy as np
-import numbers
-import warnings
-from ...framework import no_grad
-from .. import functional as F
-from paddle import _C_ops, _legacy_C_ops
+from ...fluid.data_feeder import check_variable_and_dtype
+from ...fluid.dygraph import BatchNorm  # noqa: F401
+from ...fluid.dygraph import SpectralNorm  # noqa: F401
+from ...framework import ParamAttr, get_default_dtype, no_grad
 from .. import Layer
-from paddle import in_dynamic_mode
-from paddle.device import get_all_custom_device_type
-from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph
+from .. import functional as F
+from ..functional import batch_norm, instance_norm, layer_norm
+from ..initializer import Constant
 
 __all__ = []
 
diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py
index 3c37fae3fe1749..09714e18b494eb 100755
--- a/python/paddle/nn/layer/pooling.py
+++ b/python/paddle/nn/layer/pooling.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .. import functional as F
 from .. import Layer
+from .. import functional as F
 
 __all__ = []
 
diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py
index c93dcae4ac051c..8a3ea8a209b0ee 100644
--- a/python/paddle/nn/layer/rnn.py
+++ b/python/paddle/nn/layer/rnn.py
@@ -13,25 +13,23 @@
 # limitations under the License.
 
 import math
+from collections.abc import Sequence
 from functools import reduce
 
 import numpy as np
+
 import paddle
-from paddle import framework
-from paddle.nn import functional as F
-from paddle.nn import initializer as I
-from paddle.nn import Layer
-from .container import LayerList
+from paddle import _C_ops, _legacy_C_ops, framework, in_dynamic_mode
+from paddle.fluid.framework import in_dygraph_mode
 from paddle.fluid.layers import utils
 from paddle.fluid.layers.utils import flatten, map_structure
-from paddle import _C_ops, _legacy_C_ops
-from paddle import in_dynamic_mode
-from paddle.fluid.framework import in_dygraph_mode
 from paddle.framework import core
-from paddle.static import default_startup_program
-from paddle.static import program_guard
+from paddle.nn import Layer
+from paddle.nn import functional as F
+from paddle.nn import initializer as I
+from paddle.static import default_startup_program, program_guard
 
-from collections.abc import Sequence
+from .container import LayerList
 
 __all__ = []
 
diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py
index 52cffc4998a1d2..5b788604e41c34 100644
--- a/python/paddle/nn/layer/transformer.py
+++ b/python/paddle/nn/layer/transformer.py
@@ -14,20 +14,22 @@
 
 # TODO: define the classes of Transformer neural network
 
-import copy
 import collections
+import copy
+
 import numpy as np
 
 import paddle
-from .common import Linear, Dropout
-from .norm import LayerNorm
-from .. import functional as F
+from paddle.fluid.data_feeder import convert_dtype
+
 from ... import tensor
 from ...fluid import layers
+from ...framework import ParamAttr
 from .. import Layer
+from .. import functional as F
+from .common import Dropout, Linear
 from .container import LayerList
-from ...framework import ParamAttr
-from paddle.fluid.data_feeder import convert_dtype
+from .norm import LayerNorm
 
 __all__ = []
 
diff --git a/python/paddle/nn/layer/vision.py b/python/paddle/nn/layer/vision.py
index 5bd45e6607ded4..bcc6ea77bb55ca 100644
--- a/python/paddle/nn/layer/vision.py
+++ b/python/paddle/nn/layer/vision.py
@@ -14,8 +14,7 @@
 
 # TODO: define specitial functions used in computer vision task
 
-from .. import Layer
-from .. import functional
+from .. import Layer, functional
 
 __all__ = []
 
diff --git a/python/paddle/nn/quant/functional_layers.py b/python/paddle/nn/quant/functional_layers.py
index aa1dcf179b95ad..2986e3e0500f96 100644
--- a/python/paddle/nn/quant/functional_layers.py
+++ b/python/paddle/nn/quant/functional_layers.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...tensor import math, manipulation
+from ...tensor import manipulation, math
 from .. import Layer
 
 __all__ = []
diff --git a/python/paddle/nn/quant/lsq.py b/python/paddle/nn/quant/lsq.py
index 92b27df7f23a39..ae73ec6f7024ea 100644
--- a/python/paddle/nn/quant/lsq.py
+++ b/python/paddle/nn/quant/lsq.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
+
 import paddle
-from paddle.utils import unique_name
+from paddle.autograd import PyLayer
 from paddle.framework import ParamAttr
-from paddle.nn.initializer import Constant
 from paddle.nn import Layer
-from paddle.autograd import PyLayer
-import math
+from paddle.nn.initializer import Constant
+from paddle.utils import unique_name
 
 
 def round(x):
diff --git a/python/paddle/nn/quant/quant_layers.py b/python/paddle/nn/quant/quant_layers.py
index 4c12e9658d3111..8d81a61dd7921c 100644
--- a/python/paddle/nn/quant/quant_layers.py
+++ b/python/paddle/nn/quant/quant_layers.py
@@ -12,20 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
+
 import paddle
-from paddle.framework import core
-from paddle.utils import unique_name
-from paddle.framework import ParamAttr
-from paddle.fluid.framework import _varbase_creator
-from paddle.nn.initializer import Constant
+from paddle import _legacy_C_ops, in_dynamic_mode
 from paddle.fluid.data_feeder import check_variable_and_dtype
-from paddle.nn import functional as F
-import logging
+from paddle.fluid.framework import _varbase_creator
 from paddle.fluid.log_helper import get_logger
-from paddle import _legacy_C_ops
-from paddle import in_dynamic_mode
+from paddle.framework import ParamAttr, core
 from paddle.nn import Layer
+from paddle.nn import functional as F
+from paddle.nn.initializer import Constant
 from paddle.nn.quant.lsq import FakeQuantActLSQPlus, FakeQuantWeightLSQPlus
+from paddle.utils import unique_name
 
 __all__ = [
     'FakeQuantAbsMax',
diff --git a/python/paddle/nn/utils/spectral_norm_hook.py b/python/paddle/nn/utils/spectral_norm_hook.py
index f035d0b4431160..2d65f1375ecadd 100644
--- a/python/paddle/nn/utils/spectral_norm_hook.py
+++ b/python/paddle/nn/utils/spectral_norm_hook.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import paddle
-from ..layer.conv import Conv1DTranspose, Conv2DTranspose, Conv3DTranspose
-from ..layer.common import Linear
+
 from .. import functional as F
+from ..layer.common import Linear
+from ..layer.conv import Conv1DTranspose, Conv2DTranspose, Conv3DTranspose
 
 __all__ = []
 
diff --git a/python/paddle/nn/utils/transform_parameters.py b/python/paddle/nn/utils/transform_parameters.py
index 4076b808dda744..3696ad96090598 100644
--- a/python/paddle/nn/utils/transform_parameters.py
+++ b/python/paddle/nn/utils/transform_parameters.py
@@ -15,13 +15,13 @@
 from functools import reduce
 
 import paddle
+from paddle import _C_ops
 from paddle.fluid.framework import (
-    dygraph_only,
     _dygraph_tracer,
     _varbase_creator,
+    dygraph_only,
     in_dygraph_mode,
 )
-from paddle import _C_ops
 
 
 # input==output, inplace strategy of reshape has no cost almostly
diff --git a/python/paddle/nn/utils/weight_norm_hook.py b/python/paddle/nn/utils/weight_norm_hook.py
index 2a4d07647929b4..7cab14ea4eec01 100755
--- a/python/paddle/nn/utils/weight_norm_hook.py
+++ b/python/paddle/nn/utils/weight_norm_hook.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle
+from paddle import _C_ops
+
 from ...fluid import layers as F
-from ...fluid.layer_helper import LayerHelper
 from ...fluid.data_feeder import check_variable_and_dtype
+from ...fluid.layer_helper import LayerHelper
 from ...framework import in_dygraph_mode
-from paddle import _C_ops
 
 __all__ = []
 
diff --git a/python/paddle/onnx/export.py b/python/paddle/onnx/export.py
index 2d7a3223a9a665..2167bfb664c1fc 100644
--- a/python/paddle/onnx/export.py
+++ b/python/paddle/onnx/export.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import os
+
 from paddle.utils import try_import
 
 __all__ = []
diff --git a/python/paddle/optimizer/adadelta.py b/python/paddle/optimizer/adadelta.py
index 9d21249247eece..76d73d3f5fdc6f 100644
--- a/python/paddle/optimizer/adadelta.py
+++ b/python/paddle/optimizer/adadelta.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .optimizer import Optimizer
-from ..fluid import framework
-from ..framework import in_dygraph_mode
 from paddle import _C_ops
+
+from ..fluid import framework
 from ..fluid.dygraph import no_grad
+from ..framework import in_dygraph_mode
+from .optimizer import Optimizer
 
 __all__ = []
 
diff --git a/python/paddle/optimizer/adagrad.py b/python/paddle/optimizer/adagrad.py
index 6df29c29d5156d..a562bf77d8f6ae 100644
--- a/python/paddle/optimizer/adagrad.py
+++ b/python/paddle/optimizer/adagrad.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .optimizer import Optimizer
 from ..fluid import framework
+from .optimizer import Optimizer
 
 __all__ = []
 
diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py
index aa76fb82759f18..b8ae8b01ed2e93 100644
--- a/python/paddle/optimizer/adam.py
+++ b/python/paddle/optimizer/adam.py
@@ -12,20 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .optimizer import Optimizer
-from ..fluid import core
-from ..fluid import framework
-from ..fluid.framework import Variable, in_dygraph_mode
-from ..fluid import layers
-from ..fluid import unique_name
-from ..fluid.layer_helper import LayerHelper
 import warnings
-from ..fluid.dygraph import base as imperative_base
 from collections import defaultdict
 
 import paddle
 from paddle import _C_ops, _legacy_C_ops
 
+from ..fluid import core, framework, layers, unique_name
+from ..fluid.dygraph import base as imperative_base
+from ..fluid.framework import Variable, in_dygraph_mode
+from ..fluid.layer_helper import LayerHelper
+from .optimizer import Optimizer
+
 __all__ = []
 
 GRAD_TYPES = [int(paddle.float32), int(paddle.float16), int(paddle.bfloat16)]
diff --git a/python/paddle/optimizer/adamax.py b/python/paddle/optimizer/adamax.py
index 3b8b57dbb1e5b3..69a2102ebb2e80 100644
--- a/python/paddle/optimizer/adamax.py
+++ b/python/paddle/optimizer/adamax.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .optimizer import Optimizer
-from ..fluid import framework
-from ..fluid.framework import name_scope
 from paddle import _C_ops, _legacy_C_ops
+
+from ..fluid import framework
 from ..fluid.dygraph import no_grad
+from ..fluid.framework import name_scope
+from .optimizer import Optimizer
 
 __all__ = []
 
diff --git a/python/paddle/optimizer/adamw.py b/python/paddle/optimizer/adamw.py
index 5424331a71fa93..a2d432157cc59d 100644
--- a/python/paddle/optimizer/adamw.py
+++ b/python/paddle/optimizer/adamw.py
@@ -14,20 +14,19 @@
 
 import warnings
 from collections import defaultdict
-from .optimizer import Optimizer
-from .lr import LRScheduler
-from ..fluid import core
-from ..fluid import framework
-from ..fluid.framework import Variable, Parameter
-from ..fluid import unique_name
-from ..fluid import layers
-from ..fluid.layer_helper import LayerHelper
-from ..fluid.clip import GradientClipBase
-from ..fluid.dygraph import base as imperative_base
 from collections.abc import Callable
-from .. import _C_ops, _legacy_C_ops
+
 import paddle
 
+from .. import _C_ops, _legacy_C_ops
+from ..fluid import core, framework, layers, unique_name
+from ..fluid.clip import GradientClipBase
+from ..fluid.dygraph import base as imperative_base
+from ..fluid.framework import Parameter, Variable
+from ..fluid.layer_helper import LayerHelper
+from .lr import LRScheduler
+from .optimizer import Optimizer
+
 __all__ = []
 
 
diff --git a/python/paddle/optimizer/lamb.py b/python/paddle/optimizer/lamb.py
index 2fb19fd7355b50..5df59deb152921 100644
--- a/python/paddle/optimizer/lamb.py
+++ b/python/paddle/optimizer/lamb.py
@@ -12,16 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .optimizer import Optimizer
-from ..fluid import core
-from ..fluid import framework
-from ..fluid.framework import Variable
-from ..fluid import layers
-from ..fluid import unique_name
-from ..fluid.layer_helper import LayerHelper
 from paddle import _C_ops, _legacy_C_ops
 from paddle.fluid.executor import global_scope
 
+from ..fluid import core, framework, layers, unique_name
+from ..fluid.framework import Variable
+from ..fluid.layer_helper import LayerHelper
+from .optimizer import Optimizer
+
 __all__ = []
 
 
diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py
index 387f9479a8ae01..8230336d3d3be7 100644
--- a/python/paddle/optimizer/lr.py
+++ b/python/paddle/optimizer/lr.py
@@ -13,10 +13,13 @@
 # limitations under the License.
 
 import math
-import numpy
 import warnings
-from paddle import Tensor
+
+import numpy
+
 import paddle.fluid.core as core
+from paddle import Tensor
+
 from ..fluid.framework import _in_legacy_dygraph
 
 __all__ = [  # noqa
diff --git a/python/paddle/optimizer/momentum.py b/python/paddle/optimizer/momentum.py
index 0f0f3968fd8491..3a7d0a2cedf240 100644
--- a/python/paddle/optimizer/momentum.py
+++ b/python/paddle/optimizer/momentum.py
@@ -14,16 +14,14 @@
 
 import warnings
 
-from .optimizer import Optimizer
-from ..fluid import core
-from ..fluid import framework
-from ..fluid.layer_helper import LayerHelper
-from ..fluid import unique_name
-from ..fluid import layers
-from paddle.fluid.regularizer import L2DecayRegularizer
-from paddle import _C_ops, _legacy_C_ops
 import paddle
-from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph
+from paddle import _C_ops, _legacy_C_ops
+from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
+from paddle.fluid.regularizer import L2DecayRegularizer
+
+from ..fluid import core, framework, layers, unique_name
+from ..fluid.layer_helper import LayerHelper
+from .optimizer import Optimizer
 
 __all__ = []
 
diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py
index 6a9d504cc29bc9..764c01736a3ab9 100644
--- a/python/paddle/optimizer/optimizer.py
+++ b/python/paddle/optimizer/optimizer.py
@@ -12,40 +12,37 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import logging
 from collections import defaultdict
 
+import numpy as np
+
 import paddle
+from paddle import _C_ops, _legacy_C_ops
+from paddle.fluid import core
 from paddle.fluid.framework import (
     Variable,
+    _current_expected_place,
+    _in_eager_without_dygraph_check,
+    _in_legacy_dygraph,
     default_main_program,
     device_guard,
+    in_dygraph_mode,
     name_scope,
 )
 
-from ..fluid import framework
-from ..fluid import layers
-from ..fluid import unique_name
+from ..fluid import framework, layers, unique_name
 from ..fluid.backward import _get_no_grad_set_name, append_backward
 from ..fluid.clip import (
     GradientClipBase,
     append_gradient_clip_ops,
     error_clip_callback,
 )
-from ..fluid.framework import program_guard, Parameter
+from ..fluid.dygraph import base as imperative_base
+from ..fluid.framework import Parameter, program_guard
 from ..fluid.initializer import Constant
 from ..fluid.layer_helper import LayerHelper
-from ..fluid.dygraph import base as imperative_base
-from paddle.fluid import core
 from .lr import LRScheduler
-from paddle import _C_ops, _legacy_C_ops
-from paddle.fluid.framework import (
-    _in_legacy_dygraph,
-    _in_eager_without_dygraph_check,
-    _current_expected_place,
-    in_dygraph_mode,
-)
 
 __all__ = []
 
@@ -59,7 +56,7 @@ def append_backward_new(
     checkpoints=None,
     distop_context=None,
 ):
-    from paddle.incubate.autograd.primx import orig2prim, Transform
+    from paddle.incubate.autograd.primx import Transform, orig2prim
 
     program = default_main_program()
     assert (
diff --git a/python/paddle/optimizer/rmsprop.py b/python/paddle/optimizer/rmsprop.py
index cbdac84f783aac..c6e78f538bcbe8 100644
--- a/python/paddle/optimizer/rmsprop.py
+++ b/python/paddle/optimizer/rmsprop.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .optimizer import Optimizer
 from ..fluid import framework
+from .optimizer import Optimizer
 
 __all__ = []
 
diff --git a/python/paddle/optimizer/sgd.py b/python/paddle/optimizer/sgd.py
index 2ce4a762fb014b..2ead0159c8291c 100644
--- a/python/paddle/optimizer/sgd.py
+++ b/python/paddle/optimizer/sgd.py
@@ -12,16 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .optimizer import Optimizer
-from ..fluid import core
-from ..fluid import framework
-from ..fluid.dygraph import no_grad
-from paddle import _C_ops, _legacy_C_ops
 import warnings
-from ..fluid.layer_helper import LayerHelper
-from ..fluid import unique_name
-from ..fluid import layers
+
+from paddle import _C_ops, _legacy_C_ops
+
+from ..fluid import core, framework, layers, unique_name
+from ..fluid.dygraph import no_grad
 from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode
+from ..fluid.layer_helper import LayerHelper
+from .optimizer import Optimizer
 
 __all__ = []
 
diff --git a/python/paddle/profiler/profiler.py b/python/paddle/profiler/profiler.py
index 7eebb2969a797c..caa0d754cb6498 100644
--- a/python/paddle/profiler/profiler.py
+++ b/python/paddle/profiler/profiler.py
@@ -12,30 +12,30 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import datetime
+import importlib
+import json
 import os
 import socket
-import datetime
 from enum import Enum
 from typing import Any, Callable, Iterable, Optional, Union
 from warnings import warn
-import importlib
-import json
 
 import paddle
 from paddle.fluid.core import (
-    _Profiler,
     ProfilerOptions,
     TracerEventType,
-    enable_memory_recorder,
-    enable_op_info_recorder,
+    _Profiler,
     disable_memory_recorder,
     disable_op_info_recorder,
+    enable_memory_recorder,
+    enable_op_info_recorder,
 )
-
-from .utils import RecordEvent, wrap_optimizers
-from .profiler_statistic import StatisticData, _build_table, SortedKeys
 from paddle.profiler import utils
+
+from .profiler_statistic import SortedKeys, StatisticData, _build_table
 from .timer import benchmark
+from .utils import RecordEvent, wrap_optimizers
 
 
 class SummaryView(Enum):
diff --git a/python/paddle/profiler/profiler_statistic.py b/python/paddle/profiler/profiler_statistic.py
index 53d203e03e66ed..cedfac8b614172 100755
--- a/python/paddle/profiler/profiler_statistic.py
+++ b/python/paddle/profiler/profiler_statistic.py
@@ -12,11 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import collections
-from enum import Enum
 import re
+from enum import Enum
 
 from paddle.fluid.core import TracerEventType, TracerMemEventType
-
 from paddle.utils.flops import flops
 
 from .statistic_helper import (
diff --git a/python/paddle/profiler/utils.py b/python/paddle/profiler/utils.py
index d26e6de82f2f85..e01add63e825ee 100644
--- a/python/paddle/profiler/utils.py
+++ b/python/paddle/profiler/utils.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any
-from warnings import warn
 import functools
 from contextlib import ContextDecorator
+from typing import Any
+from warnings import warn
 
 from paddle.fluid import core
-from paddle.fluid.core import _RecordEvent, TracerEventType
+from paddle.fluid.core import TracerEventType, _RecordEvent
 
 _is_profiler_used = False
 _has_optimizer_wrapped = False
diff --git a/python/paddle/reader/decorator.py b/python/paddle/reader/decorator.py
index 1969c7ba11e9e8..61e474c857f23a 100644
--- a/python/paddle/reader/decorator.py
+++ b/python/paddle/reader/decorator.py
@@ -12,17 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from threading import Thread
+import itertools
+import logging
 import multiprocessing
+import random
 import sys
 import warnings
-import logging
-
-from queue import Queue
 from itertools import zip_longest
-
-import itertools
-import random
+from queue import Queue
+from threading import Thread
 
 from paddle.fluid.reader import QUEUE_GET_TIMEOUT
 
diff --git a/python/paddle/reader/tests/decorator_test.py b/python/paddle/reader/tests/decorator_test.py
index 92d257e065a01e..357fb6e12220ea 100644
--- a/python/paddle/reader/tests/decorator_test.py
+++ b/python/paddle/reader/tests/decorator_test.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import functools
 import sys
 import time
 import unittest
-import functools
 
 import paddle.reader
 
diff --git a/python/paddle/signal.py b/python/paddle/signal.py
index 5b6879c2855bc7..6ebc08b32d1ffb 100644
--- a/python/paddle/signal.py
+++ b/python/paddle/signal.py
@@ -13,14 +13,14 @@
 # limitations under the License.
 
 import paddle
+from paddle import _C_ops, _legacy_C_ops
+from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
 
-from .tensor.attribute import is_complex
-from .fft import fft_r2c, fft_c2r, fft_c2c
+from .fft import fft_c2c, fft_c2r, fft_r2c
 from .fluid.data_feeder import check_variable_and_dtype
 from .fluid.framework import _non_static_mode
 from .fluid.layer_helper import LayerHelper
-from paddle import _C_ops, _legacy_C_ops
-from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph
+from .tensor.attribute import is_complex
 
 __all__ = [
     'stft',
diff --git a/python/paddle/sparse/binary.py b/python/paddle/sparse/binary.py
index 2d6b36922f3a4e..36ced04b558e5f 100644
--- a/python/paddle/sparse/binary.py
+++ b/python/paddle/sparse/binary.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle import _C_ops
-from paddle.fluid.framework import dygraph_only, core
-from paddle import in_dynamic_mode
+from paddle import _C_ops, in_dynamic_mode
+from paddle.fluid.framework import core, dygraph_only
 from paddle.fluid.layer_helper import LayerHelper
+
 from .unary import cast
 
 __all__ = []
diff --git a/python/paddle/sparse/creation.py b/python/paddle/sparse/creation.py
index 7b23d7326058e7..abf05746e85b8e 100644
--- a/python/paddle/sparse/creation.py
+++ b/python/paddle/sparse/creation.py
@@ -12,16 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import numpy as np
+
 import paddle
-from paddle import _C_ops
-from paddle.fluid.framework import core, dygraph_only
-from paddle.fluid.framework import _current_expected_place, _get_paddle_place
-from paddle.tensor import to_tensor, max
+from paddle import _C_ops, in_dynamic_mode
 from paddle.fluid.data_feeder import convert_dtype
-from paddle import in_dynamic_mode
+from paddle.fluid.framework import (
+    _current_expected_place,
+    _get_paddle_place,
+    core,
+    dygraph_only,
+)
 from paddle.fluid.layer_helper import LayerHelper
-
-import numpy as np
+from paddle.tensor import max, to_tensor
 
 __all__ = [
     'sparse_coo_tensor',
diff --git a/python/paddle/sparse/nn/functional/activation.py b/python/paddle/sparse/nn/functional/activation.py
index 93c5e74014f3e0..0f5bc48a3f9514 100644
--- a/python/paddle/sparse/nn/functional/activation.py
+++ b/python/paddle/sparse/nn/functional/activation.py
@@ -14,9 +14,8 @@
 
 __all__ = []
 
-from paddle import _C_ops
+from paddle import _C_ops, in_dynamic_mode
 from paddle.fluid.framework import dygraph_only
-from paddle import in_dynamic_mode
 from paddle.fluid.layer_helper import LayerHelper
 
 
diff --git a/python/paddle/sparse/nn/functional/conv.py b/python/paddle/sparse/nn/functional/conv.py
index 4e7cbd7caad40b..6c1244b72ce817 100644
--- a/python/paddle/sparse/nn/functional/conv.py
+++ b/python/paddle/sparse/nn/functional/conv.py
@@ -15,10 +15,11 @@
 __all__ = []
 
 from paddle import _C_ops, in_dynamic_mode
+from paddle.fluid.layer_helper import LayerHelper
 from paddle.fluid.layers.utils import convert_to_list
-from ...binary import add
 from paddle.nn.functional.conv import _update_padding_nd
-from paddle.fluid.layer_helper import LayerHelper
+
+from ...binary import add
 
 
 def _conv3d(
diff --git a/python/paddle/sparse/nn/functional/pooling.py b/python/paddle/sparse/nn/functional/pooling.py
index 98aaad61369629..ce8c4c20cbab22 100644
--- a/python/paddle/sparse/nn/functional/pooling.py
+++ b/python/paddle/sparse/nn/functional/pooling.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.layers import utils
 from paddle import _C_ops, in_dynamic_mode
+from paddle.fluid.layers import utils
 from paddle.nn.functional.pooling import _update_padding_nd
 
 __all__ = []
diff --git a/python/paddle/sparse/nn/layer/activation.py b/python/paddle/sparse/nn/layer/activation.py
index f87901123a5c0b..84037a8dacd175 100644
--- a/python/paddle/sparse/nn/layer/activation.py
+++ b/python/paddle/sparse/nn/layer/activation.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .. import functional as F
 from paddle.nn import Layer
 
+from .. import functional as F
+
 __all__ = []
 
 
diff --git a/python/paddle/sparse/nn/layer/conv.py b/python/paddle/sparse/nn/layer/conv.py
index b9dc9bd8e0ffd8..70f3fceb153546 100644
--- a/python/paddle/sparse/nn/layer/conv.py
+++ b/python/paddle/sparse/nn/layer/conv.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import numpy as np
-from .. import functional as F
+
+from paddle.fluid.layers import utils
 from paddle.nn import Layer
-from paddle.nn.initializer import Normal
 from paddle.nn.functional.conv import _update_padding_nd
-from paddle.fluid.layers import utils
+from paddle.nn.initializer import Normal
+
+from .. import functional as F
 
 __all__ = []
 
diff --git a/python/paddle/sparse/nn/layer/norm.py b/python/paddle/sparse/nn/layer/norm.py
index eb242a72c54b61..30e69560428306 100644
--- a/python/paddle/sparse/nn/layer/norm.py
+++ b/python/paddle/sparse/nn/layer/norm.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import warnings
-from paddle.nn.layer.norm import _BatchNormBase
-from paddle.framework import no_grad
+
+import paddle
 from paddle import _C_ops, in_dynamic_mode
 from paddle.fluid.layer_helper import LayerHelper
+from paddle.framework import no_grad
+from paddle.nn.layer.norm import _BatchNormBase
 
 
 class BatchNorm(paddle.nn.BatchNorm1D):
diff --git a/python/paddle/sparse/nn/layer/pooling.py b/python/paddle/sparse/nn/layer/pooling.py
index a2b08b4a8cddbf..f42af9f5dc1405 100644
--- a/python/paddle/sparse/nn/layer/pooling.py
+++ b/python/paddle/sparse/nn/layer/pooling.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from paddle.nn import Layer
+
 from .. import functional as F
 
 
diff --git a/python/paddle/sparse/unary.py b/python/paddle/sparse/unary.py
index 9ce5857b57e84a..23a1aa1c030f0e 100644
--- a/python/paddle/sparse/unary.py
+++ b/python/paddle/sparse/unary.py
@@ -16,9 +16,9 @@
 
 from paddle import _C_ops
 from paddle.fluid.framework import (
-    dygraph_only,
-    core,
     convert_np_dtype_to_dtype_,
+    core,
+    dygraph_only,
 )
 
 __all__ = []
diff --git a/python/paddle/static/input.py b/python/paddle/static/input.py
index f6e979dbcbf72f..09949cf939121f 100644
--- a/python/paddle/static/input.py
+++ b/python/paddle/static/input.py
@@ -13,11 +13,10 @@
 # limitations under the License.
 
 import paddle
-from paddle.fluid import core, Variable
-from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid import Variable, core
 from paddle.fluid.data_feeder import check_type
-from paddle.fluid.framework import convert_np_dtype_to_dtype_
-from paddle.fluid.framework import static_only
+from paddle.fluid.framework import convert_np_dtype_to_dtype_, static_only
+from paddle.fluid.layer_helper import LayerHelper
 
 __all__ = []
 
diff --git a/python/paddle/static/io.py b/python/paddle/static/io.py
index fdcfc1a657069d..e0f98978863e8a 100644
--- a/python/paddle/static/io.py
+++ b/python/paddle/static/io.py
@@ -17,21 +17,22 @@
 import logging
 import os
 import warnings
+
 import numpy as np
 
 import paddle
 from paddle.fluid import (
-    core,
-    Variable,
     CompiledProgram,
-    default_main_program,
     Program,
-    unique_name,
+    Variable,
+    core,
+    default_main_program,
     program_guard,
+    unique_name,
 )
-from paddle.fluid.io import prepend_feed_ops, append_fetch_ops
-from paddle.fluid.framework import static_only, Parameter
 from paddle.fluid.executor import global_scope
+from paddle.fluid.framework import Parameter, static_only
+from paddle.fluid.io import append_fetch_ops, prepend_feed_ops
 from paddle.fluid.log_helper import get_logger
 
 __all__ = []
diff --git a/python/paddle/static/nn/loss.py b/python/paddle/static/nn/loss.py
index 1cba5dfe67db7a..20c7641e2d9deb 100644
--- a/python/paddle/static/nn/loss.py
+++ b/python/paddle/static/nn/loss.py
@@ -13,17 +13,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...fluid.data_feeder import check_variable_and_dtype
-from paddle.fluid.layers.layer_function_generator import templatedoc
+import numpy as np
+
+from paddle.fluid.framework import static_only
+from paddle.fluid.initializer import NumpyArrayInitializer
 
 # TODO: define loss functions of neural network
 from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.layers.layer_function_generator import templatedoc
 from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.initializer import NumpyArrayInitializer
-import numpy as np
-from paddle.fluid.framework import (
-    static_only,
-)
+
+from ...fluid.data_feeder import check_variable_and_dtype
 
 __all__ = []
 
diff --git a/python/paddle/tests/dist_hapi_mnist_dynamic.py b/python/paddle/tests/dist_hapi_mnist_dynamic.py
index 85a0c3d6ca69b4..d83124fc19eaa8 100644
--- a/python/paddle/tests/dist_hapi_mnist_dynamic.py
+++ b/python/paddle/tests/dist_hapi_mnist_dynamic.py
@@ -18,13 +18,12 @@
 
 import paddle
 import paddle.fluid as fluid
-
 from paddle import Model, set_device
-from paddle.static import InputSpec as Input
-from paddle.nn.layer.loss import CrossEntropyLoss
 from paddle.metric import Accuracy
-from paddle.vision.models import LeNet
+from paddle.nn.layer.loss import CrossEntropyLoss
+from paddle.static import InputSpec as Input
 from paddle.vision.datasets import MNIST
+from paddle.vision.models import LeNet
 
 
 class MnistDataset(MNIST):
diff --git a/python/paddle/tests/dist_hapi_mnist_static.py b/python/paddle/tests/dist_hapi_mnist_static.py
index 395c4f6727b0c9..1c353250064770 100644
--- a/python/paddle/tests/dist_hapi_mnist_static.py
+++ b/python/paddle/tests/dist_hapi_mnist_static.py
@@ -18,13 +18,12 @@
 
 import paddle
 import paddle.fluid as fluid
-
 from paddle import Model, set_device
-from paddle.static import InputSpec as Input
-from paddle.nn.layer.loss import CrossEntropyLoss
 from paddle.metric import Accuracy
-from paddle.vision.models import LeNet
+from paddle.nn.layer.loss import CrossEntropyLoss
+from paddle.static import InputSpec as Input
 from paddle.vision.datasets import MNIST
+from paddle.vision.models import LeNet
 
 
 class MnistDataset(MNIST):
diff --git a/python/paddle/tests/dist_hapi_pure_fp16_static.py b/python/paddle/tests/dist_hapi_pure_fp16_static.py
index 6be15ec44be1f8..f63866991fe267 100644
--- a/python/paddle/tests/dist_hapi_pure_fp16_static.py
+++ b/python/paddle/tests/dist_hapi_pure_fp16_static.py
@@ -17,11 +17,9 @@
 import numpy as np
 
 import paddle
-from paddle import fluid
-
-from paddle import Model
-from paddle.static import InputSpec
+from paddle import Model, fluid
 from paddle.nn.layer.loss import CrossEntropyLoss
+from paddle.static import InputSpec
 from paddle.vision.models import LeNet
 
 
diff --git a/python/paddle/tests/hapi_mnist_bf16_static.py b/python/paddle/tests/hapi_mnist_bf16_static.py
index d8c46e58bd3f1a..1ee7e28b03e305 100644
--- a/python/paddle/tests/hapi_mnist_bf16_static.py
+++ b/python/paddle/tests/hapi_mnist_bf16_static.py
@@ -12,18 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import argparse
+import ast
+import random
+
 import numpy as np
-import paddle
 
+import paddle
+import paddle.static.amp as amp
 from paddle import Model, set_device
-from paddle.static import InputSpec as Input
 from paddle.metric import Accuracy
+from paddle.static import InputSpec as Input
 from paddle.vision.datasets import MNIST
 from paddle.vision.models import LeNet
-import paddle.static.amp as amp
-import random
-import argparse
-import ast
 
 SEED = 2
 paddle.seed(SEED)
diff --git a/python/paddle/tests/test_async_read_write.py b/python/paddle/tests/test_async_read_write.py
index 682f72422d6068..6605ac6d81ce21 100644
--- a/python/paddle/tests/test_async_read_write.py
+++ b/python/paddle/tests/test_async_read_write.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
-from paddle.fluid import core
 from paddle.device import cuda
-from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph
+from paddle.fluid import core
+from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
 
 
 class TestAsyncRead(unittest.TestCase):
diff --git a/python/paddle/tests/test_audio_backend.py b/python/paddle/tests/test_audio_backend.py
index fb583d96e1f9b7..2db722df43b6b1 100644
--- a/python/paddle/tests/test_audio_backend.py
+++ b/python/paddle/tests/test_audio_backend.py
@@ -11,11 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 import unittest
 
-import soundfile
 import numpy as np
-import os
+import soundfile
+
 import paddle.audio
 
 
diff --git a/python/paddle/tests/test_audio_datasets.py b/python/paddle/tests/test_audio_datasets.py
index 4db1730ae41891..b3c066b2115dd4 100644
--- a/python/paddle/tests/test_audio_datasets.py
+++ b/python/paddle/tests/test_audio_datasets.py
@@ -11,13 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import itertools
 import unittest
 
 import numpy as np
-import paddle
-import itertools
 from parameterized import parameterized
 
+import paddle
+
 
 def parameterize(*params):
     return parameterized.expand(list(itertools.product(*params)))
diff --git a/python/paddle/tests/test_audio_functions.py b/python/paddle/tests/test_audio_functions.py
index 80a99343d80205..8400bd4ecb40e8 100644
--- a/python/paddle/tests/test_audio_functions.py
+++ b/python/paddle/tests/test_audio_functions.py
@@ -11,16 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import itertools
 import unittest
 
 import librosa
 import numpy as np
-import paddle
+from parameterized import parameterized
+from scipy import signal
 
+import paddle
 import paddle.audio
-from scipy import signal
-import itertools
-from parameterized import parameterized
 
 
 def parameterize(*params):
diff --git a/python/paddle/tests/test_audio_logmel_feature.py b/python/paddle/tests/test_audio_logmel_feature.py
index a3a39cd79baf2a..8392d1d4c1a6d1 100644
--- a/python/paddle/tests/test_audio_logmel_feature.py
+++ b/python/paddle/tests/test_audio_logmel_feature.py
@@ -11,17 +11,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import itertools
 import unittest
 
 import librosa
 import numpy as np
-import paddle
-
-import paddle.audio
 import scipy
-import itertools
 from parameterized import parameterized
 
+import paddle
+import paddle.audio
+
 
 def parameterize(*params):
     return parameterized.expand(list(itertools.product(*params)))
diff --git a/python/paddle/tests/test_audio_mel_feature.py b/python/paddle/tests/test_audio_mel_feature.py
index ec7b843caf77df..a4e70b21693ee1 100644
--- a/python/paddle/tests/test_audio_mel_feature.py
+++ b/python/paddle/tests/test_audio_mel_feature.py
@@ -11,15 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import itertools
 import unittest
 
 import librosa
 import numpy as np
-import paddle
+from parameterized import parameterized
 
+import paddle
 import paddle.audio
-import itertools
-from parameterized import parameterized
 
 
 def parameterize(*params):
diff --git a/python/paddle/tests/test_callback_early_stop.py b/python/paddle/tests/test_callback_early_stop.py
index 23f915a7162fbf..4b6089bfdf837b 100644
--- a/python/paddle/tests/test_callback_early_stop.py
+++ b/python/paddle/tests/test_callback_early_stop.py
@@ -12,18 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import tempfile
 import shutil
+import tempfile
+import unittest
+
 import numpy as np
 
 import paddle
 from paddle import Model
-from paddle.static import InputSpec
-from paddle.vision.models import LeNet
-from paddle.vision.datasets import MNIST
 from paddle.metric import Accuracy
 from paddle.nn.layer.loss import CrossEntropyLoss
+from paddle.static import InputSpec
+from paddle.vision.datasets import MNIST
+from paddle.vision.models import LeNet
 
 
 class MnistDataset(MNIST):
diff --git a/python/paddle/tests/test_callback_reduce_lr_on_plateau.py b/python/paddle/tests/test_callback_reduce_lr_on_plateau.py
index 06ffb5888b8b28..c221c6d48246d0 100644
--- a/python/paddle/tests/test_callback_reduce_lr_on_plateau.py
+++ b/python/paddle/tests/test_callback_reduce_lr_on_plateau.py
@@ -17,12 +17,12 @@
 import paddle
 import paddle.vision.transforms as T
 from paddle import Model
-from paddle.static import InputSpec
-from paddle.vision.models import LeNet
-from paddle.vision.datasets import MNIST
+from paddle.fluid.framework import _test_eager_guard
 from paddle.metric import Accuracy
 from paddle.nn.layer.loss import CrossEntropyLoss
-from paddle.fluid.framework import _test_eager_guard
+from paddle.static import InputSpec
+from paddle.vision.datasets import MNIST
+from paddle.vision.models import LeNet
 
 
 # Accelerate unittest
diff --git a/python/paddle/tests/test_callback_visualdl.py b/python/paddle/tests/test_callback_visualdl.py
index aa6616f673053b..5ad245a16344a8 100644
--- a/python/paddle/tests/test_callback_visualdl.py
+++ b/python/paddle/tests/test_callback_visualdl.py
@@ -12,15 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import tempfile
 import shutil
+import tempfile
+import unittest
 
 import paddle
-from paddle.static import InputSpec
 import paddle.vision.transforms as T
-from paddle.vision.datasets import MNIST
 from paddle.fluid.framework import _test_eager_guard
+from paddle.static import InputSpec
+from paddle.vision.datasets import MNIST
 
 
 class MnistDataset(MNIST):
diff --git a/python/paddle/tests/test_callback_wandb.py b/python/paddle/tests/test_callback_wandb.py
index d059d76cc0eece..c198530a2d91af 100644
--- a/python/paddle/tests/test_callback_wandb.py
+++ b/python/paddle/tests/test_callback_wandb.py
@@ -15,13 +15,12 @@
 import tempfile
 import unittest
 
+import paddle
 import paddle.vision.transforms as T
+from paddle.fluid.framework import _test_eager_guard
 from paddle.static import InputSpec
 from paddle.vision.datasets import MNIST
 
-import paddle
-from paddle.fluid.framework import _test_eager_guard
-
 
 class MnistDataset(MNIST):
     def __len__(self):
diff --git a/python/paddle/tests/test_callbacks.py b/python/paddle/tests/test_callbacks.py
index 7fc8c4be65c01d..7d4c1e99055d74 100644
--- a/python/paddle/tests/test_callbacks.py
+++ b/python/paddle/tests/test_callbacks.py
@@ -12,18 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import time
 import random
-import tempfile
 import shutil
+import tempfile
+import time
+import unittest
+
 import numpy as np
 
 from paddle import Model
-from paddle.static import InputSpec
-from paddle.vision.models import LeNet
 from paddle.hapi.callbacks import config_callbacks
+from paddle.static import InputSpec
 from paddle.vision.datasets import MNIST
+from paddle.vision.models import LeNet
 
 
 class MnistDataset(MNIST):
diff --git a/python/paddle/tests/test_dataset_cifar.py b/python/paddle/tests/test_dataset_cifar.py
index abf79fb1e3974c..1fca233d3be786 100644
--- a/python/paddle/tests/test_dataset_cifar.py
+++ b/python/paddle/tests/test_dataset_cifar.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 from paddle.vision.datasets import Cifar10, Cifar100
diff --git a/python/paddle/tests/test_dataset_conll05.py b/python/paddle/tests/test_dataset_conll05.py
index 9eb0036718b355..b0c82047d758d3 100644
--- a/python/paddle/tests/test_dataset_conll05.py
+++ b/python/paddle/tests/test_dataset_conll05.py
@@ -14,6 +14,7 @@
 
 import os
 import unittest
+
 import numpy as np
 
 from paddle.text.datasets import Conll05st
diff --git a/python/paddle/tests/test_dataset_imdb.py b/python/paddle/tests/test_dataset_imdb.py
index aed8c387409dce..1211c734bc705a 100644
--- a/python/paddle/tests/test_dataset_imdb.py
+++ b/python/paddle/tests/test_dataset_imdb.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 from paddle.text.datasets import Imdb
diff --git a/python/paddle/tests/test_dataset_imikolov.py b/python/paddle/tests/test_dataset_imikolov.py
index 6ffeeda73c362c..4b0edea85bc354 100644
--- a/python/paddle/tests/test_dataset_imikolov.py
+++ b/python/paddle/tests/test_dataset_imikolov.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 from paddle.text.datasets import Imikolov
diff --git a/python/paddle/tests/test_dataset_movielens.py b/python/paddle/tests/test_dataset_movielens.py
index e5c6d8376eed97..63bd6424263182 100644
--- a/python/paddle/tests/test_dataset_movielens.py
+++ b/python/paddle/tests/test_dataset_movielens.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 from paddle.text.datasets import Movielens
diff --git a/python/paddle/tests/test_dataset_uci_housing.py b/python/paddle/tests/test_dataset_uci_housing.py
index fe85e7683dd9c6..cf5293a8473a00 100644
--- a/python/paddle/tests/test_dataset_uci_housing.py
+++ b/python/paddle/tests/test_dataset_uci_housing.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
-from paddle.text.datasets import UCIHousing, WMT14
+from paddle.text.datasets import WMT14, UCIHousing
 
 
 class TestUCIHousingTrain(unittest.TestCase):
diff --git a/python/paddle/tests/test_dataset_voc.py b/python/paddle/tests/test_dataset_voc.py
index 459068c2df2016..8e852093dc7f7f 100644
--- a/python/paddle/tests/test_dataset_voc.py
+++ b/python/paddle/tests/test_dataset_voc.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
-from paddle.vision.datasets import voc2012, VOC2012
+from paddle.vision.datasets import VOC2012, voc2012
 
 # VOC2012 is too large for unittest to download, stub a small dataset here
 voc2012.VOC_URL = 'https://paddlemodels.bj.bcebos.com/voc2012_stub/VOCtrainval_11-May-2012.tar'
diff --git a/python/paddle/tests/test_dataset_wmt.py b/python/paddle/tests/test_dataset_wmt.py
index 43663945e20fd5..895afadec5b7b5 100644
--- a/python/paddle/tests/test_dataset_wmt.py
+++ b/python/paddle/tests/test_dataset_wmt.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 from paddle.text.datasets import WMT14, WMT16
diff --git a/python/paddle/tests/test_datasets.py b/python/paddle/tests/test_datasets.py
index 172d9007de595a..2890a877462b4a 100644
--- a/python/paddle/tests/test_datasets.py
+++ b/python/paddle/tests/test_datasets.py
@@ -12,23 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import os
-import numpy as np
-import tempfile
 import shutil
+import tempfile
+import unittest
+
 import cv2
+import numpy as np
 
 import paddle.vision.transforms as T
+from paddle.dataset.common import _check_exists_and_download
+from paddle.fluid.framework import _test_eager_guard
 from paddle.vision.datasets import (
-    DatasetFolder,
-    ImageFolder,
     MNIST,
+    DatasetFolder,
     FashionMNIST,
     Flowers,
+    ImageFolder,
 )
-from paddle.dataset.common import _check_exists_and_download
-from paddle.fluid.framework import _test_eager_guard
 
 
 class TestFolderDatasets(unittest.TestCase):
diff --git a/python/paddle/tests/test_dist_hapi_model.py b/python/paddle/tests/test_dist_hapi_model.py
index 0ec5cc4f845bf6..e2696dc986ad5b 100644
--- a/python/paddle/tests/test_dist_hapi_model.py
+++ b/python/paddle/tests/test_dist_hapi_model.py
@@ -12,18 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import os
-import time
 import copy
+import os
 import subprocess
-import paddle.fluid as fluid
+import time
+import unittest
 
+import paddle.fluid as fluid
 from paddle.distributed.utils.launch_utils import (
+    TrainerProc,
     find_free_ports,
-    watch_local_trainers,
     get_cluster,
-    TrainerProc,
+    watch_local_trainers,
 )
 
 
diff --git a/python/paddle/tests/test_dlpack.py b/python/paddle/tests/test_dlpack.py
index b25281cea0cc1f..88a831be2268b0 100644
--- a/python/paddle/tests/test_dlpack.py
+++ b/python/paddle/tests/test_dlpack.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
diff --git a/python/paddle/tests/test_download.py b/python/paddle/tests/test_download.py
index 0978acbf56ab4e..62bc1e30defcf4 100644
--- a/python/paddle/tests/test_download.py
+++ b/python/paddle/tests/test_download.py
@@ -15,8 +15,7 @@
 import os
 import unittest
 
-from paddle.utils.download import get_weights_path_from_url
-from paddle.utils.download import get_path_from_url
+from paddle.utils.download import get_path_from_url, get_weights_path_from_url
 
 
 class TestDownload(unittest.TestCase):
@@ -142,6 +141,7 @@ def test_download_methods(
         ]
 
         import sys
+
         from paddle.utils.download import _download
 
         if sys.platform == 'linux':
diff --git a/python/paddle/tests/test_hapi_amp.py b/python/paddle/tests/test_hapi_amp.py
index e5be62a9e48681..654f1351f36612 100644
--- a/python/paddle/tests/test_hapi_amp.py
+++ b/python/paddle/tests/test_hapi_amp.py
@@ -16,19 +16,18 @@
 
 os.environ['FLAGS_cudnn_deterministic'] = '1'
 
-import unittest
 import tempfile
+import unittest
+
 import numpy as np
 
 import paddle
-from paddle import fluid
-
-from paddle import Model
-from paddle.static import InputSpec
+import paddle.vision.transforms as T
+from paddle import Model, fluid
 from paddle.nn.layer.loss import CrossEntropyLoss
-from paddle.vision.models import LeNet
+from paddle.static import InputSpec
 from paddle.vision.datasets import MNIST
-import paddle.vision.transforms as T
+from paddle.vision.models import LeNet
 
 
 @unittest.skipIf(
diff --git a/python/paddle/tests/test_hapi_hub.py b/python/paddle/tests/test_hapi_hub.py
index 2a383311e1eba5..683aec7a6c96f3 100644
--- a/python/paddle/tests/test_hapi_hub.py
+++ b/python/paddle/tests/test_hapi_hub.py
@@ -12,14 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import os
+import unittest
+
+import numpy as np
 
 import paddle
 from paddle.hapi import hub
 
-import numpy as np
-
 
 class TestHub(unittest.TestCase):
     def setUp(
diff --git a/python/paddle/tests/test_logger.py b/python/paddle/tests/test_logger.py
index e63b4e2cab25a2..a32e3836d7704b 100644
--- a/python/paddle/tests/test_logger.py
+++ b/python/paddle/tests/test_logger.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import os
 import shutil
 import tempfile
+import unittest
 
 from paddle.hapi.logger import setup_logger
 
diff --git a/python/paddle/tests/test_metrics.py b/python/paddle/tests/test_metrics.py
index b3bfb1eb9c842b..c604c7088e9422 100644
--- a/python/paddle/tests/test_metrics.py
+++ b/python/paddle/tests/test_metrics.py
@@ -13,11 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
 import paddle.fluid as fluid
-
 from paddle.hapi.model import to_list
 
 
diff --git a/python/paddle/tests/test_model.py b/python/paddle/tests/test_model.py
index 3ed4905c307501..021c523d210a59 100644
--- a/python/paddle/tests/test_model.py
+++ b/python/paddle/tests/test_model.py
@@ -12,31 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-
 import os
-import numpy as np
 import shutil
 import tempfile
+import unittest
+
+import numpy as np
 
 import paddle
-from paddle import fluid
-from paddle import to_tensor
+import paddle.jit as jit
+import paddle.vision.models as models
+from paddle import Model, fluid, to_tensor
+from paddle.hapi.model import prepare_distributed_context
+from paddle.io import Dataset, DistributedBatchSampler
+from paddle.jit.dy2static.program_translator import ProgramTranslator
+from paddle.metric import Accuracy
 from paddle.nn import Conv2D, Linear, ReLU, Sequential
-
-from paddle import Model
-from paddle.static import InputSpec
 from paddle.nn.layer.loss import CrossEntropyLoss
-from paddle.metric import Accuracy
+from paddle.static import InputSpec
 from paddle.vision.datasets import MNIST
 from paddle.vision.models import LeNet
-import paddle.vision.models as models
-import paddle.jit as jit
-from paddle.io import DistributedBatchSampler, Dataset
-from paddle.hapi.model import prepare_distributed_context
-from paddle.jit.dy2static.program_translator import (
-    ProgramTranslator,
-)
 
 
 class LeNetDygraph(paddle.nn.Layer):
diff --git a/python/paddle/tests/test_ops_roi_align.py b/python/paddle/tests/test_ops_roi_align.py
index 05c221e83a1572..312e96429a243c 100644
--- a/python/paddle/tests/test_ops_roi_align.py
+++ b/python/paddle/tests/test_ops_roi_align.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
-from paddle.vision.ops import roi_align, RoIAlign
+from paddle.vision.ops import RoIAlign, roi_align
 
 
 class TestRoIAlign(unittest.TestCase):
diff --git a/python/paddle/tests/test_ops_roi_pool.py b/python/paddle/tests/test_ops_roi_pool.py
index 923ec01503cd22..8abe21ed104884 100644
--- a/python/paddle/tests/test_ops_roi_pool.py
+++ b/python/paddle/tests/test_ops_roi_pool.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
-from paddle.vision.ops import roi_pool, RoIPool
+from paddle.vision.ops import RoIPool, roi_pool
 
 
 class TestRoIPool(unittest.TestCase):
diff --git a/python/paddle/tests/test_pretrained_model.py b/python/paddle/tests/test_pretrained_model.py
index 81df3adf8c7bc7..2d0217644a36d8 100644
--- a/python/paddle/tests/test_pretrained_model.py
+++ b/python/paddle/tests/test_pretrained_model.py
@@ -13,14 +13,15 @@
 # limitations under the License.
 
 import os
-import unittest
-import tempfile
 import shutil
+import tempfile
+import unittest
+
 import numpy as np
 
 import paddle
-from paddle.static import InputSpec
 import paddle.vision.models as models
+from paddle.static import InputSpec
 
 
 # test the predicted resutls of static graph and dynamic graph are equal
diff --git a/python/paddle/tests/test_progressbar.py b/python/paddle/tests/test_progressbar.py
index 09340a9c247655..ab562fa49e8cd7 100644
--- a/python/paddle/tests/test_progressbar.py
+++ b/python/paddle/tests/test_progressbar.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import unittest
 import random
 import time
+import unittest
+
+import numpy as np
 
 from paddle.hapi.progressbar import ProgressBar
 
diff --git a/python/paddle/tests/test_read_file.py b/python/paddle/tests/test_read_file.py
index d13e901b322b5d..354553dca7d339 100644
--- a/python/paddle/tests/test_read_file.py
+++ b/python/paddle/tests/test_read_file.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import os
-import cv2
-import unittest
 import tempfile
+import unittest
+
+import cv2
 import numpy as np
 
 import paddle
-from paddle.vision.ops import read_file, decode_jpeg
+from paddle.vision.ops import decode_jpeg, read_file
 
 
 class TestReadFile(unittest.TestCase):
diff --git a/python/paddle/tests/test_transforms.py b/python/paddle/tests/test_transforms.py
index 9e2cad57c37ece..7045b63455d355 100644
--- a/python/paddle/tests/test_transforms.py
+++ b/python/paddle/tests/test_transforms.py
@@ -12,19 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import os
+import shutil
 import tempfile
+import unittest
+
 import cv2
-import shutil
 import numpy as np
 from PIL import Image
 
 import paddle
+import paddle.vision.transforms.functional as F
 from paddle.vision import image_load, set_image_backend
 from paddle.vision.datasets import DatasetFolder
 from paddle.vision.transforms import transforms
-import paddle.vision.transforms.functional as F
 
 
 class TestTransformsCV2(unittest.TestCase):
diff --git a/python/paddle/tests/test_utils_lazyimport.py b/python/paddle/tests/test_utils_lazyimport.py
index 1064bd80864229..1b4800e6fb6c0b 100644
--- a/python/paddle/tests/test_utils_lazyimport.py
+++ b/python/paddle/tests/test_utils_lazyimport.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 from paddle.utils.lazy_import import try_import
 
 
diff --git a/python/paddle/tests/test_vision_models.py b/python/paddle/tests/test_vision_models.py
index dc98fc3219bff6..84bf0d83025f40 100644
--- a/python/paddle/tests/test_vision_models.py
+++ b/python/paddle/tests/test_vision_models.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import unittest
+
 import numpy as np
 
 import paddle
-from paddle.static import InputSpec
 import paddle.vision.models as models
+from paddle.static import InputSpec
 
 
 class TestVisonModels(unittest.TestCase):
diff --git a/python/paddle/text/datasets/conll05.py b/python/paddle/text/datasets/conll05.py
index 2e45e69b993215..10ef8f4edfb4ee 100644
--- a/python/paddle/text/datasets/conll05.py
+++ b/python/paddle/text/datasets/conll05.py
@@ -14,10 +14,11 @@
 
 import gzip
 import tarfile
+
 import numpy as np
 
-from paddle.io import Dataset
 from paddle.dataset.common import _check_exists_and_download
+from paddle.io import Dataset
 
 __all__ = []
 
diff --git a/python/paddle/text/datasets/imdb.py b/python/paddle/text/datasets/imdb.py
index 6fb476a24e08a3..7aad2095c41183 100644
--- a/python/paddle/text/datasets/imdb.py
+++ b/python/paddle/text/datasets/imdb.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import collections
 import re
 import string
 import tarfile
+
 import numpy as np
-import collections
 
-from paddle.io import Dataset
 from paddle.dataset.common import _check_exists_and_download
+from paddle.io import Dataset
 
 __all__ = []
 
diff --git a/python/paddle/text/datasets/imikolov.py b/python/paddle/text/datasets/imikolov.py
index 613c2a029fe8f1..c9f04712c6fe11 100644
--- a/python/paddle/text/datasets/imikolov.py
+++ b/python/paddle/text/datasets/imikolov.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import collections
 import tarfile
+
 import numpy as np
-import collections
 
-from paddle.io import Dataset
 from paddle.dataset.common import _check_exists_and_download
+from paddle.io import Dataset
 
 __all__ = []
 
diff --git a/python/paddle/text/datasets/movielens.py b/python/paddle/text/datasets/movielens.py
index c4d0681f42a4ce..505863748caa11 100644
--- a/python/paddle/text/datasets/movielens.py
+++ b/python/paddle/text/datasets/movielens.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import zipfile
 import re
+import zipfile
+
+import numpy as np
 
-from paddle.io import Dataset
 from paddle.dataset.common import _check_exists_and_download
+from paddle.io import Dataset
 
 __all__ = []
 
diff --git a/python/paddle/text/datasets/uci_housing.py b/python/paddle/text/datasets/uci_housing.py
index 0d58aa107e2307..381ba1b36524a4 100644
--- a/python/paddle/text/datasets/uci_housing.py
+++ b/python/paddle/text/datasets/uci_housing.py
@@ -15,8 +15,8 @@
 import numpy as np
 
 import paddle
-from paddle.io import Dataset
 from paddle.dataset.common import _check_exists_and_download
+from paddle.io import Dataset
 
 __all__ = []
 
diff --git a/python/paddle/text/datasets/wmt14.py b/python/paddle/text/datasets/wmt14.py
index 8cf66c27f8c50e..587e490c65d79b 100644
--- a/python/paddle/text/datasets/wmt14.py
+++ b/python/paddle/text/datasets/wmt14.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import tarfile
+
 import numpy as np
 
-from paddle.io import Dataset
 from paddle.dataset.common import _check_exists_and_download
+from paddle.io import Dataset
 
 __all__ = []
 
diff --git a/python/paddle/text/datasets/wmt16.py b/python/paddle/text/datasets/wmt16.py
index 9889c58573bc42..3183b67f7e6516 100644
--- a/python/paddle/text/datasets/wmt16.py
+++ b/python/paddle/text/datasets/wmt16.py
@@ -16,12 +16,13 @@
 
 import os
 import tarfile
-import numpy as np
 from collections import defaultdict
 
+import numpy as np
+
 import paddle
-from paddle.io import Dataset
 from paddle.dataset.common import _check_exists_and_download
+from paddle.io import Dataset
 
 __all__ = []
 
diff --git a/python/paddle/text/viterbi_decode.py b/python/paddle/text/viterbi_decode.py
index 6bb9ccd4052af0..0fbb2c20a6f810 100644
--- a/python/paddle/text/viterbi_decode.py
+++ b/python/paddle/text/viterbi_decode.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ..nn import Layer
+from paddle import _C_ops, _legacy_C_ops
+
+from ..fluid.data_feeder import check_type, check_variable_and_dtype
 from ..fluid.framework import _non_static_mode, in_dygraph_mode
 from ..fluid.layer_helper import LayerHelper
-from ..fluid.data_feeder import check_variable_and_dtype, check_type
-from paddle import _C_ops, _legacy_C_ops
+from ..nn import Layer
 
 __all__ = ['viterbi_decode', 'ViterbiDecoder']
 
diff --git a/python/paddle/utils/cpp_extension/cpp_extension.py b/python/paddle/utils/cpp_extension/cpp_extension.py
index c05be5f2a49472..c31b934c55ec86 100644
--- a/python/paddle/utils/cpp_extension/cpp_extension.py
+++ b/python/paddle/utils/cpp_extension/cpp_extension.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# isort: skip_file
+
 import os
 import copy
 import re
diff --git a/python/paddle/utils/cpp_extension/extension_utils.py b/python/paddle/utils/cpp_extension/extension_utils.py
index 15f826a8b18c99..46046b5166029f 100644
--- a/python/paddle/utils/cpp_extension/extension_utils.py
+++ b/python/paddle/utils/cpp_extension/extension_utils.py
@@ -12,22 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
-import re
-import sys
-import json
-import glob
 import atexit
+import collections
+import glob
 import hashlib
+import json
 import logging
-import collections
-import textwrap
-import warnings
+import os
+import re
 import subprocess
+import sys
+import textwrap
 import threading
-
-from importlib import machinery
+import warnings
 from contextlib import contextmanager
+from importlib import machinery
+
 from setuptools.command import bdist_egg
 
 try:
diff --git a/python/paddle/utils/deprecated.py b/python/paddle/utils/deprecated.py
index b4c17de3f178f4..7854f12aa9c10c 100755
--- a/python/paddle/utils/deprecated.py
+++ b/python/paddle/utils/deprecated.py
@@ -15,10 +15,11 @@
 decorator to deprecate a function or class
 """
 
-import warnings
 import functools
-import paddle
 import sys
+import warnings
+
+import paddle
 
 __all__ = []
 
diff --git a/python/paddle/utils/dlpack.py b/python/paddle/utils/dlpack.py
index c0449cdcae935a..beb37be4530fd3 100644
--- a/python/paddle/utils/dlpack.py
+++ b/python/paddle/utils/dlpack.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import paddle
+
 from ..fluid.core import LoDTensor
-from ..fluid.framework import _non_static_mode
 from ..fluid.data_feeder import check_type
+from ..fluid.framework import _non_static_mode
 
 __all__ = [
     'to_dlpack',
diff --git a/python/paddle/utils/download.py b/python/paddle/utils/download.py
index 660e09e8668d5d..a7f6883c974985 100644
--- a/python/paddle/utils/download.py
+++ b/python/paddle/utils/download.py
@@ -12,16 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import hashlib
 import os
-import sys
 import os.path as osp
 import shutil
-import requests
 import subprocess
-import hashlib
+import sys
 import tarfile
-import zipfile
 import time
+import zipfile
+
+import requests
 
 try:
     from tqdm import tqdm
diff --git a/python/paddle/utils/image_util.py b/python/paddle/utils/image_util.py
index 107ee828af48e7..42e0488a3e7a88 100644
--- a/python/paddle/utils/image_util.py
+++ b/python/paddle/utils/image_util.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from io import StringIO
+
 import numpy as np
 from PIL import Image
-from io import StringIO
 
 __all__ = []
 
diff --git a/python/paddle/utils/install_check.py b/python/paddle/utils/install_check.py
index c9cc1bb7a49e27..167b4c743bde62 100644
--- a/python/paddle/utils/install_check.py
+++ b/python/paddle/utils/install_check.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import logging
+
 import numpy as np
 
 import paddle
diff --git a/python/paddle/utils/profiler.py b/python/paddle/utils/profiler.py
index 625900e87e8684..3ede624bfa57b6 100644
--- a/python/paddle/utils/profiler.py
+++ b/python/paddle/utils/profiler.py
@@ -17,10 +17,8 @@
 
 from ..fluid import core
 from ..fluid.profiler import cuda_profiler  # noqa: F401
-from ..fluid.profiler import start_profiler
 from ..fluid.profiler import profiler  # noqa: F401
-from ..fluid.profiler import stop_profiler
-from ..fluid.profiler import reset_profiler
+from ..fluid.profiler import reset_profiler, start_profiler, stop_profiler
 
 __all__ = [  # noqa
     'Profiler',
diff --git a/python/paddle/utils/unique_name.py b/python/paddle/utils/unique_name.py
index 77ecebcc45b316..e4ac2381479132 100644
--- a/python/paddle/utils/unique_name.py
+++ b/python/paddle/utils/unique_name.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from ..fluid.unique_name import generate  # noqa: F401
-from ..fluid.unique_name import switch  # noqa: F401
 from ..fluid.unique_name import guard  # noqa: F401
+from ..fluid.unique_name import switch  # noqa: F401
 
 __all__ = ['generate', 'switch', 'guard']  # noqa
diff --git a/python/paddle/vision/datasets/cifar.py b/python/paddle/vision/datasets/cifar.py
index 5cf6277be66849..444aa361f180be 100644
--- a/python/paddle/vision/datasets/cifar.py
+++ b/python/paddle/vision/datasets/cifar.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import pickle
 import tarfile
+
 import numpy as np
 from PIL import Image
-import pickle
 
 import paddle
-from paddle.io import Dataset
 from paddle.dataset.common import _check_exists_and_download
+from paddle.io import Dataset
 
 __all__ = []
 
diff --git a/python/paddle/vision/datasets/flowers.py b/python/paddle/vision/datasets/flowers.py
index 1b1c556407b5b2..206e4d9703ed32 100644
--- a/python/paddle/vision/datasets/flowers.py
+++ b/python/paddle/vision/datasets/flowers.py
@@ -14,13 +14,14 @@
 
 import os
 import tarfile
+
 import numpy as np
 from PIL import Image
 
 import paddle
+from paddle.dataset.common import _check_exists_and_download
 from paddle.io import Dataset
 from paddle.utils import try_import
-from paddle.dataset.common import _check_exists_and_download
 
 __all__ = []
 
diff --git a/python/paddle/vision/datasets/mnist.py b/python/paddle/vision/datasets/mnist.py
index c273d41d4a6cc9..9ba86036fbe141 100644
--- a/python/paddle/vision/datasets/mnist.py
+++ b/python/paddle/vision/datasets/mnist.py
@@ -14,12 +14,13 @@
 
 import gzip
 import struct
+
 import numpy as np
 from PIL import Image
 
 import paddle
-from paddle.io import Dataset
 from paddle.dataset.common import _check_exists_and_download
+from paddle.io import Dataset
 
 __all__ = []
 
diff --git a/python/paddle/vision/datasets/voc2012.py b/python/paddle/vision/datasets/voc2012.py
index 81478378ae4125..b80b3efb7db15c 100644
--- a/python/paddle/vision/datasets/voc2012.py
+++ b/python/paddle/vision/datasets/voc2012.py
@@ -14,12 +14,13 @@
 
 import io
 import tarfile
+
 import numpy as np
 from PIL import Image
 
 import paddle
-from paddle.io import Dataset
 from paddle.dataset.common import _check_exists_and_download
+from paddle.io import Dataset
 
 __all__ = []
 
diff --git a/python/paddle/vision/image.py b/python/paddle/vision/image.py
index eda914d939cd35..8285132e4ea3e4 100644
--- a/python/paddle/vision/image.py
+++ b/python/paddle/vision/image.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from PIL import Image
+
 from paddle.utils import try_import
 
 __all__ = []
diff --git a/python/paddle/vision/models/alexnet.py b/python/paddle/vision/models/alexnet.py
index 988469d1cce0fa..fc04465c1ae97d 100644
--- a/python/paddle/vision/models/alexnet.py
+++ b/python/paddle/vision/models/alexnet.py
@@ -13,14 +13,13 @@
 # limitations under the License.
 
 import math
+
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
-
-from paddle.nn import Linear, Dropout, ReLU
-from paddle.nn import Conv2D, MaxPool2D
-from paddle.nn.initializer import Uniform
 from paddle.fluid.param_attr import ParamAttr
+from paddle.nn import Conv2D, Dropout, Linear, MaxPool2D, ReLU
+from paddle.nn.initializer import Uniform
 from paddle.utils.download import get_weights_path_from_url
 
 model_urls = {
diff --git a/python/paddle/vision/models/densenet.py b/python/paddle/vision/models/densenet.py
index 67baa7ea8f52f8..f620e1d70956bc 100644
--- a/python/paddle/vision/models/densenet.py
+++ b/python/paddle/vision/models/densenet.py
@@ -16,10 +16,17 @@
 
 import paddle
 import paddle.nn as nn
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
 from paddle.fluid.param_attr import ParamAttr
+from paddle.nn import (
+    AdaptiveAvgPool2D,
+    AvgPool2D,
+    BatchNorm,
+    Conv2D,
+    Dropout,
+    Linear,
+    MaxPool2D,
+)
+from paddle.nn.initializer import Uniform
 from paddle.utils.download import get_weights_path_from_url
 
 __all__ = []
diff --git a/python/paddle/vision/models/googlenet.py b/python/paddle/vision/models/googlenet.py
index 4d4169df450260..89456f49831bd1 100644
--- a/python/paddle/vision/models/googlenet.py
+++ b/python/paddle/vision/models/googlenet.py
@@ -15,11 +15,16 @@
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
-
-from paddle.nn import Conv2D, Linear, Dropout
-from paddle.nn import MaxPool2D, AvgPool2D, AdaptiveAvgPool2D
-from paddle.nn.initializer import Uniform
 from paddle.fluid.param_attr import ParamAttr
+from paddle.nn import (
+    AdaptiveAvgPool2D,
+    AvgPool2D,
+    Conv2D,
+    Dropout,
+    Linear,
+    MaxPool2D,
+)
+from paddle.nn.initializer import Uniform
 from paddle.utils.download import get_weights_path_from_url
 
 __all__ = []
diff --git a/python/paddle/vision/models/inceptionv3.py b/python/paddle/vision/models/inceptionv3.py
index 24c55bdd7578f8..84aa955ea460d6 100644
--- a/python/paddle/vision/models/inceptionv3.py
+++ b/python/paddle/vision/models/inceptionv3.py
@@ -13,14 +13,14 @@
 # limitations under the License.
 
 import math
+
 import paddle
 import paddle.nn as nn
-from paddle.nn import Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import Uniform
 from paddle.fluid.param_attr import ParamAttr
-
+from paddle.nn import AdaptiveAvgPool2D, AvgPool2D, Dropout, Linear, MaxPool2D
+from paddle.nn.initializer import Uniform
 from paddle.utils.download import get_weights_path_from_url
+
 from ..ops import ConvNormActivation
 
 __all__ = []
diff --git a/python/paddle/vision/models/mobilenetv1.py b/python/paddle/vision/models/mobilenetv1.py
index 54d13d8523156b..a7ec30601b170b 100644
--- a/python/paddle/vision/models/mobilenetv1.py
+++ b/python/paddle/vision/models/mobilenetv1.py
@@ -14,8 +14,8 @@
 
 import paddle
 import paddle.nn as nn
-
 from paddle.utils.download import get_weights_path_from_url
+
 from ..ops import ConvNormActivation
 
 __all__ = []
diff --git a/python/paddle/vision/models/mobilenetv2.py b/python/paddle/vision/models/mobilenetv2.py
index 715dd59ca18920..1f9d04509dd7b1 100644
--- a/python/paddle/vision/models/mobilenetv2.py
+++ b/python/paddle/vision/models/mobilenetv2.py
@@ -16,8 +16,8 @@
 import paddle.nn as nn
 from paddle.utils.download import get_weights_path_from_url
 
-from .utils import _make_divisible
 from ..ops import ConvNormActivation
+from .utils import _make_divisible
 
 __all__ = []
 
diff --git a/python/paddle/vision/models/mobilenetv3.py b/python/paddle/vision/models/mobilenetv3.py
index 865f8d0dd3222d..3ca62af7e558fe 100644
--- a/python/paddle/vision/models/mobilenetv3.py
+++ b/python/paddle/vision/models/mobilenetv3.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from functools import partial
+
 import paddle
 import paddle.nn as nn
 from paddle.utils.download import get_weights_path_from_url
-from functools import partial
 
-from .utils import _make_divisible
 from ..ops import ConvNormActivation
+from .utils import _make_divisible
 
 __all__ = []
 
diff --git a/python/paddle/vision/models/resnet.py b/python/paddle/vision/models/resnet.py
index 5e89e7df8abefd..4d3a785bb58c69 100644
--- a/python/paddle/vision/models/resnet.py
+++ b/python/paddle/vision/models/resnet.py
@@ -14,7 +14,6 @@
 
 import paddle
 import paddle.nn as nn
-
 from paddle.utils.download import get_weights_path_from_url
 
 __all__ = []
diff --git a/python/paddle/vision/models/squeezenet.py b/python/paddle/vision/models/squeezenet.py
index dd66087e6187a4..896afc450379b8 100644
--- a/python/paddle/vision/models/squeezenet.py
+++ b/python/paddle/vision/models/squeezenet.py
@@ -15,10 +15,8 @@
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
-
-from paddle.nn import Conv2D, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D
 from paddle.fluid.param_attr import ParamAttr
+from paddle.nn import AdaptiveAvgPool2D, Conv2D, Dropout, MaxPool2D
 from paddle.utils.download import get_weights_path_from_url
 
 __all__ = []
diff --git a/python/paddle/vision/models/vgg.py b/python/paddle/vision/models/vgg.py
index 174ec99c6d1877..6064d51ba652e7 100644
--- a/python/paddle/vision/models/vgg.py
+++ b/python/paddle/vision/models/vgg.py
@@ -14,7 +14,6 @@
 
 import paddle
 import paddle.nn as nn
-
 from paddle.utils.download import get_weights_path_from_url
 
 __all__ = []
diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py
index ef9a51cbdec609..5b8ce0c23000ae 100755
--- a/python/paddle/vision/ops.py
+++ b/python/paddle/vision/ops.py
@@ -13,19 +13,21 @@
 # limitations under the License.
 
 import numpy as np
-from ..fluid.layer_helper import LayerHelper
+
+from paddle import _C_ops, _legacy_C_ops
+
 from ..fluid.data_feeder import check_type, check_variable_and_dtype
-from ..fluid.layers import nn, utils
-from ..nn import Layer, Conv2D, Sequential, ReLU, BatchNorm2D
-from ..fluid.initializer import Normal
 from ..fluid.framework import (
     Variable,
+    _in_legacy_dygraph,
     _non_static_mode,
     in_dygraph_mode,
-    _in_legacy_dygraph,
 )
-from paddle import _C_ops, _legacy_C_ops
+from ..fluid.initializer import Normal
+from ..fluid.layer_helper import LayerHelper
+from ..fluid.layers import nn, utils
 from ..framework import _current_expected_place
+from ..nn import BatchNorm2D, Conv2D, Layer, ReLU, Sequential
 
 __all__ = [  # noqa
     'yolo_loss',
diff --git a/python/paddle/vision/transforms/functional.py b/python/paddle/vision/transforms/functional.py
index f813142144c8ce..b5889981d24870 100644
--- a/python/paddle/vision/transforms/functional.py
+++ b/python/paddle/vision/transforms/functional.py
@@ -17,10 +17,11 @@
 
 import numpy as np
 from PIL import Image
+
 import paddle
 
-from . import functional_pil as F_pil
 from . import functional_cv2 as F_cv2
+from . import functional_pil as F_pil
 from . import functional_tensor as F_t
 
 __all__ = []
diff --git a/python/paddle/vision/transforms/functional_cv2.py b/python/paddle/vision/transforms/functional_cv2.py
index f22b63d83f8711..b44c20ff1930d6 100644
--- a/python/paddle/vision/transforms/functional_cv2.py
+++ b/python/paddle/vision/transforms/functional_cv2.py
@@ -14,14 +14,13 @@
 
 import math
 import numbers
+from collections.abc import Iterable, Sequence
 
 import numpy as np
 
 import paddle
 from paddle.utils import try_import
 
-from collections.abc import Sequence, Iterable
-
 __all__ = []
 
 
diff --git a/python/paddle/vision/transforms/functional_pil.py b/python/paddle/vision/transforms/functional_pil.py
index 432cd7da2bd8ab..bca2df08e622c3 100644
--- a/python/paddle/vision/transforms/functional_pil.py
+++ b/python/paddle/vision/transforms/functional_pil.py
@@ -13,12 +13,12 @@
 # limitations under the License.
 
 import numbers
-from PIL import Image, ImageOps, ImageEnhance
+from collections.abc import Iterable, Sequence
 
 import numpy as np
-import paddle
+from PIL import Image, ImageEnhance, ImageOps
 
-from collections.abc import Sequence, Iterable
+import paddle
 
 try:
     # PIL version >= "9.1.0"
diff --git a/python/paddle/vision/transforms/transforms.py b/python/paddle/vision/transforms/transforms.py
index 13be056df2daf6..9b14c775982a3b 100644
--- a/python/paddle/vision/transforms/transforms.py
+++ b/python/paddle/vision/transforms/transforms.py
@@ -13,16 +13,16 @@
 # limitations under the License.
 
 import math
+import numbers
 import random
+import traceback
+from collections.abc import Iterable, Sequence
 
 import numpy as np
-import numbers
-import traceback
 
 import paddle
-from . import functional as F
 
-from collections.abc import Sequence, Iterable
+from . import functional as F
 
 __all__ = []
 
diff --git a/r/example/mobilenet.py b/r/example/mobilenet.py
index 082a1dea7deac9..806c5ba27ad868 100755
--- a/r/example/mobilenet.py
+++ b/r/example/mobilenet.py
@@ -17,8 +17,8 @@
 # pylint: skip-file
 
 import numpy as np
-from paddle.fluid.core import AnalysisConfig
-from paddle.fluid.core import create_paddle_predictor
+
+from paddle.fluid.core import AnalysisConfig, create_paddle_predictor
 
 
 def main():
diff --git a/tools/CrossStackProfiler/CspFileReader.py b/tools/CrossStackProfiler/CspFileReader.py
index 55a1722be66b7c..1f0a60794aef22 100755
--- a/tools/CrossStackProfiler/CspFileReader.py
+++ b/tools/CrossStackProfiler/CspFileReader.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
-import time
-import json
 import glob
+import json
 import logging
+import os
+import time
 from multiprocessing import Lock
 
 """ Some terms to clarify the code
diff --git a/tools/CrossStackProfiler/CspReporter.py b/tools/CrossStackProfiler/CspReporter.py
index 999ba4fb3e2336..94e60c8ae24d0e 100755
--- a/tools/CrossStackProfiler/CspReporter.py
+++ b/tools/CrossStackProfiler/CspReporter.py
@@ -12,19 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
-import glob
 import argparse
-
+import glob
+import os
 from multiprocessing import Process
 
+from CspFileReader import (
+    DCGM_PATH,
+    FILEORGANIZEFORM_BYRANK,
+    FILEORGANIZEFORM_BYTRAINER,
+    NET_PATH,
+    PROFILE_PATH,
+    TIME_PATH,
+    getLogger,
+)
 from DCGMFileReader import dcgmFileReader
 from ProfileFileReader import profileFileReader
 
-from CspFileReader import getLogger
-from CspFileReader import TIME_PATH, DCGM_PATH, NET_PATH, PROFILE_PATH
-from CspFileReader import FILEORGANIZEFORM_BYRANK, FILEORGANIZEFORM_BYTRAINER
-
 
 def get_argparse():
     parser = argparse.ArgumentParser(description=__doc__)
diff --git a/tools/CrossStackProfiler/DCGMFileReader.py b/tools/CrossStackProfiler/DCGMFileReader.py
index 86868c8734e3f2..4627fa43cee4bc 100755
--- a/tools/CrossStackProfiler/DCGMFileReader.py
+++ b/tools/CrossStackProfiler/DCGMFileReader.py
@@ -12,19 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
+import multiprocessing
 import os
 import re
-import logging
 import tempfile
-import pandas as pd
-import multiprocessing
 from multiprocessing import Process
 
-from CspFileReader import FileReader
-from CspFileReader import getLogger
-from CspFileReader import dcgmMetricParameterMap
-from CspFileReader import PIPELINEINFO_TRACE_NUM
-from CspFileReader import FILEORGANIZEFORM_BYTRAINER
+import pandas as pd
+from CspFileReader import (
+    FILEORGANIZEFORM_BYTRAINER,
+    PIPELINEINFO_TRACE_NUM,
+    FileReader,
+    dcgmMetricParameterMap,
+    getLogger,
+)
 
 
 class dcgmFileReader(FileReader):
diff --git a/tools/CrossStackProfiler/NetFileReader.py b/tools/CrossStackProfiler/NetFileReader.py
index ed9cfdb1fc86f2..e17fab30726043 100755
--- a/tools/CrossStackProfiler/NetFileReader.py
+++ b/tools/CrossStackProfiler/NetFileReader.py
@@ -14,13 +14,14 @@
 
 import json
 import multiprocessing
-
 from multiprocessing import Process
 
-from CspFileReader import FileReader
-from CspFileReader import getLogger
-from CspFileReader import PIPELINEINFO_TRACE_NUM
-from CspFileReader import FILEORGANIZEFORM_BYTRAINER
+from CspFileReader import (
+    FILEORGANIZEFORM_BYTRAINER,
+    PIPELINEINFO_TRACE_NUM,
+    FileReader,
+    getLogger,
+)
 
 
 class netFileReader(FileReader):
diff --git a/tools/CrossStackProfiler/ProfileFileReader.py b/tools/CrossStackProfiler/ProfileFileReader.py
index e7a07c40820af1..fac0d60d1ec142 100755
--- a/tools/CrossStackProfiler/ProfileFileReader.py
+++ b/tools/CrossStackProfiler/ProfileFileReader.py
@@ -16,18 +16,17 @@
 import multiprocessing
 from multiprocessing import Process
 
-import paddle.fluid.proto.profiler.profiler_pb2 as profiler_pb2
-
 from CspChromeTraceFormatter import ChromeTraceFormatter
-
-from CspFileReader import FileReader
-from CspFileReader import getLogger
 from CspFileReader import (
-    NETINFO_TRACE_NUM,
     DCGMINFO_TRACE_NUM,
+    FILEORGANIZEFORM_BYRANK,
+    NETINFO_TRACE_NUM,
     PIPELINEINFO_TRACE_NUM,
+    FileReader,
+    getLogger,
 )
-from CspFileReader import FILEORGANIZEFORM_BYRANK
+
+import paddle.fluid.proto.profiler.profiler_pb2 as profiler_pb2
 
 
 class profileFileReader(FileReader):
diff --git a/tools/analysisPyXml.py b/tools/analysisPyXml.py
index 200116779dbeb2..99c9959aa25026 100644
--- a/tools/analysisPyXml.py
+++ b/tools/analysisPyXml.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import commands
-from xml.etree import ElementTree
-import re
 import os
+import re
 import sys
+from xml.etree import ElementTree
+
+import commands
 
 
 def analysisPyXml(rootPath, ut):
diff --git a/tools/check_api_compatible.py b/tools/check_api_compatible.py
index ea08a4494bb171..fe6db303c7ff46 100644
--- a/tools/check_api_compatible.py
+++ b/tools/check_api_compatible.py
@@ -14,9 +14,9 @@
 
 import argparse
 import inspect
-import sys
-import re
 import logging
+import re
+import sys
 
 logger = logging.getLogger()
 if logger.handlers:
diff --git a/tools/check_api_source_without_core_ops.py b/tools/check_api_source_without_core_ops.py
index bf7027be5cb740..ba79fb35952d48 100644
--- a/tools/check_api_source_without_core_ops.py
+++ b/tools/check_api_source_without_core_ops.py
@@ -14,6 +14,7 @@
 
 import difflib
 import sys
+
 import count_api_without_core_ops
 
 with open(sys.argv[1], 'r') as f:
diff --git a/tools/check_ctest_hung.py b/tools/check_ctest_hung.py
index 11e927eb681e88..3cd5acacb85339 100644
--- a/tools/check_ctest_hung.py
+++ b/tools/check_ctest_hung.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import sys
 import re
+import sys
 
 
 def escape(input):
diff --git a/tools/check_op_benchmark_result.py b/tools/check_op_benchmark_result.py
index 6c4e0fc06b6a5f..3da859785f4842 100644
--- a/tools/check_op_benchmark_result.py
+++ b/tools/check_op_benchmark_result.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
+import argparse
 import json
 import logging
-import argparse
+import os
 
 
 def check_path_exists(path):
diff --git a/tools/check_op_desc.py b/tools/check_op_desc.py
index 3bb1f238253940..7c952460d55458 100644
--- a/tools/check_op_desc.py
+++ b/tools/check_op_desc.py
@@ -14,8 +14,9 @@
 
 import json
 import sys
-from paddle.utils import OpLastCheckpointChecker
+
 from paddle.fluid.core import OpUpdateType
+from paddle.utils import OpLastCheckpointChecker
 
 INPUTS = "Inputs"
 OUTPUTS = "Outputs"
diff --git a/tools/check_op_kernel_same_dtypes.py b/tools/check_op_kernel_same_dtypes.py
index 13d0119f83b2ee..7b9b4949c36805 100644
--- a/tools/check_op_kernel_same_dtypes.py
+++ b/tools/check_op_kernel_same_dtypes.py
@@ -18,9 +18,10 @@
     python check_op_kernel_same_dtypes.py > all_kernels.txt
     python check_op_kernel_same_dtypes.py OP_KERNEL_DTYPE_DEV.spec OP_KERNEL_DTYPE_PR.spec > is_valid
 """
-import sys
-import re
 import collections
+import re
+import sys
+
 import paddle
 
 
diff --git a/tools/check_op_register_type.py b/tools/check_op_register_type.py
index af7f3ee42a8878..e95e03b1e0dca0 100644
--- a/tools/check_op_register_type.py
+++ b/tools/check_op_register_type.py
@@ -18,10 +18,11 @@
     python check_op_register_type.py > all_kernels.txt
     python check_op_register_type.py OP_TYPE_DEV.spec OP_TYPE_PR.spec > is_valid
 """
-import sys
-import re
-import difflib
 import collections
+import difflib
+import re
+import sys
+
 import paddle.fluid as fluid
 
 INTS = set(['int', 'int64_t'])
diff --git a/tools/check_pr_approval.py b/tools/check_pr_approval.py
index 7e7e7720bf8fd6..a2495d18f52004 100644
--- a/tools/check_pr_approval.py
+++ b/tools/check_pr_approval.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import sys
 import json
+import sys
 
 
 def check_approval(count, required_reviewers):
diff --git a/tools/check_ut.py b/tools/check_ut.py
index 6f1a8ab02ba61e..90cff366f4bbd2 100644
--- a/tools/check_ut.py
+++ b/tools/check_ut.py
@@ -16,6 +16,7 @@
 
 import os
 import os.path
+
 from github import Github
 
 
diff --git a/tools/codestyle/docstring_checker.py b/tools/codestyle/docstring_checker.py
index 8deeff77348f47..4b5ad43033935b 100644
--- a/tools/codestyle/docstring_checker.py
+++ b/tools/codestyle/docstring_checker.py
@@ -13,14 +13,13 @@
 # limitations under the License.
 """DocstringChecker is used to check python doc string's style."""
 
-import astroid
+import re
+from collections import defaultdict
 
+import astroid
 from pylint.checkers import BaseChecker
 from pylint.interfaces import IAstroidChecker
 
-from collections import defaultdict
-import re
-
 
 def register(linter):
     """Register checkers."""
diff --git a/tools/codestyle/test_docstring_checker.py b/tools/codestyle/test_docstring_checker.py
index 09617a0f2f7350..09a71fb2467adb 100644
--- a/tools/codestyle/test_docstring_checker.py
+++ b/tools/codestyle/test_docstring_checker.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import astroid
 import docstring_checker
 import pylint.testutils
-import astroid
 
 
 class TestDocstring(pylint.testutils.CheckerTestCase):
diff --git a/tools/count_api_without_core_ops.py b/tools/count_api_without_core_ops.py
index 96a7c33dd95ff3..1c0ada015048d8 100644
--- a/tools/count_api_without_core_ops.py
+++ b/tools/count_api_without_core_ops.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import collections
+import hashlib
 import importlib
 import inspect
-import collections
 import sys
-import hashlib
 
 __all__ = [
     'get_apis_with_and_without_core_ops',
diff --git a/tools/diff_use_default_grad_op_maker.py b/tools/diff_use_default_grad_op_maker.py
index bab07123d5371d..38e7f437de4df5 100644
--- a/tools/diff_use_default_grad_op_maker.py
+++ b/tools/diff_use_default_grad_op_maker.py
@@ -16,9 +16,10 @@
 
 os.environ['CUDA_VISIBLE_DEVICES'] = ''
 
-import paddle.fluid as fluid
 import sys
 
+import paddle.fluid as fluid
+
 
 def generate_spec(filename):
     with open(filename, 'w') as f:
diff --git a/tools/dockerfile/build_scripts/python-tag-abi-tag.py b/tools/dockerfile/build_scripts/python-tag-abi-tag.py
index 0364ab3659e49d..685ed26cb91a3b 100644
--- a/tools/dockerfile/build_scripts/python-tag-abi-tag.py
+++ b/tools/dockerfile/build_scripts/python-tag-abi-tag.py
@@ -16,6 +16,6 @@
 # See PEP 425 for exactly what these are, but an example would be:
 #   cp27-cp27mu
 
-from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
+from wheel.pep425tags import get_abbr_impl, get_abi_tag, get_impl_ver
 
 print("{0}{1}-{2}".format(get_abbr_impl(), get_impl_ver(), get_abi_tag()))
diff --git a/tools/externalError/spider.py b/tools/externalError/spider.py
index d6062e5d50a43d..50e4a3815b8ff4 100644
--- a/tools/externalError/spider.py
+++ b/tools/externalError/spider.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import ssl
+import getopt
 import re
-import urllib.request
+import ssl
 import sys
-import getopt
-import external_error_pb2
+import urllib.request
 from html.parser import HTMLParser
 
+import external_error_pb2
+
 
 def parsing(externalErrorDesc):
     # *********************************************************************************************#
diff --git a/tools/final_ut_parallel_rule.py b/tools/final_ut_parallel_rule.py
index bc819b59d8fd1d..ae91a89af86af3 100644
--- a/tools/final_ut_parallel_rule.py
+++ b/tools/final_ut_parallel_rule.py
@@ -14,8 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import json
+import os
 import sys
 
 
diff --git a/tools/gen_ut_cmakelists.py b/tools/gen_ut_cmakelists.py
index 4bfc58f6404239..64e22d1287f7a1 100644
--- a/tools/gen_ut_cmakelists.py
+++ b/tools/gen_ut_cmakelists.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import re
-import os
 import argparse
+import os
+import re
 
 # port range (21200, 23000) is reserved for dist-ops
 
diff --git a/tools/get_op_list.py b/tools/get_op_list.py
index a6d726dc3096e9..aa1fd4dbfb353b 100644
--- a/tools/get_op_list.py
+++ b/tools/get_op_list.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import argparse
-import numpy as np
 import os
 import re
+
+import numpy as np
+
+import paddle
 from paddle.inference import _get_phi_kernel_name
 
 paddle.enable_static()
diff --git a/tools/get_pr_ut.py b/tools/get_pr_ut.py
index 02aaf13a17d31f..6773227d2acc2b 100644
--- a/tools/get_pr_ut.py
+++ b/tools/get_pr_ut.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 """ For the PR that only modified the unit test, get cases in pull request. """
 
-import os
 import json
+import os
+import platform
 import re
-import time
+import ssl
 import subprocess
-import requests
+import time
 import urllib.request
-import ssl
-import platform
+
+import requests
 from github import Github
 
 PADDLE_ROOT = os.getenv('PADDLE_ROOT', '/paddle/')
diff --git a/tools/get_quick_disable_lt.py b/tools/get_quick_disable_lt.py
index 856d71d18cf218..eaf439b04134e6 100644
--- a/tools/get_quick_disable_lt.py
+++ b/tools/get_quick_disable_lt.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import sys
 import ssl
+import sys
+
 import requests
+
 import paddle
 
 
diff --git a/tools/get_single_test_cov.py b/tools/get_single_test_cov.py
index 10aaa96ce7be13..8cdf32f348f090 100644
--- a/tools/get_single_test_cov.py
+++ b/tools/get_single_test_cov.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 
 import os
-import sys
 import re
+import sys
 
 
 def getFNDAFile(rootPath, test):
diff --git a/tools/get_ut_file_map.py b/tools/get_ut_file_map.py
index 269d844d3292bd..f63fa8ab828e0d 100644
--- a/tools/get_ut_file_map.py
+++ b/tools/get_ut_file_map.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
 import os
 import sys
-import json
 
 
 def get_all_paddle_file(rootPath):
diff --git a/tools/get_ut_mem_map.py b/tools/get_ut_mem_map.py
index 703f0ab98ac258..344d82691b6f28 100644
--- a/tools/get_ut_mem_map.py
+++ b/tools/get_ut_mem_map.py
@@ -14,8 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import json
+import os
 import sys
 
 
diff --git a/tools/handle_h_cu_file.py b/tools/handle_h_cu_file.py
index 7cd29b39aa783e..1a096fa894e463 100644
--- a/tools/handle_h_cu_file.py
+++ b/tools/handle_h_cu_file.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import queue
+import sys
 import threading
-import os
 import time
-import sys
 
 taskQueue = queue.Queue()
 
diff --git a/tools/infrt/generate_phi_kernel_dialect.py b/tools/infrt/generate_phi_kernel_dialect.py
index a5d1003dbb9f22..d1af53d07e351c 100644
--- a/tools/infrt/generate_phi_kernel_dialect.py
+++ b/tools/infrt/generate_phi_kernel_dialect.py
@@ -13,8 +13,9 @@
 # limitations under the License.
 
 import json
-import yaml
 import os
+
+import yaml
 from get_compat_kernel_signature import get_compat_kernels_info
 
 # TODO @DannyIsFunny: more attr types need to be supported.
diff --git a/tools/infrt/get_phi_kernel_info.py b/tools/infrt/get_phi_kernel_info.py
index 9eeee88276d80c..6b7f34e2d4c66f 100644
--- a/tools/infrt/get_phi_kernel_info.py
+++ b/tools/infrt/get_phi_kernel_info.py
@@ -16,9 +16,10 @@
 
 import argparse
 import json
-import yaml
 from typing import Dict, List
 
+import yaml
+
 skipped_phi_api_list_file = "/tools/infrt/skipped_phi_api.json"
 api_yaml_file = "/paddle/phi/api/yaml/api.yaml"
 legacy_api_yaml_file = "/paddle/phi/api/yaml/legacy_api.yaml"
diff --git a/tools/jetson_infer_op.py b/tools/jetson_infer_op.py
index 664f2b6616ed68..300d796ea9d444 100644
--- a/tools/jetson_infer_op.py
+++ b/tools/jetson_infer_op.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import re
-import os
-import math
 import argparse
+import math
+import os
+import re
 from threading import Thread
 
 # some particular ops
diff --git a/tools/print_op_desc.py b/tools/print_op_desc.py
index c0ce5789b7b717..2dbfa7087651e7 100644
--- a/tools/print_op_desc.py
+++ b/tools/print_op_desc.py
@@ -42,9 +42,10 @@
     python print_op_desc.py > op_desc.spec
 """
 
+import json
+
 import paddle.fluid.framework as framework
 from paddle.fluid import core
-import json
 
 INPUTS = "Inputs"
 OUTPUTS = "Outputs"
diff --git a/tools/print_signatures.py b/tools/print_signatures.py
index 4c85ba123beb7a..2ae695c8fc74ff 100644
--- a/tools/print_signatures.py
+++ b/tools/print_signatures.py
@@ -18,13 +18,14 @@
     ./print_signature  "paddle.fluid" > signature.txt
 """
 
-import inspect
+import argparse
 import collections
-import sys
 import hashlib
-import pkgutil
+import inspect
 import logging
-import argparse
+import pkgutil
+import sys
+
 import paddle
 
 member_dict = collections.OrderedDict()
diff --git a/tools/prune_for_jetson.py b/tools/prune_for_jetson.py
index 14d1fb3ef2e287..70434c513cad40 100644
--- a/tools/prune_for_jetson.py
+++ b/tools/prune_for_jetson.py
@@ -16,9 +16,9 @@
 when cmake ON_INFER=ON, which can greatly reduce the volume of the prediction library.
 """
 
+import glob
 import os
 import re
-import glob
 
 
 def find_type_files(cur_dir, file_type, file_list=[]):
diff --git a/tools/pyCov_multithreading.py b/tools/pyCov_multithreading.py
index e808dcb55e0ac2..073d5d34b61143 100644
--- a/tools/pyCov_multithreading.py
+++ b/tools/pyCov_multithreading.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import time
-import queue
-import threading
 import os
+import queue
 import sys
+import threading
+import time
 
 taskQueue = queue.Queue()
 lock = threading.RLock()
diff --git a/tools/remove_grad_op_and_kernel.py b/tools/remove_grad_op_and_kernel.py
index 85bfed9640da84..870640347187d1 100644
--- a/tools/remove_grad_op_and_kernel.py
+++ b/tools/remove_grad_op_and_kernel.py
@@ -16,9 +16,9 @@
 when cmake ON_INFER=ON, which can greatly reduce the volume of the prediction library.
 """
 
+import glob
 import os
 import re
-import glob
 
 
 def find_type_files(cur_dir, file_type, file_list=[]):
diff --git a/tools/sampcd_processor.py b/tools/sampcd_processor.py
index 3483b8253070b4..742cb3f7c78cae 100644
--- a/tools/sampcd_processor.py
+++ b/tools/sampcd_processor.py
@@ -21,16 +21,16 @@
     python sampcd_processor.py cpu
 
 """
-import os
-import sys
-import subprocess
+import argparse
+import inspect
+import logging
 import multiprocessing
+import os
 import platform
-import inspect
-import argparse
-import shutil
 import re
-import logging
+import shutil
+import subprocess
+import sys
 import time
 
 logger = logging.getLogger()
diff --git a/tools/summary_env.py b/tools/summary_env.py
index 14fb512ca22b00..ec63f764c01bf2 100644
--- a/tools/summary_env.py
+++ b/tools/summary_env.py
@@ -11,10 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import sys
-import distro
 import platform
 import subprocess
+import sys
+
+import distro
 
 envs_template = """
 Paddle version: {paddle_version}
diff --git a/tools/test_check_api_compatible.py b/tools/test_check_api_compatible.py
index d7c9a0b195d363..dd134334ed9fd7 100644
--- a/tools/test_check_api_compatible.py
+++ b/tools/test_check_api_compatible.py
@@ -16,12 +16,14 @@
 """
 TestCases for check_api_compatible.py
 """
-import unittest
 import tempfile
+import unittest
 
-from check_api_compatible import read_argspec_from_file
-from check_api_compatible import check_compatible
-from check_api_compatible import check_compatible_str
+from check_api_compatible import (
+    check_compatible,
+    check_compatible_str,
+    read_argspec_from_file,
+)
 
 
 class Test_check_compatible(unittest.TestCase):
diff --git a/tools/test_check_pr_approval.py b/tools/test_check_pr_approval.py
index c126a749f1be44..8e6c9a5a2e8b11 100644
--- a/tools/test_check_pr_approval.py
+++ b/tools/test_check_pr_approval.py
@@ -16,9 +16,9 @@
 """
 TestCases for check_pr_approval.py
 """
-import unittest
 import subprocess
 import sys
+import unittest
 
 
 class Test_check_approval(unittest.TestCase):
diff --git a/tools/test_print_signatures.py b/tools/test_print_signatures.py
index c757fcce967f88..0c3e17c7fa729d 100644
--- a/tools/test_print_signatures.py
+++ b/tools/test_print_signatures.py
@@ -21,11 +21,11 @@
     paddle.autograd.PyLayer (paddle.autograd.py_layer.PyLayer, ('document', 'c26adbbf5f1eb43d16d4a399242c979e'))
     paddle.autograd.PyLayer.apply (ArgSpec(args=['cls'], varargs=args, keywords=kwargs, defaults=None), ('document', 'cb78696dc032fb8af2cba8504153154d'))
 """
-import unittest
-import hashlib
 import functools
-from print_signatures import md5
-from print_signatures import is_primitive
+import hashlib
+import unittest
+
+from print_signatures import is_primitive, md5
 
 
 def func_example(param_a, param_b):
diff --git a/tools/test_runner.py b/tools/test_runner.py
index 2f1b9a22ab3b9b..496ff8ca409ca4 100644
--- a/tools/test_runner.py
+++ b/tools/test_runner.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
+import importlib
 import os
 import sys
+import unittest
+from io import StringIO
+
 import paddle
 import paddle.fluid as fluid
-import importlib
 import paddle.fluid.core as core
-from io import StringIO
 
 sys.path.append(os.path.abspath(os.path.dirname(__file__)))
 import static_mode_white_list
diff --git a/tools/test_sampcd_processor.py b/tools/test_sampcd_processor.py
index d389b2dbc870b5..73556438d677e6 100644
--- a/tools/test_sampcd_processor.py
+++ b/tools/test_sampcd_processor.py
@@ -14,21 +14,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import os
-import shutil
 import re
+import shutil
+import unittest
+
 import sampcd_processor
-from sampcd_processor import find_all
-from sampcd_processor import get_api_md5
-from sampcd_processor import get_incrementapi
-from sampcd_processor import sampcd_extract_to_file
-from sampcd_processor import extract_code_blocks_from_docstr
-from sampcd_processor import execute_samplecode
-from sampcd_processor import find_last_future_line_end
-from sampcd_processor import insert_codes_into_codeblock
-from sampcd_processor import get_test_capacity
-from sampcd_processor import is_required_match
+from sampcd_processor import (
+    execute_samplecode,
+    extract_code_blocks_from_docstr,
+    find_all,
+    find_last_future_line_end,
+    get_api_md5,
+    get_incrementapi,
+    get_test_capacity,
+    insert_codes_into_codeblock,
+    is_required_match,
+    sampcd_extract_to_file,
+)
 
 
 class Test_find_all(unittest.TestCase):

From 419465223b3cc66a8895f00b4bb234d78522a3a7 Mon Sep 17 00:00:00 2001
From: Nyakku Shigure <sigure.qaq@gmail.com>
Date: Tue, 29 Nov 2022 18:50:58 +0800
Subject: [PATCH 037/154] [CodeStyle][isort] introduce isort (part2) (#48390)

* isort all files

* revert conflicting files

* revert conflicting files

* revert conflicting files

* revert conflicting files

* revert conflicting files
---
 .../fluid/tests/unittests/test_accuracy_op.py |  2 ++
 .../unittests/test_activation_nn_grad.py      | 10 +++---
 .../tests/unittests/test_activation_op.py     |  6 ++--
 .../unittests/test_activation_sparse_op.py    |  4 ++-
 .../fluid/tests/unittests/test_adadelta_op.py |  2 ++
 .../fluid/tests/unittests/test_adagrad_op.py  |  8 +++--
 .../tests/unittests/test_adagrad_op_v2.py     |  1 +
 .../fluid/tests/unittests/test_adam_op.py     |  8 +++--
 .../test_adam_optimizer_fp32_fp64.py          |  3 +-
 .../fluid/tests/unittests/test_adamax_api.py  |  2 ++
 .../fluid/tests/unittests/test_adamax_op.py   |  1 +
 .../fluid/tests/unittests/test_adamw_op.py    | 12 ++++---
 .../unittests/test_adaptive_avg_pool1d.py     |  8 ++---
 .../unittests/test_adaptive_avg_pool2d.py     |  6 ++--
 .../unittests/test_adaptive_avg_pool3d.py     |  3 +-
 .../unittests/test_adaptive_max_pool1d.py     |  8 +++--
 .../unittests/test_adaptive_max_pool2d.py     |  5 +--
 .../unittests/test_adaptive_max_pool3d.py     |  5 +--
 .../unittests/test_add_reader_dependency.py   |  8 +++--
 .../fluid/tests/unittests/test_addmm_op.py    |  4 ++-
 .../unittests/test_affine_grid_function.py    |  8 +++--
 .../tests/unittests/test_affine_grid_op.py    |  2 ++
 .../tests/unittests/test_allclose_layer.py    |  6 ++--
 .../fluid/tests/unittests/test_allclose_op.py |  2 ++
 .../fluid/tests/unittests/test_allgather.py   |  3 +-
 .../test_amp_check_finite_and_scale_op.py     |  2 ++
 .../unittests/test_anchor_generator_op.py     |  1 +
 .../fluid/tests/unittests/test_angle_op.py    |  3 +-
 .../unittests/test_apply_pass_to_program.py   | 12 ++++---
 .../fluid/tests/unittests/test_arange.py      |  8 +++--
 .../tests/unittests/test_arg_min_max_op.py    |  4 ++-
 .../tests/unittests/test_arg_min_max_v2_op.py |  2 ++
 .../fluid/tests/unittests/test_argsort_op.py  |  9 +++---
 .../unittests/test_array_read_write_op.py     |  7 ++--
 .../tests/unittests/test_ascend_trigger.py    |  3 +-
 .../fluid/tests/unittests/test_assert_op.py   |  3 +-
 .../fluid/tests/unittests/test_assign_op.py   | 16 ++++++----
 .../tests/unittests/test_assign_pos_op.py     |  6 ++--
 .../tests/unittests/test_assign_value_op.py   |  3 +-
 .../test_async_ssa_graph_executor_mnist.py    |  3 +-
 .../fluid/tests/unittests/test_atan2_op.py    |  3 +-
 .../tests/unittests/test_attention_lstm_op.py |  3 +-
 .../tests/unittests/test_attribute_var.py     |  9 +++---
 .../fluid/tests/unittests/test_auc_op.py      |  6 ++--
 .../unittests/test_auc_single_pred_op.py      |  2 ++
 .../test_auto_growth_gpu_memory_limit.py      |  4 ++-
 .../unittests/test_auto_parallel_cluster.py   | 13 +++++---
 .../test_auto_parallel_completion.py          |  8 ++---
 .../test_auto_parallel_completion_gpt.py      |  8 ++---
 .../test_auto_parallel_cost_model.py          | 12 +++----
 .../test_auto_parallel_dist_tensor.py         | 17 +++++-----
 .../unittests/test_auto_parallel_graph.py     |  1 +
 .../unittests/test_auto_parallel_mapper.py    | 29 +++++++++--------
 .../test_auto_parallel_partitioner.py         | 12 +++----
 .../test_auto_parallel_partitioner_gpt.py     | 12 +++----
 .../unittests/test_auto_parallel_reshard.py   | 10 +++---
 .../test_auto_parallel_reshard_dpmppp.py      | 10 +++---
 .../test_auto_parallel_reshard_mppp.py        | 10 +++---
 .../test_auto_parallel_reshard_serial.py      |  9 +++---
 .../unittests/test_auto_parallel_searcher.py  | 18 ++++-------
 .../test_auto_search_dist_matmul_op.py        | 12 +++----
 .../unittests/test_auto_search_dist_op.py     | 12 +++----
 .../test_avoid_twice_initialization.py        |  1 +
 .../fluid/tests/unittests/test_backward.py    |  7 ++--
 ...test_backward_infer_var_data_type_shape.py |  6 ++--
 .../fluid/tests/unittests/test_base_layer.py  |  9 ++++--
 .../tests/unittests/test_basic_gru_api.py     |  9 +++---
 .../tests/unittests/test_basic_gru_unit_op.py |  9 +++---
 .../tests/unittests/test_basic_lstm_api.py    |  9 +++---
 .../unittests/test_basic_lstm_unit_op.py      |  9 +++---
 .../tests/unittests/test_basic_rnn_name.py    |  4 ++-
 .../fluid/tests/unittests/test_batch_fc_op.py |  3 +-
 .../tests/unittests/test_batch_norm_op.py     | 11 ++++---
 .../tests/unittests/test_batch_norm_op_v2.py  |  9 +++---
 .../tests/unittests/test_batch_sampler.py     |  3 +-
 .../fluid/tests/unittests/test_bce_loss.py    |  8 +++--
 .../unittests/test_bce_with_logits_loss.py    |  6 ++--
 .../unittests/test_beam_search_decode_op.py   |  5 +--
 .../tests/unittests/test_beam_search_op.py    |  8 +++--
 .../tests/unittests/test_bernoulli_op.py      |  6 ++--
 .../paddle/fluid/tests/unittests/test_bfgs.py |  3 +-
 .../unittests/test_bfloat16_embedding.py      |  8 +++--
 .../tests/unittests/test_bicubic_interp_op.py |  4 ++-
 .../unittests/test_bicubic_interp_v2_op.py    |  4 ++-
 .../unittests/test_bilateral_slice_op.py      |  4 ++-
 .../tests/unittests/test_bilinear_api.py      |  3 +-
 .../unittests/test_bilinear_interp_op.py      |  6 ++--
 .../unittests/test_bilinear_interp_v2_op.py   |  6 ++--
 .../test_bilinear_tensor_product_op.py        |  4 ++-
 .../fluid/tests/unittests/test_bincount_op.py |  6 ++--
 .../unittests/test_bipartite_match_op.py      |  1 +
 .../fluid/tests/unittests/test_bitwise_op.py  |  4 ++-
 .../fluid/tests/unittests/test_bmm_op.py      |  2 ++
 .../fluid/tests/unittests/test_box_clip_op.py |  1 +
 .../tests/unittests/test_box_coder_op.py      |  2 ++
 .../test_box_decoder_and_assign_op.py         |  1 +
 .../fluid/tests/unittests/test_boxps.py       |  3 +-
 .../fluid/tests/unittests/test_bpr_loss_op.py |  1 +
 .../tests/unittests/test_broadcast_error.py   |  2 ++
 .../tests/unittests/test_broadcast_shape.py   |  1 +
 .../unittests/test_broadcast_tensors_op.py    |  7 ++--
 .../tests/unittests/test_broadcast_to_op.py   |  4 ++-
 .../tests/unittests/test_bucketize_api.py     |  2 ++
 .../test_buffer_shared_memory_reuse_pass.py   | 10 +++---
 ...euse_pass_and_fuse_optimization_op_pass.py |  3 +-
 .../test_build_strategy_fusion_group_pass.py  |  4 ++-
 .../unittests/test_c_comm_init_all_op.py      |  1 +
 .../tests/unittests/test_c_embedding_op.py    |  1 +
 .../tests/unittests/test_calc_gradient.py     |  4 ++-
 .../paddle/fluid/tests/unittests/test_case.py |  7 ++--
 .../fluid/tests/unittests/test_cast_op.py     | 11 ++++---
 .../fluid/tests/unittests/test_center_loss.py |  2 ++
 .../tests/unittests/test_channel_shuffle.py   |  7 ++--
 .../unittests/test_check_import_scipy.py      |  3 +-
 .../tests/unittests/test_checkpoint_saver.py  |  1 -
 .../fluid/tests/unittests/test_cholesky_op.py | 10 +++---
 .../tests/unittests/test_cholesky_solve_op.py |  9 +++---
 .../tests/unittests/test_chunk_eval_op.py     |  2 +-
 .../fluid/tests/unittests/test_chunk_op.py    |  7 ++--
 .../unittests/test_class_center_sample_op.py  |  4 ++-
 .../tests/unittests/test_clip_by_norm_op.py   |  3 +-
 .../fluid/tests/unittests/test_clip_op.py     |  4 ++-
 .../unittests/test_coalesce_tensor_op.py      |  6 ++--
 .../test_collect_fpn_proposals_op.py          |  1 +
 .../unittests/test_collective_api_base.py     | 12 ++++---
 .../tests/unittests/test_collective_base.py   | 12 ++++---
 .../unittests/test_communicator_async.py      |  6 ++--
 .../tests/unittests/test_communicator_geo.py  |  9 +++---
 .../unittests/test_communicator_ps_gpu.py     |  8 ++---
 .../fluid/tests/unittests/test_compare_op.py  |  4 ++-
 .../tests/unittests/test_compare_reduce_op.py |  4 ++-
 .../tests/unittests/test_compiled_program.py  |  6 ++--
 .../fluid/tests/unittests/test_complex_abs.py |  3 +-
 .../tests/unittests/test_complex_cast.py      |  1 +
 .../test_complex_elementwise_layers.py        |  1 +
 .../tests/unittests/test_complex_getitem.py   |  2 ++
 .../test_complex_grad_accumulated.py          |  2 +-
 .../tests/unittests/test_complex_kron.py      |  8 +++--
 .../tests/unittests/test_complex_matmul.py    |  4 ++-
 .../fluid/tests/unittests/test_complex_op.py  |  3 +-
 .../tests/unittests/test_complex_reshape.py   |  8 +++--
 .../tests/unittests/test_complex_simplenet.py |  2 +-
 .../tests/unittests/test_complex_sum_layer.py |  6 ++--
 .../unittests/test_complex_trace_layer.py     |  4 ++-
 .../tests/unittests/test_complex_transpose.py |  4 ++-
 .../tests/unittests/test_complex_variable.py  |  7 ++--
 .../tests/unittests/test_complex_view_op.py   |  3 +-
 .../fluid/tests/unittests/test_concat_op.py   | 18 ++++++-----
 .../paddle/fluid/tests/unittests/test_cond.py |  9 +++---
 .../tests/unittests/test_conditional_block.py |  8 +++--
 .../fluid/tests/unittests/test_conj_op.py     |  7 ++--
 .../fluid/tests/unittests/test_const_value.py |  1 +
 .../tests/unittests/test_context_manager.py   |  3 +-
 .../tests/unittests/test_conv1d_layer.py      |  6 ++--
 .../unittests/test_conv1d_transpose_layer.py  |  6 ++--
 .../fluid/tests/unittests/test_conv2d_api.py  |  3 +-
 .../tests/unittests/test_conv2d_fusion_op.py  |  6 ++--
 .../tests/unittests/test_conv2d_layer.py      | 10 +++---
 .../fluid/tests/unittests/test_conv2d_op.py   |  6 ++--
 .../test_conv2d_op_depthwise_conv.py          | 10 +++---
 .../unittests/test_conv2d_transpose_layer.py  |  8 +++--
 .../unittests/test_conv2d_transpose_op.py     |  8 +++--
 ...test_conv2d_transpose_op_depthwise_conv.py |  1 +
 .../tests/unittests/test_conv3d_layer.py      | 10 +++---
 .../fluid/tests/unittests/test_conv3d_op.py   |  7 ++--
 .../unittests/test_conv3d_transpose_layer.py  |  8 +++--
 .../unittests/test_conv3d_transpose_op.py     |  4 ++-
 .../test_conv3d_transpose_part2_op.py         |  5 +--
 .../tests/unittests/test_conv_nn_grad.py      | 10 +++---
 .../tests/unittests/test_conv_shift_op.py     |  1 +
 .../unittests/test_conv_transpose_nn_grad.py  |  8 ++---
 .../paddle/fluid/tests/unittests/test_corr.py |  4 ++-
 .../fluid/tests/unittests/test_cos_sim_op.py  |  2 ++
 .../unittests/test_cosine_embedding_loss.py   |  6 ++--
 .../unittests/test_cosine_similarity_api.py   |  7 ++--
 .../tests/unittests/test_count_nonzero_api.py |  2 ++
 .../paddle/fluid/tests/unittests/test_cov.py  |  4 ++-
 .../tests/unittests/test_cpuonly_spawn.py     |  2 +-
 .../tests/unittests/test_create_global_var.py |  2 ++
 .../unittests/test_create_op_doc_string.py    |  1 +
 .../tests/unittests/test_create_parameter.py  |  4 ++-
 .../tests/unittests/test_crf_decoding_op.py   |  4 +--
 .../fluid/tests/unittests/test_crop_op.py     |  2 ++
 .../tests/unittests/test_crop_tensor_op.py    |  2 ++
 .../tests/unittests/test_cross_entropy2_op.py |  3 +-
 .../unittests/test_cross_entropy_loss.py      |  8 +++--
 .../tests/unittests/test_cross_entropy_op.py  |  4 ++-
 .../fluid/tests/unittests/test_cross_op.py    |  2 ++
 .../fluid/tests/unittests/test_crypto.py      |  5 ++-
 .../fluid/tests/unittests/test_ctc_align.py   |  4 ++-
 .../unittests/test_cuda_cudnn_version.py      |  1 +
 .../tests/unittests/test_cuda_device_count.py |  3 +-
 .../test_cuda_device_name_capability.py       |  3 +-
 .../tests/unittests/test_cuda_empty_cache.py  |  3 +-
 .../fluid/tests/unittests/test_cuda_graph.py  | 12 ++++---
 .../test_cuda_graph_partial_graph.py          |  8 +++--
 .../test_cuda_graph_partial_graph_static.py   |  5 +--
 ...est_cuda_graph_partial_graph_static_run.py | 10 +++---
 .../test_cuda_max_memory_allocated.py         |  7 ++--
 .../test_cuda_max_memory_reserved.py          |  7 ++--
 .../unittests/test_cuda_memory_allocated.py   |  5 +--
 .../unittests/test_cuda_memory_reserved.py    |  5 +--
 .../tests/unittests/test_cuda_random_seed.py  |  7 ++--
 .../tests/unittests/test_cuda_stream_event.py |  7 ++--
 .../tests/unittests/test_cudnn_grucell.py     |  5 +--
 .../tests/unittests/test_cudnn_lstmcell.py    |  5 +--
 .../fluid/tests/unittests/test_cumprod_op.py  |  6 ++--
 .../fluid/tests/unittests/test_cumsum_op.py   | 12 ++++---
 .../tests/unittests/test_custom_grad_input.py |  1 +
 .../fluid/tests/unittests/test_cvm_op.py      |  5 +--
 .../unittests/test_cyclic_cifar_dataset.py    |  3 +-
 .../paddle/fluid/tests/unittests/test_data.py |  2 +-
 .../tests/unittests/test_data_generator.py    |  1 +
 .../unittests/test_dataloader_autotune.py     | 12 ++++---
 .../unittests/test_dataloader_dataset.py      |  2 +-
 .../unittests/test_dataloader_early_reset.py  |  6 ++--
 .../unittests/test_dataloader_keep_order.py   |  6 ++--
 .../unittests/test_dataloader_unkeep_order.py |  6 ++--
 .../fluid/tests/unittests/test_dataset.py     |  7 ++--
 .../test_dataset_consistency_inspection.py    |  5 +--
 .../unittests/test_dataset_dataloader.py      | 10 +++---
 .../tests/unittests/test_dataset_download.py  |  3 +-
 .../fluid/tests/unittests/test_debugger.py    |  1 +
 .../unittests/test_decayed_adagrad_op.py      |  1 +
 .../unittests/test_decoupled_py_reader.py     |  8 +++--
 .../test_decoupled_py_reader_data_check.py    |  8 +++--
 .../tests/unittests/test_default_dtype.py     |  4 ++-
 .../unittests/test_default_scope_funcs.py     |  3 +-
 .../tests/unittests/test_deform_conv2d.py     |  8 +++--
 .../unittests/test_deformable_conv_op.py      |  4 ++-
 .../unittests/test_deformable_conv_v1_op.py   |  4 ++-
 .../test_deformable_psroi_pooling.py          |  2 ++
 .../fluid/tests/unittests/test_deg2rad.py     |  2 ++
 .../unittests/test_density_prior_box_op.py    |  3 +-
 .../unittests/test_deprecated_decorator.py    |  9 +++---
 ...t_deprecated_memory_optimize_interfaces.py |  4 ++-
 .../unittests/test_dequantize_abs_max_op.py   |  3 +-
 .../tests/unittests/test_dequantize_log_op.py |  1 +
 .../fluid/tests/unittests/test_desc_clone.py  |  7 ++--
 .../fluid/tests/unittests/test_detach.py      |  4 +--
 .../tests/unittests/test_detection_map_op.py  |  5 +--
 .../tests/unittests/test_determinant_op.py    |  2 ++
 .../tests/unittests/test_device_guard.py      |  2 +-
 .../paddle/fluid/tests/unittests/test_diag.py |  2 ++
 .../fluid/tests/unittests/test_diag_embed.py  |  4 ++-
 .../fluid/tests/unittests/test_diag_v2.py     |  2 ++
 .../fluid/tests/unittests/test_diagflat.py    |  2 ++
 .../fluid/tests/unittests/test_diagonal_op.py |  2 ++
 .../fluid/tests/unittests/test_diff_op.py     |  2 ++
 .../fluid/tests/unittests/test_digamma_op.py  |  4 ++-
 .../unittests/test_directory_migration.py     |  2 +-
 .../unittests/test_disable_signal_handler.py  |  2 +-
 .../tests/unittests/test_dist_allreduce_op.py |  2 ++
 .../fluid/tests/unittests/test_dist_base.py   | 20 ++++++------
 .../tests/unittests/test_dist_dygraph_apis.py |  1 +
 .../test_dist_fleet_a_sync_optimizer_auto.py  |  3 +-
 ..._dist_fleet_a_sync_optimizer_auto_async.py |  1 +
 ...st_dist_fleet_a_sync_optimizer_auto_geo.py |  1 +
 .../test_dist_fleet_a_sync_optimizer_geo.py   |  2 +-
 .../test_dist_fleet_a_sync_optimizer_sync.py  |  3 +-
 .../tests/unittests/test_dist_fleet_base.py   | 15 ++++-----
 .../tests/unittests/test_dist_fleet_ctr.py    |  1 +
 .../tests/unittests/test_dist_fleet_ctr2.py   |  3 +-
 .../tests/unittests/test_dist_fleet_decay.py  |  5 +--
 .../tests/unittests/test_dist_fleet_geo.py    |  9 +++---
 .../tests/unittests/test_dist_fleet_gloo.py   |  4 +--
 .../unittests/test_dist_fleet_heter_base.py   | 15 ++++-----
 .../test_dist_fleet_heter_program.py          |  8 ++---
 .../tests/unittests/test_dist_fleet_infer.py  |  3 +-
 .../tests/unittests/test_dist_fleet_ps.py     |  4 +--
 .../tests/unittests/test_dist_fleet_ps10.py   |  7 ++--
 .../tests/unittests/test_dist_fleet_ps11.py   |  4 +--
 .../tests/unittests/test_dist_fleet_ps12.py   |  4 +--
 .../tests/unittests/test_dist_fleet_ps13.py   |  4 +--
 .../tests/unittests/test_dist_fleet_ps2.py    |  4 +--
 .../tests/unittests/test_dist_fleet_ps3.py    |  4 +--
 .../tests/unittests/test_dist_fleet_ps4.py    |  4 +--
 .../tests/unittests/test_dist_fleet_ps5.py    |  4 +--
 .../tests/unittests/test_dist_fleet_ps6.py    |  4 +--
 .../tests/unittests/test_dist_fleet_ps7.py    |  5 +--
 .../tests/unittests/test_dist_fleet_ps8.py    |  5 +--
 .../tests/unittests/test_dist_fleet_ps9.py    |  5 +--
 .../test_dist_fleet_raw_program_optimizer.py  |  4 ++-
 ...et_raw_program_optimizer_fuse_allreduce.py |  4 ++-
 .../tests/unittests/test_dist_fleet_simnet.py |  2 ++
 .../test_dist_fleet_sparse_embedding_ctr.py   |  7 ++--
 .../test_dist_lookup_sparse_table_fuse_ops.py |  4 +--
 .../test_dist_mnist_backward_deps.py          |  2 ++
 .../unittests/test_dist_mnist_batch_merge.py  |  4 ++-
 .../unittests/test_dist_mnist_fleet_save.py   |  4 ++-
 .../unittests/test_dist_mnist_fleetapi.py     |  4 ++-
 .../test_dist_mnist_fp16_allreduce.py         |  1 +
 .../unittests/test_dist_mnist_hallreduce.py   |  3 +-
 .../tests/unittests/test_dist_mnist_lars.py   |  1 +
 .../unittests/test_dist_mnist_multi_comm.py   |  3 +-
 .../tests/unittests/test_dist_mnist_pg.py     |  2 ++
 .../test_dist_mnist_ring_allreduce.py         |  2 ++
 .../tests/unittests/test_dist_mnist_train.py  |  4 +--
 .../unittests/test_dist_mnist_with_program.py |  2 ++
 .../fluid/tests/unittests/test_dist_op.py     |  2 ++
 .../tests/unittests/test_dist_save_load.py    |  5 +--
 .../unittests/test_dist_se_resnext_nccl.py    |  4 +--
 .../unittests/test_dist_se_resnext_sync.py    |  5 ++-
 .../unittests/test_dist_sharding_save.py      |  4 ++-
 .../unittests/test_dist_sparse_load_ps0.py    |  8 +++--
 .../unittests/test_dist_sparse_load_ps1.py    |  8 +++--
 .../test_dist_sparse_tensor_load_adagrad.py   |  4 ++-
 .../test_dist_sparse_tensor_load_adam.py      |  4 ++-
 .../test_dist_sparse_tensor_load_ftrl.py      |  4 ++-
 .../test_dist_sparse_tensor_load_momentum.py  |  4 ++-
 .../test_dist_sparse_tensor_load_rmsprop.py   |  4 ++-
 .../test_dist_sparse_tensor_load_sgd.py       |  3 +-
 .../test_dist_text_classification.py          |  3 +-
 .../fluid/tests/unittests/test_dist_train.py  | 11 +++----
 .../tests/unittests/test_dist_transformer.py  |  4 ++-
 .../tests/unittests/test_dist_transpiler.py   |  7 ++--
 .../tests/unittests/test_dist_tree_index.py   |  5 +--
 .../tests/unittests/test_dist_word2vec.py     |  4 +--
 .../test_distribute_fpn_proposals_op.py       |  5 +--
 ...est_distributed_fused_lamb_op_with_clip.py |  5 +--
 ...buted_fused_lamb_op_with_gradient_merge.py |  3 +-
 ..._distributed_fused_lamb_op_without_clip.py |  3 +-
 .../tests/unittests/test_distributions.py     |  6 ++--
 .../fluid/tests/unittests/test_dot_op.py      |  8 +++--
 .../fluid/tests/unittests/test_downpoursgd.py | 14 ++++----
 .../fluid/tests/unittests/test_dpsgd_op.py    |  1 +
 .../tests/unittests/test_dropout_nd_op.py     |  8 +++--
 .../fluid/tests/unittests/test_dropout_op.py  | 13 ++++----
 .../unittests/test_dygraph_mnist_fp16.py      |  3 +-
 .../test_dygraph_mode_of_unittest.py          |  1 +
 .../unittests/test_dygraph_multi_forward.py   |  7 ++--
 .../unittests/test_dygraph_spectral_norm.py   |  4 ++-
 .../unittests/test_dygraph_weight_norm.py     |  8 +++--
 .../test_dynamic_rnn_stop_gradient.py         |  5 +--
 .../test_eager_deletion_conditional_block.py  |  3 +-
 .../test_eager_deletion_delete_vars.py        |  5 +--
 .../test_eager_deletion_dynamic_rnn_base.py   |  5 +--
 .../unittests/test_eager_deletion_gru_net.py  |  2 ++
 .../unittests/test_eager_deletion_lstm_net.py |  4 ++-
 .../unittests/test_eager_deletion_mnist.py    |  1 +
 .../test_eager_deletion_padding_rnn.py        |  5 +--
 .../test_eager_deletion_recurrent_op.py       | 11 ++++---
 .../unittests/test_eager_deletion_while_op.py | 13 ++++----
 .../tests/unittests/test_eager_run_program.py | 21 ++++++------
 .../tests/unittests/test_eager_trace_op.py    |  2 ++
 .../tests/unittests/test_edit_distance_op.py  |  2 ++
 .../unittests/test_egr_code_generate_api.py   |  6 ++--
 .../tests/unittests/test_egr_python_api.py    | 10 +++---
 .../unittests/test_egr_string_tensor_api.py   |  6 ++--
 .../fluid/tests/unittests/test_eig_op.py      |  6 ++--
 .../fluid/tests/unittests/test_eigh_op.py     |  4 ++-
 .../fluid/tests/unittests/test_eigvals_op.py  |  6 ++--
 .../fluid/tests/unittests/test_eigvalsh_op.py |  4 ++-
 .../fluid/tests/unittests/test_einsum.py      |  7 ++--
 .../fluid/tests/unittests/test_einsum_op.py   |  4 ++-
 .../fluid/tests/unittests/test_einsum_v2.py   |  7 ++--
 .../unittests/test_elementwise_add_op.py      | 10 +++---
 .../unittests/test_elementwise_div_op.py      |  4 ++-
 .../unittests/test_elementwise_floordiv_op.py |  7 ++--
 .../unittests/test_elementwise_gradient_op.py |  3 +-
 .../test_elementwise_heaviside_op.py          |  2 ++
 .../unittests/test_elementwise_max_op.py      |  6 ++--
 .../unittests/test_elementwise_min_op.py      |  2 ++
 .../unittests/test_elementwise_mod_op.py      |  7 ++--
 .../unittests/test_elementwise_mul_op.py      |  4 +--
 .../unittests/test_elementwise_nn_grad.py     |  8 ++---
 .../unittests/test_elementwise_pow_op.py      |  4 ++-
 .../unittests/test_elementwise_sub_op.py      |  4 ++-
 .../paddle/fluid/tests/unittests/test_ema.py  |  2 ++
 .../fluid/tests/unittests/test_ema_fleet.py   |  4 ++-
 .../test_embedding_id_stop_gradient.py        |  4 ++-
 .../tests/unittests/test_empty_like_op.py     |  6 ++--
 .../fluid/tests/unittests/test_empty_op.py    |  4 ++-
 .../fluid/tests/unittests/test_entry_attr.py  |  3 +-
 .../fluid/tests/unittests/test_entry_attr2.py |  1 +
 .../fluid/tests/unittests/test_erf_op.py      |  3 +-
 .../fluid/tests/unittests/test_erfinv_op.py   |  4 ++-
 .../fluid/tests/unittests/test_exception.py   |  3 +-
 .../tests/unittests/test_executor_and_mul.py  |  3 +-
 .../test_executor_and_use_program_cache.py    |  5 +--
 .../test_executor_check_fetch_list.py         |  2 +-
 .../test_executor_feed_non_tensor.py          |  1 +
 ..._executor_return_tensor_not_overwriting.py |  4 ++-
 .../tests/unittests/test_expand_as_op.py      |  1 +
 .../tests/unittests/test_expand_as_v2_op.py   |  2 ++
 .../fluid/tests/unittests/test_expand_op.py   |  2 ++
 .../tests/unittests/test_expand_v2_op.py      | 10 +++---
 .../tests/unittests/test_exponential_op.py    |  4 ++-
 .../fluid/tests/unittests/test_eye_op.py      |  6 ++--
 .../unittests/test_fake_dequantize_op.py      |  3 +-
 .../tests/unittests/test_fake_quantize_op.py  |  5 +--
 .../unittests/test_faster_tokenizer_op.py     | 10 +++---
 .../fluid/tests/unittests/test_fc_op.py       |  6 ++--
 .../test_feed_data_check_shape_type.py        |  6 ++--
 .../tests/unittests/test_feed_fetch_method.py |  4 ++-
 .../tests/unittests/test_fetch_handler.py     |  5 +--
 .../unittests/test_fetch_lod_tensor_array.py  |  6 ++--
 .../tests/unittests/test_fetch_unmerged.py    |  4 ++-
 .../fluid/tests/unittests/test_fetch_var.py   |  6 ++--
 .../tests/unittests/test_fill_any_like_op.py  |  6 ++--
 .../fluid/tests/unittests/test_fill_any_op.py |  4 ++-
 .../test_fill_constant_batch_size_like.py     |  4 ++-
 .../tests/unittests/test_fill_constant_op.py  |  6 ++--
 .../unittests/test_fill_diagonal_tensor_op.py |  4 ++-
 .../fluid/tests/unittests/test_fill_op.py     |  2 ++
 .../unittests/test_fill_zeros_like2_op.py     |  4 ++-
 .../unittests/test_fill_zeros_like_op.py      |  1 +
 .../unittests/test_filter_by_instag_op.py     |  1 +
 .../fluid/tests/unittests/test_flatten2_op.py |  1 +
 .../test_flatten_contiguous_range_op.py       |  4 ++-
 .../fluid/tests/unittests/test_flatten_op.py  |  2 +-
 .../fluid/tests/unittests/test_fleet.py       |  2 +-
 .../tests/unittests/test_fleet_api_input.py   | 20 ++++++------
 .../fluid/tests/unittests/test_fleet_auto.py  |  3 +-
 .../fluid/tests/unittests/test_fleet_base.py  |  6 ++--
 .../tests/unittests/test_fleet_base_2.py      |  2 ++
 .../tests/unittests/test_fleet_base_3.py      |  3 +-
 .../tests/unittests/test_fleet_base_4.py      |  3 +-
 .../tests/unittests/test_fleet_base_single.py |  6 ++--
 .../test_fleet_elastic_collective.py          |  2 +-
 .../unittests/test_fleet_elastic_manager.py   |  4 +--
 .../test_fleet_exe_dist_model_run.py          |  8 +++--
 .../test_fleet_exe_dist_model_tensor.py       |  7 ++--
 .../tests/unittests/test_fleet_executor.py    |  2 ++
 .../test_fleet_executor_multi_devices.py      |  5 +--
 .../test_fleet_executor_origin_scheduler.py   |  2 ++
 .../test_fleet_executor_task_node.py          |  1 +
 .../unittests/test_fleet_executor_utils.py    |  1 +
 .../test_fleet_executor_with_task_nodes.py    |  2 ++
 .../unittests/test_fleet_gradient_scale.py    |  7 ++--
 .../tests/unittests/test_fleet_metric.py      |  9 +++---
 .../tests/unittests/test_fleet_nocvm_1.py     |  2 +-
 .../fluid/tests/unittests/test_fleet_ps.py    |  1 +
 .../unittests/test_fleet_pyramid_hash.py      |  1 +
 .../tests/unittests/test_fleet_rolemaker.py   |  3 +-
 .../tests/unittests/test_fleet_rolemaker_2.py | 11 ++++---
 .../tests/unittests/test_fleet_rolemaker_3.py |  2 +-
 .../tests/unittests/test_fleet_rolemaker_4.py |  8 +++--
 .../unittests/test_fleet_rolemaker_init.py    |  1 +
 .../tests/unittests/test_fleet_runtime.py     |  1 +
 .../unittests/test_fleet_unitaccessor.py      |  2 +-
 .../fluid/tests/unittests/test_fleet_util.py  | 12 ++++---
 .../paddle/fluid/tests/unittests/test_flip.py |  8 +++--
 .../fluid/tests/unittests/test_fmax_op.py     |  4 ++-
 .../fluid/tests/unittests/test_fmin_op.py     |  4 ++-
 .../fluid/tests/unittests/test_fold_op.py     |  6 ++--
 .../fluid/tests/unittests/test_frac_api.py    |  2 ++
 .../fluid/tests/unittests/test_frame_op.py    |  7 ++--
 .../unittests/test_framework_debug_str.py     |  1 +
 .../fluid/tests/unittests/test_frexp_api.py   |  2 ++
 .../tests/unittests/test_fs_interface.py      |  2 +-
 .../fluid/tests/unittests/test_fsp_op.py      |  2 ++
 .../fluid/tests/unittests/test_ftrl_op.py     |  4 ++-
 .../tests/unittests/test_full_like_op.py      | 11 ++++---
 .../fluid/tests/unittests/test_full_op.py     |  3 +-
 .../tests/unittests/test_function_hook.py     |  3 +-
 .../tests/unittests/test_functional_conv1d.py | 10 +++---
 .../test_functional_conv1d_transpose.py       | 10 +++---
 .../tests/unittests/test_functional_conv2d.py | 12 ++++---
 .../test_functional_conv2d_transpose.py       |  6 ++--
 .../tests/unittests/test_functional_conv3d.py | 12 ++++---
 .../test_functional_conv3d_transpose.py       |  8 +++--
 .../unittests/test_fuse_all_reduce_pass.py    | 14 ++++----
 .../tests/unittests/test_fuse_bn_act_pass.py  |  3 +-
 .../unittests/test_fuse_bn_add_act_pass.py    |  2 ++
 .../test_fuse_elewise_add_act_pass.py         | 10 +++---
 .../unittests/test_fuse_gemm_epilogue_pass.py |  4 ++-
 .../unittests/test_fuse_optimizer_pass.py     | 12 ++++---
 .../test_fuse_relu_depthwise_conv_pass.py     | 10 +++---
 .../unittests/test_fused_attention_op.py      | 13 ++++----
 .../unittests/test_fused_attention_op_api.py  |  3 +-
 ...sed_bias_dropout_residual_layer_norm_op.py |  9 +++---
 ...bias_dropout_residual_layer_norm_op_api.py |  3 +-
 .../test_fused_elemwise_activation_op.py      |  6 ++--
 .../unittests/test_fused_emb_seq_pool_op.py   |  4 ++-
 .../test_fused_embedding_fc_lstm_op.py        |  3 +-
 .../test_fused_fc_elementwise_layernorm_op.py |  6 ++--
 .../unittests/test_fused_feedforward_op.py    | 13 ++++----
 .../unittests/test_fused_gate_attention_op.py |  7 ++--
 .../test_fused_gemm_epilogue_grad_op.py       |  4 ++-
 .../unittests/test_fused_gemm_epilogue_op.py  |  4 ++-
 .../tests/unittests/test_fused_matmul_bias.py |  8 +++--
 .../test_fused_multi_transformer_int8_op.py   | 13 ++++----
 .../test_fused_multi_transformer_op.py        | 14 ++++----
 .../test_fused_multihead_matmul_op.py         |  2 ++
 .../unittests/test_fused_token_prune_op.py    |  2 ++
 .../test_fused_transformer_encoder_layer.py   |  5 +--
 ...st_fused_transformer_with_amp_decorator.py |  5 +--
 .../tests/unittests/test_fusion_gru_op.py     |  4 ++-
 .../tests/unittests/test_fusion_lstm_op.py    |  4 ++-
 .../test_fusion_repeated_fc_relu_op.py        |  3 +-
 .../test_fusion_seqconv_eltadd_relu_op.py     |  1 +
 .../test_fusion_seqexpand_concat_fc_op.py     |  3 +-
 .../test_fusion_seqpool_concat_op.py          |  5 +--
 .../test_fusion_seqpool_cvm_concat_op.py      |  5 +--
 .../test_fusion_squared_mat_sub_op.py         |  1 +
 ...test_fusion_transpose_flatten_concat_op.py |  2 ++
 .../unittests/test_gast_with_compatibility.py |  3 +-
 .../tests/unittests/test_gather_nd_op.py      |  4 ++-
 .../fluid/tests/unittests/test_gather_op.py   |  4 ++-
 .../tests/unittests/test_gather_tree_op.py    |  4 ++-
 .../unittests/test_gaussian_random_op.py      |  5 +--
 .../paddle/fluid/tests/unittests/test_gcd.py  |  2 ++
 .../fluid/tests/unittests/test_gelu_op.py     |  4 ++-
 .../unittests/test_generate_mask_labels_op.py |  3 +-
 .../test_generate_proposal_labels_op.py       |  1 +
 .../unittests/test_generate_proposals_op.py   |  8 +++--
 .../test_generate_proposals_v2_op.py          |  6 ++--
 .../fluid/tests/unittests/test_generator.py   |  1 +
 .../unittests/test_generator_dataloader.py    |  8 +++--
 .../test_get_all_op_or_kernel_names.py        |  1 +
 .../unittests/test_get_device_properties.py   |  3 +-
 .../test_get_inputs_outputs_in_block.py       |  4 ++-
 .../tests/unittests/test_get_places_op.py     |  6 ++--
 .../tests/unittests/test_get_set_flags.py     |  3 +-
 .../test_get_tensor_from_selected_rows_op.py  |  6 ++--
 .../test_global_var_getter_setter.py          |  3 +-
 .../paddle/fluid/tests/unittests/test_glu.py  |  7 ++--
 .../test_gpu_package_without_gpu_device.py    |  5 +--
 .../unittests/test_grad_clip_minimize.py      |  9 +++---
 .../tests/unittests/test_gradient_clip.py     |  6 ++--
 .../unittests/test_graph_khop_sampler.py      |  2 ++
 .../tests/unittests/test_graph_reindex.py     |  2 ++
 .../unittests/test_graph_sample_neighbors.py  |  2 ++
 .../unittests/test_graph_send_recv_op.py      |  4 +--
 .../unittests/test_graph_send_ue_recv_op.py   |  5 +--
 .../tests/unittests/test_graph_send_uv_op.py  |  5 +--
 .../unittests/test_grid_sample_function.py    |  6 ++--
 .../tests/unittests/test_grid_sampler_op.py   |  6 ++--
 .../tests/unittests/test_group_norm_op_v2.py  |  7 ++--
 .../fluid/tests/unittests/test_gru_op.py      |  6 ++--
 .../fluid/tests/unittests/test_gru_rnn_op.py  |  9 +++---
 .../fluid/tests/unittests/test_gru_unit_op.py |  6 ++--
 .../tests/unittests/test_gumbel_softmax_op.py |  2 ++
 .../fluid/tests/unittests/test_hash_op.py     |  2 ++
 .../unittests/test_hinge_embedding_loss.py    |  6 ++--
 .../tests/unittests/test_hinge_loss_op.py     |  1 +
 .../tests/unittests/test_histogram_op.py      |  4 ++-
 .../fluid/tests/unittests/test_hsigmoid_op.py |  8 +++--
 .../tests/unittests/test_huber_loss_op.py     |  2 ++
 .../test_hybrid_parallel_topology.py          |  4 ++-
 .../tests/unittests/test_identity_loss_op.py  |  4 ++-
 .../fluid/tests/unittests/test_identity_op.py |  2 ++
 .../tests/unittests/test_iinfo_and_finfo.py   |  4 ++-
 .../tests/unittests/test_im2sequence_op.py    |  1 +
 .../unittests/test_imperative_auto_prune.py   |  4 ++-
 .../tests/unittests/test_imperative_basic.py  | 10 +++---
 .../test_imperative_container_layerdict.py    |  3 +-
 .../test_imperative_container_layerlist.py    |  4 ++-
 ...test_imperative_container_parameterlist.py |  4 ++-
 .../test_imperative_container_sequential.py   |  4 ++-
 .../test_imperative_data_loader_base.py       |  4 ++-
 .../test_imperative_data_loader_exception.py  |  2 ++
 .../test_imperative_data_loader_exit_func.py  | 12 +++----
 .../test_imperative_data_loader_fds_clear.py  |  4 ++-
 .../test_imperative_data_loader_process.py    |  9 +++---
 .../test_imperative_data_parallel.py          |  4 +--
 .../unittests/test_imperative_decorator.py    |  9 +++---
 .../tests/unittests/test_imperative_deepcf.py | 11 ++++---
 .../unittests/test_imperative_double_grad.py  | 10 +++---
 .../unittests/test_imperative_framework.py    |  6 ++--
 .../tests/unittests/test_imperative_gan.py    |  5 +--
 .../tests/unittests/test_imperative_gnn.py    |  7 ++--
 .../tests/unittests/test_imperative_group.py  |  3 +-
 .../test_imperative_hook_for_layer.py         |  4 +--
 .../unittests/test_imperative_layer_apply.py  |  6 ++--
 .../test_imperative_layer_children.py         |  6 ++--
 .../test_imperative_layer_trainable.py        |  3 +-
 .../tests/unittests/test_imperative_layers.py |  1 +
 .../test_imperative_load_static_param.py      | 10 +++---
 ..._imperative_lod_tensor_to_selected_rows.py | 12 ++++---
 .../tests/unittests/test_imperative_mnist.py  |  9 +++---
 .../test_imperative_mnist_sorted_gradient.py  |  7 ++--
 .../test_imperative_named_members.py          |  4 ++-
 .../unittests/test_imperative_numpy_bridge.py |  1 +
 .../test_imperative_ocr_attention_model.py    | 10 +++---
 .../unittests/test_imperative_optimizer.py    | 32 +++++++++----------
 .../unittests/test_imperative_optimizer_v2.py | 28 ++++++++--------
 ...test_imperative_parallel_coalesce_split.py |  7 ++--
 .../test_imperative_partitial_backward.py     |  4 ++-
 .../unittests/test_imperative_ptb_rnn.py      | 14 ++++----
 ...test_imperative_ptb_rnn_sorted_gradient.py | 10 +++---
 .../test_imperative_recurrent_usage.py        | 10 +++---
 .../test_imperative_reinforcement.py          |  7 ++--
 .../tests/unittests/test_imperative_resnet.py | 12 +++----
 .../test_imperative_resnet_sorted_gradient.py |  5 +--
 .../unittests/test_imperative_save_load.py    | 10 +++---
 .../unittests/test_imperative_save_load_v2.py | 12 +++----
 .../unittests/test_imperative_se_resnext.py   |  7 ++--
 .../test_imperative_selected_rows.py          | 10 +++---
 ..._imperative_selected_rows_to_lod_tensor.py | 10 +++---
 .../test_imperative_signal_handler.py         |  6 ++--
 ...perative_star_gan_with_gradient_penalty.py |  8 +++--
 .../test_imperative_static_runner_mnist.py    |  2 +-
 .../test_imperative_static_runner_while.py    |  7 ++--
 .../test_imperative_tensor_clear_gradient.py  |  8 +++--
 .../test_imperative_thread_local_has_grad.py  |  8 +++--
 ...imperative_trace_non_persistable_inputs.py |  6 ++--
 ..._imperative_transformer_sorted_gradient.py | 15 +++++----
 .../unittests/test_imperative_triple_grad.py  |  8 +++--
 .../test_imperative_using_non_zero_gpu.py     |  8 +++--
 .../fluid/tests/unittests/test_increment.py   |  1 +
 .../tests/unittests/test_index_add_op.py      |  4 ++-
 .../tests/unittests/test_index_sample_op.py   |  6 ++--
 .../tests/unittests/test_index_select_op.py   |  4 ++-
 .../test_infer_no_need_buffer_slots.py        |  2 +-
 .../tests/unittests/test_inference_api.py     | 13 +++++---
 .../unittests/test_inference_model_io.py      | 14 ++++----
 .../fluid/tests/unittests/test_initializer.py |  3 +-
 .../tests/unittests/test_initializer_nn.py    |  5 +--
 .../fluid/tests/unittests/test_inner.py       |  2 +-
 .../fluid/tests/unittests/test_inplace.py     |  1 +
 .../tests/unittests/test_inplace_abn_op.py    |  8 +++--
 .../unittests/test_inplace_addto_strategy.py  |  3 +-
 .../test_inplace_and_clear_gradient.py        |  4 ++-
 .../test_inplace_auto_generated_apis.py       |  1 +
 ...test_inplace_softmax_with_cross_entropy.py |  6 ++--
 .../fluid/tests/unittests/test_input_spec.py  |  7 ++--
 .../tests/unittests/test_install_check.py     |  3 +-
 .../tests/unittests/test_instance_norm_op.py  |  4 ++-
 .../unittests/test_instance_norm_op_v2.py     |  7 ++--
 .../fluid/tests/unittests/test_inverse_op.py  |  6 ++--
 .../tests/unittests/test_io_save_load.py      |  5 +--
 .../tests/unittests/test_iou_similarity_op.py |  1 +
 .../fluid/tests/unittests/test_ir_graph.py    |  1 +
 .../tests/unittests/test_ir_inplace_pass.py   |  6 ++--
 .../test_ir_memory_optimize_ifelse_op.py      |  9 +++---
 .../unittests/test_ir_memory_optimize_nlp.py  |  6 ++--
 .../unittests/test_ir_memory_optimize_pass.py | 10 +++---
 .../test_ir_memory_optimize_transformer.py    |  3 +-
 .../fluid/tests/unittests/test_is_complex.py  |  6 ++--
 .../fluid/tests/unittests/test_is_empty_op.py |  2 ++
 .../fluid/tests/unittests/test_is_integer.py  |  6 ++--
 .../fluid/tests/unittests/test_is_tensor.py   |  1 +
 .../fluid/tests/unittests/test_isclose_op.py  |  2 ++
 .../fluid/tests/unittests/test_isfinite_op.py |  4 ++-
 .../tests/unittests/test_isfinite_v2_op.py    |  6 ++--
 .../fluid/tests/unittests/test_jit_layer.py   | 12 +++----
 .../unittests/test_jit_pre_save_hooks.py      |  2 +-
 .../tests/unittests/test_jit_save_load.py     | 12 ++++---
 .../tests/unittests/test_kldiv_loss_op.py     |  4 ++-
 .../fluid/tests/unittests/test_kron_op.py     |  1 +
 .../fluid/tests/unittests/test_kthvalue_op.py |  2 ++
 .../fluid/tests/unittests/test_l1_loss.py     |  6 ++--
 .../fluid/tests/unittests/test_l1_norm_op.py  |  3 +-
 .../unittests/test_label_smooth_functional.py |  6 ++--
 .../tests/unittests/test_label_smooth_op.py   |  2 ++
 .../fluid/tests/unittests/test_lamb_op.py     |  2 ++
 .../fluid/tests/unittests/test_lambv2_op.py   |  6 ++--
 .../tests/unittests/test_launch_coverage.py   |  6 ++--
 .../tests/unittests/test_layer_norm_op.py     | 11 ++++---
 .../tests/unittests/test_layer_norm_op_v2.py  |  9 +++---
 .../fluid/tests/unittests/test_layers.py      | 26 ++++++++-------
 .../tests/unittests/test_layout_autotune.py   |  2 +-
 .../fluid/tests/unittests/test_lazy_init.py   |  8 +++--
 .../fluid/tests/unittests/test_lbfgs.py       |  1 -
 .../paddle/fluid/tests/unittests/test_lcm.py  |  2 ++
 .../fluid/tests/unittests/test_lerp_op.py     |  2 ++
 .../fluid/tests/unittests/test_lgamma_op.py   |  6 ++--
 .../unittests/test_limit_by_capacity_op.py    |  4 ++-
 .../fluid/tests/unittests/test_linalg_cond.py |  2 ++
 .../tests/unittests/test_linalg_lstsq_op.py   |  2 ++
 .../tests/unittests/test_linalg_pinv_op.py    |  2 ++
 .../fluid/tests/unittests/test_linear.py      |  6 ++--
 .../unittests/test_linear_chain_crf_op.py     |  4 +--
 .../tests/unittests/test_linear_interp_op.py  |  4 ++-
 .../unittests/test_linear_interp_v2_op.py     |  4 ++-
 .../fluid/tests/unittests/test_linspace.py    |  5 +--
 .../unittests/test_listen_and_serv_op.py      |  6 ++--
 .../fluid/tests/unittests/test_load_op.py     |  8 +++--
 .../fluid/tests/unittests/test_load_op_xpu.py |  8 +++--
 .../test_load_state_dict_from_old_format.py   |  5 +--
 .../unittests/test_load_vars_shape_check.py   |  3 +-
 .../unittests/test_locality_aware_nms_op.py   |  4 ++-
 .../unittests/test_lod_array_length_op.py     |  9 +++---
 .../tests/unittests/test_lod_rank_table.py    | 10 +++---
 .../tests/unittests/test_lod_reset_op.py      |  4 ++-
 .../tests/unittests/test_lod_tensor_array.py  |  4 ++-
 .../fluid/tests/unittests/test_log_loss_op.py |  2 ++
 .../fluid/tests/unittests/test_log_softmax.py |  4 ++-
 .../tests/unittests/test_logcumsumexp_op.py   | 10 +++---
 .../fluid/tests/unittests/test_logical_op.py  |  6 ++--
 .../fluid/tests/unittests/test_logit_op.py    |  2 ++
 .../fluid/tests/unittests/test_logspace.py    |  2 ++
 .../fluid/tests/unittests/test_logsumexp.py   |  6 ++--
 .../fluid/tests/unittests/test_lookahead.py   |  4 ++-
 .../unittests/test_lookup_table_bf16_op.py    | 10 +++---
 .../unittests/test_lookup_table_dequant_op.py |  3 +-
 .../tests/unittests/test_lookup_table_op.py   | 10 +++---
 .../unittests/test_lookup_table_v2_bf16_op.py |  8 +++--
 .../unittests/test_lookup_table_v2_op.py      |  7 ++--
 .../tests/unittests/test_lr_scheduler.py      |  3 +-
 .../fluid/tests/unittests/test_lrn_op.py      |  6 ++--
 .../tests/unittests/test_lstm_cudnn_op.py     |  9 +++---
 .../fluid/tests/unittests/test_lstm_op.py     |  6 ++--
 .../tests/unittests/test_lstm_unit_op.py      |  4 ++-
 .../fluid/tests/unittests/test_lstmp_op.py    |  2 ++
 .../fluid/tests/unittests/test_lu_op.py       | 12 ++++---
 .../tests/unittests/test_lu_unpack_op.py      | 12 ++++---
 699 files changed, 2338 insertions(+), 1422 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_accuracy_op.py b/python/paddle/fluid/tests/unittests/test_accuracy_op.py
index 0de81034d3373d..fc7666bcba7428 100755
--- a/python/paddle/fluid/tests/unittests/test_accuracy_op.py
+++ b/python/paddle/fluid/tests/unittests/test_accuracy_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
diff --git a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
index e0c40e20021119..4b3311120467df 100644
--- a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
@@ -13,17 +13,17 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
 
-import paddle.fluid as fluid
 import paddle
-import paddle.fluid.layers as layers
+import paddle.fluid as fluid
 import paddle.fluid.core as core
-import gradient_checker
+import paddle.fluid.layers as layers
 import paddle.nn.functional as F
 
-from decorator_helper import prog_scope
-
 
 class TestSigmoidTripleGradCheck(unittest.TestCase):
     @prog_scope()
diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py
index 6cfe72bfdfd395..9f5bbee0fc88b0 100755
--- a/python/paddle/fluid/tests/unittests/test_activation_op.py
+++ b/python/paddle/fluid/tests/unittests/test_activation_op.py
@@ -15,13 +15,13 @@
 import unittest
 
 import numpy as np
-from scipy.special import expit, erf
-
 from op_test import OpTest, convert_float_to_uint16
+from scipy.special import erf, expit
+
 import paddle
-import paddle.nn.functional as F
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.nn.functional as F
 from paddle.fluid import Program, program_guard
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_activation_sparse_op.py b/python/paddle/fluid/tests/unittests/test_activation_sparse_op.py
index 472dc334d66524..ad4f6c60fc1833 100644
--- a/python/paddle/fluid/tests/unittests/test_activation_sparse_op.py
+++ b/python/paddle/fluid/tests/unittests/test_activation_sparse_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
+import paddle
 import paddle.fluid.core as core
 from paddle.fluid.op import Operator
-import paddle
 
 
 class TestSparseSquareOp(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_adadelta_op.py b/python/paddle/fluid/tests/unittests/test_adadelta_op.py
index 12122c8e05f374..73a3c1e1cbf7c0 100644
--- a/python/paddle/fluid/tests/unittests/test_adadelta_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adadelta_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_adagrad_op.py b/python/paddle/fluid/tests/unittests/test_adagrad_op.py
index 036322ee62d615..94a754ca3cecf5 100644
--- a/python/paddle/fluid/tests/unittests/test_adagrad_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adagrad_op.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
 import unittest
+
 import numpy as np
-import paddle.fluid.core as core
-from paddle.fluid.op import Operator
 from op_test import OpTest
-import math
+
 import paddle
+import paddle.fluid.core as core
+from paddle.fluid.op import Operator
 
 
 class TestAdagradOp1(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_adagrad_op_v2.py b/python/paddle/fluid/tests/unittests/test_adagrad_op_v2.py
index c3eee954b610bf..35d8aa9101001d 100644
--- a/python/paddle/fluid/tests/unittests/test_adagrad_op_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_adagrad_op_v2.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py
index 9904ee0d100a3d..07fef0b4603e88 100644
--- a/python/paddle/fluid/tests/unittests/test_adam_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adam_op.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-from paddle.fluid import core
-from paddle.fluid.op import Operator
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
+from paddle.fluid import core
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.op import Operator
 
 
 class TestAdamOp1(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py b/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py
index 904b02e778c25f..8e43728fb83da1 100644
--- a/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py
+++ b/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import paddle
 import paddle.fluid as fluid
-import unittest
 
 
 def get_places():
diff --git a/python/paddle/fluid/tests/unittests/test_adamax_api.py b/python/paddle/fluid/tests/unittests/test_adamax_api.py
index 78e9553a806d65..1b1e658defc4a6 100644
--- a/python/paddle/fluid/tests/unittests/test_adamax_api.py
+++ b/python/paddle/fluid/tests/unittests/test_adamax_api.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_adamax_op.py b/python/paddle/fluid/tests/unittests/test_adamax_op.py
index b6432714434695..d2a80073e13bd6 100644
--- a/python/paddle/fluid/tests/unittests/test_adamax_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adamax_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_adamw_op.py b/python/paddle/fluid/tests/unittests/test_adamw_op.py
index 15c8bf69bc01b4..1810a4bea6121b 100644
--- a/python/paddle/fluid/tests/unittests/test_adamw_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adamw_op.py
@@ -12,15 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import paddle
 import random
+import unittest
+from functools import partial
+
 import numpy as np
-import paddle.fluid as fluid
 from op_test import OpTest
-from functools import partial
-from paddle.framework import core
+
+import paddle
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
+from paddle.framework import core
 
 
 def adamw_step(inputs, attributes):
diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool1d.py b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool1d.py
index 532c2e8a45b0a9..ab76a61017bfd9 100644
--- a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool1d.py
+++ b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool1d.py
@@ -12,14 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
+
 import numpy as np
-import paddle.fluid.core as core
-import paddle.fluid as fluid
+
 import paddle
-import paddle.nn.functional as F
 import paddle.fluid as fluid
+import paddle.fluid.core as core
+import paddle.nn.functional as F
 
 
 def adaptive_start_index(index, input_size, output_size):
diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py
index ef1051c377a9e5..f2dccd4d63bee7 100644
--- a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py
+++ b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py
@@ -14,15 +14,15 @@
 
 import os
 import unittest
+
 import numpy as np
+from test_attribute_var import UnittestBase
 
-import paddle.fluid.core as core
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
 
-from test_attribute_var import UnittestBase
-
 
 def adaptive_start_index(index, input_size, output_size):
     return int(np.floor(index * input_size / output_size))
diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py
index 065a27c90e80dc..c1e6a886688166 100755
--- a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py
+++ b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
-import paddle.fluid.core as core
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 
 def adaptive_start_index(index, input_size, output_size):
diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool1d.py b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool1d.py
index a83ef2bfd5bae6..245c13ff5389d7 100644
--- a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool1d.py
+++ b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool1d.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
+
+import numpy as np
 from op_test import check_out_dtype
-import paddle.fluid.core as core
+
 import paddle
-import paddle.nn.functional as F
 import paddle.fluid as fluid
+import paddle.fluid.core as core
+import paddle.nn.functional as F
 
 
 def adaptive_start_index(index, input_size, output_size):
diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py
index decbfbfa012ec4..af8e71a0f7102c 100644
--- a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py
+++ b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
-import numpy as np
 
-import paddle.fluid.core as core
+import numpy as np
 from op_test import check_out_dtype
+
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.nn.functional as F
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py
index 21400576c4752c..d3f76ba39503d2 100755
--- a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py
+++ b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
-import numpy as np
 
-import paddle.fluid.core as core
+import numpy as np
 from op_test import check_out_dtype
+
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.nn.functional as F
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_add_reader_dependency.py b/python/paddle/fluid/tests/unittests/test_add_reader_dependency.py
index 1604445a873574..5cf3953cb8447c 100644
--- a/python/paddle/fluid/tests/unittests/test_add_reader_dependency.py
+++ b/python/paddle/fluid/tests/unittests/test_add_reader_dependency.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
-from paddle.fluid.layer_helper import LayerHelper
+import time
 import unittest
+
 import numpy as np
-import time
+
+import paddle.fluid as fluid
+from paddle.fluid.layer_helper import LayerHelper
 
 
 def inplace_add(x, bias):
diff --git a/python/paddle/fluid/tests/unittests/test_addmm_op.py b/python/paddle/fluid/tests/unittests/test_addmm_op.py
index f101f55dc739ea..9a1385c63b5dbc 100644
--- a/python/paddle/fluid/tests/unittests/test_addmm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_addmm_op.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle
 from op_test import OpTest
+
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_affine_grid_function.py b/python/paddle/fluid/tests/unittests/test_affine_grid_function.py
index 420b6e61ca2665..16a67d765f870e 100644
--- a/python/paddle/fluid/tests/unittests/test_affine_grid_function.py
+++ b/python/paddle/fluid/tests/unittests/test_affine_grid_function.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
+import unittest
+
 import numpy as np
-from paddle import fluid
+
+import paddle
 import paddle.fluid.dygraph as dg
 import paddle.nn.functional as F
-import unittest
+from paddle import fluid
 
 
 class AffineGridTestCase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_affine_grid_op.py b/python/paddle/fluid/tests/unittests/test_affine_grid_op.py
index 096b4fe12777dd..2f07a9d077cad2 100644
--- a/python/paddle/fluid/tests/unittests/test_affine_grid_op.py
+++ b/python/paddle/fluid/tests/unittests/test_affine_grid_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_allclose_layer.py b/python/paddle/fluid/tests/unittests/test_allclose_layer.py
index c406241c65be1d..79497985b0aaae 100644
--- a/python/paddle/fluid/tests/unittests/test_allclose_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_allclose_layer.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
+
+import paddle
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_allclose_op.py b/python/paddle/fluid/tests/unittests/test_allclose_op.py
index 0eb47aa4441b90..c4cde0ec49ee99 100644
--- a/python/paddle/fluid/tests/unittests/test_allclose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_allclose_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_allgather.py b/python/paddle/fluid/tests/unittests/test_allgather.py
index 89441f394cff74..33789fb792fb16 100644
--- a/python/paddle/fluid/tests/unittests/test_allgather.py
+++ b/python/paddle/fluid/tests/unittests/test_allgather.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
-import paddle
 
 from test_collective_base import TestDistBase
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_amp_check_finite_and_scale_op.py b/python/paddle/fluid/tests/unittests/test_amp_check_finite_and_scale_op.py
index 7101d3439f6429..a840b582474f34 100644
--- a/python/paddle/fluid/tests/unittests/test_amp_check_finite_and_scale_op.py
+++ b/python/paddle/fluid/tests/unittests/test_amp_check_finite_and_scale_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle.fluid.contrib.mixed_precision.amp_nn as amp_nn
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_anchor_generator_op.py b/python/paddle/fluid/tests/unittests/test_anchor_generator_op.py
index 3766dc95ab93e1..063fa6cdf7d20b 100644
--- a/python/paddle/fluid/tests/unittests/test_anchor_generator_op.py
+++ b/python/paddle/fluid/tests/unittests/test_anchor_generator_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_angle_op.py b/python/paddle/fluid/tests/unittests/test_angle_op.py
index 767a274ad32248..126685b39f90b0 100644
--- a/python/paddle/fluid/tests/unittests/test_angle_op.py
+++ b/python/paddle/fluid/tests/unittests/test_angle_op.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
 import paddle
-from paddle.fluid import dygraph
 from paddle import static
+from paddle.fluid import dygraph
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py b/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py
index fff492e9176738..f2a18ea156cbe7 100644
--- a/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py
+++ b/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
+import numpy as np
+
 import paddle
-from paddle.vision.models import resnet50
-from paddle.nn import CrossEntropyLoss
+import paddle.fluid as fluid
 from paddle.fluid.framework import _apply_pass
 from paddle.fluid.ir import apply_build_strategy
-import paddle.fluid as fluid
-import unittest
-import numpy as np
+from paddle.nn import CrossEntropyLoss
+from paddle.vision.models import resnet50
 
 
 def get_resnet50_model():
diff --git a/python/paddle/fluid/tests/unittests/test_arange.py b/python/paddle/fluid/tests/unittests/test_arange.py
index a7c5c4231d44de..f616acb0149821 100644
--- a/python/paddle/fluid/tests/unittests/test_arange.py
+++ b/python/paddle/fluid/tests/unittests/test_arange.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-from paddle.fluid import core
-from paddle.static import program_guard, Program
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
+import paddle
+from paddle.fluid import core
+from paddle.static import Program, program_guard
+
 
 class TestArangeOp(OpTest):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_arg_min_max_op.py b/python/paddle/fluid/tests/unittests/test_arg_min_max_op.py
index 6c9d09bbab9df1..603ea0d6b7a039 100644
--- a/python/paddle/fluid/tests/unittests/test_arg_min_max_op.py
+++ b/python/paddle/fluid/tests/unittests/test_arg_min_max_op.py
@@ -14,12 +14,14 @@
 
 import os
 import unittest
+
 import numpy as np
 from op_test import OpTest
+from test_attribute_var import UnittestBase
+
 import paddle
 import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
-from test_attribute_var import UnittestBase
 
 
 class BaseTestCase(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py b/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py
index a6f3fee21095d5..99dcff5db7b0bc 100644
--- a/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_argsort_op.py b/python/paddle/fluid/tests/unittests/test_argsort_op.py
index 3a0d1711af8ac7..f939462a6451b8 100644
--- a/python/paddle/fluid/tests/unittests/test_argsort_op.py
+++ b/python/paddle/fluid/tests/unittests/test_argsort_op.py
@@ -13,14 +13,15 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import numpy as np
 import paddle.fluid.core as core
-
-from paddle.fluid.framework import Program, grad_var_name
-from paddle.fluid.executor import Executor
 from paddle.fluid.backward import append_backward
+from paddle.fluid.executor import Executor
+from paddle.fluid.framework import Program, grad_var_name
 
 np.random.seed(123)
 
diff --git a/python/paddle/fluid/tests/unittests/test_array_read_write_op.py b/python/paddle/fluid/tests/unittests/test_array_read_write_op.py
index e8d2579b3f2914..907bb65cfce25c 100644
--- a/python/paddle/fluid/tests/unittests/test_array_read_write_op.py
+++ b/python/paddle/fluid/tests/unittests/test_array_read_write_op.py
@@ -14,15 +14,16 @@
 
 import unittest
 
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.fluid.layers as layers
-from paddle.fluid.executor import Executor
+from paddle.fluid import Program, program_guard
 from paddle.fluid.backward import append_backward
+from paddle.fluid.executor import Executor
 from paddle.fluid.framework import default_main_program
-from paddle.fluid import Program, program_guard
-import numpy as np
 
 
 def _test_read_write(x):
diff --git a/python/paddle/fluid/tests/unittests/test_ascend_trigger.py b/python/paddle/fluid/tests/unittests/test_ascend_trigger.py
index 16b38865593a60..917ef5606f70d6 100644
--- a/python/paddle/fluid/tests/unittests/test_ascend_trigger.py
+++ b/python/paddle/fluid/tests/unittests/test_ascend_trigger.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import paddle
 import paddle.fluid as fluid
-import unittest
 
 
 class TestAscendTriggerOP(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_assert_op.py b/python/paddle/fluid/tests/unittests/test_assert_op.py
index 4ce1fb9a65c520..a006f999287306 100644
--- a/python/paddle/fluid/tests/unittests/test_assert_op.py
+++ b/python/paddle/fluid/tests/unittests/test_assert_op.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
-import unittest
 
 
 class TestAssertOp(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_assign_op.py b/python/paddle/fluid/tests/unittests/test_assign_op.py
index a59d9fb6688ff1..4c5921a5f2012b 100644
--- a/python/paddle/fluid/tests/unittests/test_assign_op.py
+++ b/python/paddle/fluid/tests/unittests/test_assign_op.py
@@ -12,18 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import op_test
-import numpy as np
 import unittest
+
+import gradient_checker
+import numpy as np
+import op_test
+from decorator_helper import prog_scope
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
-from paddle.fluid.backward import append_backward
+import paddle.fluid.core as core
 import paddle.fluid.framework as framework
-import gradient_checker
-from decorator_helper import prog_scope
 import paddle.fluid.layers as layers
+from paddle.fluid import Program, program_guard
+from paddle.fluid.backward import append_backward
 
 
 class TestAssignOp(op_test.OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_assign_pos_op.py b/python/paddle/fluid/tests/unittests/test_assign_pos_op.py
index d04fecd7b0500b..9478efd51a7e8c 100644
--- a/python/paddle/fluid/tests/unittests/test_assign_pos_op.py
+++ b/python/paddle/fluid/tests/unittests/test_assign_pos_op.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import op_test
-import numpy as np
 import unittest
+
+import numpy as np
+import op_test
+
 import paddle
 import paddle.fluid.core as core
 from paddle.distributed.models.moe import utils
diff --git a/python/paddle/fluid/tests/unittests/test_assign_value_op.py b/python/paddle/fluid/tests/unittests/test_assign_value_op.py
index cac321c2a27d32..7a5128ed2ff159 100644
--- a/python/paddle/fluid/tests/unittests/test_assign_value_op.py
+++ b/python/paddle/fluid/tests/unittests/test_assign_value_op.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import unittest
-import numpy as np
 
+import numpy as np
 import op_test
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.framework as framework
diff --git a/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py
index 41fc17187093ce..9caae0bea67e77 100644
--- a/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import os
+import time
 import unittest
 
 import numpy
-import time
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_atan2_op.py b/python/paddle/fluid/tests/unittests/test_atan2_op.py
index b74aeb9057004a..77ad77e3252b88 100644
--- a/python/paddle/fluid/tests/unittests/test_atan2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_atan2_op.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
 
+import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_attention_lstm_op.py b/python/paddle/fluid/tests/unittests/test_attention_lstm_op.py
index 053912005cc1b2..b4c2d8ed2cf63f 100644
--- a/python/paddle/fluid/tests/unittests/test_attention_lstm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_attention_lstm_op.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-from test_fusion_lstm_op import fc, ACTIVATION
+from test_fusion_lstm_op import ACTIVATION, fc
 from test_softmax_op import stable_softmax
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_attribute_var.py b/python/paddle/fluid/tests/unittests/test_attribute_var.py
index 60d202eb1b589c..082b1970a46b22 100644
--- a/python/paddle/fluid/tests/unittests/test_attribute_var.py
+++ b/python/paddle/fluid/tests/unittests/test_attribute_var.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import os
-import unittest
 import tempfile
+import unittest
+
+import numpy as np
+
 import paddle
 import paddle.inference as paddle_infer
-from paddle.fluid.framework import program_guard, Program
-from paddle.fluid.framework import OpProtoHolder
-import numpy as np
+from paddle.fluid.framework import OpProtoHolder, Program, program_guard
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_auc_op.py b/python/paddle/fluid/tests/unittests/test_auc_op.py
index 0ea67b19b13fcf..4a693500bcf28b 100644
--- a/python/paddle/fluid/tests/unittests/test_auc_op.py
+++ b/python/paddle/fluid/tests/unittests/test_auc_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-from paddle.fluid import metrics
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
+from paddle.fluid import metrics
 
 
 class TestAucOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_auc_single_pred_op.py b/python/paddle/fluid/tests/unittests/test_auc_single_pred_op.py
index 13482d0af3abaf..1abdc6ef401473 100644
--- a/python/paddle/fluid/tests/unittests/test_auc_single_pred_op.py
+++ b/python/paddle/fluid/tests/unittests/test_auc_single_pred_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 from paddle.fluid import metrics
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_auto_growth_gpu_memory_limit.py b/python/paddle/fluid/tests/unittests/test_auto_growth_gpu_memory_limit.py
index 7f4f6c78c19455..2bab9ad2011f25 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_growth_gpu_memory_limit.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_growth_gpu_memory_limit.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
 
+import paddle.fluid as fluid
+
 fluid.core.globals()['FLAGS_allocator_strategy'] = 'auto_growth'
 
 if fluid.is_compiled_with_cuda():
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py
index a632d29f6915be..c2a116abc05804 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py
@@ -12,13 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
+import os
 import tempfile
 import unittest
-import os
-import json
-from paddle.distributed.auto_parallel.cluster import Cluster
-from paddle.distributed.auto_parallel.cluster import DeviceType
-from paddle.distributed.auto_parallel.cluster import LinkType
+
+from paddle.distributed.auto_parallel.cluster import (
+    Cluster,
+    DeviceType,
+    LinkType,
+)
 
 cluster_json = """
 {
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py
index c6af2b0a6a2f38..113f32d31e12a3 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py
@@ -17,14 +17,14 @@
 
 import paddle
 import paddle.nn as nn
-import paddle.static as static
 import paddle.nn.functional as F
-import paddle.utils as utils
+import paddle.static as static
 import paddle.tensor as tensor
-from paddle.fluid import layers
-from paddle.distributed.fleet import auto
+import paddle.utils as utils
 from paddle.distributed.auto_parallel.completion import Completer
 from paddle.distributed.auto_parallel.dist_context import DistributedContext
+from paddle.distributed.fleet import auto
+from paddle.fluid import layers
 
 paddle.enable_static()
 _global_parallel_strategy = None
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py
index 0febac998b32c6..45dd9bb66ee064 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py
@@ -18,14 +18,14 @@
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
+import paddle.static as static
 import paddle.tensor as tensor
 import paddle.utils as utils
-from paddle.fluid import layers
-from paddle.nn.layer.transformer import _convert_param_attr_to_list
-import paddle.static as static
-from paddle.distributed.fleet import auto
 from paddle.distributed.auto_parallel.completion import Completer
 from paddle.distributed.auto_parallel.dist_context import DistributedContext
+from paddle.distributed.fleet import auto
+from paddle.fluid import layers
+from paddle.nn.layer.transformer import _convert_param_attr_to_list
 
 paddle.enable_static()
 _global_parallel_strategy = None
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py
index e474c91355ead4..73cc48f2722324 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py
@@ -15,19 +15,19 @@
 import unittest
 
 import paddle
+import paddle.fluid.core as core
 import paddle.nn as nn
-import paddle.static as static
 import paddle.nn.functional as F
+import paddle.static as static
 import paddle.utils as utils
-from paddle.distributed.fleet import auto
+from paddle.distributed import fleet
 from paddle.distributed.auto_parallel.completion import Completer
+from paddle.distributed.auto_parallel.cost_model import estimate_cost
 from paddle.distributed.auto_parallel.dist_context import DistributedContext
-from paddle.distributed import fleet
-from paddle.distributed.auto_parallel.partitioner import Partitioner
 from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer
+from paddle.distributed.auto_parallel.partitioner import Partitioner
 from paddle.distributed.auto_parallel.reshard import Resharder
-from paddle.distributed.auto_parallel.cost_model import estimate_cost
-import paddle.fluid.core as core
+from paddle.distributed.fleet import auto
 
 paddle.enable_static()
 _global_parallel_strategy = "dp_mp_pp"
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py
index e1f504602398e6..900b44d18f18f0 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py
@@ -15,19 +15,20 @@
 import copy
 import unittest
 
+import test_auto_parallel_reshard
+from test_auto_parallel_reshard import mlp_forward
+
 import paddle
-from paddle.distributed.fleet import auto
-from paddle.distributed.auto_parallel.completion import Completer
 from paddle.distributed import fleet
-from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer
-from paddle.distributed.auto_parallel.partitioner import Partitioner
-from paddle.distributed.auto_parallel.dist_context import DistributedContext
-from paddle.distributed.auto_parallel.dist_tensor import DistributedTensor
+from paddle.distributed.auto_parallel.completion import Completer
 from paddle.distributed.auto_parallel.dist_attribute import (
     TensorDistributedAttribute,
 )
-import test_auto_parallel_reshard
-from test_auto_parallel_reshard import mlp_forward
+from paddle.distributed.auto_parallel.dist_context import DistributedContext
+from paddle.distributed.auto_parallel.dist_tensor import DistributedTensor
+from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer
+from paddle.distributed.auto_parallel.partitioner import Partitioner
+from paddle.distributed.fleet import auto
 
 
 def get_dist_prog(
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_graph.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_graph.py
index 2e43bf6f928cb9..ac75e0a9570b4f 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_graph.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_graph.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 from paddle.distributed.auto_parallel.graph import Graph
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py
index 80e4e66d2137f2..946b8959d0b4d3 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py
@@ -12,33 +12,34 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
+import os
 import tempfile
 import unittest
-import os
-import json
+
 import numpy as np
 
 import paddle
-import paddle.nn as nn
 import paddle.fluid as fluid
+import paddle.nn as nn
 import paddle.nn.functional as F
-import paddle.utils as utils
 import paddle.static as static
-from paddle.fluid import core
-from paddle.fluid import layers
-from paddle.fluid.initializer import NumpyArrayInitializer
+import paddle.utils as utils
 from paddle.distributed import fleet
-
-from paddle.distributed.fleet import auto
+from paddle.distributed.auto_parallel.cluster import Cluster
 from paddle.distributed.auto_parallel.completion import Completer
-from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer
 from paddle.distributed.auto_parallel.dist_context import DistributedContext
+from paddle.distributed.auto_parallel.mapper import (
+    get_comm_volume,
+    get_dtype_bytes,
+    mapping,
+)
+from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer
 from paddle.distributed.auto_parallel.partitioner import Partitioner
 from paddle.distributed.auto_parallel.reshard import Resharder
-from paddle.distributed.auto_parallel.cluster import Cluster
-from paddle.distributed.auto_parallel.mapper import mapping
-from paddle.distributed.auto_parallel.mapper import get_dtype_bytes
-from paddle.distributed.auto_parallel.mapper import get_comm_volume
+from paddle.distributed.fleet import auto
+from paddle.fluid import core, layers
+from paddle.fluid.initializer import NumpyArrayInitializer
 
 if os.getenv("CUDA_VISIBLE_DEVICES") is not None:
     os.environ["CUDA_VISIBLE_DEVICES"] = ""
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py
index 78706d20d45e57..f745926f5b39b2 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py
@@ -17,18 +17,18 @@
 
 import paddle
 import paddle.nn as nn
-import paddle.static as static
 import paddle.nn.functional as F
-import paddle.utils as utils
+import paddle.static as static
 import paddle.tensor as tensor
-from paddle.fluid import layers
-from paddle.distributed.fleet import auto
+import paddle.utils as utils
+from paddle.distributed import fleet
 from paddle.distributed.auto_parallel.completion import Completer
 from paddle.distributed.auto_parallel.dist_context import DistributedContext
-from paddle.distributed import fleet
 from paddle.distributed.auto_parallel.partitioner import Partitioner
-from paddle.distributed.auto_parallel.utils import _get_comm_group
 from paddle.distributed.auto_parallel.process_group import new_process_group
+from paddle.distributed.auto_parallel.utils import _get_comm_group
+from paddle.distributed.fleet import auto
+from paddle.fluid import layers
 
 paddle.enable_static()
 _global_parallel_strategy = None
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py
index b65a2351244b67..cefc98cdb5ff4a 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py
@@ -18,17 +18,17 @@
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
+import paddle.static as static
 import paddle.tensor as tensor
 import paddle.utils as utils
-from paddle.fluid import layers
-from paddle.nn.layer.transformer import _convert_param_attr_to_list
-import paddle.static as static
-from paddle.distributed.fleet import auto
 from paddle.distributed.auto_parallel.completion import Completer
-from paddle.distributed.auto_parallel.partitioner import Partitioner
 from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer
-from paddle.distributed.auto_parallel.utils import _get_comm_group
+from paddle.distributed.auto_parallel.partitioner import Partitioner
 from paddle.distributed.auto_parallel.process_group import new_process_group
+from paddle.distributed.auto_parallel.utils import _get_comm_group
+from paddle.distributed.fleet import auto
+from paddle.fluid import layers
+from paddle.nn.layer.transformer import _convert_param_attr_to_list
 
 paddle.enable_static()
 _global_parallel_strategy = None
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py
index c8ab71ed3ec697..d3c5fcfb95d190 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py
@@ -16,20 +16,20 @@
 
 import paddle
 import paddle.nn as nn
-import paddle.static as static
 import paddle.nn.functional as F
+import paddle.static as static
 import paddle.utils as utils
-from paddle.distributed.fleet import auto
+from paddle.distributed import fleet
 from paddle.distributed.auto_parallel.completion import Completer
 from paddle.distributed.auto_parallel.dist_context import DistributedContext
-from paddle.distributed import fleet
 from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer
 from paddle.distributed.auto_parallel.partitioner import Partitioner
-from paddle.distributed.auto_parallel.reshard import Resharder
 from paddle.distributed.auto_parallel.process_group import (
-    _g_process_group_map,
     ProcessGroup,
+    _g_process_group_map,
 )
+from paddle.distributed.auto_parallel.reshard import Resharder
+from paddle.distributed.fleet import auto
 
 paddle.enable_static()
 _global_parallel_strategy = None
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py
index 9212056a41268b..8fc9003379211b 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py
@@ -16,18 +16,18 @@
 
 import paddle
 import paddle.nn as nn
-import paddle.static as static
 import paddle.nn.functional as F
+import paddle.static as static
 import paddle.utils as utils
-from paddle.distributed.fleet import auto
+from paddle.distributed import fleet
+from paddle.distributed.auto_parallel.cluster import Cluster
 from paddle.distributed.auto_parallel.completion import Completer
+from paddle.distributed.auto_parallel.cost import CostEstimator
 from paddle.distributed.auto_parallel.dist_context import DistributedContext
-from paddle.distributed import fleet
 from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer
 from paddle.distributed.auto_parallel.partitioner import Partitioner
 from paddle.distributed.auto_parallel.reshard import Resharder
-from paddle.distributed.auto_parallel.cost import CostEstimator
-from paddle.distributed.auto_parallel.cluster import Cluster
+from paddle.distributed.fleet import auto
 
 paddle.enable_static()
 _global_parallel_strategy = "dp_mp_pp"
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py
index 42a2e6ff798ffb..e3c284bd56933a 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py
@@ -16,18 +16,18 @@
 
 import paddle
 import paddle.nn as nn
-import paddle.static as static
 import paddle.nn.functional as F
+import paddle.static as static
 import paddle.utils as utils
-from paddle.distributed.fleet import auto
+from paddle.distributed import fleet
+from paddle.distributed.auto_parallel.cluster import Cluster
 from paddle.distributed.auto_parallel.completion import Completer
+from paddle.distributed.auto_parallel.cost import CostEstimator
 from paddle.distributed.auto_parallel.dist_context import DistributedContext
-from paddle.distributed import fleet
 from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer
 from paddle.distributed.auto_parallel.partitioner import Partitioner
 from paddle.distributed.auto_parallel.reshard import Resharder
-from paddle.distributed.auto_parallel.cost import CostEstimator
-from paddle.distributed.auto_parallel.cluster import Cluster
+from paddle.distributed.fleet import auto
 
 paddle.enable_static()
 _global_parallel_strategy = "mp_pp"
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py
index 5c55ced4f292c7..c346360cbc0491 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py
@@ -12,23 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-
 import os
+import unittest
 
 if os.getenv("CUDA_VISIBLE_DEVICES", None) is None:
     os.environ["CUDA_VISIBLE_DEVICES"] = '0'
 
 import paddle
 import paddle.nn as nn
-import paddle.static as static
 import paddle.nn.functional as F
+import paddle.static as static
 import paddle.utils as utils
-from paddle.distributed.fleet import auto
+from paddle.distributed import fleet
 from paddle.distributed.auto_parallel.dist_context import (
     get_default_distributed_context,
 )
-from paddle.distributed import fleet
+from paddle.distributed.fleet import auto
 
 paddle.enable_static()
 _global_parallel_strategy = None
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py
index 4e90dabd3933f9..0c965ff70bda12 100755
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py
@@ -16,24 +16,20 @@
 
 import paddle
 import paddle.nn as nn
-import paddle.static as static
 import paddle.nn.functional as F
+import paddle.static as static
 import paddle.utils as utils
-from paddle.distributed.fleet import auto
-from paddle.distributed.auto_parallel.planner import PlanSpace
-from paddle.distributed.auto_parallel.dist_context import DistributedContext
-from paddle.distributed.auto_parallel.dist_attribute import (
-    TensorDistributedAttribute,
-)
 from paddle.distributed.auto_parallel.dist_attribute import (
     OperatorDistributedAttribute,
+    TensorDistributedAttribute,
 )
+from paddle.distributed.auto_parallel.dist_context import DistributedContext
+from paddle.distributed.auto_parallel.planner import PlanSpace
 from paddle.distributed.auto_parallel.utils import (
     update_op_dims_mapping_by_default_dist_impl,
-)
-from paddle.distributed.auto_parallel.utils import (
     update_op_dims_mapping_by_elementwise_like_dist_impl,
 )
+from paddle.distributed.fleet import auto
 
 paddle.enable_static()
 
@@ -183,13 +179,11 @@ def test_update(self):
         set_default_dist_attr(train_program, dist_context, global_process_mesh)
         ops = train_program.global_block().ops
         vars = train_program.global_block().vars
+        from paddle.distributed.auto_parallel.dist_op import DistributedOperator
         from paddle.distributed.auto_parallel.operators.common import (
             get_distributed_operator_impl_container,
-        )
-        from paddle.distributed.auto_parallel.operators.common import (
             is_elementwise_op,
         )
-        from paddle.distributed.auto_parallel.dist_op import DistributedOperator
 
         for op in ops:
             dist_op_impl_container = get_distributed_operator_impl_container(
diff --git a/python/paddle/fluid/tests/unittests/test_auto_search_dist_matmul_op.py b/python/paddle/fluid/tests/unittests/test_auto_search_dist_matmul_op.py
index 555602a9e23ee7..a9430ea4aa0c9c 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_search_dist_matmul_op.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_search_dist_matmul_op.py
@@ -14,19 +14,19 @@
 import unittest
 
 import paddle
+import paddle.fluid.core as core
 import paddle.nn as nn
-import paddle.static as static
 import paddle.nn.functional as F
+import paddle.static as static
 import paddle.utils as utils
-import paddle.fluid.core as core
-from paddle.fluid import layers
-from paddle.distributed.auto_parallel.operators.common import (
-    get_distributed_operator_impl_container,
-)
 from paddle.distributed.auto_parallel.dist_attribute import (
     OperatorDistributedAttribute,
 )
 from paddle.distributed.auto_parallel.dist_op import DistributedOperator
+from paddle.distributed.auto_parallel.operators.common import (
+    get_distributed_operator_impl_container,
+)
+from paddle.fluid import layers
 
 paddle.enable_static()
 device = "gpu" if core.is_compiled_with_cuda() else "cpu"
diff --git a/python/paddle/fluid/tests/unittests/test_auto_search_dist_op.py b/python/paddle/fluid/tests/unittests/test_auto_search_dist_op.py
index 2c839d835bfbd4..0c36aa2460454a 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_search_dist_op.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_search_dist_op.py
@@ -14,19 +14,19 @@
 import unittest
 
 import paddle
+import paddle.fluid.core as core
 import paddle.nn as nn
-import paddle.static as static
 import paddle.nn.functional as F
+import paddle.static as static
 import paddle.utils as utils
-import paddle.fluid.core as core
-from paddle.fluid import layers
-from paddle.distributed.auto_parallel.operators.common import (
-    get_distributed_operator_impl_container,
-)
 from paddle.distributed.auto_parallel.dist_attribute import (
     OperatorDistributedAttribute,
 )
 from paddle.distributed.auto_parallel.dist_op import DistributedOperator
+from paddle.distributed.auto_parallel.operators.common import (
+    get_distributed_operator_impl_container,
+)
+from paddle.fluid import layers
 
 paddle.enable_static()
 device = "gpu" if core.is_compiled_with_cuda() else "cpu"
diff --git a/python/paddle/fluid/tests/unittests/test_avoid_twice_initialization.py b/python/paddle/fluid/tests/unittests/test_avoid_twice_initialization.py
index 43a2d08731d115..77062eee5a376b 100644
--- a/python/paddle/fluid/tests/unittests/test_avoid_twice_initialization.py
+++ b/python/paddle/fluid/tests/unittests/test_avoid_twice_initialization.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle.fluid as fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_backward.py b/python/paddle/fluid/tests/unittests/test_backward.py
index 98c2c05e60c58c..9ba1cf884a1f30 100644
--- a/python/paddle/fluid/tests/unittests/test_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_backward.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid as fluid
-import paddle.static as static
-import paddle
 
 import numpy as np
 
+import paddle
+import paddle.fluid as fluid
+import paddle.static as static
+
 
 class BackwardNet:
     """
diff --git a/python/paddle/fluid/tests/unittests/test_backward_infer_var_data_type_shape.py b/python/paddle/fluid/tests/unittests/test_backward_infer_var_data_type_shape.py
index 2a16d6d57e3ba4..add2b429ba1835 100644
--- a/python/paddle/fluid/tests/unittests/test_backward_infer_var_data_type_shape.py
+++ b/python/paddle/fluid/tests/unittests/test_backward_infer_var_data_type_shape.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid as fluid
+import warnings
+
 import numpy as np
+
 import paddle
-import warnings
+import paddle.fluid as fluid
 
 
 class TestBackwardInferVarDataTypeShape(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_base_layer.py b/python/paddle/fluid/tests/unittests/test_base_layer.py
index 832c4488ccd720..4ec5ddd9dd5d03 100644
--- a/python/paddle/fluid/tests/unittests/test_base_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_base_layer.py
@@ -13,14 +13,19 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.dygraph import to_variable
-from paddle.fluid.framework import ParamBase, EagerParamBase
+from paddle.fluid.framework import (
+    EagerParamBase,
+    ParamBase,
+    _test_eager_guard,
+    in_dygraph_mode,
+)
 from paddle.jit import ProgramTranslator
-from paddle.fluid.framework import _test_eager_guard, in_dygraph_mode
 
 
 class L1(fluid.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_basic_gru_api.py b/python/paddle/fluid/tests/unittests/test_basic_gru_api.py
index 339323110a116b..17ffed2629e0e1 100644
--- a/python/paddle/fluid/tests/unittests/test_basic_gru_api.py
+++ b/python/paddle/fluid/tests/unittests/test_basic_gru_api.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 
 import unittest
+
 import numpy
+import numpy as np
+
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 import paddle.fluid.core as core
+import paddle.fluid.layers as layers
+from paddle.fluid import framework
 from paddle.fluid.contrib.layers import basic_gru
 from paddle.fluid.executor import Executor
-from paddle.fluid import framework
-
-import numpy as np
 
 np.set_seed(123)
 
diff --git a/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py b/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py
index 660aa184f57880..152c76bf6fa014 100644
--- a/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py
+++ b/python/paddle/fluid/tests/unittests/test_basic_gru_unit_op.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 
 import unittest
+
 import numpy
+import numpy as np
+
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 import paddle.fluid.core as core
+import paddle.fluid.layers as layers
+from paddle.fluid import framework
 from paddle.fluid.contrib.layers import BasicGRUUnit
 from paddle.fluid.executor import Executor
-from paddle.fluid import framework
-
-import numpy as np
 
 np.set_seed(123)
 
diff --git a/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py b/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py
index befc7e2c5ffa5c..ba7132cfea99d0 100644
--- a/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py
+++ b/python/paddle/fluid/tests/unittests/test_basic_lstm_api.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 
 import unittest
+
 import numpy
+import numpy as np
+
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 import paddle.fluid.core as core
+import paddle.fluid.layers as layers
+from paddle.fluid import framework
 from paddle.fluid.contrib.layers import basic_lstm
 from paddle.fluid.executor import Executor
-from paddle.fluid import framework
-
-import numpy as np
 
 np.set_seed(123)
 
diff --git a/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py b/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py
index cf32b95d76f369..0b17e611b68069 100644
--- a/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py
+++ b/python/paddle/fluid/tests/unittests/test_basic_lstm_unit_op.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 
 import unittest
+
 import numpy
+import numpy as np
+
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 import paddle.fluid.core as core
+import paddle.fluid.layers as layers
+from paddle.fluid import framework
 from paddle.fluid.contrib.layers import BasicLSTMUnit
 from paddle.fluid.executor import Executor
-from paddle.fluid import framework
-
-import numpy as np
 
 np.set_seed(123)
 
diff --git a/python/paddle/fluid/tests/unittests/test_basic_rnn_name.py b/python/paddle/fluid/tests/unittests/test_basic_rnn_name.py
index 17bec037e842b9..c01a266bdbb45b 100644
--- a/python/paddle/fluid/tests/unittests/test_basic_rnn_name.py
+++ b/python/paddle/fluid/tests/unittests/test_basic_rnn_name.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
+from test_imperative_base import new_program_scope
+
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
 from paddle.fluid.contrib.layers import basic_gru, basic_lstm
-from test_imperative_base import new_program_scope
 
 
 class TestBasicGRUApiName(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_batch_fc_op.py b/python/paddle/fluid/tests/unittests/test_batch_fc_op.py
index 2414514b2bd250..80818ae0c189a4 100644
--- a/python/paddle/fluid/tests/unittests/test_batch_fc_op.py
+++ b/python/paddle/fluid/tests/unittests/test_batch_fc_op.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-from op_test import OpTest
+
 import paddle.fluid.core as core
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py
index ccd7de2c3171fe..9c3f04b1360c03 100644
--- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py
@@ -14,15 +14,16 @@
 
 import os
 import unittest
+
 import numpy as np
-import paddle
-import paddle.fluid.core as core
-from paddle.fluid.op import Operator
-import paddle.fluid as fluid
 from op_test import OpTest, _set_use_system_allocator
-from paddle.fluid.framework import grad_var_name
+
+import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
+from paddle.fluid.framework import grad_var_name
+from paddle.fluid.op import Operator
 
 _set_use_system_allocator(True)
 
diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py
index 614e058c6deadb..7056c34487bdbd 100644
--- a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid.core as core
-import paddle.fluid as fluid
-from paddle.fluid.framework import _test_eager_guard
+
+import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
-import paddle
+from paddle.fluid.framework import _test_eager_guard
 
 
 class TestBatchNorm(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_batch_sampler.py b/python/paddle/fluid/tests/unittests/test_batch_sampler.py
index a848898c53f89b..72ea1577beb53a 100644
--- a/python/paddle/fluid/tests/unittests/test_batch_sampler.py
+++ b/python/paddle/fluid/tests/unittests/test_batch_sampler.py
@@ -15,12 +15,13 @@
 import unittest
 
 import numpy as np
+
 from paddle.io import (
     BatchSampler,
     Dataset,
+    RandomSampler,
     Sampler,
     SequenceSampler,
-    RandomSampler,
     WeightedRandomSampler,
 )
 
diff --git a/python/paddle/fluid/tests/unittests/test_bce_loss.py b/python/paddle/fluid/tests/unittests/test_bce_loss.py
index c77196c1d0d94e..d60c648535cd58 100644
--- a/python/paddle/fluid/tests/unittests/test_bce_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_bce_loss.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
-import numpy as np
 import unittest
+
+import numpy as np
 from op_test import OpTest
 
+import paddle
+import paddle.fluid as fluid
+
 
 def test_static_layer(
     place, input_np, label_np, reduction='mean', weight_np=None
diff --git a/python/paddle/fluid/tests/unittests/test_bce_with_logits_loss.py b/python/paddle/fluid/tests/unittests/test_bce_with_logits_loss.py
index 8d921215bfa322..788c7418f7e020 100644
--- a/python/paddle/fluid/tests/unittests/test_bce_with_logits_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_bce_with_logits_loss.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import numpy as np
-import unittest
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py b/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py
index ee1e2fd64585f6..c69d8fac11f3c9 100644
--- a/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py
+++ b/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py
@@ -15,10 +15,11 @@
 import unittest
 
 import numpy as np
-import paddle.fluid.core as core
-from paddle.fluid.op import Operator
+
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid.framework import Program, program_guard
+from paddle.fluid.op import Operator
 
 
 class TestBeamSearchDecodeOp(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_beam_search_op.py b/python/paddle/fluid/tests/unittests/test_beam_search_op.py
index 60989a0b60880e..0d44764e2da1bf 100644
--- a/python/paddle/fluid/tests/unittests/test_beam_search_op.py
+++ b/python/paddle/fluid/tests/unittests/test_beam_search_op.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.op import Operator
-import paddle.fluid.core as core
 import unittest
+
 import numpy as np
+
+import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid.framework import Program, program_guard
-import paddle
+from paddle.fluid.op import Operator
 
 
 def create_tensor(scope, name, np_data):
diff --git a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py
index 027afdb177a29e..d00e804f26b2f2 100644
--- a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
-import paddle
-from op_test import OpTest
+
 import numpy as np
+from op_test import OpTest
+
+import paddle
 
 
 def output_hist(out):
diff --git a/python/paddle/fluid/tests/unittests/test_bfgs.py b/python/paddle/fluid/tests/unittests/test_bfgs.py
index 07b15d3aefca7f..0e02f18ad065d1 100644
--- a/python/paddle/fluid/tests/unittests/test_bfgs.py
+++ b/python/paddle/fluid/tests/unittests/test_bfgs.py
@@ -18,9 +18,8 @@
 
 import paddle
 import paddle.nn.functional as F
-
-from paddle.incubate.optimizer.functional.bfgs import minimize_bfgs
 from paddle.fluid.framework import _test_eager_guard
+from paddle.incubate.optimizer.functional.bfgs import minimize_bfgs
 
 np.random.seed(123)
 
diff --git a/python/paddle/fluid/tests/unittests/test_bfloat16_embedding.py b/python/paddle/fluid/tests/unittests/test_bfloat16_embedding.py
index e86c45cf5412b4..45084add53acb7 100644
--- a/python/paddle/fluid/tests/unittests/test_bfloat16_embedding.py
+++ b/python/paddle/fluid/tests/unittests/test_bfloat16_embedding.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import unittest
-import paddle.nn.functional as F
+
+import numpy as np
 from test_sparse_attention_op import get_cuda_version
 
+import paddle
+import paddle.nn.functional as F
+
 
 class BF16EmbeddingTest(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_bicubic_interp_op.py b/python/paddle/fluid/tests/unittests/test_bicubic_interp_op.py
index 886441429c25dd..b5d1a7d0dfd5c0 100644
--- a/python/paddle/fluid/tests/unittests/test_bicubic_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bicubic_interp_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 from paddle.nn.functional import interpolate
 
diff --git a/python/paddle/fluid/tests/unittests/test_bicubic_interp_v2_op.py b/python/paddle/fluid/tests/unittests/test_bicubic_interp_v2_op.py
index 2bfe40abe53cbd..7046d78fffb180 100644
--- a/python/paddle/fluid/tests/unittests/test_bicubic_interp_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bicubic_interp_v2_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 from paddle.fluid.framework import _test_eager_guard
 from paddle.nn.functional import interpolate
diff --git a/python/paddle/fluid/tests/unittests/test_bilateral_slice_op.py b/python/paddle/fluid/tests/unittests/test_bilateral_slice_op.py
index db0b8c182de921..d8aa8baec6b829 100644
--- a/python/paddle/fluid/tests/unittests/test_bilateral_slice_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bilateral_slice_op.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
-import math
 
 
 class Gsz:
diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_api.py b/python/paddle/fluid/tests/unittests/test_bilinear_api.py
index dcdb03d05bad90..925a45f3bada42 100644
--- a/python/paddle/fluid/tests/unittests/test_bilinear_api.py
+++ b/python/paddle/fluid/tests/unittests/test_bilinear_api.py
@@ -14,10 +14,11 @@
 
 import unittest
 
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import numpy as np
 
 
 class TestBilinearAPI(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
index b281d30a2221f2..5381df947acc02 100755
--- a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid.core as core
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py
index 1a3786dbdd5d54..ff5a0a707b87aa 100755
--- a/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid.core as core
+
+import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.nn.functional import interpolate
-import paddle
 
 
 def bilinear_interp_test(
diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_tensor_product_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_tensor_product_op.py
index 79ef2ffabbc1c4..74910a4a87c15e 100644
--- a/python/paddle/fluid/tests/unittests/test_bilinear_tensor_product_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bilinear_tensor_product_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
 from op_test import OpTest
+
 import paddle
+import paddle.fluid as fluid
 
 
 class TestDygraphBilinearTensorProductAPIError(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_bincount_op.py b/python/paddle/fluid/tests/unittests/test_bincount_op.py
index c5e39f1b13fa34..1c7ce50b2fd22b 100644
--- a/python/paddle/fluid/tests/unittests/test_bincount_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bincount_op.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import os
-import unittest
 import tempfile
+import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
-from op_test import OpTest
 import paddle.inference as paddle_infer
 from paddle.fluid.framework import in_dygraph_mode
 
diff --git a/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py b/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py
index 0412c04d8ca0d3..0c2664007df86c 100644
--- a/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_bitwise_op.py b/python/paddle/fluid/tests/unittests/test_bitwise_op.py
index e5d888b329a8cb..fa4e3bc4a64aa9 100644
--- a/python/paddle/fluid/tests/unittests/test_bitwise_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bitwise_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle
 from op_test import OpTest
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_bmm_op.py b/python/paddle/fluid/tests/unittests/test_bmm_op.py
index 5c99a2e62cc59b..5269f27ccdaa39 100644
--- a/python/paddle/fluid/tests/unittests/test_bmm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bmm_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_box_clip_op.py b/python/paddle/fluid/tests/unittests/test_box_clip_op.py
index 2ee356976f6c62..a114f30a0620d1 100644
--- a/python/paddle/fluid/tests/unittests/test_box_clip_op.py
+++ b/python/paddle/fluid/tests/unittests/test_box_clip_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_box_coder_op.py b/python/paddle/fluid/tests/unittests/test_box_coder_op.py
index fd3106f9c6f849..6fea1dc9385238 100644
--- a/python/paddle/fluid/tests/unittests/test_box_coder_op.py
+++ b/python/paddle/fluid/tests/unittests/test_box_coder_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_box_decoder_and_assign_op.py b/python/paddle/fluid/tests/unittests/test_box_decoder_and_assign_op.py
index 0b826e362e7bd0..52caf2ad92db11 100644
--- a/python/paddle/fluid/tests/unittests/test_box_decoder_and_assign_op.py
+++ b/python/paddle/fluid/tests/unittests/test_box_decoder_and_assign_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_boxps.py b/python/paddle/fluid/tests/unittests/test_boxps.py
index a51f86a7b2b725..4f2d3ee138a775 100644
--- a/python/paddle/fluid/tests/unittests/test_boxps.py
+++ b/python/paddle/fluid/tests/unittests/test_boxps.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import unittest
 from paddle.fluid.layers.nn import _pull_box_sparse
 from paddle.fluid.transpiler import collective
 
diff --git a/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py b/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py
index 5f8953aaea3631..5bd6dbb2fda5b6 100644
--- a/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest, randomize_probability
 
diff --git a/python/paddle/fluid/tests/unittests/test_broadcast_error.py b/python/paddle/fluid/tests/unittests/test_broadcast_error.py
index 517de67fd6dddf..05d544cbab81e3 100644
--- a/python/paddle/fluid/tests/unittests/test_broadcast_error.py
+++ b/python/paddle/fluid/tests/unittests/test_broadcast_error.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle.fluid.core as core
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_broadcast_shape.py b/python/paddle/fluid/tests/unittests/test_broadcast_shape.py
index 1afb046b68b467..028e950821a24e 100644
--- a/python/paddle/fluid/tests/unittests/test_broadcast_shape.py
+++ b/python/paddle/fluid/tests/unittests/test_broadcast_shape.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_broadcast_tensors_op.py b/python/paddle/fluid/tests/unittests/test_broadcast_tensors_op.py
index 3872dadebd82d9..8a1b6a52a2baa9 100644
--- a/python/paddle/fluid/tests/unittests/test_broadcast_tensors_op.py
+++ b/python/paddle/fluid/tests/unittests/test_broadcast_tensors_op.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import random
 import unittest
+
 import numpy as np
-import paddle
-import paddle.fluid.core as core
 from op_test import OpTest
 
-import random
+import paddle
+import paddle.fluid.core as core
 
 random.seed(2021)
 
diff --git a/python/paddle/fluid/tests/unittests/test_broadcast_to_op.py b/python/paddle/fluid/tests/unittests/test_broadcast_to_op.py
index e8e876766c3326..ea8e7648b1d32d 100644
--- a/python/paddle/fluid/tests/unittests/test_broadcast_to_op.py
+++ b/python/paddle/fluid/tests/unittests/test_broadcast_to_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
-import paddle
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_bucketize_api.py b/python/paddle/fluid/tests/unittests/test_bucketize_api.py
index e82d6e9d15c862..3740ab4e579406 100644
--- a/python/paddle/fluid/tests/unittests/test_bucketize_api.py
+++ b/python/paddle/fluid/tests/unittests/test_bucketize_api.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py b/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py
index 5534ff67915b2b..284f0b8f77e754 100644
--- a/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
-import numpy as np
-from simple_nets import simple_fc_net
 import random
 import unittest
 
+import numpy as np
+from simple_nets import simple_fc_net
+
+import paddle
+import paddle.fluid as fluid
+
 batch_size = 32
 
 feed_dict = {
diff --git a/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass.py b/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass.py
index e9e62bee006801..115984ad7ce11b 100644
--- a/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from test_buffer_shared_memory_reuse_pass import InplaceTestBase
 import unittest
 
+from test_buffer_shared_memory_reuse_pass import InplaceTestBase
+
 
 class CUDAInplaceTestWithFuseOptimizationOps(InplaceTestBase):
     def initParameter(self):
diff --git a/python/paddle/fluid/tests/unittests/test_build_strategy_fusion_group_pass.py b/python/paddle/fluid/tests/unittests/test_build_strategy_fusion_group_pass.py
index 962a5d3327c78e..a9ae9e85c36e4a 100644
--- a/python/paddle/fluid/tests/unittests/test_build_strategy_fusion_group_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_build_strategy_fusion_group_pass.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
+
+from test_eager_deletion_padding_rnn import PaddingRNNTestBase, RNNConfig
+
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from test_eager_deletion_padding_rnn import RNNConfig, PaddingRNNTestBase
 
 
 class FusionGroupPaddingRNNTest(PaddingRNNTestBase):
diff --git a/python/paddle/fluid/tests/unittests/test_c_comm_init_all_op.py b/python/paddle/fluid/tests/unittests/test_c_comm_init_all_op.py
index 7f4eac0d3fb36a..1863210d4c7eed 100644
--- a/python/paddle/fluid/tests/unittests/test_c_comm_init_all_op.py
+++ b/python/paddle/fluid/tests/unittests/test_c_comm_init_all_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_c_embedding_op.py b/python/paddle/fluid/tests/unittests/test_c_embedding_op.py
index 66c68097127a2d..87a976d5b83086 100644
--- a/python/paddle/fluid/tests/unittests/test_c_embedding_op.py
+++ b/python/paddle/fluid/tests/unittests/test_c_embedding_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 from paddle.fluid.tests.unittests.c_embedding_op_base import (
     TestCEmbeddingCPU,
     TestCEmbeddingOpBase,
diff --git a/python/paddle/fluid/tests/unittests/test_calc_gradient.py b/python/paddle/fluid/tests/unittests/test_calc_gradient.py
index 59bc1f574e443a..58c4ee6083f7ff 100644
--- a/python/paddle/fluid/tests/unittests/test_calc_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_calc_gradient.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
 from paddle.fluid.backward import calc_gradient
diff --git a/python/paddle/fluid/tests/unittests/test_case.py b/python/paddle/fluid/tests/unittests/test_case.py
index c0ef9f811efc1d..7008544d17a68b 100644
--- a/python/paddle/fluid/tests/unittests/test_case.py
+++ b/python/paddle/fluid/tests/unittests/test_case.py
@@ -12,16 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
+from functools import partial
+
+import numpy as np
 
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.fluid.layers as layers
-from paddle.fluid.framework import Program, program_guard
-from functools import partial
 import paddle.fluid.optimizer as optimizer
+from paddle.fluid.framework import Program, program_guard
 
 
 class TestAPICase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_cast_op.py b/python/paddle/fluid/tests/unittests/test_cast_op.py
index 8665e285fa5cfb..96ad3246f41b6d 100644
--- a/python/paddle/fluid/tests/unittests/test_cast_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cast_op.py
@@ -13,17 +13,18 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
+from op_test import OpTest, convert_float_to_uint16, convert_uint16_to_float
 
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
+import paddle.fluid.layers as layers
 from paddle.fluid import Program, program_guard
-from op_test import OpTest, convert_uint16_to_float, convert_float_to_uint16
 from paddle.fluid.framework import _test_eager_guard
-import gradient_checker
-from decorator_helper import prog_scope
-import paddle.fluid.layers as layers
 
 
 class TestCastOpFp32ToFp64(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_center_loss.py b/python/paddle/fluid/tests/unittests/test_center_loss.py
index bf21e64945f565..15a69ac8e5c202 100644
--- a/python/paddle/fluid/tests/unittests/test_center_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_center_loss.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_channel_shuffle.py b/python/paddle/fluid/tests/unittests/test_channel_shuffle.py
index 0da2a651477e2b..bfad2bd94d3e6d 100644
--- a/python/paddle/fluid/tests/unittests/test_channel_shuffle.py
+++ b/python/paddle/fluid/tests/unittests/test_channel_shuffle.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import unittest
-import numpy as np
 
+import numpy as np
 from op_test import OpTest
+
 import paddle
-import paddle.nn.functional as F
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
+import paddle.nn.functional as F
 
 
 def channel_shuffle_np(x, groups, data_format="NCHW"):
diff --git a/python/paddle/fluid/tests/unittests/test_check_import_scipy.py b/python/paddle/fluid/tests/unittests/test_check_import_scipy.py
index f06b5c97e3f904..801528ebce80ea 100644
--- a/python/paddle/fluid/tests/unittests/test_check_import_scipy.py
+++ b/python/paddle/fluid/tests/unittests/test_check_import_scipy.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import builtins
-from paddle.check_import_scipy import check_import_scipy
 import unittest
 
+from paddle.check_import_scipy import check_import_scipy
+
 
 def my_import(name, globals=None, locals=None, fromlist=(), level=0):
     raise ImportError('DLL load failed, unittest: import scipy failed')
diff --git a/python/paddle/fluid/tests/unittests/test_checkpoint_saver.py b/python/paddle/fluid/tests/unittests/test_checkpoint_saver.py
index f3b539d8057aeb..cd2abeb7016280 100644
--- a/python/paddle/fluid/tests/unittests/test_checkpoint_saver.py
+++ b/python/paddle/fluid/tests/unittests/test_checkpoint_saver.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import unittest
-from paddle.fluid.incubate.checkpoint.checkpoint_saver import CheckpointSaver
 
 from paddle.distributed.fleet.utils.fs import HDFSClient
 from paddle.fluid.incubate.checkpoint.checkpoint_saver import CheckpointSaver
diff --git a/python/paddle/fluid/tests/unittests/test_cholesky_op.py b/python/paddle/fluid/tests/unittests/test_cholesky_op.py
index 0688a782f7287d..ffeb18b0ff90b5 100644
--- a/python/paddle/fluid/tests/unittests/test_cholesky_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cholesky_op.py
@@ -13,14 +13,16 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from decorator_helper import prog_scope
+from gradient_checker import grad_check
+from op_test import OpTest, skip_check_grad_ci
+
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 import paddle.fluid.core as core
-from op_test import OpTest, skip_check_grad_ci
-from gradient_checker import grad_check
-from decorator_helper import prog_scope
+import paddle.fluid.layers as layers
 
 
 @skip_check_grad_ci(
diff --git a/python/paddle/fluid/tests/unittests/test_cholesky_solve_op.py b/python/paddle/fluid/tests/unittests/test_cholesky_solve_op.py
index 5ed62fe2e0076d..49c50e2280c717 100644
--- a/python/paddle/fluid/tests/unittests/test_cholesky_solve_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cholesky_solve_op.py
@@ -12,18 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.w
 
+import sys
 import unittest
+
 import numpy as np
 import scipy
 import scipy.linalg
 
-import sys
-
 sys.path.append("..")
-import paddle
 from op_test import OpTest
+
+import paddle
 import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard, core
+from paddle.fluid import Program, core, program_guard
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py b/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py
index 1cbf8ebabb8042..a79827f2fefdd8 100644
--- a/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py
+++ b/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import numpy as np
 
 
 class Segment:
diff --git a/python/paddle/fluid/tests/unittests/test_chunk_op.py b/python/paddle/fluid/tests/unittests/test_chunk_op.py
index f29d90ceb73dc5..7124409727668f 100644
--- a/python/paddle/fluid/tests/unittests/test_chunk_op.py
+++ b/python/paddle/fluid/tests/unittests/test_chunk_op.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import numpy as np
-from paddle.fluid import Program, program_guard
-from paddle import fluid
+
 import paddle
+from paddle import fluid
+from paddle.fluid import Program, program_guard
 
 
 class TestChunkOpError(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_class_center_sample_op.py b/python/paddle/fluid/tests/unittests/test_class_center_sample_op.py
index 53b9cfad345301..9eaf77fcad6a18 100644
--- a/python/paddle/fluid/tests/unittests/test_class_center_sample_op.py
+++ b/python/paddle/fluid/tests/unittests/test_class_center_sample_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle
 import paddle.fluid.core as core
-from op_test import OpTest
 from paddle.fluid import Program, program_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py b/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
index d7b9c6cd528552..a9d79f81bf310b 100644
--- a/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle
 from op_test import OpTest
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_clip_op.py b/python/paddle/fluid/tests/unittests/test_clip_op.py
index e955bd71a64307..e6056c50124811 100644
--- a/python/paddle/fluid/tests/unittests/test_clip_op.py
+++ b/python/paddle/fluid/tests/unittests/test_clip_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
-from op_test import OpTest
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_coalesce_tensor_op.py b/python/paddle/fluid/tests/unittests/test_coalesce_tensor_op.py
index bcd9eb412bc996..d4b25f7bd21367 100644
--- a/python/paddle/fluid/tests/unittests/test_coalesce_tensor_op.py
+++ b/python/paddle/fluid/tests/unittests/test_coalesce_tensor_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-from paddle.fluid import core
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
+from paddle.fluid import core
 
 
 def coalesce_tensor_eager_api(
diff --git a/python/paddle/fluid/tests/unittests/test_collect_fpn_proposals_op.py b/python/paddle/fluid/tests/unittests/test_collect_fpn_proposals_op.py
index 06011ee62d8dfe..4898eeecfdc875 100644
--- a/python/paddle/fluid/tests/unittests/test_collect_fpn_proposals_op.py
+++ b/python/paddle/fluid/tests/unittests/test_collect_fpn_proposals_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_collective_api_base.py b/python/paddle/fluid/tests/unittests/test_collective_api_base.py
index 68b44a186df25d..096ac99f9c9973 100644
--- a/python/paddle/fluid/tests/unittests/test_collective_api_base.py
+++ b/python/paddle/fluid/tests/unittests/test_collective_api_base.py
@@ -12,18 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import unittest
 import os
-import sys
-import subprocess
 import pickle
+import subprocess
+import sys
 import tempfile
+import unittest
 from contextlib import closing
+
+import numpy as np
+from paddle_bfloat import bfloat16
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
-from paddle_bfloat import bfloat16
 
 
 def create_bool_test_data(shape=None, seed=None):
diff --git a/python/paddle/fluid/tests/unittests/test_collective_base.py b/python/paddle/fluid/tests/unittests/test_collective_base.py
index 08d6fca95a4316..1483cd1d07a9c7 100644
--- a/python/paddle/fluid/tests/unittests/test_collective_base.py
+++ b/python/paddle/fluid/tests/unittests/test_collective_base.py
@@ -12,15 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import unittest
-import time
 import os
-import sys
-import subprocess
 import pickle
+import subprocess
+import sys
 import tempfile
+import time
+import unittest
 from contextlib import closing
+
+import numpy as np
+
 import paddle.fluid as fluid
 import paddle.fluid.unique_name as nameGen
 from paddle.fluid import core
diff --git a/python/paddle/fluid/tests/unittests/test_communicator_async.py b/python/paddle/fluid/tests/unittests/test_communicator_async.py
index 33a98a5c894c2e..978e0d644c7271 100644
--- a/python/paddle/fluid/tests/unittests/test_communicator_async.py
+++ b/python/paddle/fluid/tests/unittests/test_communicator_async.py
@@ -13,16 +13,16 @@
 # limitations under the License.
 
 import os
-import unittest
 import time
+import unittest
 
 import paddle
 
 paddle.enable_static()
 
-import paddle.fluid as fluid
-import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.distributed.fleet as fleet
+import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 
 
 class TestCommunicator(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_communicator_geo.py b/python/paddle/fluid/tests/unittests/test_communicator_geo.py
index 0efea743f33144..9363e2fe4727c7 100644
--- a/python/paddle/fluid/tests/unittests/test_communicator_geo.py
+++ b/python/paddle/fluid/tests/unittests/test_communicator_geo.py
@@ -13,18 +13,17 @@
 # limitations under the License.
 
 import os
+import subprocess
 import sys
 import time
-import subprocess
 import unittest
+
 import numpy
 
 import paddle
-import paddle.fluid as fluid
-
-import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.distributed.fleet as fleet
-
+import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 from paddle.distributed.utils.launch_utils import find_free_ports
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py b/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py
index 328874b3f55656..fbea8b003ceda4 100644
--- a/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py
+++ b/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py
@@ -13,17 +13,17 @@
 # limitations under the License.
 
 import os
-import unittest
-import time
 import tempfile
+import time
+import unittest
 
 import paddle
 
 paddle.enable_static()
 
-import paddle.fluid as fluid
-import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.distributed.fleet as fleet
+import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 
 
 class TestCommunicator(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_compare_op.py b/python/paddle/fluid/tests/unittests/test_compare_op.py
index 191f250c4530d3..8cce73391d692e 100755
--- a/python/paddle/fluid/tests/unittests/test_compare_op.py
+++ b/python/paddle/fluid/tests/unittests/test_compare_op.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import op_test
 import unittest
+
 import numpy
 import numpy as np
+import op_test
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_compare_reduce_op.py b/python/paddle/fluid/tests/unittests/test_compare_reduce_op.py
index fb8a7057fd7d4f..bc176faea8a06a 100644
--- a/python/paddle/fluid/tests/unittests/test_compare_reduce_op.py
+++ b/python/paddle/fluid/tests/unittests/test_compare_reduce_op.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import op_test
 import unittest
+
 import numpy as np
+import op_test
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_compiled_program.py b/python/paddle/fluid/tests/unittests/test_compiled_program.py
index 8418de88fd8658..70edcc5358afb2 100644
--- a/python/paddle/fluid/tests/unittests/test_compiled_program.py
+++ b/python/paddle/fluid/tests/unittests/test_compiled_program.py
@@ -13,12 +13,14 @@
 # limitations under the license.
 
 import unittest
+
 import numpy as np
+from simple_nets import simple_fc_net
+from test_imperative_base import new_program_scope
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
-from test_imperative_base import new_program_scope
-from simple_nets import simple_fc_net
 
 
 class TestCompiledProgram(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_complex_abs.py b/python/paddle/fluid/tests/unittests/test_complex_abs.py
index 968bcb79613180..8d21f76d64ff96 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_abs.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_abs.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
 
 import paddle
 import paddle.fluid.dygraph as dg
-from op_test import OpTest
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_complex_cast.py b/python/paddle/fluid/tests/unittests/test_complex_cast.py
index 9065b2d008d333..8302ba585856b0 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_cast.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_cast.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
diff --git a/python/paddle/fluid/tests/unittests/test_complex_elementwise_layers.py b/python/paddle/fluid/tests/unittests/test_complex_elementwise_layers.py
index 14bdec6101ab2d..98a95eb37a099b 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_elementwise_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_elementwise_layers.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from numpy.random import random as rand
 
diff --git a/python/paddle/fluid/tests/unittests/test_complex_getitem.py b/python/paddle/fluid/tests/unittests/test_complex_getitem.py
index 7446e8f6dfefa5..09aca3b060f307 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_getitem.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_getitem.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle.fluid as fluid
 import paddle.fluid.dygraph as dg
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_complex_grad_accumulated.py b/python/paddle/fluid/tests/unittests/test_complex_grad_accumulated.py
index f7597bfcc6982b..10ff998226f7a0 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_grad_accumulated.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_grad_accumulated.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
-
 import paddle.fluid.core as core
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_complex_kron.py b/python/paddle/fluid/tests/unittests/test_complex_kron.py
index 537a4d0d070988..77c5d964297732 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_kron.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_kron.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle import fluid
+import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid.dygraph as dg
-import numpy as np
-import unittest
+from paddle import fluid
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_complex_matmul.py b/python/paddle/fluid/tests/unittests/test_complex_matmul.py
index f803930d2ad3e8..b6793e35d31f77 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_matmul.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_matmul.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.dygraph as dg
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_complex_op.py b/python/paddle/fluid/tests/unittests/test_complex_op.py
index 01c6b55921bd21..854fad15bab376 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_op.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_op.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
 import paddle
-from paddle.fluid import dygraph
 from paddle import static
+from paddle.fluid import dygraph
 from paddle.fluid.framework import _test_eager_guard
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_complex_reshape.py b/python/paddle/fluid/tests/unittests/test_complex_reshape.py
index 0e8398cf816eab..00b939e959c7d9 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_reshape.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_reshape.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
+import unittest
+
+import numpy as np
+
 import paddle
+import paddle.fluid as fluid
 import paddle.fluid.dygraph as dg
-import numpy as np
-import unittest
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_complex_simplenet.py b/python/paddle/fluid/tests/unittests/test_complex_simplenet.py
index b52cda8a64a63a..f9c59f88116bfc 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_simplenet.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_simplenet.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
-
 import paddle.fluid.core as core
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_complex_sum_layer.py b/python/paddle/fluid/tests/unittests/test_complex_sum_layer.py
index 7de44b33b0fce1..09d4828de9bb90 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_sum_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_sum_layer.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle
 from numpy.random import random as rand
-from paddle import tensor
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.dygraph as dg
+from paddle import tensor
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_complex_trace_layer.py b/python/paddle/fluid/tests/unittests/test_complex_trace_layer.py
index bb8bc7f445c19f..d0188ae9b19878 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_trace_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_trace_layer.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from numpy.random import random as rand
-from paddle import tensor
+
 import paddle.fluid as fluid
 import paddle.fluid.dygraph as dg
+from paddle import tensor
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_complex_transpose.py b/python/paddle/fluid/tests/unittests/test_complex_transpose.py
index b08f7e1be0716d..adc421a96605b6 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_transpose.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_transpose.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.dygraph as dg
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_complex_variable.py b/python/paddle/fluid/tests/unittests/test_complex_variable.py
index b3afc1d9df9df2..588f87b3b2cb61 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_variable.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_variable.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
-import paddle.fluid.dygraph as dg
 import paddle.fluid.core as core
-from paddle.fluid.framework import convert_np_dtype_to_dtype_
+import paddle.fluid.dygraph as dg
 from paddle.fluid.data_feeder import convert_dtype
-from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.framework import _test_eager_guard, convert_np_dtype_to_dtype_
 
 
 class TestComplexVariable(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_complex_view_op.py b/python/paddle/fluid/tests/unittests/test_complex_view_op.py
index 451469dfa21010..e73c936e4a9b6f 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_view_op.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_view_op.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
 import paddle
-from paddle.fluid import dygraph
 from paddle import static
+from paddle.fluid import dygraph
 from paddle.fluid.framework import _test_eager_guard
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_concat_op.py b/python/paddle/fluid/tests/unittests/test_concat_op.py
index 624f2be4a9f8ec..79984c4a96da46 100644
--- a/python/paddle/fluid/tests/unittests/test_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/test_concat_op.py
@@ -13,19 +13,21 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
+
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.layers as layers
+from paddle.fluid import Program, core, program_guard
+from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.tests.unittests.op_test import (
     OpTest,
-    skip_check_grad_ci,
     convert_float_to_uint16,
+    skip_check_grad_ci,
 )
-import paddle.fluid as fluid
-from paddle.fluid import Program, core, program_guard
-from paddle.fluid.framework import _test_eager_guard
-import paddle
-import gradient_checker
-from decorator_helper import prog_scope
-import paddle.fluid.layers as layers
 
 
 class TestConcatOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_cond.py b/python/paddle/fluid/tests/unittests/test_cond.py
index 176be73afe49fc..41bfcff7363b87 100644
--- a/python/paddle/fluid/tests/unittests/test_cond.py
+++ b/python/paddle/fluid/tests/unittests/test_cond.py
@@ -12,18 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import os
 import unittest
+
+import numpy as np
+from simple_nets import batchnorm_fc_with_inputs, simple_fc_net_with_inputs
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 import paddle.fluid.framework as framework
+import paddle.fluid.layers as layers
 from paddle.fluid.backward import append_backward
 from paddle.fluid.framework import Program, program_guard
-from simple_nets import simple_fc_net_with_inputs, batchnorm_fc_with_inputs
-import paddle
 
 np.random.seed(123)
 
diff --git a/python/paddle/fluid/tests/unittests/test_conditional_block.py b/python/paddle/fluid/tests/unittests/test_conditional_block.py
index fc8d926070e3e9..418ae3875998e7 100644
--- a/python/paddle/fluid/tests/unittests/test_conditional_block.py
+++ b/python/paddle/fluid/tests/unittests/test_conditional_block.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 import paddle.fluid.core as core
-from paddle.fluid.executor import Executor
+import paddle.fluid.layers as layers
 from paddle.fluid.backward import append_backward
+from paddle.fluid.executor import Executor
 from paddle.fluid.layers.control_flow import ConditionalBlock
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_conj_op.py b/python/paddle/fluid/tests/unittests/test_conj_op.py
index 67647458f5da7e..44698f5679b2de 100644
--- a/python/paddle/fluid/tests/unittests/test_conj_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conj_op.py
@@ -12,16 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
+
 import paddle
-import sys
 
 sys.path.append("..")
+from numpy.random import random as rand
 from op_test import OpTest
+
 import paddle.fluid.dygraph as dg
 import paddle.static as static
-from numpy.random import random as rand
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_const_value.py b/python/paddle/fluid/tests/unittests/test_const_value.py
index 58ac6fa0a9a30a..727da878f9beda 100644
--- a/python/paddle/fluid/tests/unittests/test_const_value.py
+++ b/python/paddle/fluid/tests/unittests/test_const_value.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle.fluid.framework as framework
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_context_manager.py b/python/paddle/fluid/tests/unittests/test_context_manager.py
index 9ae5630b0aff92..719ade5d49d463 100644
--- a/python/paddle/fluid/tests/unittests/test_context_manager.py
+++ b/python/paddle/fluid/tests/unittests/test_context_manager.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
 
+import paddle.fluid as fluid
+
 
 class TestContextManagerRaiseException(unittest.TestCase):
     # When exception raised in 'with' context, we should safely exit the context
diff --git a/python/paddle/fluid/tests/unittests/test_conv1d_layer.py b/python/paddle/fluid/tests/unittests/test_conv1d_layer.py
index 2de244b9b28f93..441318c5129627 100644
--- a/python/paddle/fluid/tests/unittests/test_conv1d_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_conv1d_layer.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+
 import paddle
-from paddle import fluid, nn
 import paddle.fluid.dygraph as dg
 import paddle.nn.functional as F
-import unittest
+from paddle import fluid, nn
 
 
 class Conv1DTestCase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py
index bd19ab1138d8ff..45edd261bc6eda 100644
--- a/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+
 import paddle
-from paddle import fluid, nn
 import paddle.fluid.dygraph as dg
 import paddle.nn.functional as F
-import unittest
+from paddle import fluid, nn
 
 
 class Conv1DTransposeTestCase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_api.py b/python/paddle/fluid/tests/unittests/test_conv2d_api.py
index 13616155d9e3f5..ef1035d09ccc31 100644
--- a/python/paddle/fluid/tests/unittests/test_conv2d_api.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_api.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
 
 paddle.enable_static()
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 
 class TestConv2DAPI(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py
index 7e4a01331578be..3e5ec0f8a9788f 100644
--- a/python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py
@@ -13,13 +13,13 @@
 # limitations under the License.
 
 import unittest
-import numpy as np
 
-import paddle.fluid.core as core
+import numpy as np
 from op_test import OpTest
-
 from test_conv2d_op import conv2d_forward_naive
 
+import paddle.fluid.core as core
+
 
 def create_test_padding_SAME_class(parent):
     class TestPaddingSAMECase(parent):
diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_layer.py b/python/paddle/fluid/tests/unittests/test_conv2d_layer.py
index 6716bf7ada046e..bdaaac3b8c3e69 100644
--- a/python/paddle/fluid/tests/unittests/test_conv2d_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_layer.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
-from paddle import fluid, nn
+
+import paddle
 import paddle.fluid.dygraph as dg
-import paddle.nn.functional as F
 import paddle.fluid.initializer as I
-import unittest
-import paddle
+import paddle.nn.functional as F
+from paddle import fluid, nn
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_op.py
index 0c22f7ff7b2778..ac2947a64aa8ff 100644
--- a/python/paddle/fluid/tests/unittests/test_conv2d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_op.py
@@ -13,19 +13,19 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle
 
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid import Program, program_guard
 from paddle.fluid.tests.unittests.op_test import (
     OpTest,
     convert_float_to_uint16,
     get_numeric_gradient,
 )
 from paddle.fluid.tests.unittests.testsuite import create_op
-from paddle.fluid import Program, program_guard
 
 
 def conv2d_forward_naive(
diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_op_depthwise_conv.py b/python/paddle/fluid/tests/unittests/test_conv2d_op_depthwise_conv.py
index d6abdc0bcf41f1..fe1a8a0f219e4f 100644
--- a/python/paddle/fluid/tests/unittests/test_conv2d_op_depthwise_conv.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_op_depthwise_conv.py
@@ -13,22 +13,24 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
 
 paddle.enable_static()
-import paddle.fluid.core as core
 from test_conv2d_op import (
     TestConv2DOp,
     TestConv2DOp_v2,
-    create_test_padding_SAME_class,
-    create_test_padding_VALID_class,
     create_test_channel_last_class,
-    create_test_cudnn_padding_SAME_class,
     create_test_cudnn_channel_last_class,
+    create_test_cudnn_padding_SAME_class,
+    create_test_padding_SAME_class,
+    create_test_padding_VALID_class,
 )
 
+import paddle.fluid.core as core
+
 # ----------------TestDepthwiseConv -----
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py
index 266543bf16af3e..9eeb4fc82dfb14 100644
--- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+
 import paddle
-from paddle import fluid, nn
 import paddle.fluid.dygraph as dg
-import paddle.nn.functional as F
 import paddle.fluid.initializer as I
-import unittest
+import paddle.nn.functional as F
+from paddle import fluid, nn
 
 
 class Conv2DTransposeTestCase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
index f22a536cda8882..8d87195db497a5 100644
--- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
@@ -14,17 +14,19 @@
 
 import os
 import unittest
+
 import numpy as np
 
 import paddle
 import paddle.nn as nn
 
 paddle.enable_static()
-import paddle.fluid.core as core
+from op_test import OpTest
+from test_attribute_var import UnittestBase
+
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
-from test_attribute_var import UnittestBase
-from op_test import OpTest
 
 
 def conv2dtranspose_forward_naive(input_, filter_, attrs):
diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op_depthwise_conv.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op_depthwise_conv.py
index a50b0e2cb96730..d0e3b756fc2963 100644
--- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op_depthwise_conv.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op_depthwise_conv.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_layer.py b/python/paddle/fluid/tests/unittests/test_conv3d_layer.py
index fd7dc6bb630291..e4d1da2978720e 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_layer.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
-from paddle import fluid, nn
+
+import paddle
 import paddle.fluid.dygraph as dg
-import paddle.nn.functional as F
 import paddle.fluid.initializer as I
-import paddle
+import paddle.nn.functional as F
+from paddle import fluid, nn
 from paddle.fluid.framework import _test_eager_guard
-import unittest
 
 
 class Conv3DTestCase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
index d9cd69e4d550ce..dc2760307cfbd0 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
-import numpy as np
 
-import paddle.fluid.core as core
+import numpy as np
 from op_test import OpTest
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 
 def conv3d_forward_naive(
diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py
index 6c690595a0072e..1ea071142c6c73 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+
 import paddle
-from paddle import fluid, nn
 import paddle.fluid.dygraph as dg
-import paddle.nn.functional as F
 import paddle.fluid.initializer as I
-import unittest
+import paddle.nn.functional as F
+from paddle import fluid, nn
 
 
 class Conv3DTransposeTestCase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
index 869d1d5f29a132..bf223b8d59fa58 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_op.py
@@ -13,14 +13,16 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
 
 paddle.enable_static()
-import paddle.fluid.core as core
 from op_test import OpTest
 
+import paddle.fluid.core as core
+
 
 def conv3dtranspose_forward_naive(input_, filter_, attrs):
     padding_algorithm = attrs['padding_algorithm']
diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py
index 80fb35f902a8fc..0ad217f6810243 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from test_conv3d_transpose_op import TestConv3DTransposeOp
 
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
-from test_conv3d_transpose_op import TestConv3DTransposeOp
+import paddle.fluid.core as core
 
 
 class TestWithSymmetricPad_NHWC(TestConv3DTransposeOp):
diff --git a/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py b/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py
index 19b9d6fdee97c4..890fe3b10c7bd8 100644
--- a/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py
@@ -13,15 +13,15 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
-import paddle
+from decorator_helper import prog_scope
 
+import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 import paddle.fluid.core as core
-import gradient_checker
-
-from decorator_helper import prog_scope
+import paddle.fluid.layers as layers
 
 
 class TestConvDoubleGradCheck(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_conv_shift_op.py b/python/paddle/fluid/tests/unittests/test_conv_shift_op.py
index 57a2038d3f59bb..26965d9b393cb1 100644
--- a/python/paddle/fluid/tests/unittests/test_conv_shift_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv_shift_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py b/python/paddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py
index c37394ed834274..142359286bbea4 100644
--- a/python/paddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py
@@ -13,15 +13,15 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 import paddle.fluid.core as core
-import gradient_checker
-
-from decorator_helper import prog_scope
+import paddle.fluid.layers as layers
 
 
 class TestConvTransposeDoubleGradCheck(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_corr.py b/python/paddle/fluid/tests/unittests/test_corr.py
index 82f28d3cd0046b..eb3eacddedf00b 100644
--- a/python/paddle/fluid/tests/unittests/test_corr.py
+++ b/python/paddle/fluid/tests/unittests/test_corr.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
+
 import paddle
+import paddle.fluid as fluid
 
 np_minor_version = int((np.__version__).split('.')[1])
 
diff --git a/python/paddle/fluid/tests/unittests/test_cos_sim_op.py b/python/paddle/fluid/tests/unittests/test_cos_sim_op.py
index 86d1e0e1a8ce7a..15d1d1e75df84c 100644
--- a/python/paddle/fluid/tests/unittests/test_cos_sim_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cos_sim_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_cosine_embedding_loss.py b/python/paddle/fluid/tests/unittests/test_cosine_embedding_loss.py
index 784e583b4f5f83..f2846f165a7e71 100644
--- a/python/paddle/fluid/tests/unittests/test_cosine_embedding_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_cosine_embedding_loss.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
+import unittest
+
+import numpy as np
+
 import paddle
 import paddle.static as static
-import numpy as np
-import unittest
 
 
 def cosine_embedding_loss(input1, input2, label, margin=0.5, reduction='mean'):
diff --git a/python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py b/python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py
index 1c658d0ecc4583..0626066c34a83b 100644
--- a/python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py
+++ b/python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
+import paddle.fluid.core as core
 import paddle.nn as nn
 import paddle.nn.functional as F
-import paddle.fluid.core as core
-
-from paddle.fluid import Program, program_guard, Executor, default_main_program
+from paddle.fluid import Executor, Program, default_main_program, program_guard
 
 
 class TestCosineSimilarityAPI(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_count_nonzero_api.py b/python/paddle/fluid/tests/unittests/test_count_nonzero_api.py
index 55f34dd2612187..516b0744598848 100644
--- a/python/paddle/fluid/tests/unittests/test_count_nonzero_api.py
+++ b/python/paddle/fluid/tests/unittests/test_count_nonzero_api.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_cov.py b/python/paddle/fluid/tests/unittests/test_cov.py
index b00fc5b3fe2110..bf0857484ccae5 100644
--- a/python/paddle/fluid/tests/unittests/test_cov.py
+++ b/python/paddle/fluid/tests/unittests/test_cov.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
+
 import paddle
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_cpuonly_spawn.py b/python/paddle/fluid/tests/unittests/test_cpuonly_spawn.py
index 93821ee1e8bd86..1b74b0b8a526e9 100644
--- a/python/paddle/fluid/tests/unittests/test_cpuonly_spawn.py
+++ b/python/paddle/fluid/tests/unittests/test_cpuonly_spawn.py
@@ -15,9 +15,9 @@
 import unittest
 
 import paddle
+import paddle.distributed as dist
 import paddle.nn as nn
 import paddle.optimizer as opt
-import paddle.distributed as dist
 
 
 class LinearNet(nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_create_global_var.py b/python/paddle/fluid/tests/unittests/test_create_global_var.py
index 45dc1e52022853..1d5548718d6b92 100644
--- a/python/paddle/fluid/tests/unittests/test_create_global_var.py
+++ b/python/paddle/fluid/tests/unittests/test_create_global_var.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_create_op_doc_string.py b/python/paddle/fluid/tests/unittests/test_create_op_doc_string.py
index 07c89eefc32fab..378e0e44ef81bb 100644
--- a/python/paddle/fluid/tests/unittests/test_create_op_doc_string.py
+++ b/python/paddle/fluid/tests/unittests/test_create_op_doc_string.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle.fluid.layers as layers
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_create_parameter.py b/python/paddle/fluid/tests/unittests/test_create_parameter.py
index d70e06f184356e..75231c40474b95 100644
--- a/python/paddle/fluid/tests/unittests/test_create_parameter.py
+++ b/python/paddle/fluid/tests/unittests/test_create_parameter.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
-import paddle
 
 
 class TestCreateParameterError(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py b/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py
index 801a1622421cbe..a18328a4038d2e 100644
--- a/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py
+++ b/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import random
-import numpy as np
+import unittest
 
+import numpy as np
 from op_test import OpTest
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_crop_op.py b/python/paddle/fluid/tests/unittests/test_crop_op.py
index f495be8cbefaee..5e13eacb6e5c94 100644
--- a/python/paddle/fluid/tests/unittests/test_crop_op.py
+++ b/python/paddle/fluid/tests/unittests/test_crop_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_crop_tensor_op.py b/python/paddle/fluid/tests/unittests/test_crop_tensor_op.py
index aeb0cfb6414a88..620c10a2bd5f62 100644
--- a/python/paddle/fluid/tests/unittests/test_crop_tensor_op.py
+++ b/python/paddle/fluid/tests/unittests/test_crop_tensor_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py b/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py
index 9b8acb884ab458..1066f4d2c83055 100644
--- a/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from op_test import OpTest
 import unittest
+
 import numpy as np
+from op_test import OpTest
 
 
 class CrossEntropy2OpTestBase(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
index 17c51dc741640d..7d6b23623b4987 100644
--- a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
-import numpy as np
 import unittest
+
+import numpy as np
 from test_softmax_op import stable_softmax
 from test_softmax_with_cross_entropy_op import cross_entropy
+
+import paddle
+import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy_op.py b/python/paddle/fluid/tests/unittests/test_cross_entropy_op.py
index c9fe88f318e88a..4e139b1bef8f99 100644
--- a/python/paddle/fluid/tests/unittests/test_cross_entropy_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid.core as core
 from op_test import OpTest, randomize_probability
+
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_cross_op.py b/python/paddle/fluid/tests/unittests/test_cross_op.py
index 73580d6fc8fcd7..6cc366b85c89fa 100644
--- a/python/paddle/fluid/tests/unittests/test_cross_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cross_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
diff --git a/python/paddle/fluid/tests/unittests/test_crypto.py b/python/paddle/fluid/tests/unittests/test_crypto.py
index 147615905fd853..da0506b0080c22 100644
--- a/python/paddle/fluid/tests/unittests/test_crypto.py
+++ b/python/paddle/fluid/tests/unittests/test_crypto.py
@@ -12,11 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.core import CipherUtils
-from paddle.fluid.core import CipherFactory
-
 import unittest
 
+from paddle.fluid.core import CipherFactory, CipherUtils
+
 
 class CipherUtilsTestCase(unittest.TestCase):
     def test_gen_key(self):
diff --git a/python/paddle/fluid/tests/unittests/test_ctc_align.py b/python/paddle/fluid/tests/unittests/test_ctc_align.py
index b11dfe0254a252..18d8cb35e6ebed 100644
--- a/python/paddle/fluid/tests/unittests/test_ctc_align.py
+++ b/python/paddle/fluid/tests/unittests/test_ctc_align.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
 
 
 def CTCAlign(input, lod, blank, merge_repeated, padding=0, input_length=None):
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_cudnn_version.py b/python/paddle/fluid/tests/unittests/test_cuda_cudnn_version.py
index d8229247a817f6..84c688ed9f8bad 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_cudnn_version.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_cudnn_version.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_device_count.py b/python/paddle/fluid/tests/unittests/test_cuda_device_count.py
index 7789bcd944dedc..0e10a5f95a6d5a 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_device_count.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_device_count.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
 
+import paddle
+
 
 class TestDeviceCount(unittest.TestCase):
     def test_device_count(self):
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_device_name_capability.py b/python/paddle/fluid/tests/unittests/test_cuda_device_name_capability.py
index 07ef914f0a311c..cfeaa84745fd51 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_device_name_capability.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_device_name_capability.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
 
+import paddle
+
 
 class TestDeviceName(unittest.TestCase):
     def test_device_name_default(self):
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py b/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py
index 4aefb234bbfc14..2af9418d063bf4 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
 
+import paddle
+
 
 class TestEmptyCache(unittest.TestCase):
     def test_empty_cache(self):
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_graph.py b/python/paddle/fluid/tests/unittests/test_cuda_graph.py
index ce446264f39d52..d8ba91bad7b8a5 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_graph.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_graph.py
@@ -12,16 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-from paddle.device.cuda.graphs import CUDAGraph
-import unittest
-import numpy as np
 import os
 import pathlib
 import shutil
-from paddle.fluid.dygraph.base import switch_to_static_graph
+import unittest
+
+import numpy as np
 from simple_nets import simple_fc_net_with_inputs
 
+import paddle
+from paddle.device.cuda.graphs import CUDAGraph
+from paddle.fluid.dygraph.base import switch_to_static_graph
+
 
 def can_use_cuda_graph():
     return paddle.is_compiled_with_cuda() and not paddle.is_compiled_with_rocm()
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_graph_partial_graph.py b/python/paddle/fluid/tests/unittests/test_cuda_graph_partial_graph.py
index 7b87dcc1ef9691..9ef385cfa9078f 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_graph_partial_graph.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_graph_partial_graph.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.nn as nn
 import unittest
+
 import numpy as np
-from paddle.device.cuda.graphs import wrap_cuda_graph, is_cuda_graph_supported
+
+import paddle
+import paddle.nn as nn
+from paddle.device.cuda.graphs import is_cuda_graph_supported, wrap_cuda_graph
 
 
 class SimpleModel(nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_graph_partial_graph_static.py b/python/paddle/fluid/tests/unittests/test_cuda_graph_partial_graph_static.py
index f33fe76bff5118..2e51237ca241fd 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_graph_partial_graph_static.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_graph_partial_graph_static.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import paddle
 import paddle.nn as nn
-import unittest
-from paddle.device.cuda.graphs import wrap_cuda_graph, is_cuda_graph_supported
+from paddle.device.cuda.graphs import is_cuda_graph_supported, wrap_cuda_graph
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_graph_partial_graph_static_run.py b/python/paddle/fluid/tests/unittests/test_cuda_graph_partial_graph_static_run.py
index 6dd36a19da61a6..5601fbac533314 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_graph_partial_graph_static_run.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_graph_partial_graph_static_run.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.nn as nn
 import unittest
+
 import numpy as np
+
+import paddle
+import paddle.nn as nn
 from paddle.device.cuda.graphs import (
-    wrap_cuda_graph,
-    is_cuda_graph_supported,
     cuda_graph_transform,
+    is_cuda_graph_supported,
+    wrap_cuda_graph,
 )
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_max_memory_allocated.py b/python/paddle/fluid/tests/unittests/test_cuda_max_memory_allocated.py
index 909b9a44b11b74..e412945d19a973 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_max_memory_allocated.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_max_memory_allocated.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
-from paddle.fluid import core
+
+import paddle
 from paddle.device.cuda import (
     device_count,
-    memory_allocated,
     max_memory_allocated,
+    memory_allocated,
 )
+from paddle.fluid import core
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_max_memory_reserved.py b/python/paddle/fluid/tests/unittests/test_cuda_max_memory_reserved.py
index e067d293074ab8..9651d893fa7760 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_max_memory_reserved.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_max_memory_reserved.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
-from paddle.fluid import core
+
+import paddle
 from paddle.device.cuda import (
     device_count,
-    memory_reserved,
     max_memory_reserved,
+    memory_reserved,
 )
+from paddle.fluid import core
 
 
 class TestMaxMemoryreserved(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_memory_allocated.py b/python/paddle/fluid/tests/unittests/test_cuda_memory_allocated.py
index 5b21a9b07e88f0..16bbb51d559ea7 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_memory_allocated.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_memory_allocated.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
-from paddle.fluid import core
+
+import paddle
 from paddle.device.cuda import device_count, memory_allocated
+from paddle.fluid import core
 
 
 class TestMemoryAllocated(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_memory_reserved.py b/python/paddle/fluid/tests/unittests/test_cuda_memory_reserved.py
index 6b2cdc45f19242..bb671832d68215 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_memory_reserved.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_memory_reserved.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
-from paddle.fluid import core
+
+import paddle
 from paddle.device.cuda import device_count, memory_reserved
+from paddle.fluid import core
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py b/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py
index d9bef511c9c516..7c3ba6add03122 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py
@@ -14,14 +14,15 @@
 """Test cloud role maker."""
 
 import os
+import shutil
+import tempfile
 import unittest
 
-import paddle.fluid as fluid
 import numpy as np
+
 import paddle
+import paddle.fluid as fluid
 import paddle.fluid.core as core
-import shutil
-import tempfile
 
 
 @unittest.skipIf(
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_stream_event.py b/python/paddle/fluid/tests/unittests/test_cuda_stream_event.py
index 600cd04d1970f8..8cb6b9566f4cd9 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_stream_event.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_stream_event.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.device import cuda
-import paddle
 import ctypes
-
 import unittest
+
 import numpy as np
 
+import paddle
+from paddle.device import cuda
+
 
 class TestCurrentStream(unittest.TestCase):
     def test_current_stream(self):
diff --git a/python/paddle/fluid/tests/unittests/test_cudnn_grucell.py b/python/paddle/fluid/tests/unittests/test_cudnn_grucell.py
index 39c25c909fdc6d..6a1ed39157d529 100644
--- a/python/paddle/fluid/tests/unittests/test_cudnn_grucell.py
+++ b/python/paddle/fluid/tests/unittests/test_cudnn_grucell.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.dygraph import GRUCell
 
-import numpy as np
-
 np.random.seed = 123
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_cudnn_lstmcell.py b/python/paddle/fluid/tests/unittests/test_cudnn_lstmcell.py
index 4eacff27d14324..913dc958117342 100644
--- a/python/paddle/fluid/tests/unittests/test_cudnn_lstmcell.py
+++ b/python/paddle/fluid/tests/unittests/test_cudnn_lstmcell.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.dygraph import LSTMCell
 
-import numpy as np
-
 np.random.seed = 123
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_cumprod_op.py b/python/paddle/fluid/tests/unittests/test_cumprod_op.py
index a6aea5e33fd339..ee4b319c23db7f 100644
--- a/python/paddle/fluid/tests/unittests/test_cumprod_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cumprod_op.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import random
 import unittest
-import numpy as np
 
+import numpy as np
 from op_test import OpTest
-import random
-import paddle
 
+import paddle
 import paddle.fluid.core as core
 
 np.random.seed(0)
diff --git a/python/paddle/fluid/tests/unittests/test_cumsum_op.py b/python/paddle/fluid/tests/unittests/test_cumsum_op.py
index 1621b6c5432173..4675ac9f52c9cd 100644
--- a/python/paddle/fluid/tests/unittests/test_cumsum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cumsum_op.py
@@ -13,17 +13,19 @@
 # limitations under the License.
 
 import os
-import unittest
 import tempfile
+import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
 from op_test import OpTest
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
-import paddle.inference as paddle_infer
-import gradient_checker
-from decorator_helper import prog_scope
+import paddle.fluid.core as core
 import paddle.fluid.layers as layers
+import paddle.inference as paddle_infer
 
 
 class TestCumsumOp(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py
index 74f63f03f451f3..4a750a6c0752ab 100644
--- a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py
+++ b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
diff --git a/python/paddle/fluid/tests/unittests/test_cvm_op.py b/python/paddle/fluid/tests/unittests/test_cvm_op.py
index a6197e7d726110..282167e8050cfc 100644
--- a/python/paddle/fluid/tests/unittests/test_cvm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cvm_op.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
+import unittest
 from math import log
+
+import numpy as np
 from op_test import OpTest
-import unittest
 
 
 def cvm_compute(X, item_width, use_cvm):
diff --git a/python/paddle/fluid/tests/unittests/test_cyclic_cifar_dataset.py b/python/paddle/fluid/tests/unittests/test_cyclic_cifar_dataset.py
index 01a588c4058a4c..35a40e79726c32 100644
--- a/python/paddle/fluid/tests/unittests/test_cyclic_cifar_dataset.py
+++ b/python/paddle/fluid/tests/unittests/test_cyclic_cifar_dataset.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
 
+import paddle
+
 
 class TestCifar10(unittest.TestCase):
     def test_main(self):
diff --git a/python/paddle/fluid/tests/unittests/test_data.py b/python/paddle/fluid/tests/unittests/test_data.py
index bcb956e25c38a3..25b2372e817c4b 100644
--- a/python/paddle/fluid/tests/unittests/test_data.py
+++ b/python/paddle/fluid/tests/unittests/test_data.py
@@ -16,9 +16,9 @@
 
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.fluid.layers as layers
 from paddle.fluid import Program, program_guard
-import paddle.fluid.core as core
 
 
 class TestApiDataError(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_data_generator.py b/python/paddle/fluid/tests/unittests/test_data_generator.py
index b804e6a96025ee..02677d501e78c8 100644
--- a/python/paddle/fluid/tests/unittests/test_data_generator.py
+++ b/python/paddle/fluid/tests/unittests/test_data_generator.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 import unittest
+
 import paddle.distributed.fleet as fleet
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_autotune.py b/python/paddle/fluid/tests/unittests/test_dataloader_autotune.py
index 3f35a74440a439..c94c5f54781f28 100755
--- a/python/paddle/fluid/tests/unittests/test_dataloader_autotune.py
+++ b/python/paddle/fluid/tests/unittests/test_dataloader_autotune.py
@@ -12,16 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import numpy as np
+import json
+import os
+import sys
 import tempfile
+import unittest
 import warnings
-import json
+
+import numpy as np
+
 import paddle
 import paddle.nn as nn
 from paddle.io import DataLoader, Dataset
-import sys
-import os
 
 
 class RandomDataset(Dataset):
diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_dataset.py b/python/paddle/fluid/tests/unittests/test_dataloader_dataset.py
index 2c4bcfa444b0fc..b174fd67b58727 100644
--- a/python/paddle/fluid/tests/unittests/test_dataloader_dataset.py
+++ b/python/paddle/fluid/tests/unittests/test_dataloader_dataset.py
@@ -17,8 +17,8 @@
 
 import paddle
 import paddle.vision.transforms as transforms
-from paddle.io import Dataset
 from paddle.fluid.framework import _test_eager_guard
+from paddle.io import Dataset
 
 
 class TestDatasetAbstract(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py b/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py
index fa0bac6c5bda90..50f412fca0e66b 100644
--- a/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py
+++ b/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
-import numpy as np
 import unittest
 
+import numpy as np
+
+import paddle.fluid as fluid
+
 
 def infinite_reader():
     num = 0
diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_keep_order.py b/python/paddle/fluid/tests/unittests/test_dataloader_keep_order.py
index f2dd18c008082e..8263edd7469eb7 100644
--- a/python/paddle/fluid/tests/unittests/test_dataloader_keep_order.py
+++ b/python/paddle/fluid/tests/unittests/test_dataloader_keep_order.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
+import os
 import unittest
+
 import numpy as np
-import os
+
+import paddle.fluid as fluid
 
 
 def create_reader(shape, batch_number):
diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_unkeep_order.py b/python/paddle/fluid/tests/unittests/test_dataloader_unkeep_order.py
index 8e121ba401ee22..c8cf808526b5d3 100644
--- a/python/paddle/fluid/tests/unittests/test_dataloader_unkeep_order.py
+++ b/python/paddle/fluid/tests/unittests/test_dataloader_unkeep_order.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
+import os
 import unittest
+
 import numpy as np
-import os
+
+import paddle.fluid as fluid
 from paddle.fluid.reader import keep_data_loader_order
 
 keep_data_loader_order(False)
diff --git a/python/paddle/fluid/tests/unittests/test_dataset.py b/python/paddle/fluid/tests/unittests/test_dataset.py
index 5c0dc9db6a2444..95e252c12f41b5 100644
--- a/python/paddle/fluid/tests/unittests/test_dataset.py
+++ b/python/paddle/fluid/tests/unittests/test_dataset.py
@@ -16,13 +16,14 @@
 including create, config, run, etc.
 """
 
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.core as core
 import os
 import tempfile
 import unittest
 
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+
 
 class TestDataset(unittest.TestCase):
     """TestCases for Dataset."""
diff --git a/python/paddle/fluid/tests/unittests/test_dataset_consistency_inspection.py b/python/paddle/fluid/tests/unittests/test_dataset_consistency_inspection.py
index 51463e7ff6287b..63814b468245ed 100644
--- a/python/paddle/fluid/tests/unittests/test_dataset_consistency_inspection.py
+++ b/python/paddle/fluid/tests/unittests/test_dataset_consistency_inspection.py
@@ -15,12 +15,13 @@
 TestCases for Dataset consistency insepection of use_var_list and data_generator.
 """
 
-import paddle
-import paddle.fluid as fluid
 import math
 import os
 import tempfile
 import unittest
+
+import paddle
+import paddle.fluid as fluid
 import paddle.fluid.incubate.data_generator as dg
 
 # paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py b/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py
index 5c7c97991d63a9..3e4fc60f3acfa3 100644
--- a/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py
+++ b/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
-import numpy as np
 import os
-import unittest
 import tempfile
+import unittest
+
+import numpy as np
 from simple_nets import simple_fc_net_with_inputs
 
+import paddle
+import paddle.fluid as fluid
+
 BATCH_SIZE = 32
 BATCH_NUM = 10
 EPOCH_NUM = 4
diff --git a/python/paddle/fluid/tests/unittests/test_dataset_download.py b/python/paddle/fluid/tests/unittests/test_dataset_download.py
index b009a2fe58dca3..dc8c865c83ea74 100644
--- a/python/paddle/fluid/tests/unittests/test_dataset_download.py
+++ b/python/paddle/fluid/tests/unittests/test_dataset_download.py
@@ -14,7 +14,8 @@
 
 import os
 import unittest
-from paddle.dataset.common import download, DATA_HOME, md5file
+
+from paddle.dataset.common import DATA_HOME, download, md5file
 
 
 class TestDataSetDownload(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_debugger.py b/python/paddle/fluid/tests/unittests/test_debugger.py
index 23beb7f1a638b8..37824478ca905d 100644
--- a/python/paddle/fluid/tests/unittests/test_debugger.py
+++ b/python/paddle/fluid/tests/unittests/test_debugger.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle.fluid.core as core
 from paddle.fluid import debugger
 from paddle.fluid.framework import Program
diff --git a/python/paddle/fluid/tests/unittests/test_decayed_adagrad_op.py b/python/paddle/fluid/tests/unittests/test_decayed_adagrad_op.py
index dae5fd7b7b7874..b647c428a347eb 100644
--- a/python/paddle/fluid/tests/unittests/test_decayed_adagrad_op.py
+++ b/python/paddle/fluid/tests/unittests/test_decayed_adagrad_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py
index d7a5d3714317cd..72ec58aa2d1ea6 100644
--- a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py
+++ b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
-import numpy as np
 import time
 import unittest
 
+import numpy as np
+
+import paddle
+import paddle.fluid as fluid
+
 EPOCH_NUM = 5
 BATCH_SIZE = 16
 BATCH_NUM = 10
diff --git a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py
index 90c2e84d0eef66..1a7118cfb8279c 100644
--- a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py
+++ b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
-import paddle
-import numpy as np
 import unittest
 
+import numpy as np
+
+import paddle
+import paddle.fluid as fluid
+
 
 class TestClass(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_default_dtype.py b/python/paddle/fluid/tests/unittests/test_default_dtype.py
index ca95c820a718e5..ceaa9447cfcf5e 100644
--- a/python/paddle/fluid/tests/unittests/test_default_dtype.py
+++ b/python/paddle/fluid/tests/unittests/test_default_dtype.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-from paddle.framework import set_default_dtype, get_default_dtype
+
+from paddle.framework import get_default_dtype, set_default_dtype
 
 
 class TestDefaultType(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_default_scope_funcs.py b/python/paddle/fluid/tests/unittests/test_default_scope_funcs.py
index 88686585853f5f..c6c0e73c5d2ee6 100644
--- a/python/paddle/fluid/tests/unittests/test_default_scope_funcs.py
+++ b/python/paddle/fluid/tests/unittests/test_default_scope_funcs.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 from paddle.fluid.default_scope_funcs import (
     enter_local_scope,
     find_var,
@@ -20,7 +22,6 @@
     scoped_function,
     var,
 )
-import unittest
 
 
 class TestDefaultScopeFuncs(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_deform_conv2d.py b/python/paddle/fluid/tests/unittests/test_deform_conv2d.py
index 0bca1d051a4248..304fbcf80a4f5a 100644
--- a/python/paddle/fluid/tests/unittests/test_deform_conv2d.py
+++ b/python/paddle/fluid/tests/unittests/test_deform_conv2d.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+from unittest import TestCase
+
+import numpy as np
+
 import paddle
 import paddle.nn.initializer as I
-import numpy as np
-import unittest
 from paddle.fluid.framework import _test_eager_guard
-from unittest import TestCase
 
 
 class TestDeformConv2D(TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_deformable_conv_op.py b/python/paddle/fluid/tests/unittests/test_deformable_conv_op.py
index dec6d9d8dba5ee..273eeeda28c23f 100644
--- a/python/paddle/fluid/tests/unittests/test_deformable_conv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_deformable_conv_op.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
+import paddle
 from paddle.fluid.framework import _test_eager_guard
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_deformable_conv_v1_op.py b/python/paddle/fluid/tests/unittests/test_deformable_conv_v1_op.py
index 00556f7e5a7224..80903fa5a6b715 100644
--- a/python/paddle/fluid/tests/unittests/test_deformable_conv_v1_op.py
+++ b/python/paddle/fluid/tests/unittests/test_deformable_conv_v1_op.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
+import paddle
+
 
 def dmc_bilinear(data_im, height, width, h, w):
     h_low = int(np.floor(h))
diff --git a/python/paddle/fluid/tests/unittests/test_deformable_psroi_pooling.py b/python/paddle/fluid/tests/unittests/test_deformable_psroi_pooling.py
index 8dbe2dfa32252b..d73ef732da8146 100644
--- a/python/paddle/fluid/tests/unittests/test_deformable_psroi_pooling.py
+++ b/python/paddle/fluid/tests/unittests/test_deformable_psroi_pooling.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_deg2rad.py b/python/paddle/fluid/tests/unittests/test_deg2rad.py
index 79e4541b635e14..5d6d9ac646bfc8 100644
--- a/python/paddle/fluid/tests/unittests/test_deg2rad.py
+++ b/python/paddle/fluid/tests/unittests/test_deg2rad.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_density_prior_box_op.py b/python/paddle/fluid/tests/unittests/test_density_prior_box_op.py
index 7254f9cc2f8bce..9d621dc551111f 100644
--- a/python/paddle/fluid/tests/unittests/test_density_prior_box_op.py
+++ b/python/paddle/fluid/tests/unittests/test_density_prior_box_op.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
 import unittest
+
 import numpy as np
-import math
 from op_test import OpTest
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py b/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py
index ae24334fea5721..a41ff3e3b362f7 100755
--- a/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py
+++ b/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
+import sys
+import unittest
+import warnings
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import unittest
-import sys
-import warnings
 import paddle.utils.deprecated as deprecated
 from paddle import _legacy_C_ops
 
diff --git a/python/paddle/fluid/tests/unittests/test_deprecated_memory_optimize_interfaces.py b/python/paddle/fluid/tests/unittests/test_deprecated_memory_optimize_interfaces.py
index c3a21ba0bcbb65..22b682a0a4eb32 100644
--- a/python/paddle/fluid/tests/unittests/test_deprecated_memory_optimize_interfaces.py
+++ b/python/paddle/fluid/tests/unittests/test_deprecated_memory_optimize_interfaces.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
+
 from simple_nets import simple_fc_net
 
+import paddle.fluid as fluid
+
 
 class DeprecatedMemoryOptimizationInterfaceTest(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_dequantize_abs_max_op.py b/python/paddle/fluid/tests/unittests/test_dequantize_abs_max_op.py
index c4806866eff6e4..0df5a3fda11c2f 100644
--- a/python/paddle/fluid/tests/unittests/test_dequantize_abs_max_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dequantize_abs_max_op.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
 import unittest
+
 import numpy as np
-import math
 from op_test import OpTest
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_dequantize_log_op.py b/python/paddle/fluid/tests/unittests/test_dequantize_log_op.py
index 5fb9d07d77a42e..9db2aa6b918d15 100644
--- a/python/paddle/fluid/tests/unittests/test_dequantize_log_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dequantize_log_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_desc_clone.py b/python/paddle/fluid/tests/unittests/test_desc_clone.py
index 6a2e831df94108..5028a3e251e546 100644
--- a/python/paddle/fluid/tests/unittests/test_desc_clone.py
+++ b/python/paddle/fluid/tests/unittests/test_desc_clone.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import collections
+import functools
+import unittest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
-import unittest
-import functools
-import collections
 
 SEED = 1
 DTYPE = "float32"
diff --git a/python/paddle/fluid/tests/unittests/test_detach.py b/python/paddle/fluid/tests/unittests/test_detach.py
index 48abc41e3c11c1..bd976cf3943477 100644
--- a/python/paddle/fluid/tests/unittests/test_detach.py
+++ b/python/paddle/fluid/tests/unittests/test_detach.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
 
 import paddle
@@ -19,8 +21,6 @@
 from paddle.fluid.dygraph import Linear
 from paddle.fluid.dygraph.base import to_variable
 
-import unittest
-
 
 class Test_Detach(unittest.TestCase):
     def generate_Data(self):
diff --git a/python/paddle/fluid/tests/unittests/test_detection_map_op.py b/python/paddle/fluid/tests/unittests/test_detection_map_op.py
index 9cf7abf8fda082..5578d15aab1d70 100644
--- a/python/paddle/fluid/tests/unittests/test_detection_map_op.py
+++ b/python/paddle/fluid/tests/unittests/test_detection_map_op.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import numpy as np
 import collections
 import math
+import unittest
+
+import numpy as np
 from op_test import OpTest
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_determinant_op.py b/python/paddle/fluid/tests/unittests/test_determinant_op.py
index 730864eb23097d..7eff966e60d217 100644
--- a/python/paddle/fluid/tests/unittests/test_determinant_op.py
+++ b/python/paddle/fluid/tests/unittests/test_determinant_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_device_guard.py b/python/paddle/fluid/tests/unittests/test_device_guard.py
index d1e0c383ac76b0..34a029e7bac57b 100644
--- a/python/paddle/fluid/tests/unittests/test_device_guard.py
+++ b/python/paddle/fluid/tests/unittests/test_device_guard.py
@@ -13,11 +13,11 @@
 # limitations under the License.
 
 import unittest
+import warnings
 
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import warnings
 
 
 def execute(main_program, startup_program):
diff --git a/python/paddle/fluid/tests/unittests/test_diag.py b/python/paddle/fluid/tests/unittests/test_diag.py
index 4135db34218ba7..06969aac8f349c 100644
--- a/python/paddle/fluid/tests/unittests/test_diag.py
+++ b/python/paddle/fluid/tests/unittests/test_diag.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
diff --git a/python/paddle/fluid/tests/unittests/test_diag_embed.py b/python/paddle/fluid/tests/unittests/test_diag_embed.py
index c91f5d9b86c9c7..066ce23c5eaf57 100644
--- a/python/paddle/fluid/tests/unittests/test_diag_embed.py
+++ b/python/paddle/fluid/tests/unittests/test_diag_embed.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.nn.functional as F
+
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.nn.functional as F
 
 
 class TestDiagEmbedOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_diag_v2.py b/python/paddle/fluid/tests/unittests/test_diag_v2.py
index f1ff2b77d40411..9c96661f6c081f 100644
--- a/python/paddle/fluid/tests/unittests/test_diag_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_diag_v2.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
diff --git a/python/paddle/fluid/tests/unittests/test_diagflat.py b/python/paddle/fluid/tests/unittests/test_diagflat.py
index f23c5051ddbbc9..899589e9fd6d94 100644
--- a/python/paddle/fluid/tests/unittests/test_diagflat.py
+++ b/python/paddle/fluid/tests/unittests/test_diagflat.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 from paddle.static import Program, program_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_diagonal_op.py b/python/paddle/fluid/tests/unittests/test_diagonal_op.py
index bdc64827fd2a82..5b3c3830c57ca0 100644
--- a/python/paddle/fluid/tests/unittests/test_diagonal_op.py
+++ b/python/paddle/fluid/tests/unittests/test_diagonal_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_diff_op.py b/python/paddle/fluid/tests/unittests/test_diff_op.py
index 4b3981075a18d2..7df3f779ffd847 100644
--- a/python/paddle/fluid/tests/unittests/test_diff_op.py
+++ b/python/paddle/fluid/tests/unittests/test_diff_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_digamma_op.py b/python/paddle/fluid/tests/unittests/test_digamma_op.py
index ff9e2b182759a9..3f876085dfdfcc 100644
--- a/python/paddle/fluid/tests/unittests/test_digamma_op.py
+++ b/python/paddle/fluid/tests/unittests/test_digamma_op.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
 from scipy.special import psi
+
 import paddle
 import paddle.fluid as fluid
 import paddle.static as static
-from op_test import OpTest
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_directory_migration.py b/python/paddle/fluid/tests/unittests/test_directory_migration.py
index 408ad42379e620..4212b73f214e8b 100644
--- a/python/paddle/fluid/tests/unittests/test_directory_migration.py
+++ b/python/paddle/fluid/tests/unittests/test_directory_migration.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 import os
+import subprocess
 import sys
 import tempfile
-import subprocess
 import unittest
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py b/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py
index f0de021eaadf0e..a563c53f756223 100644
--- a/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py
+++ b/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import signal
 import subprocess
+import unittest
 
 SignalsToTest = {
     signal.SIGTERM,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py b/python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py
index c2469ab92b7e5e..62f598ee27f943 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 from test_dist_base import TestDistBase
+
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py
index c0f992c010bf37..57071583f1ed0d 100755
--- a/python/paddle/fluid/tests/unittests/test_dist_base.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_base.py
@@ -12,26 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import tempfile
-
+import argparse
 import ast
-import unittest
 import os
-import sys
-import subprocess
-import argparse
 import pickle
 import random
-import numpy as np
+import subprocess
+import sys
+import tempfile
 import time
+import unittest
+
+import numpy as np
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import compiler
 import paddle.fluid.dygraph as dygraph
-from paddle.fluid.framework import _test_eager_guard
-from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy
 import paddle.fluid.incubate.fleet.base.role_maker as role_maker
+from paddle.fluid import compiler
+from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.incubate.fleet.collective import DistributedStrategy, fleet
 
 RUN_STEP = 5
 DEFAULT_BATCH_SIZE = 2
diff --git a/python/paddle/fluid/tests/unittests/test_dist_dygraph_apis.py b/python/paddle/fluid/tests/unittests/test_dist_dygraph_apis.py
index aa5e1e93064fb9..073cae79eb8de9 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_dygraph_apis.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_dygraph_apis.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 from test_parallel_dygraph_dataparallel import TestMultipleGpus
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py
index 523170367fb3ce..e2f08591d705e7 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 import paddle
-import os
 import paddle.distributed.fleet.base.role_maker as role_maker
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py
index a51b01209b1d7a..aa46502b4d234a 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py
@@ -16,6 +16,7 @@
 
 os.environ["WITH_DISTRIBUTE"] = "ON"
 import unittest
+
 import paddle
 import paddle.distributed.fleet.base.role_maker as role_maker
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py
index e6f618b63a6582..88f3769c83efd4 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py
@@ -15,6 +15,7 @@
 
 os.environ["WITH_DISTRIBUTE"] = "ON"
 import unittest
+
 import paddle
 import paddle.distributed.fleet.base.role_maker as role_maker
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py
index 05abc40a1d0418..789f7ee8637855 100755
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
-
 import unittest
+
 import paddle
 import paddle.distributed.fleet.base.role_maker as role_maker
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py
index 837ea65d54f48b..19c0f48e7c1a2c 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 import paddle
-import os
 import paddle.distributed.fleet as fleet
 import paddle.distributed.fleet.base.role_maker as role_maker
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py
index eb21f925fa751e..c84bd2b7ca6bc8 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py
@@ -12,27 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.distributed.fleet.utils.ps_util import DistributedInfer
+import paddle
 import paddle.distributed.fleet as fleet
 import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.fluid as fluid
-import paddle
+from paddle.distributed.fleet.utils.ps_util import DistributedInfer
 
 """
     high level unit test for distribute fleet.
 """
 
+import argparse
 import os
-import sys
-import subprocess
-
 import shutil
-import argparse
-from contextlib import closing
 import socket
-import time
+import subprocess
+import sys
 import tempfile
+import time
 import unittest
+from contextlib import closing
 
 import paddle
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py
index 33006b9d9e82c9..ac3a422dffa4b3 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py
@@ -14,6 +14,7 @@
 
 import os
 import unittest
+
 from test_dist_fleet_base import TestFleetBase
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr2.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr2.py
index d9c725c3a89c48..623b6ed4d67855 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr2.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr2.py
@@ -13,8 +13,9 @@
 # limitations under the License.
 
 import os
-import unittest
 import tempfile
+import unittest
+
 from test_dist_fleet_base import TestFleetBase
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_decay.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_decay.py
index 9d5ac645b6136a..21284d69353476 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_decay.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_decay.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
+import paddle
 import paddle.distributed.fleet as fleet
 import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.fluid as fluid
-import unittest
-import paddle
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py
index 2e63a28cd24784..da24daebd88c0b 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py
@@ -16,13 +16,14 @@
 
 os.environ["WITH_DISTRIBUTE"] = "ON"
 import unittest
+
+from dist_fleet_simnet_bow import train_network
+from test_dist_fleet_base import TestFleetBase
+
 import paddle
-import paddle.fluid as fluid
 import paddle.distributed.fleet as fleet
 import paddle.distributed.fleet.base.role_maker as role_maker
-
-from test_dist_fleet_base import TestFleetBase
-from dist_fleet_simnet_bow import train_network
+import paddle.fluid as fluid
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_gloo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_gloo.py
index dd31dc8ef0db86..7b35ec169f5a22 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_gloo.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_gloo.py
@@ -14,10 +14,10 @@
 
 import os
 import shutil
-import tempfile
-import unittest
 import subprocess
+import tempfile
 import time
+import unittest
 
 # import paddle.fluid.incubate.fleet.base.role_maker as role_maker
 from test_dist_fleet_base import TestFleetBase
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py
index 6871580b27021b..61f2ea0d6b9a32 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py
@@ -15,22 +15,21 @@
     high level unit test for distribute fleet.
 """
 
+import argparse
 import os
-import sys
-import subprocess
-
 import shutil
-import argparse
-from contextlib import closing
 import socket
-import time
+import subprocess
+import sys
 import tempfile
+import time
 import unittest
+from contextlib import closing
 
 import paddle
-import paddle.fluid as fluid
-import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.distributed.fleet as fleet
+import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 
 __all__ = ['FleetDistHeterRunnerBase', 'TestFleetHeterBase', 'runtime_main']
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py
index 199594f24a88f8..477d9091d7a51b 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py
@@ -12,14 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
+import os
 import unittest
+
 import paddle
-import os
-import math
-import paddle.fluid as fluid
 import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 from paddle.distributed.fleet import fleet
-import paddle
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_infer.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_infer.py
index c0c7070cf803bf..184209f25e7c62 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_infer.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_infer.py
@@ -14,8 +14,9 @@
 
 import os
 import shutil
-import unittest
 import tempfile
+import unittest
+
 from test_dist_fleet_base import TestFleetBase
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
index a63d0912c3ec10..685f8dd0e4c10c 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
@@ -18,9 +18,9 @@
 
 paddle.enable_static()
 
-import paddle.fluid as fluid
-import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.distributed.fleet as fleet
+import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 
 # For Net
 base_lr = 0.2
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps10.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps10.py
index db553b990343b2..833c9f8e162138 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps10.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps10.py
@@ -15,11 +15,12 @@
 import os
 
 os.environ["WITH_DISTRIBUTE"] = "ON"
-import paddle.fluid as fluid
-import paddle.distributed.fleet.base.role_maker as role_maker
-import paddle.distributed.fleet as fleet
 import unittest
+
 import paddle
+import paddle.distributed.fleet as fleet
+import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
index 8828dd2cfafdfa..cea69f92e69213 100755
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
@@ -16,9 +16,9 @@
 import unittest
 
 import paddle
-import paddle.fluid as fluid
-import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.distributed.fleet as fleet
+import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
index 9c752cd3d0daca..f56335454640a3 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
@@ -19,9 +19,9 @@
 import unittest
 
 import paddle
-import paddle.fluid as fluid
-import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.distributed.fleet as fleet
+import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
index 5d6c3bee7f27da..107488e771f3c4 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
@@ -19,9 +19,9 @@
 import unittest
 
 import paddle
-import paddle.fluid as fluid
-import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.distributed.fleet as fleet
+import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
index 30f3f8134889c0..2afaa2bf472570 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
@@ -19,9 +19,9 @@
 import unittest
 
 import paddle
-import paddle.fluid as fluid
-import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.distributed.fleet as fleet
+import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
index 39634ebc4f31a6..9cc5b947f67268 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
@@ -18,9 +18,9 @@
 
 paddle.enable_static()
 
-import paddle.fluid as fluid
-import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.distributed.fleet as fleet
+import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 
 # For Net
 base_lr = 0.2
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
index b7ac497721c7e4..751f0c70b8da2b 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
@@ -18,9 +18,9 @@
 
 paddle.enable_static()
 
-import paddle.fluid as fluid
-import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.distributed.fleet as fleet
+import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 
 # For Net
 base_lr = 0.2
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
index 668d31ee854ba3..0b00a97a344921 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
@@ -18,9 +18,9 @@
 
 paddle.enable_static()
 
-import paddle.fluid as fluid
-import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.distributed.fleet as fleet
+import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 
 # For Net
 base_lr = 0.2
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
index b85540f2d92796..f90b08c3f3387f 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
@@ -18,9 +18,9 @@
 
 paddle.enable_static()
 
-import paddle.fluid as fluid
-import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.distributed.fleet as fleet
+import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 
 # For Net
 base_lr = 0.2
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps7.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps7.py
index 9351791260b2af..838a9a10719318 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps7.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps7.py
@@ -16,11 +16,12 @@
 
 os.environ["WITH_DISTRIBUTE"] = "ON"
 
+import unittest
+
+import paddle
 import paddle.distributed.fleet as fleet
 import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.fluid as fluid
-import unittest
-import paddle
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps8.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps8.py
index cbf12f19714ff5..e203fc34803c29 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps8.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps8.py
@@ -15,11 +15,12 @@
 import os
 
 os.environ["WITH_DISTRIBUTE"] = "ON"
+import unittest
+
+import paddle
 import paddle.distributed.fleet as fleet
 import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.fluid as fluid
-import unittest
-import paddle
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps9.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps9.py
index bebc6bbb96536a..67149d458fcb1a 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps9.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps9.py
@@ -15,11 +15,12 @@
 import os
 
 os.environ["WITH_DISTRIBUTE"] = "ON"
+import unittest
+
+import paddle
 import paddle.distributed.fleet as fleet
 import paddle.distributed.fleet.base.role_maker as role_maker
 import paddle.fluid as fluid
-import unittest
-import paddle
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_raw_program_optimizer.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_raw_program_optimizer.py
index ca3c5badc0922e..ef90a541cc9783 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_raw_program_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_raw_program_optimizer.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 from test_dist_base import TestDistBase
+
 import paddle
-import os
 
 paddle.enable_static()
 flag_name = os.path.splitext(__file__)[0]
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_raw_program_optimizer_fuse_allreduce.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_raw_program_optimizer_fuse_allreduce.py
index a6b994dde6d785..2a401beb4ac55e 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_raw_program_optimizer_fuse_allreduce.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_raw_program_optimizer_fuse_allreduce.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 from test_dist_base import TestDistBase
+
 import paddle
-import os
 
 paddle.enable_static()
 flag_name = os.path.splitext(__file__)[0]
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py
index 27c76bee6ed9c2..3d31b391de982a 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py
@@ -14,7 +14,9 @@
 
 import os
 import unittest
+
 from test_dist_fleet_base import TestFleetBase
+
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py
index 13eaa703ddc0c7..87d1b12a3a5b5b 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py
@@ -16,14 +16,15 @@
 import shutil
 import tempfile
 import unittest
+
 import paddle
 
 paddle.enable_static()
 
-import paddle.fluid as fluid
-
-from test_dist_fleet_base import TestFleetBase
 from dist_fleet_sparse_embedding_ctr import fake_ctr_reader
+from test_dist_fleet_base import TestFleetBase
+
+import paddle.fluid as fluid
 
 
 @unittest.skip(reason="Skip unstable ut, need paddle sync mode fix")
diff --git a/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py b/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py
index f64a7e6882e12f..023d5c3a5cf8b2 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py
@@ -13,13 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 
-import paddle
-
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py
index ba1037b05730c4..55379b2ab54d25 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 from test_dist_base import TestDistBase
+
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py
index a480b987317557..c24e550f49f1be 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 from test_dist_base import TestDistBase
-import os
+
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleet_save.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleet_save.py
index 9a1afd250d8ef8..ffe327c1c61a97 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleet_save.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleet_save.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import shutil
 import os
+import shutil
 import unittest
+
 from test_dist_base import TestDistBase
+
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py
index fbfdccfeff41cb..00774eff964fa4 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 from test_dist_base import TestDistBase
+
 import paddle
 
 paddle.enable_static()
@@ -45,8 +47,8 @@ def test_open_sync_batch_norm(self):
         import paddle.fluid as fluid
         import paddle.fluid.incubate.fleet.base.role_maker as role_maker
         from paddle.fluid.incubate.fleet.collective import (
-            fleet,
             DistributedStrategy,
+            fleet,
         )
 
         if not fluid.core.is_compiled_with_cuda():
diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_fp16_allreduce.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_fp16_allreduce.py
index ce74d92a7b81af..37d0130d2f5aef 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_mnist_fp16_allreduce.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_fp16_allreduce.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 from test_dist_base import TestDistBase
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py
index ca1b7299b8c63d..46da80eb49cbb1 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 from test_dist_base import TestDistBase
 
-import os
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_lars.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_lars.py
index 81b96e7c8991cd..f72d9108598fab 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_mnist_lars.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_lars.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 from test_dist_base import TestDistBase
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py
index 82f09bcfae1d01..46a047806d3762 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 from test_dist_base import TestDistBase
 
-import os
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py
index e517e38978e48f..f7ae4755d689b8 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 from test_dist_base import TestDistBase
+
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py
index 40ae6686ed98ce..de913897b70fa0 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 from test_dist_base import TestDistBase
+
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_train.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_train.py
index f6dfa739846605..357aada7def535 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_mnist_train.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_train.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
-from test_dist_base import TestDistBase
 
-import os
+from test_dist_base import TestDistBase
 
 flag_name = os.path.splitext(__file__)[0]
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_with_program.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_with_program.py
index 83c9a340e32cb0..754efe54730bbc 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_mnist_with_program.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_with_program.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 from test_dist_base import TestDistBase
+
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_op.py b/python/paddle/fluid/tests/unittests/test_dist_op.py
index e627567b1afd9a..f99a416a6630e1 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_dist_save_load.py b/python/paddle/fluid/tests/unittests/test_dist_save_load.py
index d4de87866227fc..9eeaf376a5e0cf 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_save_load.py
@@ -14,15 +14,12 @@
 
 import os
 import shutil
-import unittest
 import tempfile
+import unittest
 
 import numpy as np
-
 from test_dist_base import TestDistBase
 
-import os
-
 flag_name = os.path.splitext(__file__)[0]
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py
index 35ffbbb80ca043..e68cbf34b4fe9f 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 from test_dist_base import TestDistBase
-import os
 
-import os
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_sync.py b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_sync.py
index cc5905506e6d07..6a00d41a6c2cf5 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_sync.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_sync.py
@@ -12,11 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-from test_dist_base import TestDistBase
 import os
+import unittest
 
-import os
+from test_dist_base import TestDistBase
 
 flag_name = os.path.splitext(__file__)[0]
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_sharding_save.py b/python/paddle/fluid/tests/unittests/test_dist_sharding_save.py
index 29442a9fb68389..0d06777c8b6ede 100755
--- a/python/paddle/fluid/tests/unittests/test_dist_sharding_save.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_sharding_save.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import shutil
 import os
+import shutil
 import unittest
+
 from test_dist_base import TestDistBase
+
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py
index f79afcca3de888..75f076ae7ce7af 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 
 import os
+import shutil
+import tempfile
 import unittest
+
 import numpy as np
-import tempfile
-import shutil
+
 import paddle
-import paddle.fluid as fluid
 import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 from paddle.distributed.fleet import fleet
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps1.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps1.py
index 1152ff3e82b207..2b9f9ee54434ad 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps1.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps1.py
@@ -13,14 +13,16 @@
 # limitations under the License.
 
 import os
+import shutil
 import unittest
+
 import numpy as np
-import shutil
+from test_dist_sparse_load_ps0 import SparseLoadOp
+
 import paddle
-import paddle.fluid as fluid
 import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 from paddle.distributed.fleet import fleet
-from test_dist_sparse_load_ps0 import SparseLoadOp
 
 
 @unittest.skip(reason="Skip unstable ut, need rewrite with new implement")
diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_adagrad.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_adagrad.py
index 5fca7c3afa7b70..e1fae30ddab28e 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_adagrad.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_adagrad.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
+from test_dist_sparse_tensor_load_sgd import TestSparseLoadProgram
+
 import paddle
 import paddle.fluid as fluid
 from paddle.distributed.fleet import fleet
-from test_dist_sparse_tensor_load_sgd import TestSparseLoadProgram
 
 
 class TestSparseLoadProgramAdagrad(TestSparseLoadProgram):
diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_adam.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_adam.py
index 8305ade7b3e981..bebe9dab7604cd 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_adam.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_adam.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
+from test_dist_sparse_tensor_load_sgd import TestSparseLoadProgram
+
 import paddle
 import paddle.fluid as fluid
 from paddle.distributed.fleet import fleet
-from test_dist_sparse_tensor_load_sgd import TestSparseLoadProgram
 
 
 class TestSparseLoadProgramAdam(TestSparseLoadProgram):
diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_ftrl.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_ftrl.py
index 8193e20037a387..c267e84d279731 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_ftrl.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_ftrl.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
+from test_dist_sparse_tensor_load_sgd import TestSparseLoadProgram
+
 import paddle
 import paddle.fluid as fluid
 from paddle.distributed.fleet import fleet
-from test_dist_sparse_tensor_load_sgd import TestSparseLoadProgram
 
 
 class TestSparseLoadProgramFtrl(TestSparseLoadProgram):
diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_momentum.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_momentum.py
index 83fc785bddd9f8..02108196f08f19 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_momentum.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_momentum.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
+from test_dist_sparse_tensor_load_sgd import TestSparseLoadProgram
+
 import paddle
 import paddle.fluid as fluid
 from paddle.distributed.fleet import fleet
-from test_dist_sparse_tensor_load_sgd import TestSparseLoadProgram
 
 
 class TestSparseLoadProgramMomentum(TestSparseLoadProgram):
diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_rmsprop.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_rmsprop.py
index 049a9043cf1a23..f0005e894ee6d0 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_rmsprop.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_rmsprop.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
+from test_dist_sparse_tensor_load_sgd import TestSparseLoadProgram
+
 import paddle
 import paddle.fluid as fluid
 from paddle.distributed.fleet import fleet
-from test_dist_sparse_tensor_load_sgd import TestSparseLoadProgram
 
 
 class TestSparseLoadProgramRmsprop(TestSparseLoadProgram):
diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_sgd.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_sgd.py
index 42737d3d69b4f1..0c2073e3b72b07 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_sgd.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_sgd.py
@@ -14,9 +14,10 @@
 
 import os
 import unittest
+
 import paddle
-import paddle.fluid as fluid
 import paddle.distributed.fleet.base.role_maker as role_maker
+import paddle.fluid as fluid
 from paddle.distributed.fleet import fleet
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_text_classification.py b/python/paddle/fluid/tests/unittests/test_dist_text_classification.py
index 36215f4e26c487..f777775724aee0 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_text_classification.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_text_classification.py
@@ -14,9 +14,8 @@
 
 import os
 import unittest
-from test_dist_base import TestDistBase
 
-import os
+from test_dist_base import TestDistBase
 
 flag_name = os.path.splitext(__file__)[0]
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_train.py b/python/paddle/fluid/tests/unittests/test_dist_train.py
index 3067321289b9cf..5fa4f87f95a324 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_train.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_train.py
@@ -13,23 +13,20 @@
 # limitations under the License.
 
 import os
+import signal
 import time
 import unittest
 from multiprocessing import Process
-import signal
 
 import numpy as np
+from dist_test_utils import remove_ps_flag
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
-from paddle.fluid.layers.io import ListenAndServ
-from paddle.fluid.layers.io import Recv
-from paddle.fluid.layers.io import Send
 import paddle.fluid.layers.ops as ops
-from dist_test_utils import remove_ps_flag
-
 from paddle.fluid import core
-import paddle
+from paddle.fluid.layers.io import ListenAndServ, Recv, Send
 
 RPC_OP_ROLE_ATTR_NAME = (
     op_role_attr_name
diff --git a/python/paddle/fluid/tests/unittests/test_dist_transformer.py b/python/paddle/fluid/tests/unittests/test_dist_transformer.py
index 470468f7fe3a15..6ccebe309df209 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_transformer.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_transformer.py
@@ -14,9 +14,11 @@
 
 import os
 import unittest
-import paddle
+
 from test_dist_base import TestDistBase
 
+import paddle
+
 
 def download_files():
     url_prefix = 'http://paddle-unittest-data.bj.bcebos.com/dist_transformer/'
diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
index 2c53d27efa0171..cdeb7619e7b9d8 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
@@ -12,13 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import math
-
 import functools
+import gc
+import math
 import unittest
-import numpy as np
 
-import gc
+import numpy as np
 
 gc.set_debug(gc.DEBUG_COLLECTABLE)
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_tree_index.py b/python/paddle/fluid/tests/unittests/test_dist_tree_index.py
index d3d3d249c41254..f08080018ea845 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_tree_index.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_tree_index.py
@@ -15,10 +15,11 @@
 import os
 import tempfile
 import unittest
+
+import paddle
+import paddle.fluid as fluid
 from paddle.dataset.common import download
 from paddle.distributed.fleet.dataset import TreeIndex
-import paddle.fluid as fluid
-import paddle
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_word2vec.py b/python/paddle/fluid/tests/unittests/test_dist_word2vec.py
index be100d7699f0ff..bfdfb8c071b02c 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_word2vec.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_word2vec.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
-from test_dist_base import TestDistBase
 
-import os
+from test_dist_base import TestDistBase
 
 flag_name = os.path.splitext(__file__)[0]
 
diff --git a/python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py b/python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py
index 354a630bc3847a..afb053c3be5381 100644
--- a/python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py
+++ b/python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 import unittest
-import numpy as np
-import paddle
 
+import numpy as np
 from op_test import OpTest
 
+import paddle
+
 
 def distribute_fpn_proposals_wrapper(
     fpn_rois,
diff --git a/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_clip.py b/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_clip.py
index 7638f7cf2436fc..431f9632441571 100644
--- a/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_clip.py
+++ b/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_clip.py
@@ -14,11 +14,12 @@
 
 import os
 import shlex
-import sys
 import shutil
+import sys
+import tempfile
 import unittest
+
 import paddle
-import tempfile
 
 
 def get_test_file():
diff --git a/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_gradient_merge.py b/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_gradient_merge.py
index 37b315e61a808a..0c7096f5dae1a0 100644
--- a/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_gradient_merge.py
+++ b/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_gradient_merge.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from test_distributed_fused_lamb_op_with_clip import run_test
 import unittest
 
+from test_distributed_fused_lamb_op_with_clip import run_test
+
 
 class TestDistributedFusedLambGradientMerge(unittest.TestCase):
     def test_gm(self):
diff --git a/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_without_clip.py b/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_without_clip.py
index 6e9d8ddea17287..69589835579828 100644
--- a/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_without_clip.py
+++ b/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_without_clip.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from test_distributed_fused_lamb_op_with_clip import run_test
 import unittest
 
+from test_distributed_fused_lamb_op_with_clip import run_test
+
 
 class TestDistributedFusedLambWithoutClip(unittest.TestCase):
     def test_1(self):
diff --git a/python/paddle/fluid/tests/unittests/test_distributions.py b/python/paddle/fluid/tests/unittests/test_distributions.py
index a032b74f796bd1..df8fe980e88433 100644
--- a/python/paddle/fluid/tests/unittests/test_distributions.py
+++ b/python/paddle/fluid/tests/unittests/test_distributions.py
@@ -12,8 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
+import math
 import unittest
+
+import numpy as np
+
 from paddle import fluid
 from paddle.fluid import layers
 from paddle.fluid.layers.distributions import (
@@ -22,7 +25,6 @@
     Normal,
     Uniform,
 )
-import math
 
 
 class DistributionNumpy:
diff --git a/python/paddle/fluid/tests/unittests/test_dot_op.py b/python/paddle/fluid/tests/unittests/test_dot_op.py
index 006c91d60e64b1..55460c2f14b9d6 100644
--- a/python/paddle/fluid/tests/unittests/test_dot_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dot_op.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.core as core
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_downpoursgd.py b/python/paddle/fluid/tests/unittests/test_downpoursgd.py
index 6626f0ebfa0478..556bdda232a2bf 100644
--- a/python/paddle/fluid/tests/unittests/test_downpoursgd.py
+++ b/python/paddle/fluid/tests/unittests/test_downpoursgd.py
@@ -13,17 +13,19 @@
 # limitations under the License.
 """Test cases for Downpour."""
 
-import paddle
-import paddle.fluid as fluid
 import os
-import unittest
 import sys
+import unittest
+
+from google.protobuf import text_format
+
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.incubate.fleet.parameter_server.pslib.ps_pb2 as pslib
 from paddle.fluid.incubate.fleet.parameter_server.pslib.node import (
-    DownpourWorker,
     DownpourServer,
+    DownpourWorker,
 )
-from google.protobuf import text_format
-import paddle.fluid.incubate.fleet.parameter_server.pslib.ps_pb2 as pslib
 from paddle.fluid.trainer_factory import TrainerFactory
 
 cache_path = os.path.expanduser('~/.cache/paddle/dataset')
diff --git a/python/paddle/fluid/tests/unittests/test_dpsgd_op.py b/python/paddle/fluid/tests/unittests/test_dpsgd_op.py
index 2e505c05a891f4..6a18b7a8913b01 100644
--- a/python/paddle/fluid/tests/unittests/test_dpsgd_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dpsgd_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_dropout_nd_op.py b/python/paddle/fluid/tests/unittests/test_dropout_nd_op.py
index e19a0104de38d9..c4e01fb6e4c048 100644
--- a/python/paddle/fluid/tests/unittests/test_dropout_nd_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dropout_nd_op.py
@@ -13,16 +13,18 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.framework import _non_static_mode
 from paddle import _legacy_C_ops
-from paddle.static import default_main_program
 from paddle.fluid.data_feeder import check_variable_and_dtype
+from paddle.fluid.framework import _non_static_mode
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.static import default_main_program
 
 
 def dropout_nd(
diff --git a/python/paddle/fluid/tests/unittests/test_dropout_op.py b/python/paddle/fluid/tests/unittests/test_dropout_op.py
index 862ca0c1f9135a..6c886acea0d8b2 100644
--- a/python/paddle/fluid/tests/unittests/test_dropout_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dropout_op.py
@@ -13,16 +13,17 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid.core as core
-from op_test import OpTest, skip_check_grad_ci, convert_float_to_uint16
+from op_test import OpTest, convert_float_to_uint16, skip_check_grad_ci
+
 import paddle
-import paddle.static as static
 import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
-from paddle.fluid.framework import _test_eager_guard, _enable_legacy_dygraph
-
+import paddle.fluid.core as core
+import paddle.static as static
 from paddle import _C_ops
+from paddle.fluid import Program, program_guard
+from paddle.fluid.framework import _enable_legacy_dygraph, _test_eager_guard
 
 
 class TestDropoutOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py b/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py
index f96595588c3dbc..204361fc92d8b8 100644
--- a/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py
+++ b/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.nn import Pool2D, Linear
+from paddle.fluid.dygraph.nn import Linear, Pool2D
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_mode_of_unittest.py b/python/paddle/fluid/tests/unittests/test_dygraph_mode_of_unittest.py
index 8da813c00ef447..82eb7256b7cef2 100644
--- a/python/paddle/fluid/tests/unittests/test_dygraph_mode_of_unittest.py
+++ b/python/paddle/fluid/tests/unittests/test_dygraph_mode_of_unittest.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
index 08bb8fceb4fa40..3c099642553035 100644
--- a/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
+++ b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
-from paddle.fluid.optimizer import SGDOptimizer
-from paddle.fluid.dygraph.nn import Pool2D, Linear
 from paddle.fluid.dygraph.base import to_variable
-from test_imperative_base import new_program_scope
+from paddle.fluid.dygraph.nn import Linear, Pool2D
+from paddle.fluid.optimizer import SGDOptimizer
 
 SEED = 123123111
 
diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_spectral_norm.py b/python/paddle/fluid/tests/unittests/test_dygraph_spectral_norm.py
index b8968d3fdd01c0..86acc30b47715a 100644
--- a/python/paddle/fluid/tests/unittests/test_dygraph_spectral_norm.py
+++ b/python/paddle/fluid/tests/unittests/test_dygraph_spectral_norm.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import collections
 import unittest
+
 import numpy as np
-import collections
+
 import paddle
 import paddle.nn as nn
 from paddle.nn.utils import spectral_norm
diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_weight_norm.py b/python/paddle/fluid/tests/unittests/test_dygraph_weight_norm.py
index 1a3e4a2e7b5103..f277a7fb7c5c3b 100644
--- a/python/paddle/fluid/tests/unittests/test_dygraph_weight_norm.py
+++ b/python/paddle/fluid/tests/unittests/test_dygraph_weight_norm.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import numpy as np
 import collections
+import unittest
 from functools import reduce
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-from paddle.nn.utils import weight_norm, remove_weight_norm
+from paddle.nn.utils import remove_weight_norm, weight_norm
 
 
 class TestDygraphWeightNorm(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py b/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
index fff63bcb0057ee..bea1473ac78fe4 100644
--- a/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
-import unittest
-import paddle
 
 
 def build_and_run_program(place, batch_size, beam_size, stop_gradient=False):
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_conditional_block.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_conditional_block.py
index aa28b5886c094e..4f89a3691d6099 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_conditional_block.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_conditional_block.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
 
+import paddle.fluid as fluid
+
 fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
index 684322c529265e..d3ea6fd4f4f5f6 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
@@ -13,17 +13,18 @@
 # limitations under the License.
 
 import os
+
 import numpy as np
 
 os.environ['FLAGS_use_mkldnn'] = '0'
 os.environ['CPU_NUM'] = '4'
 
-import paddle.fluid as fluid
-import unittest
 import multiprocessing
+import unittest
 from functools import reduce
 
 import paddle
+import paddle.fluid as fluid
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
index 86952d74e397b4..f9294f152dcded 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
@@ -18,11 +18,12 @@
 
 import unittest
 
+from fake_reader import fake_imdb_reader
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import compiler
-from fake_reader import fake_imdb_reader
 
 
 def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2):
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py
index c28ece138fb41c..b5dff40cb3d268 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_gru_net.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 from test_eager_deletion_dynamic_rnn_base import TestBase
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py
index 998872f0acf21a..3cd7c681adc72c 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_lstm_net.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 from test_eager_deletion_dynamic_rnn_base import TestBase
+
 import paddle
 import paddle.fluid as fluid
-import unittest
 
 fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
 
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_mnist.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_mnist.py
index 89d1d769be3324..f9344e9f823cd1 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_mnist.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle.fluid as fluid
 
 fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
index c0a6ad983b1c73..66bf53c8a902d5 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
-import os
-
 from paddle.fluid import ParamAttr
 from paddle.fluid.contrib.layers import basic_lstm
 from paddle.fluid.executor import Executor
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py
index d9fef7d73642fe..e044071e43d88e 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py
@@ -13,17 +13,18 @@
 # limitations under the License.
 
 import os
+import unittest
+
 import numpy as np
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.fluid.layers as layers
-import unittest
-
 from paddle.fluid import ParamAttr
-from paddle.fluid.framework import Program, grad_var_name
-from paddle.fluid.executor import Executor
 from paddle.fluid.backward import append_backward
-import paddle
+from paddle.fluid.executor import Executor
+from paddle.fluid.framework import Program, grad_var_name
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
index 8c359a39195e37..d61e6a6f078b38 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
@@ -16,16 +16,17 @@
 
 os.environ['CPU_NUM'] = '2'
 
+import multiprocessing
 import unittest
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-from paddle.fluid.executor import Executor
-import paddle.fluid.core as core
-import paddle.fluid.compiler as compiler
+
 import numpy
-import multiprocessing
 
 import paddle
+import paddle.fluid as fluid
+import paddle.fluid.compiler as compiler
+import paddle.fluid.core as core
+import paddle.fluid.layers as layers
+from paddle.fluid.executor import Executor
 
 paddle.enable_static()
 fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
diff --git a/python/paddle/fluid/tests/unittests/test_eager_run_program.py b/python/paddle/fluid/tests/unittests/test_eager_run_program.py
index 847bdc3d1a8fc3..c52599b87d6ae7 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_run_program.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_run_program.py
@@ -12,23 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
+import unittest
+
 import numpy as np
+
+import paddle
 from paddle import _legacy_C_ops
-from paddle.fluid.framework import (
-    _test_eager_guard,
-    Variable,
-    _in_legacy_dygraph,
-)
 from paddle.fluid import core
-from paddle.fluid.layers.utils import _hash_with_id
 from paddle.fluid.dygraph.base import switch_to_static_graph
 from paddle.fluid.executor import (
-    _is_enable_standalone_executor,
     _is_dy2st_enable_standalone_executor,
+    _is_enable_standalone_executor,
 )
-
-import unittest
+from paddle.fluid.framework import (
+    Variable,
+    _in_legacy_dygraph,
+    _test_eager_guard,
+)
+from paddle.fluid.layers.utils import _hash_with_id
 
 
 def _append_backward_desc(main_program, outs):
diff --git a/python/paddle/fluid/tests/unittests/test_eager_trace_op.py b/python/paddle/fluid/tests/unittests/test_eager_trace_op.py
index 6f93c4bd19dc16..b463cbb051a226 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_trace_op.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_trace_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_edit_distance_op.py b/python/paddle/fluid/tests/unittests/test_edit_distance_op.py
index c737ed2e41ca35..38c304f579f5a0 100644
--- a/python/paddle/fluid/tests/unittests/test_edit_distance_op.py
+++ b/python/paddle/fluid/tests/unittests/test_edit_distance_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_egr_code_generate_api.py b/python/paddle/fluid/tests/unittests/test_egr_code_generate_api.py
index aea56537654da5..20ce26acb9b732 100644
--- a/python/paddle/fluid/tests/unittests/test_egr_code_generate_api.py
+++ b/python/paddle/fluid/tests/unittests/test_egr_code_generate_api.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
+import unittest
+
 import numpy as np
+
+import paddle
 from paddle.fluid.framework import _test_eager_guard
-import unittest
 
 
 class EagerOpAPIGenerateTestCase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py
index 4471d78936ab82..25cdfe82af39b6 100644
--- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py
+++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py
@@ -12,9 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid.core as core
-import paddle
+import copy
+import unittest
+
 import numpy as np
+
+import paddle
+import paddle.fluid.core as core
 from paddle.fluid.framework import (
     EagerParamBase,
     _current_expected_place,
@@ -22,8 +26,6 @@
     _test_eager_guard,
     in_dygraph_mode,
 )
-import unittest
-import copy
 
 
 class EagerScaleTestCase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_egr_string_tensor_api.py b/python/paddle/fluid/tests/unittests/test_egr_string_tensor_api.py
index b25e40149bac70..82ab0ee8ffbe9b 100644
--- a/python/paddle/fluid/tests/unittests/test_egr_string_tensor_api.py
+++ b/python/paddle/fluid/tests/unittests/test_egr_string_tensor_api.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid.core as core
+import unittest
+
 import numpy as np
+
+import paddle.fluid.core as core
 from paddle.fluid.framework import _test_eager_guard
-import unittest
 
 
 class EagerStringTensorTestCase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_eig_op.py b/python/paddle/fluid/tests/unittests/test_eig_op.py
index 338eb3512e57a3..1696941b1ec548 100644
--- a/python/paddle/fluid/tests/unittests/test_eig_op.py
+++ b/python/paddle/fluid/tests/unittests/test_eig_op.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+from op_test import OpTest, skip_check_grad_ci
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from op_test import OpTest, skip_check_grad_ci
-import unittest
 
 
 # cast output to complex for numpy.linalg.eig
diff --git a/python/paddle/fluid/tests/unittests/test_eigh_op.py b/python/paddle/fluid/tests/unittests/test_eigh_op.py
index 928234a138f13b..33278050ef882c 100644
--- a/python/paddle/fluid/tests/unittests/test_eigh_op.py
+++ b/python/paddle/fluid/tests/unittests/test_eigh_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle
 from op_test import OpTest
 
+import paddle
+
 
 def valid_eigh_result(A, eigh_value, eigh_vector, uplo):
     assert A.ndim == 2 or A.ndim == 3
diff --git a/python/paddle/fluid/tests/unittests/test_eigvals_op.py b/python/paddle/fluid/tests/unittests/test_eigvals_op.py
index df8d6b001cffbf..ac989f24755df1 100644
--- a/python/paddle/fluid/tests/unittests/test_eigvals_op.py
+++ b/python/paddle/fluid/tests/unittests/test_eigvals_op.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
-import paddle.fluid.core as core
+
 import numpy as np
 from op_test import OpTest
 
+import paddle
+import paddle.fluid.core as core
+
 np.set_printoptions(threshold=np.inf)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_eigvalsh_op.py b/python/paddle/fluid/tests/unittests/test_eigvalsh_op.py
index 2dc378a0810dc5..d00431531cf6b2 100644
--- a/python/paddle/fluid/tests/unittests/test_eigvalsh_op.py
+++ b/python/paddle/fluid/tests/unittests/test_eigvalsh_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle
 from op_test import OpTest
 
+import paddle
+
 
 def compare_result(actual, expected):
     assert actual.ndim == 1 or actual.ndim == 2
diff --git a/python/paddle/fluid/tests/unittests/test_einsum.py b/python/paddle/fluid/tests/unittests/test_einsum.py
index 2e8a0a6c7cb5e5..a689c5f1e42fe7 100644
--- a/python/paddle/fluid/tests/unittests/test_einsum.py
+++ b/python/paddle/fluid/tests/unittests/test_einsum.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
+import os
 import unittest
+
+import numpy as np
+
 import paddle
 from paddle.fluid import core
 
-import os
-
 os.environ['FLAGS_new_einsum'] = "0"
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_einsum_op.py b/python/paddle/fluid/tests/unittests/test_einsum_op.py
index bb48cd31dd481f..adb531b77868f6 100644
--- a/python/paddle/fluid/tests/unittests/test_einsum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_einsum_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle
 from op_test import OpTest
 
+import paddle
+
 
 class TestEinsumBinary(OpTest):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_einsum_v2.py b/python/paddle/fluid/tests/unittests/test_einsum_v2.py
index f45f9ace1cc649..1d76b4dea802d8 100644
--- a/python/paddle/fluid/tests/unittests/test_einsum_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_einsum_v2.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
+import os
 import unittest
+
+import numpy as np
+
 import paddle
 from paddle.fluid import core
 
-import os
-
 os.environ['FLAGS_new_einsum'] = "1"
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py
index 79c5bdda4337b5..3ddf25bcb09d78 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py
@@ -13,17 +13,19 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
+import paddle.fluid as fluid
 import paddle.fluid.core as core
+from paddle.fluid import Program, program_guard
+from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.tests.unittests.op_test import (
     OpTest,
-    skip_check_grad_ci,
     convert_float_to_uint16,
+    skip_check_grad_ci,
 )
-import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
-from paddle.fluid.framework import _test_eager_guard
 
 
 class TestElementwiseAddOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
index 9f37a456b7441e..3c01e3fd7b39a2 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-from op_test import OpTest, skip_check_grad_ci, convert_float_to_uint16
+from op_test import OpTest, convert_float_to_uint16, skip_check_grad_ci
+
 import paddle
 from paddle import fluid
 from paddle.fluid import core
diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_floordiv_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_floordiv_op.py
index 022d5929f1bab4..0f2c68487be295 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_floordiv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_floordiv_op.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import random
 import unittest
+
 import numpy as np
-import paddle
-import paddle.fluid as fluid
 from op_test import OpTest
 
-import random
+import paddle
+import paddle.fluid as fluid
 
 
 class TestElementwiseModOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_gradient_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_gradient_op.py
index 91401ff574f6a2..fff002d40d2ca8 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_gradient_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_gradient_op.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 
 class TestElementWiseAddOp(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_heaviside_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_heaviside_op.py
index aebf1c3f4fe82e..d917a6bc33a661 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_heaviside_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_heaviside_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_max_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_max_op.py
index 671b5a942b8a84..f6d17740687382 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_max_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_max_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-from op_test import OpTest, skip_check_grad_ci, convert_float_to_uint16
-import paddle.fluid.core as core
+from op_test import OpTest, convert_float_to_uint16, skip_check_grad_ci
+
 import paddle
+import paddle.fluid.core as core
 
 
 class TestElementwiseOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_min_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_min_op.py
index 8df9f9842b8854..c9835b5cb1566f 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_min_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_min_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest, skip_check_grad_ci
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py
index 9c9d2d91209f35..d953fbbad85d97 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import random
 import unittest
+
 import numpy as np
-import paddle
-import paddle.fluid as fluid
 from op_test import OpTest
 
-import random
+import paddle
+import paddle.fluid as fluid
 
 
 class TestElementwiseModOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
index c72728cfe951b3..0d3b829e9159d0 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
@@ -15,16 +15,16 @@
 import unittest
 
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
 from paddle.fluid.framework import _test_eager_guard
-
 from paddle.fluid.tests.unittests.op_test import (
     OpTest,
-    skip_check_grad_ci,
     convert_float_to_uint16,
+    skip_check_grad_ci,
 )
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py b/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
index 3f816b61650134..115d5e947f6a73 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
@@ -13,15 +13,15 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 import paddle.fluid.core as core
-import gradient_checker
-
-from decorator_helper import prog_scope
+import paddle.fluid.layers as layers
 
 
 class TestElementwiseMulDoubleGradCheck(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py
index 1d53dbdb2fa6a5..6ff9c7961cd78b 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_pow_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest, skip_check_grad_ci
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
 
 
 def pow_grad(x, y, dout):
diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_sub_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_sub_op.py
index d2ad1d90f0846c..99880d1e2aad7d 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_sub_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_sub_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest, convert_float_to_uint16, skip_check_grad_ci
+
 import paddle
 import paddle.fluid as fluid
-from op_test import OpTest, skip_check_grad_ci, convert_float_to_uint16
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_ema.py b/python/paddle/fluid/tests/unittests/test_ema.py
index fe4178c8259805..811e49cc902797 100644
--- a/python/paddle/fluid/tests/unittests/test_ema.py
+++ b/python/paddle/fluid/tests/unittests/test_ema.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_ema_fleet.py b/python/paddle/fluid/tests/unittests/test_ema_fleet.py
index 8a3a6993d050d6..7bc95afeae3fbd 100644
--- a/python/paddle/fluid/tests/unittests/test_ema_fleet.py
+++ b/python/paddle/fluid/tests/unittests/test_ema_fleet.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
-import paddle.utils as utils
 import paddle.static as static
+import paddle.utils as utils
 
 
 def gen_data():
diff --git a/python/paddle/fluid/tests/unittests/test_embedding_id_stop_gradient.py b/python/paddle/fluid/tests/unittests/test_embedding_id_stop_gradient.py
index be5fdcba695575..a61b396273fba4 100644
--- a/python/paddle/fluid/tests/unittests/test_embedding_id_stop_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_embedding_id_stop_gradient.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import unittest
 
 
 class TestEmbeddingIdStopGradientBase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_empty_like_op.py b/python/paddle/fluid/tests/unittests/test_empty_like_op.py
index 82ad72e11e5f2b..96665f11130f41 100644
--- a/python/paddle/fluid/tests/unittests/test_empty_like_op.py
+++ b/python/paddle/fluid/tests/unittests/test_empty_like_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
-from paddle.fluid.data_feeder import convert_dtype
 import paddle.fluid.core as core
-from paddle.static import program_guard, Program
+from paddle.fluid.data_feeder import convert_dtype
+from paddle.static import Program, program_guard
 
 
 class TestEmptyLikeAPICommon(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_empty_op.py b/python/paddle/fluid/tests/unittests/test_empty_op.py
index 7b488aa0c6dda1..718835f1a8494a 100644
--- a/python/paddle/fluid/tests/unittests/test_empty_op.py
+++ b/python/paddle/fluid/tests/unittests/test_empty_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
-from op_test import OpTest
 from paddle.fluid.framework import convert_np_dtype_to_dtype_
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_entry_attr.py b/python/paddle/fluid/tests/unittests/test_entry_attr.py
index cfbff2113c3046..1ae98ab7cd6f25 100644
--- a/python/paddle/fluid/tests/unittests/test_entry_attr.py
+++ b/python/paddle/fluid/tests/unittests/test_entry_attr.py
@@ -17,10 +17,11 @@
 paddle.enable_static()
 
 import unittest
+
 import paddle.fluid as fluid
 from paddle.distributed import (
-    ProbabilityEntry,
     CountFilterEntry,
+    ProbabilityEntry,
     ShowClickEntry,
 )
 
diff --git a/python/paddle/fluid/tests/unittests/test_entry_attr2.py b/python/paddle/fluid/tests/unittests/test_entry_attr2.py
index f4f098798ac01d..d1f546249b0f32 100644
--- a/python/paddle/fluid/tests/unittests/test_entry_attr2.py
+++ b/python/paddle/fluid/tests/unittests/test_entry_attr2.py
@@ -17,6 +17,7 @@
 paddle.enable_static()
 
 import unittest
+
 import paddle.fluid as fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_erf_op.py b/python/paddle/fluid/tests/unittests/test_erf_op.py
index f21ae86d1c2eac..06b7f55069fb94 100644
--- a/python/paddle/fluid/tests/unittests/test_erf_op.py
+++ b/python/paddle/fluid/tests/unittests/test_erf_op.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-from scipy.special import erf
 from op_test import OpTest
+from scipy.special import erf
 
 import paddle
 import paddle.fluid as fluid
diff --git a/python/paddle/fluid/tests/unittests/test_erfinv_op.py b/python/paddle/fluid/tests/unittests/test_erfinv_op.py
index e605d8e0e62a4d..a4265b17895a60 100644
--- a/python/paddle/fluid/tests/unittests/test_erfinv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_erfinv_op.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-from scipy.special import erfinv
 from op_test import OpTest
+from scipy.special import erfinv
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_exception.py b/python/paddle/fluid/tests/unittests/test_exception.py
index bb53ae950287eb..a42c2f5bad0f0a 100644
--- a/python/paddle/fluid/tests/unittests/test_exception.py
+++ b/python/paddle/fluid/tests/unittests/test_exception.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy
 import unittest
 
+import numpy
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_executor_and_mul.py b/python/paddle/fluid/tests/unittests/test_executor_and_mul.py
index f85f7b97ec8578..639f84295b24b7 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_and_mul.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_and_mul.py
@@ -15,8 +15,9 @@
 import unittest
 
 import numpy as np
+
 from paddle.fluid.executor import Executor
-from paddle.fluid.layers import mul, data, zeros, array_write, increment
+from paddle.fluid.layers import array_write, data, increment, mul, zeros
 
 
 class TestExecutor(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py b/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py
index 2412794929c055..a6b097e1f7ac71 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py
@@ -15,9 +15,10 @@
 import unittest
 
 import numpy as np
-import paddle.fluid.core as core
+from test_eager_deletion_padding_rnn import PaddingRNNTestBase, RNNConfig
+
 import paddle.fluid as fluid
-from test_eager_deletion_padding_rnn import RNNConfig, PaddingRNNTestBase
+import paddle.fluid.core as core
 
 
 class TestExecutor(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_executor_check_fetch_list.py b/python/paddle/fluid/tests/unittests/test_executor_check_fetch_list.py
index d6711a612d6930..4756e5d861df17 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_check_fetch_list.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_check_fetch_list.py
@@ -15,8 +15,8 @@
 import unittest
 
 import numpy as np
+
 import paddle
-import unittest
 
 
 class TestCheckFetchList(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py b/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py
index 41b4211a8bcc69..e2c52d99fd2d80 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py
@@ -15,6 +15,7 @@
 import unittest
 
 import numpy
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py b/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py
index 7a3063025d5dc2..c9ddac80f0eb6a 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
 from op_test import OpTest, skip_check_grad_ci
 
+import paddle.fluid as fluid
+
 
 @skip_check_grad_ci(reason="Not op test but call the method of class OpTest.")
 class TestExecutorReturnTensorNotOverwritingWithOptest(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_expand_as_op.py b/python/paddle/fluid/tests/unittests/test_expand_as_op.py
index 868f0d269ee441..b5b8013a2c9c63 100755
--- a/python/paddle/fluid/tests/unittests/test_expand_as_op.py
+++ b/python/paddle/fluid/tests/unittests/test_expand_as_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_expand_as_v2_op.py b/python/paddle/fluid/tests/unittests/test_expand_as_v2_op.py
index 5f4c04470ff937..fccfab3e4c3fd3 100755
--- a/python/paddle/fluid/tests/unittests/test_expand_as_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_expand_as_v2_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_expand_op.py b/python/paddle/fluid/tests/unittests/test_expand_op.py
index 9fb4c7c804e017..71ba5483c0e4df 100644
--- a/python/paddle/fluid/tests/unittests/test_expand_op.py
+++ b/python/paddle/fluid/tests/unittests/test_expand_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle.fluid as fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py
index da27fb397cc595..289f27d9c474d7 100644
--- a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py
@@ -13,15 +13,17 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
 from op_test import OpTest
+
+import paddle
 import paddle.fluid as fluid
+import paddle.fluid.layers as layers
 from paddle.fluid import Program, core, program_guard
-import paddle
 from paddle.fluid.framework import _test_eager_guard
-import gradient_checker
-from decorator_helper import prog_scope
-import paddle.fluid.layers as layers
 
 
 # Situation 1: shape is a list(without tensor)
diff --git a/python/paddle/fluid/tests/unittests/test_exponential_op.py b/python/paddle/fluid/tests/unittests/test_exponential_op.py
index 458be9f3635b2f..cc0144a56a1a72 100644
--- a/python/paddle/fluid/tests/unittests/test_exponential_op.py
+++ b/python/paddle/fluid/tests/unittests/test_exponential_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
 from op_test import OpTest
 
+import paddle
+
 paddle.seed(100)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_eye_op.py b/python/paddle/fluid/tests/unittests/test_eye_op.py
index fb93aee9b30cd9..172fef4d89780a 100644
--- a/python/paddle/fluid/tests/unittests/test_eye_op.py
+++ b/python/paddle/fluid/tests/unittests/test_eye_op.py
@@ -14,15 +14,15 @@
 
 import os
 import unittest
+
 import numpy as np
 from op_test import OpTest
+from test_attribute_var import UnittestBase
 
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.framework as framework
-
-from paddle.fluid.framework import program_guard, Program
-from test_attribute_var import UnittestBase
+from paddle.fluid.framework import Program, program_guard
 
 
 class TestEyeOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py b/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py
index b56ff6edbfe975..47ab4754f2a3f0 100644
--- a/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
 import unittest
+
 import numpy as np
-import math
 from op_test import OpTest
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py
index 5bcb0eec733f5f..95df904b235f10 100644
--- a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import itertools
-import numpy as np
 import math
+import unittest
+
+import numpy as np
 from op_test import OpTest
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py b/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py
index c231bffc9d300f..6f3c2328008046 100755
--- a/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py
+++ b/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py
@@ -13,17 +13,17 @@
 # limitations under the License.
 
 import os
+import sys
+import tempfile
 import unittest
 
 import numpy as np
+
 import paddle
 import paddle.nn as nn
-from paddle.fluid.framework import core, _non_static_mode, _test_eager_guard
-from paddle.fluid.layer_helper import LayerHelper
 from paddle import _legacy_C_ops
-
-import sys
-import tempfile
+from paddle.fluid.framework import _non_static_mode, _test_eager_guard, core
+from paddle.fluid.layer_helper import LayerHelper
 
 sys.path.append("./tokenizer")
 from tokenizer.bert_tokenizer import BertTokenizer
diff --git a/python/paddle/fluid/tests/unittests/test_fc_op.py b/python/paddle/fluid/tests/unittests/test_fc_op.py
index 8003c0f99e84ba..bc339a516ffacd 100644
--- a/python/paddle/fluid/tests/unittests/test_fc_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fc_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
 from op_test import OpTest
+
+import paddle
 import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard, core
+from paddle.fluid import Program, core, program_guard
 
 SEED = 2020
 
diff --git a/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py b/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
index 764e5e75067480..f1a1fb5f3b5130 100644
--- a/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
+++ b/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 
 import multiprocessing
-import numpy as np
 import os
+import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.compiler as compiler
 import paddle.fluid.core as core
-import unittest
 
 os.environ['CPU_NUM'] = str(4)
 np.random.seed(123)
diff --git a/python/paddle/fluid/tests/unittests/test_feed_fetch_method.py b/python/paddle/fluid/tests/unittests/test_feed_fetch_method.py
index 4054af8fb51925..aa7d2ea4b020a6 100644
--- a/python/paddle/fluid/tests/unittests/test_feed_fetch_method.py
+++ b/python/paddle/fluid/tests/unittests/test_feed_fetch_method.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid.core as core
 import unittest
+
 import numpy as np
 
+import paddle.fluid.core as core
+
 
 class TestFeedFetch(unittest.TestCase):
     def test_feed_fetch(self):
diff --git a/python/paddle/fluid/tests/unittests/test_fetch_handler.py b/python/paddle/fluid/tests/unittests/test_fetch_handler.py
index 6a8b1f689fd797..deb2d051ff1f66 100644
--- a/python/paddle/fluid/tests/unittests/test_fetch_handler.py
+++ b/python/paddle/fluid/tests/unittests/test_fetch_handler.py
@@ -14,11 +14,12 @@
 
 import time
 import unittest
+
 import numpy as np
-from paddle.fluid.framework import Program
 
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.framework import Program
 
 
 class TestFetchHandler(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py b/python/paddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py
index c096a4c8a6e027..b2869d88882375 100644
--- a/python/paddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py
+++ b/python/paddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import os
-import numpy as np
 import unittest
+
+import numpy as np
+from simple_nets import simple_fc_net, simple_fc_net_with_inputs
+
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
-from simple_nets import simple_fc_net_with_inputs, simple_fc_net
 
 
 class TestFetchLoDTensorArray(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py b/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py
index 0da628db92c112..978298f8f859d9 100644
--- a/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py
+++ b/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py
@@ -14,9 +14,11 @@
 
 import os
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
 
 os.environ["CPU_NUM"] = "2"
 
diff --git a/python/paddle/fluid/tests/unittests/test_fetch_var.py b/python/paddle/fluid/tests/unittests/test_fetch_var.py
index 98b06b31368d97..4339813584a909 100644
--- a/python/paddle/fluid/tests/unittests/test_fetch_var.py
+++ b/python/paddle/fluid/tests/unittests/test_fetch_var.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
+import numpy as np
+
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
-import numpy as np
-import unittest
 
 
 class TestFetchVar(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_fill_any_like_op.py b/python/paddle/fluid/tests/unittests/test_fill_any_like_op.py
index 1739361080d3b7..9fa333d623bf91 100644
--- a/python/paddle/fluid/tests/unittests/test_fill_any_like_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fill_any_like_op.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid.core as core
 import unittest
+
 import numpy as np
 from op_test import OpTest, convert_float_to_uint16
 
+import paddle
+import paddle.fluid.core as core
+
 
 class TestFillAnyLikeOp(OpTest):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_fill_any_op.py b/python/paddle/fluid/tests/unittests/test_fill_any_op.py
index 76daed5a5d84a2..36d6d5a1566b54 100644
--- a/python/paddle/fluid/tests/unittests/test_fill_any_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fill_any_op.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
+import paddle
+
 
 class TestFillAnyOp(OpTest):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like.py b/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like.py
index 339fafbd0b4e2d..d6114ec8dc6dd1 100644
--- a/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like.py
+++ b/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
+import paddle
 from paddle.fluid.framework import convert_np_dtype_to_dtype_
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py
index 838efff490ab81..5e1af99259db1a 100644
--- a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py
@@ -13,15 +13,15 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest, convert_float_to_uint16
 
 import paddle
-import paddle.fluid.core as core
-from paddle.fluid.op import Operator
 import paddle.fluid as fluid
-import numpy as np
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
+from paddle.fluid.op import Operator
 
 
 # Situation 1: Attr(shape) is a list(without tensor)
diff --git a/python/paddle/fluid/tests/unittests/test_fill_diagonal_tensor_op.py b/python/paddle/fluid/tests/unittests/test_fill_diagonal_tensor_op.py
index f412a161eb63c0..4550ccc4a1826f 100644
--- a/python/paddle/fluid/tests/unittests/test_fill_diagonal_tensor_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fill_diagonal_tensor_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle
 from op_test import OpTest
 
+import paddle
+
 
 def fill_diagonal_ndarray(x, value, offset=0, dim1=0, dim2=1):
     """Fill value into the diagonal of x that offset is ${offset} and the coordinate system is (dim1, dim2)."""
diff --git a/python/paddle/fluid/tests/unittests/test_fill_op.py b/python/paddle/fluid/tests/unittests/test_fill_op.py
index 95a432136caca7..499377146676a2 100644
--- a/python/paddle/fluid/tests/unittests/test_fill_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fill_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle.fluid.core as core
 from paddle.fluid.op import Operator
 
diff --git a/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py b/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py
index f1a1039ea016cc..2cb86460190936 100644
--- a/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle.fluid as fluid
 from paddle.fluid.framework import convert_np_dtype_to_dtype_
-from op_test import OpTest
 
 
 class TestFillZerosLike2Op(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py b/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py
index 7317973e774ab5..805d9c88d6cb23 100644
--- a/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_filter_by_instag_op.py b/python/paddle/fluid/tests/unittests/test_filter_by_instag_op.py
index e1136d677a13d9..153d22bfce15fe 100644
--- a/python/paddle/fluid/tests/unittests/test_filter_by_instag_op.py
+++ b/python/paddle/fluid/tests/unittests/test_filter_by_instag_op.py
@@ -14,6 +14,7 @@
 """This is unit test of Test filter_instag Op."""
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_flatten2_op.py b/python/paddle/fluid/tests/unittests/test_flatten2_op.py
index d67b873084d5a0..115699025d83f8 100644
--- a/python/paddle/fluid/tests/unittests/test_flatten2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_flatten2_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
index 8d1bcc6254d010..06cef1d48cef05 100644
--- a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
+++ b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle
 from op_test import OpTest
 
+import paddle
+
 
 class TestFlattenOp(OpTest):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_flatten_op.py b/python/paddle/fluid/tests/unittests/test_flatten_op.py
index a1108d3392ca58..7753f2d90ee66e 100644
--- a/python/paddle/fluid/tests/unittests/test_flatten_op.py
+++ b/python/paddle/fluid/tests/unittests/test_flatten_op.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 
 import unittest
-import numpy as np
 
+import numpy as np
 from op_test import OpTest
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_fleet.py b/python/paddle/fluid/tests/unittests/test_fleet.py
index 75d6ab31754831..d0445c2c5e09b8 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet.py
@@ -34,8 +34,8 @@ def test_pslib_1(self):
         """Test cases for pslib."""
         import paddle
         import paddle.fluid as fluid
-        from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
         from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
+        from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
 
         os.environ["POD_IP"] = "127.0.0.1"
         os.environ["PADDLE_PORT"] = "36001"
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_api_input.py b/python/paddle/fluid/tests/unittests/test_fleet_api_input.py
index ac77fc42efdfbf..b3817573352f15 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_api_input.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_api_input.py
@@ -13,23 +13,25 @@
 # limitations under the License.
 
 import unittest
+
+from dist_simnet_bow import train_network
+
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.transpiler.distribute_transpiler import (
-    DistributeTranspilerConfig,
-)
-from paddle.fluid.incubate.fleet.base.role_maker import UserDefinedRoleMaker
+import paddle.fluid.incubate.fleet.base.role_maker as role_maker
 from paddle.fluid.incubate.fleet.base.role_maker import (
+    Role,
     UserDefinedCollectiveRoleMaker,
+    UserDefinedRoleMaker,
 )
-from paddle.fluid.incubate.fleet.base.role_maker import Role
-import paddle.fluid.incubate.fleet.base.role_maker as role_maker
+from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer
+from paddle.fluid.incubate.fleet.parameter_server import TranspilerOptimizer
 from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import (
     fleet,
 )
-from paddle.fluid.incubate.fleet.parameter_server import TranspilerOptimizer
-from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer
-from dist_simnet_bow import train_network
+from paddle.fluid.transpiler.distribute_transpiler import (
+    DistributeTranspilerConfig,
+)
 
 
 class DistributeTranspilerConfigTest(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_auto.py b/python/paddle/fluid/tests/unittests/test_fleet_auto.py
index 608b948d35f06a..22abaaf4d18c36 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_auto.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_auto.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 import paddle
-import os
 import paddle.distributed.fleet as fleet
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base.py b/python/paddle/fluid/tests/unittests/test_fleet_base.py
index 686c7fa1ef75a2..74886d9a9c1230 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_base.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_base.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.distributed.fleet as fleet
 import paddle.distributed.fleet.base.role_maker as role_maker
-import os
 import paddle.fluid as fluid
-import numpy as np
 
 
 class TestFleetBase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_2.py b/python/paddle/fluid/tests/unittests/test_fleet_base_2.py
index 084718c5407a33..64cd2df9037290 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_base_2.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_base_2.py
@@ -14,11 +14,13 @@
 
 import tempfile
 import unittest
+
 import paddle
 
 paddle.enable_static()
 
 import os
+
 import paddle.fluid as fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_3.py b/python/paddle/fluid/tests/unittests/test_fleet_base_3.py
index d9014b2c6f858a..7d1672064d1156 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_base_3.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_base_3.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import os
+import unittest
+
 import paddle
 import paddle.distributed.fleet as fleet
 import paddle.distributed.fleet.base.role_maker as role_maker
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_4.py b/python/paddle/fluid/tests/unittests/test_fleet_base_4.py
index 986a48162b28c3..ac0bbea11fde5d 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_base_4.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_base_4.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 import paddle
-import os
 import paddle.distributed.fleet as fleet
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_single.py b/python/paddle/fluid/tests/unittests/test_fleet_base_single.py
index 6b574b84b92d24..39825686b37113 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_base_single.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_base_single.py
@@ -12,18 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import os
 
+import numpy as np
+
 cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES')
 if cuda_visible_devices is None or cuda_visible_devices == "":
     os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 else:
     os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices.split(',')[0]
+import unittest
+
 import paddle
 import paddle.distributed.fleet as fleet
 import paddle.fluid as fluid
-import unittest
 import paddle.nn as nn
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_elastic_collective.py b/python/paddle/fluid/tests/unittests/test_fleet_elastic_collective.py
index 2d9a9268ee3fa1..31f45aa09af0f8 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_elastic_collective.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_elastic_collective.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 
 import os
-import unittest
 import tempfile
+import unittest
 
 from paddle.distributed.fleet.elastic.collective import CollectiveLauncher
 from paddle.distributed.fleet.launch import launch_collective
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_elastic_manager.py b/python/paddle/fluid/tests/unittests/test_fleet_elastic_manager.py
index 0570ae1928c124..7c42a59000ee44 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_elastic_manager.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_elastic_manager.py
@@ -15,10 +15,10 @@
 import os
 import unittest
 
-from paddle.distributed.fleet.elastic.manager import ElasticManager
-from paddle.distributed.fleet.elastic.manager import LauncherInterface
 from paddle.distributed.fleet.elastic.manager import (
     ELASTIC_AUTO_PARALLEL_EXIT_CODE,
+    ElasticManager,
+    LauncherInterface,
 )
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_exe_dist_model_run.py b/python/paddle/fluid/tests/unittests/test_fleet_exe_dist_model_run.py
index 4e37988f7673ee..ebc790c4034630 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_exe_dist_model_run.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_exe_dist_model_run.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import paddle
-import numpy as np
 import os
 import tempfile
+import unittest
+
+import numpy as np
+
+import paddle
 from paddle.fluid import core
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_exe_dist_model_tensor.py b/python/paddle/fluid/tests/unittests/test_fleet_exe_dist_model_tensor.py
index 85da732ab5bd6d..b5c4af9fc763f9 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_exe_dist_model_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_exe_dist_model_tensor.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
-from paddle.fluid.core import DistModelTensor
-from paddle.fluid.core import DistModelDataType
+
+import paddle
+from paddle.fluid.core import DistModelDataType, DistModelTensor
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor.py b/python/paddle/fluid/tests/unittests/test_fleet_executor.py
index 502f03784cbcc3..e81fc34ea2ca0f 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_executor.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_executor.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor_multi_devices.py b/python/paddle/fluid/tests/unittests/test_fleet_executor_multi_devices.py
index 5c635e4ce48076..b2a1c488dcec66 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_executor_multi_devices.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_executor_multi_devices.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import os
+import unittest
+
 import paddle
-import paddle.fluid as fluid
 import paddle.distributed.fleet as fleet
+import paddle.fluid as fluid
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py b/python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py
index cda6be8f0a6be0..0de28e9839efa3 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor_task_node.py b/python/paddle/fluid/tests/unittests/test_fleet_executor_task_node.py
index 03d6be10111f3d..6d74fdc075cdf0 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_executor_task_node.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_executor_task_node.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle
 import paddle.fluid.core as core
 from paddle.distributed.fleet.fleet_executor_utils import TaskNode
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor_utils.py b/python/paddle/fluid/tests/unittests/test_fleet_executor_utils.py
index fb003f450f6e18..63befae625379a 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_executor_utils.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_executor_utils.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle
 from paddle.distributed.fleet.fleet_executor_utils import FleetExecutorUtils
 
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py b/python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py
index 802ce0cbfe3c97..36a85e2d74fc7a 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 from paddle.distributed.fleet.fleet_executor_utils import TaskNode
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_gradient_scale.py b/python/paddle/fluid/tests/unittests/test_fleet_gradient_scale.py
index 36e823a2bc92a3..bda452e0889972 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_gradient_scale.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_gradient_scale.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
 
+import numpy as np
+
 import paddle
-import paddle.fluid as fluid
 import paddle.distributed.fleet as fleet
-import numpy as np
-import os
+import paddle.fluid as fluid
 
 
 class TestGradientScale(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_metric.py b/python/paddle/fluid/tests/unittests/test_fleet_metric.py
index 76aff57125b967..d5435d3941f6f3 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_metric.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_metric.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 """Test fleet metric."""
 
-import numpy as np
-import paddle
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
-import paddle.distributed.fleet.metrics.metric as metric
+
+import paddle
 import paddle.distributed.fleet as fleet
+import paddle.distributed.fleet.metrics.metric as metric
+import paddle.fluid as fluid
 from paddle.distributed.fleet.base.util_factory import UtilBase
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py b/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py
index 8c92efe323650e..577652037e5386 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py
@@ -32,8 +32,8 @@ def setUp(self):
     def test_pslib_1(self):
         """Test cases for pslib."""
         import paddle.fluid as fluid
-        from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
         from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
+        from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
 
         os.environ["POD_IP"] = "127.0.0.1"
         os.environ["PADDLE_PORT"] = "36001"
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_ps.py b/python/paddle/fluid/tests/unittests/test_fleet_ps.py
index f5f7f81311cfc2..1d4c319d5c85a2 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_ps.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_ps.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 from paddle.fluid.framework import default_main_program
 from paddle.fluid.incubate.fleet.parameter_server.ir.pserver_pass import (
     _get_optimizer_input_shape,
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_pyramid_hash.py b/python/paddle/fluid/tests/unittests/test_fleet_pyramid_hash.py
index c558bc0f0abad9..10a7a7037e2f6d 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_pyramid_hash.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_pyramid_hash.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.incubate.fleet.base.role_maker as role_maker
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py
index 05732d3cc86199..f64d8cb1692b20 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py
@@ -15,6 +15,7 @@
 
 import os
 import unittest
+
 import paddle.fluid.incubate.fleet.base.role_maker as role_maker
 
 
@@ -61,8 +62,8 @@ def test_training_role(self):
     def test_pslib_1(self):
         """Test cases for pslib."""
         import paddle.fluid as fluid
-        from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
         from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
+        from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
 
         os.environ["POD_IP"] = "127.0.0.1"
         os.environ["PADDLE_PORT"] = "36001"
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py
index 2c343a19c95923..a657d3deb51a02 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 """Test cases for role makers."""
 
-import paddle
 import os
-import unittest
 import tempfile
+import unittest
+
+import paddle
 
 
 class TestCloudRoleMaker2(unittest.TestCase):
@@ -34,11 +35,13 @@ def tearDown(self):
     def test_pslib_2(self):
         """Test cases for pslib."""
         import paddle.fluid as fluid
+        from paddle.fluid.incubate.fleet.base.role_maker import (
+            GeneralRoleMaker,
+            RoleMakerBase,
+        )
         from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import (
             fleet,
         )
-        from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
-        from paddle.fluid.incubate.fleet.base.role_maker import RoleMakerBase
 
         paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py
index 0ce1b0743aa92d..79b5e136f189a9 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py
@@ -32,8 +32,8 @@ def setUp(self):
     def test_pslib_1(self):
         """Test cases for pslib."""
         import paddle.fluid as fluid
-        from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
         from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
+        from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
 
         os.environ["POD_IP"] = "127.0.0.1"
         os.environ["PADDLE_PORT"] = "36001"
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_4.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_4.py
index 2825a1232fc055..7f82c904645621 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_4.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_4.py
@@ -34,9 +34,11 @@ def test_pslib_1(self):
         import threading
 
         try:
-            from paddle.distributed.fleet.utils.http_server import KVHandler
-            from paddle.distributed.fleet.utils.http_server import KVServer
-            from paddle.distributed.fleet.utils.http_server import KVHTTPServer
+            from paddle.distributed.fleet.utils.http_server import (
+                KVHandler,
+                KVHTTPServer,
+                KVServer,
+            )
         except:
             print("warning: no fleet, skip test_pslib_4")
             return
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_init.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_init.py
index f7f2b11acdcb2d..5c3f77a3f6cba7 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_init.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_init.py
@@ -15,6 +15,7 @@
 
 import os
 import unittest
+
 import paddle.distributed.fleet.base.role_maker as role_maker
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_runtime.py b/python/paddle/fluid/tests/unittests/test_fleet_runtime.py
index 4a415f92ca5a16..fb60166f887be3 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_runtime.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_runtime.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py b/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py
index d1f4df65328002..9c7736a39384f1 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py
@@ -32,8 +32,8 @@ def setUp(self):
     def test_pslib_1(self):
         """Test cases for pslib."""
         import paddle.fluid as fluid
-        from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
         from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
+        from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
 
         os.environ["POD_IP"] = "127.0.0.1"
         os.environ["PADDLE_PORT"] = "36001"
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_util.py b/python/paddle/fluid/tests/unittests/test_fleet_util.py
index 383b8ac2ebe463..01d7d6bedeae4c 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_util.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_util.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import numpy as np
-import tarfile
-import tempfile
 import os
 import sys
-from paddle.dataset.common import download
+import tarfile
+import tempfile
+import unittest
+
+import numpy as np
+
 import paddle.distributed.fleet.base.role_maker as role_maker
+from paddle.dataset.common import download
 
 
 class TestFleetUtil(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_flip.py b/python/paddle/fluid/tests/unittests/test_flip.py
index f5943db87e0c7f..f7ebd3d1d54f95 100644
--- a/python/paddle/fluid/tests/unittests/test_flip.py
+++ b/python/paddle/fluid/tests/unittests/test_flip.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
+from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from op_test import OpTest
-import gradient_checker
-from decorator_helper import prog_scope
 import paddle.fluid.layers as layers
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_fmax_op.py b/python/paddle/fluid/tests/unittests/test_fmax_op.py
index 986417fede6264..3a84570ffcecec 100644
--- a/python/paddle/fluid/tests/unittests/test_fmax_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fmax_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle
 import paddle.fluid.core as core
-from op_test import OpTest
 
 
 class ApiFMaxTest(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_fmin_op.py b/python/paddle/fluid/tests/unittests/test_fmin_op.py
index e1a9cf7a912d7a..32b203c57418b2 100644
--- a/python/paddle/fluid/tests/unittests/test_fmin_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fmin_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle
 import paddle.fluid.core as core
-from op_test import OpTest
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_fold_op.py b/python/paddle/fluid/tests/unittests/test_fold_op.py
index 1b2cc9d149c7aa..71f69aa6d6745f 100644
--- a/python/paddle/fluid/tests/unittests/test_fold_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fold_op.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
+
+import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
@@ -157,8 +159,8 @@ def test_info(self):
 
 class TestFoldOpError(unittest.TestCase):
     def test_errors(self):
-        from paddle.nn.functional import fold
         from paddle.fluid.framework import Program, program_guard
+        from paddle.nn.functional import fold
 
         with program_guard(Program(), Program()):
 
diff --git a/python/paddle/fluid/tests/unittests/test_frac_api.py b/python/paddle/fluid/tests/unittests/test_frac_api.py
index b455dbbdb4ddce..2e705bb2d7fad1 100644
--- a/python/paddle/fluid/tests/unittests/test_frac_api.py
+++ b/python/paddle/fluid/tests/unittests/test_frac_api.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_frame_op.py b/python/paddle/fluid/tests/unittests/test_frame_op.py
index fc0c092e91e639..89551b2ccc121e 100644
--- a/python/paddle/fluid/tests/unittests/test_frame_op.py
+++ b/python/paddle/fluid/tests/unittests/test_frame_op.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-from numpy.lib.stride_tricks import as_strided
-import paddle
 import unittest
 
+import numpy as np
+from numpy.lib.stride_tricks import as_strided
 from op_test import OpTest
 
+import paddle
+
 
 def frame_from_librosa(x, frame_length, hop_length, axis=-1):
     if axis == -1 and not x.flags["C_CONTIGUOUS"]:
diff --git a/python/paddle/fluid/tests/unittests/test_framework_debug_str.py b/python/paddle/fluid/tests/unittests/test_framework_debug_str.py
index 6a1e72c072ac2d..ef9f294055b46a 100644
--- a/python/paddle/fluid/tests/unittests/test_framework_debug_str.py
+++ b/python/paddle/fluid/tests/unittests/test_framework_debug_str.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 from paddle.fluid.framework import Program
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_frexp_api.py b/python/paddle/fluid/tests/unittests/test_frexp_api.py
index 9604080ffd41ef..230afc993ae7f2 100644
--- a/python/paddle/fluid/tests/unittests/test_frexp_api.py
+++ b/python/paddle/fluid/tests/unittests/test_frexp_api.py
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_fs_interface.py b/python/paddle/fluid/tests/unittests/test_fs_interface.py
index 4705e44eba62b9..dc2a5f1b8e91e7 100644
--- a/python/paddle/fluid/tests/unittests/test_fs_interface.py
+++ b/python/paddle/fluid/tests/unittests/test_fs_interface.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import inspect
+import unittest
 
 from paddle.distributed.fleet.utils.fs import FS
 
diff --git a/python/paddle/fluid/tests/unittests/test_fsp_op.py b/python/paddle/fluid/tests/unittests/test_fsp_op.py
index 3a1de0833a6ff0..3e325d189148bd 100644
--- a/python/paddle/fluid/tests/unittests/test_fsp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fsp_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle.fluid as fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_ftrl_op.py b/python/paddle/fluid/tests/unittests/test_ftrl_op.py
index e346b4f2d118ed..23081125491a0e 100644
--- a/python/paddle/fluid/tests/unittests/test_ftrl_op.py
+++ b/python/paddle/fluid/tests/unittests/test_ftrl_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle.fluid.core as core
 from paddle.fluid.op import Operator
-from op_test import OpTest
 
 
 def ftrl_step(param, grad, rows, sq_accum, lin_accum, lr, l1, l2, lr_power):
diff --git a/python/paddle/fluid/tests/unittests/test_full_like_op.py b/python/paddle/fluid/tests/unittests/test_full_like_op.py
index c24ee6791b14a9..5a4b1235d8dd28 100644
--- a/python/paddle/fluid/tests/unittests/test_full_like_op.py
+++ b/python/paddle/fluid/tests/unittests/test_full_like_op.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid.core as core
-from paddle.static import program_guard, Program
 import unittest
+
 import numpy as np
 from op_test import OpTest
-from paddle.fluid.framework import convert_np_dtype_to_dtype_
-from paddle.fluid.framework import _test_eager_guard
+
+import paddle
+import paddle.fluid.core as core
+from paddle.fluid.framework import _test_eager_guard, convert_np_dtype_to_dtype_
+from paddle.static import Program, program_guard
 
 
 class TestFullOp(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_full_op.py b/python/paddle/fluid/tests/unittests/test_full_op.py
index a8c54188e48bbd..35474810da77bc 100644
--- a/python/paddle/fluid/tests/unittests/test_full_op.py
+++ b/python/paddle/fluid/tests/unittests/test_full_op.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
-import paddle.fluid as fluid
 import paddle
+import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_function_hook.py b/python/paddle/fluid/tests/unittests/test_function_hook.py
index 7ba230233a4152..b6adc83a748959 100644
--- a/python/paddle/fluid/tests/unittests/test_function_hook.py
+++ b/python/paddle/fluid/tests/unittests/test_function_hook.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
 
+import paddle
 from paddle import _legacy_C_ops
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv1d.py b/python/paddle/fluid/tests/unittests/test_functional_conv1d.py
index 6869c511b9fbef..0bd7fa1878d414 100644
--- a/python/paddle/fluid/tests/unittests/test_functional_conv1d.py
+++ b/python/paddle/fluid/tests/unittests/test_functional_conv1d.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.nn.functional as F
-import paddle.fluid.dygraph as dg
-import numpy as np
 import unittest
 from unittest import TestCase
 
+import numpy as np
+
+import paddle
+import paddle.fluid.dygraph as dg
+import paddle.nn.functional as F
+
 
 class TestFunctionalConv1DError(TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv1d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv1d_transpose.py
index 431c940f0b4a57..a1e144da146a57 100644
--- a/python/paddle/fluid/tests/unittests/test_functional_conv1d_transpose.py
+++ b/python/paddle/fluid/tests/unittests/test_functional_conv1d_transpose.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.nn.functional as F
-import paddle.fluid.dygraph as dg
-import numpy as np
 import unittest
 from unittest import TestCase
 
+import numpy as np
+
+import paddle
+import paddle.fluid.dygraph as dg
+import paddle.nn.functional as F
+
 
 class TestFunctionalConv1DError(TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv2d.py b/python/paddle/fluid/tests/unittests/test_functional_conv2d.py
index 1a92a9babb7275..2e2dcd5baf52c9 100644
--- a/python/paddle/fluid/tests/unittests/test_functional_conv2d.py
+++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+from unittest import TestCase
+
+import numpy as np
+
 import paddle
-import paddle.nn.functional as F
-from paddle import fluid
 import paddle.fluid.dygraph as dg
 import paddle.fluid.initializer as I
-import numpy as np
-import unittest
-from unittest import TestCase
+import paddle.nn.functional as F
+from paddle import fluid
 
 
 class TestFunctionalConv2D(TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py
index d45f13e4c95760..a66713eaee5830 100644
--- a/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py
+++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py
@@ -12,15 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+from unittest import TestCase
+
 import numpy as np
+
+import paddle
 import paddle.fluid.dygraph as dg
 import paddle.fluid.initializer as I
 import paddle.nn.functional as F
 from paddle import fluid
 from paddle.fluid.framework import _test_eager_guard
-from unittest import TestCase
 
 
 class TestFunctionalConv2D(TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv3d.py b/python/paddle/fluid/tests/unittests/test_functional_conv3d.py
index 71123cb51e74b8..bdd8360f971745 100644
--- a/python/paddle/fluid/tests/unittests/test_functional_conv3d.py
+++ b/python/paddle/fluid/tests/unittests/test_functional_conv3d.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+from unittest import TestCase
+
+import numpy as np
+
 import paddle
-import paddle.nn.functional as F
-from paddle import fluid
 import paddle.fluid.dygraph as dg
 import paddle.fluid.initializer as I
-import numpy as np
-import unittest
-from unittest import TestCase
+import paddle.nn.functional as F
+from paddle import fluid
 
 
 class TestFunctionalConv3D(TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py
index 3c013fe873396d..4c944d4fa3d1f4 100644
--- a/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py
+++ b/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py
@@ -12,15 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
+import unittest
+from unittest import TestCase
+
 import numpy as np
+
+import paddle
 import paddle.fluid.dygraph as dg
 import paddle.fluid.initializer as I
 import paddle.nn.functional as F
-import unittest
 from paddle import fluid
 from paddle.fluid.framework import _test_eager_guard
-from unittest import TestCase
 
 
 class TestFunctionalConv3DTranspose(TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_all_reduce_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_all_reduce_pass.py
index 9431bcac4e34ac..4566c9e97a9ad4 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_all_reduce_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_all_reduce_pass.py
@@ -12,15 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from simple_nets import simple_fc_net, fc_with_batchnorm, init_data, bow_net
+import os
+import unittest
+from functools import partial
+
 from fake_reader import fake_imdb_reader
-from parallel_executor_test_base import TestParallelExecutorBase, DeviceType
+from parallel_executor_test_base import DeviceType, TestParallelExecutorBase
+from simple_nets import bow_net, fc_with_batchnorm, init_data, simple_fc_net
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from functools import partial
-import paddle
-import unittest
-import os
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
index 5018b7bd5e43b6..a04e845db0af43 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import paddle
 import paddle.fluid as fluid
-import unittest
 
 
 class TestFuseBatchNormActPass(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
index 47c2a1a5f16cdb..68d11d0897279a 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
index bc052ec1cf7225..9a33552f31af8e 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from simple_nets import simple_fc_net, fc_with_batchnorm, init_data
-from parallel_executor_test_base import TestParallelExecutorBase, DeviceType
+import os
+import unittest
+
+from parallel_executor_test_base import DeviceType, TestParallelExecutorBase
+from simple_nets import fc_with_batchnorm, init_data, simple_fc_net
+
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import unittest
-import os
 
 
 class TestMNIST(TestParallelExecutorBase):
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_gemm_epilogue_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_gemm_epilogue_pass.py
index 6595d709d26e13..a3141128a54d31 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_gemm_epilogue_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_gemm_epilogue_pass.py
@@ -14,9 +14,11 @@
 # limitations under the License.
 """Test cases for role makers."""
 
-import paddle
 import unittest
+
 import numpy as np
+
+import paddle
 import paddle.fluid.core as core
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py
index 9fd6fcc276ecfd..fe4baed3271bb4 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py
@@ -12,15 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from simple_nets import bow_net, fc_with_batchnorm, init_data
-from fake_reader import fake_imdb_reader
-from parallel_executor_test_base import TestParallelExecutorBase, DeviceType
+import os
+import unittest
 from functools import partial
+
+from fake_reader import fake_imdb_reader
+from parallel_executor_test_base import DeviceType, TestParallelExecutorBase
+from simple_nets import bow_net, fc_with_batchnorm, init_data
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import unittest
-import os
 
 
 class TestFuseOptimizationOps(TestParallelExecutorBase):
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py
index 54eacb5ec0d8f7..ed7cb760f05389 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from parallel_executor_test_base import TestParallelExecutorBase, DeviceType
-import paddle.fluid as fluid
-import paddle.fluid.core as core
+import unittest
+
 import numpy as np
+from parallel_executor_test_base import DeviceType, TestParallelExecutorBase
+
 import paddle
-import unittest
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 
 def norm(*args, **kargs):
diff --git a/python/paddle/fluid/tests/unittests/test_fused_attention_op.py b/python/paddle/fluid/tests/unittests/test_fused_attention_op.py
index 6e3c0014422670..a73dae327117c8 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_attention_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_attention_op.py
@@ -12,19 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+from op_test import OpTest
 
 import paddle
-import paddle.nn.functional as F
 import paddle.incubate.nn.functional as incubate_f
-from paddle.nn.layer.norm import LayerNorm
-from paddle.nn.layer.common import Linear, Dropout
-from paddle.nn.layer.transformer import _convert_attention_mask
+import paddle.nn.functional as F
 from paddle import tensor
 from paddle.fluid import layers
-import unittest
-from op_test import OpTest
 from paddle.fluid.framework import default_main_program
+from paddle.nn.layer.common import Dropout, Linear
+from paddle.nn.layer.norm import LayerNorm
+from paddle.nn.layer.transformer import _convert_attention_mask
 
 default_main_program().random_seed = 42
 
diff --git a/python/paddle/fluid/tests/unittests/test_fused_attention_op_api.py b/python/paddle/fluid/tests/unittests/test_fused_attention_op_api.py
index 3a3872bb6726f4..0917e8b96a91dc 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_attention_op_api.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_attention_op_api.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
 
 import paddle
 from paddle.incubate.nn.layer.fused_transformer import FusedMultiHeadAttention
 from paddle.static import Program
-import unittest
 
 
 def fc(x, weight):
diff --git a/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op.py b/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op.py
index 8ae92fe194f35b..3cd611931fadde 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op.py
@@ -12,15 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+from op_test import OpTest
 
 import paddle
 import paddle.incubate.nn.functional as incubate_f
-from paddle.nn.layer.norm import LayerNorm
+from paddle.fluid.framework import _enable_legacy_dygraph, default_main_program
 from paddle.nn.layer.common import Dropout
-import unittest
-from op_test import OpTest
-from paddle.fluid.framework import default_main_program, _enable_legacy_dygraph
+from paddle.nn.layer.norm import LayerNorm
 
 _enable_legacy_dygraph()
 
diff --git a/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op_api.py b/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op_api.py
index 261a2686b271cd..dae2f2ba61c886 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op_api.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op_api.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
 
 import paddle
@@ -19,7 +21,6 @@
     FusedBiasDropoutResidualLayerNorm,
 )
 from paddle.static import Program
-import unittest
 
 
 def layer_norm(x, has_scale, has_bias, weight, bias, epsilon=1e-05):
diff --git a/python/paddle/fluid/tests/unittests/test_fused_elemwise_activation_op.py b/python/paddle/fluid/tests/unittests/test_fused_elemwise_activation_op.py
index cc672f5a16f02f..dfa2bf6fc3452f 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_elemwise_activation_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_elemwise_activation_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
-import numpy as np
 from functools import partial
-import paddle.fluid.core as core
+
+import numpy as np
 from op_test import OpTest
 
+import paddle.fluid.core as core
+
 #   TestFusedElementwiseActivationOp
 #   TestFusedElementwiseActivationOp_scalar
 #   TestFusedElementwiseActivationOp_scalar2
diff --git a/python/paddle/fluid/tests/unittests/test_fused_emb_seq_pool_op.py b/python/paddle/fluid/tests/unittests/test_fused_emb_seq_pool_op.py
index 392c04f730a3f7..3e3f0ca05a62f2 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_emb_seq_pool_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_emb_seq_pool_op.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import platform
+import unittest
+
 import numpy as np
 from op_test import OpTest, skip_check_grad_ci
+
 import paddle.version as ver
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_fused_embedding_fc_lstm_op.py b/python/paddle/fluid/tests/unittests/test_fused_embedding_fc_lstm_op.py
index 54fc4d77b79215..cc9dd6a17565de 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_embedding_fc_lstm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_embedding_fc_lstm_op.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-from test_lstm_op import lstm, ACTIVATION
+from test_lstm_op import ACTIVATION, lstm
 
 
 def fc(x, w, b):
diff --git a/python/paddle/fluid/tests/unittests/test_fused_fc_elementwise_layernorm_op.py b/python/paddle/fluid/tests/unittests/test_fused_fc_elementwise_layernorm_op.py
index 09797b03849198..7b07e559c8cd2c 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_fc_elementwise_layernorm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_fc_elementwise_layernorm_op.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-from paddle.fluid import core
-from test_fc_op import fc_refer, MatrixGenerate
+from test_fc_op import MatrixGenerate, fc_refer
 from test_layer_norm_op import _reference_layer_norm_naive
 
+from paddle.fluid import core
+
 np.random.random(123)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_fused_feedforward_op.py b/python/paddle/fluid/tests/unittests/test_fused_feedforward_op.py
index a745ec57c15493..5153425b18aa73 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_feedforward_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_feedforward_op.py
@@ -11,17 +11,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import unittest
+
 import numpy as np
+from op_test import OpTest
 
 import paddle
-from paddle.nn.layer import transformer
-import paddle.nn.functional as F
 import paddle.incubate.nn.functional as incubate_f
-from paddle.nn.layer.norm import LayerNorm
-from paddle.nn.layer.common import Linear, Dropout
-import unittest
-from op_test import OpTest
+import paddle.nn.functional as F
 from paddle.fluid.framework import default_main_program
+from paddle.nn.layer import transformer
+from paddle.nn.layer.common import Dropout, Linear
+from paddle.nn.layer.norm import LayerNorm
 
 
 class TestFusedFFNOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_fused_gate_attention_op.py b/python/paddle/fluid/tests/unittests/test_fused_gate_attention_op.py
index 1eed59690d88dd..0a7ca81f216d80 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_gate_attention_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_gate_attention_op.py
@@ -17,13 +17,14 @@
 os.environ['NVIDIA_TF32_OVERRIDE'] = "0"
 os.environ['FLAGS_new_einsum'] = "0"
 
+import unittest
+
 import numpy as np
+from op_test import OpTest, convert_float_to_uint16, convert_uint16_to_float
+from test_sparse_attention_op import get_cuda_version
 
 import paddle
 import paddle.nn as nn
-import unittest
-from op_test import OpTest, convert_float_to_uint16, convert_uint16_to_float
-from test_sparse_attention_op import get_cuda_version
 from paddle import _legacy_C_ops
 from paddle.fluid import core
 
diff --git a/python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_grad_op.py b/python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_grad_op.py
index 322e63dd4fd45a..21b6d7e29cfea9 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_grad_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_grad_op.py
@@ -14,10 +14,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest, skip_check_grad_ci
+
 import paddle
 import paddle.fluid.core as core
-from op_test import OpTest, skip_check_grad_ci
 
 
 def get_outputs(DOut, X, Y):
diff --git a/python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_op.py b/python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_op.py
index a3af9cc194f73b..eb03509ee9e055 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_op.py
@@ -14,10 +14,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest, skip_check_grad_ci, skip_check_inplace_ci
+
 import paddle
 import paddle.fluid.core as core
-from op_test import OpTest, skip_check_grad_ci, skip_check_inplace_ci
 
 
 def is_fused_gemm_epilogue_supported():
diff --git a/python/paddle/fluid/tests/unittests/test_fused_matmul_bias.py b/python/paddle/fluid/tests/unittests/test_fused_matmul_bias.py
index 53ef3610d6fde6..5cbbabbee24959 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_matmul_bias.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_matmul_bias.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid.core as core
 import unittest
+
 import numpy as np
-from paddle.incubate.nn.functional import fused_matmul_bias, fused_linear
+
+import paddle
+import paddle.fluid.core as core
 from paddle.incubate.nn import FusedLinear
+from paddle.incubate.nn.functional import fused_linear, fused_matmul_bias
 
 
 def is_fused_matmul_bias_supported():
diff --git a/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_int8_op.py b/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_int8_op.py
index e3fb41412fd390..3f91f9b6e6d90d 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_int8_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_int8_op.py
@@ -12,19 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
 
 import paddle
 import paddle.nn.functional as F
-from paddle.nn.layer.norm import LayerNorm
-from paddle.nn.layer.common import Dropout
-from paddle.nn.layer.transformer import _convert_attention_mask
-from paddle import tensor
+from paddle import _legacy_C_ops, tensor
 from paddle.fluid import layers
-import unittest
-from paddle.fluid.framework import default_main_program
 from paddle.fluid.framework import default_main_program
-from paddle import _legacy_C_ops
+from paddle.nn.layer.common import Dropout
+from paddle.nn.layer.norm import LayerNorm
+from paddle.nn.layer.transformer import _convert_attention_mask
 
 default_main_program().random_seed = 42
 np.random.seed(0)
diff --git a/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py b/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py
index 0daf8342294f4f..199c1e48bb324e 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py
@@ -12,21 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+from op_test import OpTest
 
 import paddle
 import paddle.nn.functional as F
-from paddle.nn.layer.norm import LayerNorm
-from paddle.nn.layer.common import Linear, Dropout
-from paddle.nn.layer.transformer import _convert_attention_mask
 from paddle import tensor
 from paddle.fluid import layers
-import unittest
-from op_test import OpTest
-from paddle.fluid.framework import default_main_program
 from paddle.fluid.framework import default_main_program
-from paddle.incubate.nn.functional import fused_multi_transformer
 from paddle.incubate.nn import FusedMultiTransformer
+from paddle.incubate.nn.functional import fused_multi_transformer
+from paddle.nn.layer.common import Dropout, Linear
+from paddle.nn.layer.norm import LayerNorm
+from paddle.nn.layer.transformer import _convert_attention_mask
 
 default_main_program().random_seed = 42
 
diff --git a/python/paddle/fluid/tests/unittests/test_fused_multihead_matmul_op.py b/python/paddle/fluid/tests/unittests/test_fused_multihead_matmul_op.py
index 55c2a563c8cdf9..06f3debb3872bc 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_multihead_matmul_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_multihead_matmul_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 from paddle.fluid import core
 
 np.random.random(123)
diff --git a/python/paddle/fluid/tests/unittests/test_fused_token_prune_op.py b/python/paddle/fluid/tests/unittests/test_fused_token_prune_op.py
index 22d0fe4ba761df..c29766123d15e9 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_token_prune_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_token_prune_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 from paddle.framework import core
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_fused_transformer_encoder_layer.py b/python/paddle/fluid/tests/unittests/test_fused_transformer_encoder_layer.py
index a8a349265b46ce..c4b0a47420a1a8 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_transformer_encoder_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_transformer_encoder_layer.py
@@ -11,13 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import unittest
+
 import numpy as np
 
 import paddle
+from paddle.fluid.framework import default_main_program, in_dygraph_mode
 from paddle.incubate.nn import FusedTransformerEncoderLayer
 from paddle.nn import TransformerEncoderLayer
-from paddle.fluid.framework import default_main_program, in_dygraph_mode
-import unittest
 
 
 class TestFusedTransformerEncoderLayer(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_fused_transformer_with_amp_decorator.py b/python/paddle/fluid/tests/unittests/test_fused_transformer_with_amp_decorator.py
index 199ebc15f86269..a2a6a958761047 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_transformer_with_amp_decorator.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_transformer_with_amp_decorator.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import paddle
 import paddle.nn as nn
 from paddle.incubate.nn.layer.fused_transformer import (
-    FusedMultiHeadAttention,
     FusedFeedForward,
+    FusedMultiHeadAttention,
 )
-import unittest
 
 
 class PreModel(nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_fusion_gru_op.py b/python/paddle/fluid/tests/unittests/test_fusion_gru_op.py
index 68777ca256757f..36f9e1d02001ac 100644
--- a/python/paddle/fluid/tests/unittests/test_fusion_gru_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fusion_gru_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
+from paddle.fluid.tests.unittests.test_fusion_lstm_op import ACTIVATION, fc
 from paddle.fluid.tests.unittests.test_gru_op import gru
-from paddle.fluid.tests.unittests.test_fusion_lstm_op import fc, ACTIVATION
 
 
 def fusion_gru(
diff --git a/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py b/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
index fcde3d2f5db664..9dc930c21a6f85 100644
--- a/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-from paddle.fluid.tests.unittests.test_lstm_op import lstm, ACTIVATION
+
+from paddle.fluid.tests.unittests.test_lstm_op import ACTIVATION, lstm
 
 
 def fc(x, w, b):
diff --git a/python/paddle/fluid/tests/unittests/test_fusion_repeated_fc_relu_op.py b/python/paddle/fluid/tests/unittests/test_fusion_repeated_fc_relu_op.py
index c58570d9de9927..431d67e1d00975 100644
--- a/python/paddle/fluid/tests/unittests/test_fusion_repeated_fc_relu_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fusion_repeated_fc_relu_op.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-from test_fc_op import fc_refer, MatrixGenerate
+from test_fc_op import MatrixGenerate, fc_refer
 
 
 class TestFusionRepeatedFCReluOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_fusion_seqconv_eltadd_relu_op.py b/python/paddle/fluid/tests/unittests/test_fusion_seqconv_eltadd_relu_op.py
index 91ed3f564429e1..4ebea360ecc968 100644
--- a/python/paddle/fluid/tests/unittests/test_fusion_seqconv_eltadd_relu_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fusion_seqconv_eltadd_relu_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 from sequence.test_sequence_conv import seqconv
diff --git a/python/paddle/fluid/tests/unittests/test_fusion_seqexpand_concat_fc_op.py b/python/paddle/fluid/tests/unittests/test_fusion_seqexpand_concat_fc_op.py
index 10a8623ec9c1d3..d3cf473320a3fc 100644
--- a/python/paddle/fluid/tests/unittests/test_fusion_seqexpand_concat_fc_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fusion_seqexpand_concat_fc_op.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-from test_fusion_lstm_op import fc, ACTIVATION
+from test_fusion_lstm_op import ACTIVATION, fc
 
 
 def fusion_seqexpand_concat_fc(xs, lod, w, b, fc_act):
diff --git a/python/paddle/fluid/tests/unittests/test_fusion_seqpool_concat_op.py b/python/paddle/fluid/tests/unittests/test_fusion_seqpool_concat_op.py
index c1c815ae146ef3..ce0eaff4e06bbb 100644
--- a/python/paddle/fluid/tests/unittests/test_fusion_seqpool_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fusion_seqpool_concat_op.py
@@ -13,14 +13,15 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-from test_reorder_lod_tensor import convert_to_offset
 from sequence.test_sequence_pool import (
-    compute_seqpool_sum,
     compute_seqpool_avg,
     compute_seqpool_sqrt,
+    compute_seqpool_sum,
 )
+from test_reorder_lod_tensor import convert_to_offset
 
 
 class TestFusionSeqPoolConcatOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_fusion_seqpool_cvm_concat_op.py b/python/paddle/fluid/tests/unittests/test_fusion_seqpool_cvm_concat_op.py
index 524ed99aa5a7fa..02e3a48e12a854 100644
--- a/python/paddle/fluid/tests/unittests/test_fusion_seqpool_cvm_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fusion_seqpool_cvm_concat_op.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-from test_reorder_lod_tensor import convert_to_offset
 from sequence.test_sequence_pool import (
-    compute_seqpool_sum,
     compute_seqpool_avg,
     compute_seqpool_sqrt,
+    compute_seqpool_sum,
 )
 from test_cvm_op import cvm_compute
+from test_reorder_lod_tensor import convert_to_offset
 
 
 class TestFusionSeqPoolCVMConcatOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_fusion_squared_mat_sub_op.py b/python/paddle/fluid/tests/unittests/test_fusion_squared_mat_sub_op.py
index 2a6098a8a6a066..a9c692e5382617 100644
--- a/python/paddle/fluid/tests/unittests/test_fusion_squared_mat_sub_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fusion_squared_mat_sub_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_fusion_transpose_flatten_concat_op.py b/python/paddle/fluid/tests/unittests/test_fusion_transpose_flatten_concat_op.py
index 768bbec552dbfe..9dbdc979f5fe24 100644
--- a/python/paddle/fluid/tests/unittests/test_fusion_transpose_flatten_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fusion_transpose_flatten_concat_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle.fluid.core as core
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_gast_with_compatibility.py b/python/paddle/fluid/tests/unittests/test_gast_with_compatibility.py
index b098dcf4e16486..aac53d3875c325 100644
--- a/python/paddle/fluid/tests/unittests/test_gast_with_compatibility.py
+++ b/python/paddle/fluid/tests/unittests/test_gast_with_compatibility.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 import ast
-from paddle.utils import gast
 import sys
 import textwrap
 import unittest
 
+from paddle.utils import gast
+
 
 class GastNodeTransformer(gast.NodeTransformer):
     def __init__(self, root):
diff --git a/python/paddle/fluid/tests/unittests/test_gather_nd_op.py b/python/paddle/fluid/tests/unittests/test_gather_nd_op.py
index 20fc57ff023685..6a3f5cf8556809 100644
--- a/python/paddle/fluid/tests/unittests/test_gather_nd_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gather_nd_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
 
 
 class TestGatherNdOpWithEmptyIndex(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_gather_op.py b/python/paddle/fluid/tests/unittests/test_gather_op.py
index 4f722c8bdc2131..44ab250c7652c2 100644
--- a/python/paddle/fluid/tests/unittests/test_gather_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gather_op.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest, convert_float_to_uint16
+
 import paddle
 import paddle.fluid as fluid
-from paddle.framework import core
 from paddle.fluid.dygraph.base import switch_to_static_graph
+from paddle.framework import core
 
 
 def gather_numpy(x, index, axis):
diff --git a/python/paddle/fluid/tests/unittests/test_gather_tree_op.py b/python/paddle/fluid/tests/unittests/test_gather_tree_op.py
index 18fb61c69f1ebb..1af5776cd60e58 100644
--- a/python/paddle/fluid/tests/unittests/test_gather_tree_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gather_tree_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.framework import program_guard, Program
+from paddle.fluid.framework import Program, program_guard
 
 
 class TestGatherTreeOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py b/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py
index c3ecf7303c1758..b0fb623502ed39 100644
--- a/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.tests.unittests.op_test import OpTest, convert_uint16_to_float
 from paddle.fluid.framework import _test_eager_guard
-import paddle
+from paddle.fluid.tests.unittests.op_test import OpTest, convert_uint16_to_float
 
 
 class TestGaussianRandomOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_gcd.py b/python/paddle/fluid/tests/unittests/test_gcd.py
index 7a2d02dcb81128..6b600d9c6f12e0 100644
--- a/python/paddle/fluid/tests/unittests/test_gcd.py
+++ b/python/paddle/fluid/tests/unittests/test_gcd.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_gelu_op.py b/python/paddle/fluid/tests/unittests/test_gelu_op.py
index e8999b7ba96797..63cf1c0c212812 100644
--- a/python/paddle/fluid/tests/unittests/test_gelu_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gelu_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from scipy.special import erf
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.dygraph as dg
-import paddle
 import paddle.nn.functional as F
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_generate_mask_labels_op.py b/python/paddle/fluid/tests/unittests/test_generate_mask_labels_op.py
index a2121ee3ef50b2..12a1fa082e4bc7 100644
--- a/python/paddle/fluid/tests/unittests/test_generate_mask_labels_op.py
+++ b/python/paddle/fluid/tests/unittests/test_generate_mask_labels_op.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
 import unittest
+
 import numpy as np
-import math
 from op_test import OpTest
 
 '''
diff --git a/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py
index 8027234ce5bbc7..493f9d6168b8d3 100644
--- a/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py
+++ b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py b/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py
index 8a23973ede38ac..a2438b347949bd 100644
--- a/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py
+++ b/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
+import math
 import unittest
+
 import numpy as np
-import math
-import paddle
 from op_test import OpTest
 from test_anchor_generator_op import anchor_generator_in_python
-import copy
+
+import paddle
 
 
 def generate_proposals_in_python(
diff --git a/python/paddle/fluid/tests/unittests/test_generate_proposals_v2_op.py b/python/paddle/fluid/tests/unittests/test_generate_proposals_v2_op.py
index f0b5d56114167a..a63acb8c9c6f14 100644
--- a/python/paddle/fluid/tests/unittests/test_generate_proposals_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_generate_proposals_v2_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle
 from op_test import OpTest
 from test_anchor_generator_op import anchor_generator_in_python
-from test_generate_proposals_op import clip_tiled_boxes, box_coder, nms
+from test_generate_proposals_op import box_coder, clip_tiled_boxes, nms
+
+import paddle
 
 
 def python_generate_proposals_v2(
diff --git a/python/paddle/fluid/tests/unittests/test_generator.py b/python/paddle/fluid/tests/unittests/test_generator.py
index 5adef46fc5de26..d8c03d4286f2a4 100644
--- a/python/paddle/fluid/tests/unittests/test_generator.py
+++ b/python/paddle/fluid/tests/unittests/test_generator.py
@@ -14,6 +14,7 @@
 """Test cloud role maker."""
 
 import unittest
+
 import paddle
 import paddle.fluid.generator as generator
 
diff --git a/python/paddle/fluid/tests/unittests/test_generator_dataloader.py b/python/paddle/fluid/tests/unittests/test_generator_dataloader.py
index 67b2c37accb703..22114853e9e86c 100644
--- a/python/paddle/fluid/tests/unittests/test_generator_dataloader.py
+++ b/python/paddle/fluid/tests/unittests/test_generator_dataloader.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
-import numpy as np
 import time
 import unittest
+
+import numpy as np
+
+import paddle
+import paddle.fluid as fluid
 from paddle.fluid.reader import DataLoaderBase
 
 EPOCH_NUM = 20
diff --git a/python/paddle/fluid/tests/unittests/test_get_all_op_or_kernel_names.py b/python/paddle/fluid/tests/unittests/test_get_all_op_or_kernel_names.py
index 47af7355cb7e2a..8ee2d2cb0f5690 100644
--- a/python/paddle/fluid/tests/unittests/test_get_all_op_or_kernel_names.py
+++ b/python/paddle/fluid/tests/unittests/test_get_all_op_or_kernel_names.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 from paddle.fluid import core
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_get_device_properties.py b/python/paddle/fluid/tests/unittests/test_get_device_properties.py
index 3fea41dbadd67f..aa3cac5d9f34aa 100644
--- a/python/paddle/fluid/tests/unittests/test_get_device_properties.py
+++ b/python/paddle/fluid/tests/unittests/test_get_device_properties.py
@@ -13,8 +13,9 @@
 # limitations under the License.
 
 import unittest
-from paddle.fluid import core
+
 from paddle.device.cuda import device_count, get_device_properties
+from paddle.fluid import core
 
 
 class TestGetDeviceProperties(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_get_inputs_outputs_in_block.py b/python/paddle/fluid/tests/unittests/test_get_inputs_outputs_in_block.py
index 49c05c420cc12f..c6a1c1a3482e92 100644
--- a/python/paddle/fluid/tests/unittests/test_get_inputs_outputs_in_block.py
+++ b/python/paddle/fluid/tests/unittests/test_get_inputs_outputs_in_block.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
+
+import paddle
 from paddle.fluid.layers import utils
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_get_places_op.py b/python/paddle/fluid/tests/unittests/test_get_places_op.py
index d51e36f2514cd5..03f32c78b59b50 100644
--- a/python/paddle/fluid/tests/unittests/test_get_places_op.py
+++ b/python/paddle/fluid/tests/unittests/test_get_places_op.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
+from decorator_helper import prog_scope
+
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.layers.device import get_places
-from decorator_helper import prog_scope
-import unittest
 
 
 class TestGetPlaces(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_get_set_flags.py b/python/paddle/fluid/tests/unittests/test_get_set_flags.py
index dad58ae08ba96f..aef5f2016f8af4 100644
--- a/python/paddle/fluid/tests/unittests/test_get_set_flags.py
+++ b/python/paddle/fluid/tests/unittests/test_get_set_flags.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest as unittest
 
+import paddle.fluid as fluid
+
 
 class TestGetAndSetFlags(unittest.TestCase):
     def test_api(self):
diff --git a/python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py b/python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py
index 17bf702b886073..ab5b9096dcc8ad 100644
--- a/python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py
+++ b/python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid.core as core
+
 import numpy as np
+
 import paddle.fluid as fluid
-from paddle.fluid.op import Operator
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
+from paddle.fluid.op import Operator
 
 
 class TestGetTensorFromSelectedRowsError(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_global_var_getter_setter.py b/python/paddle/fluid/tests/unittests/test_global_var_getter_setter.py
index 19c66daade9909..d3f24dcb93ac4b 100644
--- a/python/paddle/fluid/tests/unittests/test_global_var_getter_setter.py
+++ b/python/paddle/fluid/tests/unittests/test_global_var_getter_setter.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest as unittest
 
+import paddle.fluid as fluid
+
 
 class VarInfo:
     def __init__(self, var_name, var_type, writable):
diff --git a/python/paddle/fluid/tests/unittests/test_glu.py b/python/paddle/fluid/tests/unittests/test_glu.py
index 25f1975db0c529..5ec54567ec1c8c 100644
--- a/python/paddle/fluid/tests/unittests/test_glu.py
+++ b/python/paddle/fluid/tests/unittests/test_glu.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-from paddle import fluid
-import paddle.fluid.dygraph as dg
 import unittest
 
+import numpy as np
+
 import paddle
+import paddle.fluid.dygraph as dg
+from paddle import fluid
 from paddle.nn import functional as F
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py b/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py
index ac786dea529f40..6cf6eec52cdb37 100644
--- a/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py
+++ b/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import os
-import sys
 import subprocess
-import unittest
+import sys
 import tempfile
+import unittest
+
 from paddle.fluid import core
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_grad_clip_minimize.py b/python/paddle/fluid/tests/unittests/test_grad_clip_minimize.py
index ad1fc3ad183698..db34123d3bdd8f 100644
--- a/python/paddle/fluid/tests/unittests/test_grad_clip_minimize.py
+++ b/python/paddle/fluid/tests/unittests/test_grad_clip_minimize.py
@@ -13,17 +13,16 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle.fluid as fluid
-
-from paddle.fluid.dygraph.base import to_variable
-
 from paddle.fluid.clip import (
-    GradientClipByValue,
-    GradientClipByNorm,
     GradientClipByGlobalNorm,
+    GradientClipByNorm,
+    GradientClipByValue,
 )
+from paddle.fluid.dygraph.base import to_variable
 
 
 class TestGradClipByGlobalNorm(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_gradient_clip.py b/python/paddle/fluid/tests/unittests/test_gradient_clip.py
index d8eaaef49b215b..ec2812e4ff32b7 100644
--- a/python/paddle/fluid/tests/unittests/test_gradient_clip.py
+++ b/python/paddle/fluid/tests/unittests/test_gradient_clip.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from fake_reader import fake_imdb_reader
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
-from fake_reader import fake_imdb_reader
+import paddle.fluid.core as core
 from paddle.fluid.clip import _allow_pure_fp16_global_norm_clip
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_graph_khop_sampler.py b/python/paddle/fluid/tests/unittests/test_graph_khop_sampler.py
index b2f57af35de90a..31ec4a9b03d9e9 100644
--- a/python/paddle/fluid/tests/unittests/test_graph_khop_sampler.py
+++ b/python/paddle/fluid/tests/unittests/test_graph_khop_sampler.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_graph_reindex.py b/python/paddle/fluid/tests/unittests/test_graph_reindex.py
index 318deda61f2a7b..db767504559d89 100644
--- a/python/paddle/fluid/tests/unittests/test_graph_reindex.py
+++ b/python/paddle/fluid/tests/unittests/test_graph_reindex.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_graph_sample_neighbors.py b/python/paddle/fluid/tests/unittests/test_graph_sample_neighbors.py
index 20366b80e2ad73..18c917fa1031e6 100644
--- a/python/paddle/fluid/tests/unittests/test_graph_sample_neighbors.py
+++ b/python/paddle/fluid/tests/unittests/test_graph_sample_neighbors.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_graph_send_recv_op.py b/python/paddle/fluid/tests/unittests/test_graph_send_recv_op.py
index a5344a96a319e7..c0a76eaa17b16e 100644
--- a/python/paddle/fluid/tests/unittests/test_graph_send_recv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_graph_send_recv_op.py
@@ -15,11 +15,11 @@
 import unittest
 
 import numpy as np
+from op_test import OpTest
+
 import paddle
 from paddle.fluid.framework import _test_eager_guard
 
-from op_test import OpTest
-
 
 def graph_send_recv_wrapper(
     x, src_index, dst_index, reduce_op="sum", out_size=None, name=None
diff --git a/python/paddle/fluid/tests/unittests/test_graph_send_ue_recv_op.py b/python/paddle/fluid/tests/unittests/test_graph_send_ue_recv_op.py
index 9d4d7dc25cab7f..e38c9ad109c1d5 100644
--- a/python/paddle/fluid/tests/unittests/test_graph_send_ue_recv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_graph_send_ue_recv_op.py
@@ -14,13 +14,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle
 import paddle.fluid.core as core
 from paddle.fluid.framework import _test_eager_guard
 
-from op_test import OpTest
-
 
 def get_broadcast_shape(shp1, shp2):
     pad_shp1, pad_shp2 = shp1, shp2
diff --git a/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py b/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py
index 9dd252b17c5319..64663398c38c80 100644
--- a/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_graph_send_uv_op.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle
 from paddle.fluid.framework import _test_eager_guard
 
-from op_test import OpTest
-
 
 def compute_graph_send_uv(inputs, attributes):
     x = inputs['x']
diff --git a/python/paddle/fluid/tests/unittests/test_grid_sample_function.py b/python/paddle/fluid/tests/unittests/test_grid_sample_function.py
index 323ddc80ed63d1..3189fd6e11f766 100644
--- a/python/paddle/fluid/tests/unittests/test_grid_sample_function.py
+++ b/python/paddle/fluid/tests/unittests/test_grid_sample_function.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+
 import paddle
-from paddle import fluid
 import paddle.fluid.dygraph as dg
 import paddle.nn.functional as F
-import unittest
+from paddle import fluid
 
 
 class GridSampleTestCase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py b/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py
index 0245383045b855..2b4f39c39aa141 100644
--- a/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py
+++ b/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
-import paddle.fluid.core as core
 from op_test import OpTest, skip_check_grad_ci
 
+import paddle
+import paddle.fluid.core as core
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py
index d457357996e1ee..1082cd4b98dc58 100644
--- a/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid.core as core
-import paddle.fluid as fluid
+
+import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
 from paddle.fluid.framework import _test_eager_guard
-import paddle
 
 
 def group_norm_naive_for_general_dimension(x, scale, bias, epsilon, groups):
diff --git a/python/paddle/fluid/tests/unittests/test_gru_op.py b/python/paddle/fluid/tests/unittests/test_gru_op.py
index ab3d9b516da1bb..2fbc60b45465a3 100644
--- a/python/paddle/fluid/tests/unittests/test_gru_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gru_op.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import functools
 import unittest
+
 import numpy as np
-import functools
 from op_test import OpTest
-from paddle.fluid.tests.unittests.test_lstm_op import ACTIVATION
+
 from paddle import fluid
 from paddle.fluid import Program, program_guard
+from paddle.fluid.tests.unittests.test_lstm_op import ACTIVATION
 
 
 def gru(
diff --git a/python/paddle/fluid/tests/unittests/test_gru_rnn_op.py b/python/paddle/fluid/tests/unittests/test_gru_rnn_op.py
index 89379ac87f3493..9c60131b52a8d9 100644
--- a/python/paddle/fluid/tests/unittests/test_gru_rnn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gru_rnn_op.py
@@ -12,18 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import random
+import sys
 import unittest
-import numpy as np
 
+import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid.core as core
-import random
-import sys
 
 sys.path.append("./rnn")
-from rnn_numpy import GRU
 from convert import get_params_for_net
+from rnn_numpy import GRU
 
 random.seed(2)
 np.set_printoptions(threshold=np.inf)
diff --git a/python/paddle/fluid/tests/unittests/test_gru_unit_op.py b/python/paddle/fluid/tests/unittests/test_gru_unit_op.py
index 9d1fcc238ea282..2f7c4a42e3362d 100644
--- a/python/paddle/fluid/tests/unittests/test_gru_unit_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gru_unit_op.py
@@ -14,12 +14,14 @@
 
 import math
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
 from op_test import OpTest
+
+import paddle.fluid as fluid
 from paddle import fluid
+from paddle.fluid.framework import Program, program_guard
 from paddle.fluid.layers import gru_unit
-from paddle.fluid.framework import program_guard, Program
 
 
 class TestGRUUnitAPIError(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_gumbel_softmax_op.py b/python/paddle/fluid/tests/unittests/test_gumbel_softmax_op.py
index 3494ccb5d16c31..fa62013a75cda4 100644
--- a/python/paddle/fluid/tests/unittests/test_gumbel_softmax_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gumbel_softmax_op.py
@@ -11,8 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_hash_op.py b/python/paddle/fluid/tests/unittests/test_hash_op.py
index 0070a3fadc75ad..53b1551c7b8446 100644
--- a/python/paddle/fluid/tests/unittests/test_hash_op.py
+++ b/python/paddle/fluid/tests/unittests/test_hash_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle.fluid as fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_hinge_embedding_loss.py b/python/paddle/fluid/tests/unittests/test_hinge_embedding_loss.py
index 68e7b4210a6926..1ce6c1bc5f0253 100644
--- a/python/paddle/fluid/tests/unittests/test_hinge_embedding_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_hinge_embedding_loss.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import unittest
+
+import numpy as np
+
+import paddle
 from paddle.static import Program, program_guard
 
 np.random.seed(42)
diff --git a/python/paddle/fluid/tests/unittests/test_hinge_loss_op.py b/python/paddle/fluid/tests/unittests/test_hinge_loss_op.py
index 1b320f864491ab..d87d9e43aa2d37 100644
--- a/python/paddle/fluid/tests/unittests/test_hinge_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_hinge_loss_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_histogram_op.py b/python/paddle/fluid/tests/unittests/test_histogram_op.py
index 669431f8f6ff4b..e9ebf10bc68753 100644
--- a/python/paddle/fluid/tests/unittests/test_histogram_op.py
+++ b/python/paddle/fluid/tests/unittests/test_histogram_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
-from op_test import OpTest
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
index 733ce0e6ce140a..180e9abe1b2f9b 100644
--- a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
+++ b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
 import unittest
+
 import numpy as np
+from op_test import OpTest, skip_check_grad_ci
+
 import paddle
 import paddle.fluid as fluid
-import paddle.nn.functional as F
 import paddle.fluid.initializer as I
-import math
-from op_test import OpTest, skip_check_grad_ci
+import paddle.nn.functional as F
 
 paddle.enable_static()
 np.random.seed(100)
diff --git a/python/paddle/fluid/tests/unittests/test_huber_loss_op.py b/python/paddle/fluid/tests/unittests/test_huber_loss_op.py
index afb1170a9db4ec..a783e9f61930ab 100644
--- a/python/paddle/fluid/tests/unittests/test_huber_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_huber_loss_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_hybrid_parallel_topology.py b/python/paddle/fluid/tests/unittests/test_hybrid_parallel_topology.py
index d9210c128dc5c7..33d7bca3f4dcb8 100644
--- a/python/paddle/fluid/tests/unittests/test_hybrid_parallel_topology.py
+++ b/python/paddle/fluid/tests/unittests/test_hybrid_parallel_topology.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
-from paddle.distributed import fleet
+
 import numpy as np
 
+from paddle.distributed import fleet
+
 
 class TestCommunicateTopology(unittest.TestCase):
     def test_topology(self):
diff --git a/python/paddle/fluid/tests/unittests/test_identity_loss_op.py b/python/paddle/fluid/tests/unittests/test_identity_loss_op.py
index 0d808e7be3440d..1a0ff98b176e55 100644
--- a/python/paddle/fluid/tests/unittests/test_identity_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_identity_loss_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
-from op_test import OpTest
 
 
 class TestIdentityLossOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_identity_op.py b/python/paddle/fluid/tests/unittests/test_identity_op.py
index f798b421cfb02a..311a609dd5146d 100644
--- a/python/paddle/fluid/tests/unittests/test_identity_op.py
+++ b/python/paddle/fluid/tests/unittests/test_identity_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_iinfo_and_finfo.py b/python/paddle/fluid/tests/unittests/test_iinfo_and_finfo.py
index bacb13a0981b7a..ba6a74779b2799 100644
--- a/python/paddle/fluid/tests/unittests/test_iinfo_and_finfo.py
+++ b/python/paddle/fluid/tests/unittests/test_iinfo_and_finfo.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
 
+import paddle
+
 
 class TestIInfoAndFInfoAPI(unittest.TestCase):
     def test_invalid_input(self):
diff --git a/python/paddle/fluid/tests/unittests/test_im2sequence_op.py b/python/paddle/fluid/tests/unittests/test_im2sequence_op.py
index 24a5e696a10c88..025701c6bff6ff 100644
--- a/python/paddle/fluid/tests/unittests/test_im2sequence_op.py
+++ b/python/paddle/fluid/tests/unittests/test_im2sequence_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest, skip_check_grad_ci
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
index 218ff0f0d77676..5889d8299dc389 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import numpy as np
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
index 25c7eed19b87da..595ec4fe3e60a0 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
@@ -13,17 +13,17 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from test_imperative_base import new_program_scope
 
+import paddle
 import paddle.fluid as fluid
-from paddle.fluid import core
-from paddle.fluid import Linear
-from paddle.fluid.layer_helper import LayerHelper
-from test_imperative_base import new_program_scope
 import paddle.fluid.dygraph_utils as dygraph_utils
+from paddle.fluid import Linear, core
 from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper
-import paddle
 from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
+from paddle.fluid.layer_helper import LayerHelper
 
 
 class MyLayer(fluid.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_layerdict.py b/python/paddle/fluid/tests/unittests/test_imperative_container_layerdict.py
index d81cef00f7bcae..0163ffcdaac5ef 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_container_layerdict.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_container_layerdict.py
@@ -13,8 +13,9 @@
 # limitations under the License.
 
 import unittest
-import paddle
 from collections import OrderedDict
+
+import paddle
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_layerlist.py b/python/paddle/fluid/tests/unittests/test_imperative_container_layerlist.py
index db86295a5aa917..e90a16def8f5bc 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_container_layerlist.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_container_layerlist.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid as fluid
+
 import numpy as np
+
 import paddle
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py b/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py
index a87392741223b7..57335a88319e87 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid as fluid
+
 import numpy as np
+
 import paddle
+import paddle.fluid as fluid
 from paddle import _legacy_C_ops
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py b/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py
index c1b0a42675f6af..7ed45d58703c22 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid as fluid
+
 import numpy as np
+
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_base.py b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_base.py
index da15dcf29b5700..5a8b8ca532cec7 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_base.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_base.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle.fluid as fluid
-from paddle.fluid.reader import use_pinned_memory
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.reader import use_pinned_memory
 
 
 def get_random_images_and_labels(image_shape, label_shape):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_exception.py b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_exception.py
index eac20ea03c788c..d7f6240c872c1b 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_exception.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_exception.py
@@ -14,7 +14,9 @@
 
 import time
 import unittest
+
 import numpy as np
+
 import paddle.fluid as fluid
 from paddle.fluid import core
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_exit_func.py b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_exit_func.py
index ff9ae7343b38b6..6d250676871b86 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_exit_func.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_exit_func.py
@@ -12,19 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import signal
-import unittest
 import multiprocessing
+import queue
+import signal
 import time
+import unittest
 
 from paddle.fluid.framework import _test_eager_guard
-
-import queue
-
 from paddle.fluid.reader import (
-    multiprocess_queue_set,
-    _cleanup,
     CleanupFuncRegistrar,
+    _cleanup,
+    multiprocess_queue_set,
 )
 
 # NOTE: These special functions cannot be detected by the existing coverage mechanism,
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_fds_clear.py b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_fds_clear.py
index 355f35ce5e9e62..2bff5c9e3384b2 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_fds_clear.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_fds_clear.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle.fluid as fluid
-from paddle.io import Dataset, DataLoader
 from paddle.fluid.framework import _test_eager_guard
+from paddle.io import DataLoader, Dataset
 
 
 def get_random_images_and_labels(image_shape, label_shape):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_process.py b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_process.py
index c03b9b3bbf16b6..5cbb84c2f2c8b4 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_data_loader_process.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_data_loader_process.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import multiprocessing
+import queue
+import unittest
+
 import numpy as np
+
 import paddle.fluid as fluid
-from paddle.fluid.reader import _reader_process_loop
 from paddle.fluid.framework import _test_eager_guard
-
-import queue
+from paddle.fluid.reader import _reader_process_loop
 
 
 def get_random_images_and_labels(image_shape, label_shape):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py b/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py
index 97d74e342410f2..b02b5105000163 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py
@@ -13,13 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import unittest
 
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.fluid.dygraph as dygraph
 from paddle.fluid.dygraph.nn import Linear
-import paddle.fluid.core as core
 
 
 class MLP(fluid.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_decorator.py b/python/paddle/fluid/tests/unittests/test_imperative_decorator.py
index 42407c0b36f3fc..2f237a10c8d700 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_decorator.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_decorator.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.framework as framework
-import unittest
 import inspect
+import unittest
 
 from test_imperative_base import new_program_scope
+
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.framework as framework
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
index 0b8576994be1ca..38c19677f79c1c 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
@@ -12,18 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import numpy as np
-import random
 import os
+import random
 import sys
+import unittest
+
+import numpy as np
+from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from test_imperative_base import new_program_scope
-from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.dygraph import Linear
+from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
index 4b5e008cb74828..ec879d9cf0178c 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
-import paddle
-from paddle.fluid.wrapped_decorator import wrap_decorator
-from paddle.vision.models import resnet50, resnet101
 import unittest
 from unittest import TestCase
+
 import numpy as np
+
+import paddle
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.wrapped_decorator import wrap_decorator
+from paddle.vision.models import resnet50, resnet101
 
 
 def _dygraph_guard_(func):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_framework.py b/python/paddle/fluid/tests/unittests/test_imperative_framework.py
index 44c5ee01fe52ec..fd86679a77a203 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_framework.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_framework.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
-import paddle
-import paddle.fluid as fluid
+
 import numpy as np
 from test_imperative_base import new_program_scope
+
+import paddle
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gan.py b/python/paddle/fluid/tests/unittests/test_imperative_gan.py
index d645dbe1f1e936..781253449d58a8 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_gan.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_gan.py
@@ -13,16 +13,17 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.optimizer import SGDOptimizer
 from paddle.fluid import Linear
-from test_imperative_base import new_program_scope
 from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.optimizer import SGDOptimizer
 
 
 class Discriminator(fluid.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
index 58bf0b9ad48d47..bff393f38d1332 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
@@ -12,17 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
+from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.optimizer import AdamOptimizer
-from test_imperative_base import new_program_scope
 from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.optimizer import AdamOptimizer
 
 
 def gen_data():
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_group.py b/python/paddle/fluid/tests/unittests/test_imperative_group.py
index 672dee430f8896..8b037ce722caa8 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_group.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_group.py
@@ -12,14 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import unittest
 
 import paddle
 import paddle.fluid.core as core
 from paddle.fluid.framework import (
-    _test_eager_guard,
     _in_legacy_dygraph,
+    _test_eager_guard,
     in_dygraph_mode,
 )
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_hook_for_layer.py b/python/paddle/fluid/tests/unittests/test_imperative_hook_for_layer.py
index 462f614defaf27..6521c5509483ba 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_hook_for_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_hook_for_layer.py
@@ -13,13 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from test_imperative_lod_tensor_to_selected_rows import SimpleNet
 
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.fluid.dygraph.base as base
-
-from test_imperative_lod_tensor_to_selected_rows import SimpleNet
 from paddle.fluid.framework import _test_eager_guard
 
 call_forward_post_hook = False
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layer_apply.py b/python/paddle/fluid/tests/unittests/test_imperative_layer_apply.py
index 156cac0b3a3243..57756945523364 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_layer_apply.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_layer_apply.py
@@ -14,11 +14,11 @@
 
 import unittest
 
+import numpy as np
+
 import paddle
-import paddle.nn as nn
 import paddle.fluid as fluid
-
-import numpy as np
+import paddle.nn as nn
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py b/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py
index 80f3d4cc7ca78d..59717d48949332 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py
@@ -14,11 +14,11 @@
 
 import unittest
 
+import numpy as np
+
 import paddle
-import paddle.nn as nn
 import paddle.fluid as fluid
-
-import numpy as np
+import paddle.nn as nn
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py b/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py
index dc4ad0cea15dae..abcb811f671a6c 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid as fluid
+
 import numpy as np
 
+import paddle.fluid as fluid
 import paddle.fluid.dygraph as dygraph
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layers.py b/python/paddle/fluid/tests/unittests/test_imperative_layers.py
index e0cd3d4f88f64b..821543a7577c3e 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_layers.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle.nn as nn
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
index 90e757a5d4eae1..b3e23565799b18 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
@@ -12,23 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+import tempfile
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.framework as framework
 from paddle.fluid.dygraph.nn import (
+    NCE,
     BatchNorm,
     Conv3D,
     Embedding,
     GroupNorm,
     LayerNorm,
     Linear,
-    NCE,
     PRelu,
 )
-import numpy as np
-import os
-import tempfile
 
 
 class TestDygraphLoadStatic(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
index d49552fcb9db88..f15205eb3f64dc 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
@@ -13,16 +13,18 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+from test_imperative_base import new_program_scope
+from utils import DyGraphProgramDescTracerTestHelper
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.dygraph.nn import Embedding
-from paddle.fluid.optimizer import SGDOptimizer
 from paddle.fluid.dygraph.base import to_variable
-from test_imperative_base import new_program_scope
-import numpy as np
-from utils import DyGraphProgramDescTracerTestHelper
+from paddle.fluid.dygraph.nn import Embedding
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.optimizer import SGDOptimizer
 
 
 class SimpleNet(fluid.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py
index 67ad27a1ba8d45..2ca175390d5516 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py
@@ -13,16 +13,17 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from test_imperative_base import new_program_scope
+from utils import DyGraphProgramDescTracerTestHelper
 
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
+from paddle.fluid.dygraph.nn import Linear, Pool2D
+from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
 from paddle.fluid.optimizer import SGDOptimizer
-from paddle.fluid.dygraph.nn import Pool2D, Linear
-from test_imperative_base import new_program_scope
-from utils import DyGraphProgramDescTracerTestHelper
-from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph
 
 
 class SimpleImgConvPool(fluid.dygraph.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_mnist_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_mnist_sorted_gradient.py
index 1a077311e0541b..2ed75cb777a93e 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_mnist_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_mnist_sorted_gradient.py
@@ -13,16 +13,17 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from test_imperative_base import new_program_scope
+from test_imperative_mnist import MNIST
 
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
-from paddle.fluid.optimizer import SGDOptimizer
 from paddle.fluid.dygraph.base import to_variable
-from test_imperative_base import new_program_scope
-from test_imperative_mnist import MNIST
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.optimizer import SGDOptimizer
 
 
 class TestImperativeMnistSortGradient(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_named_members.py b/python/paddle/fluid/tests/unittests/test_imperative_named_members.py
index 044661940cb114..622839253d2ab2 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_named_members.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_named_members.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py b/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py
index 4e333f7ca4e0ff..15af13c2f57240 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_numpy_bridge.py
@@ -16,6 +16,7 @@
 import warnings
 
 import numpy as np
+
 import paddle.fluid as fluid
 from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
index 6205bb7f58b630..fcaafc72b8cce0 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
@@ -13,19 +13,21 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from test_imperative_base import new_program_scope
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
+from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.dygraph.nn import (
-    Pool2D,
-    Linear,
     BatchNorm,
     Embedding,
     GRUUnit,
+    Linear,
+    Pool2D,
 )
-from paddle.fluid.dygraph.base import to_variable
-from test_imperative_base import new_program_scope
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
index 917876c9741a19..521ff77d58f358 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
@@ -12,39 +12,37 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import itertools
 import unittest
+
 import numpy as np
-import itertools
+from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
+from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
 from paddle.fluid import core
+from paddle.fluid.dygraph import Linear
+from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.optimizer import (
-    SGDOptimizer,
-    Adam,
-    MomentumOptimizer,
-    LarsMomentumOptimizer,
+    AdadeltaOptimizer,
     AdagradOptimizer,
+    Adam,
     AdamaxOptimizer,
-    DpsgdOptimizer,
     DecayedAdagradOptimizer,
-    AdadeltaOptimizer,
-    RMSPropOptimizer,
+    DpsgdOptimizer,
+    ExponentialMovingAverage,
     FtrlOptimizer,
     LambOptimizer,
-)
-from paddle.fluid.optimizer import (
+    LarsMomentumOptimizer,
+    LookaheadOptimizer,
     ModelAverage,
-    ExponentialMovingAverage,
+    MomentumOptimizer,
     PipelineOptimizer,
-    LookaheadOptimizer,
     RecomputeOptimizer,
+    RMSPropOptimizer,
+    SGDOptimizer,
 )
-from paddle.fluid.dygraph import Linear
-from test_imperative_base import new_program_scope
-from paddle.fluid.framework import _test_eager_guard
-
-from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
 
 # Note(wangzhongpu)
 # In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
index 0c6853ce653689..8bc9a953aaf297 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
@@ -12,36 +12,34 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import itertools
 import unittest
+
 import numpy as np
-import itertools
+from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
+from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
 from paddle.fluid import core
+from paddle.fluid.dygraph import Linear
+from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.optimizer import (
-    MomentumOptimizer,
-    LarsMomentumOptimizer,
+    AdadeltaOptimizer,
     AdagradOptimizer,
     AdamaxOptimizer,
-    DpsgdOptimizer,
     DecayedAdagradOptimizer,
-    AdadeltaOptimizer,
-    RMSPropOptimizer,
+    DpsgdOptimizer,
+    ExponentialMovingAverage,
     FtrlOptimizer,
-)
-from paddle.fluid.optimizer import (
+    LarsMomentumOptimizer,
+    LookaheadOptimizer,
     ModelAverage,
-    ExponentialMovingAverage,
+    MomentumOptimizer,
     PipelineOptimizer,
-    LookaheadOptimizer,
     RecomputeOptimizer,
+    RMSPropOptimizer,
 )
-from paddle.fluid.dygraph import Linear
-from test_imperative_base import new_program_scope
-from paddle.fluid.framework import _test_eager_guard
-
-from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
 
 # Note(wangzhongpu)
 # In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_parallel_coalesce_split.py b/python/paddle/fluid/tests/unittests/test_imperative_parallel_coalesce_split.py
index aee8c08582c4c0..721feaf188ea7c 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_parallel_coalesce_split.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_parallel_coalesce_split.py
@@ -13,18 +13,19 @@
 # limitations under the License.
 
 import unittest
-import numpy as np
 from collections import OrderedDict
 
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
-from paddle.fluid.dygraph.parallel import DataParallel
 from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.dygraph.parallel import (
+    DataParallel,
     _coalesce_tensors,
-    _split_tensors,
     _reshape_inplace,
+    _split_tensors,
 )
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py b/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
index 7f723a5d665d5c..042fb294ff9fd1 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid as fluid
+
 import numpy as np
+
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
index 2630e3ce689f53..3765a6676d0bbe 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
@@ -13,18 +13,20 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+from test_imperative_base import new_program_scope
+from utils import DyGraphProgramDescTracerTestHelper, is_equal_program
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.dygraph.nn import Embedding
 import paddle.fluid.framework as framework
-from paddle.fluid.optimizer import SGDOptimizer
 from paddle.fluid.dygraph.base import to_variable
+from paddle.fluid.dygraph.nn import Embedding
+from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
+from paddle.fluid.optimizer import SGDOptimizer
 from paddle.jit import TracedLayer
-from test_imperative_base import new_program_scope
-import numpy as np
-from utils import DyGraphProgramDescTracerTestHelper, is_equal_program
-from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph
 
 
 class SimpleLSTMRNN(fluid.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py
index 38c8b9ca73dd46..fd43a5e9177160 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py
@@ -13,16 +13,18 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+from test_imperative_base import new_program_scope
+from test_imperative_ptb_rnn import PtbModel
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.fluid.framework as framework
-from paddle.fluid.optimizer import SGDOptimizer
 from paddle.fluid.dygraph.base import to_variable
-from test_imperative_base import new_program_scope
-from test_imperative_ptb_rnn import PtbModel
-import numpy as np
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.optimizer import SGDOptimizer
 
 
 class TestDygraphPtbRnnSortGradient(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py b/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py
index 3cafc512cba1eb..26364bf1c98410 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid as fluid
+
+import numpy as np
+from test_imperative_base import new_program_scope
+
 import paddle
+import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.dygraph.base import to_variable
-from test_imperative_base import new_program_scope
-import numpy as np
+from paddle.fluid.framework import _test_eager_guard
 
 
 class RecurrentTest(fluid.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
index 56b210f22d1eeb..0181c7a431c369 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import core
-from paddle.fluid.optimizer import SGDOptimizer
 import paddle.fluid.dygraph.nn as nn
-from test_imperative_base import new_program_scope
+from paddle.fluid import core
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.optimizer import SGDOptimizer
 
 
 class Policy(fluid.dygraph.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
index 18e8b8d1cc321d..eca1e2d8cce263 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
@@ -13,18 +13,18 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from test_imperative_base import new_program_scope
+from utils import DyGraphProgramDescTracerTestHelper, is_equal_program
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import core
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid import Pool2D, BatchNorm, Linear
+from paddle.fluid import BatchNorm, Linear, Pool2D, core
 from paddle.fluid.dygraph.base import to_variable
-from test_imperative_base import new_program_scope
-from utils import DyGraphProgramDescTracerTestHelper, is_equal_program
+from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
+from paddle.fluid.layer_helper import LayerHelper
 from paddle.jit import TracedLayer
-from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph
 
 # NOTE(zhiqiu): run with FLAGS_cudnn_deterministic=1
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet_sorted_gradient.py
index 9e203092dc77b4..50afad1b5cf0ed 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_resnet_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet_sorted_gradient.py
@@ -13,14 +13,15 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from test_imperative_base import new_program_scope
+from test_imperative_resnet import ResNet
 
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
 from paddle.fluid.dygraph.base import to_variable
-from test_imperative_base import new_program_scope
-from test_imperative_resnet import ResNet
 from paddle.fluid.framework import _test_eager_guard
 
 batch_size = 8
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
index 82084cfd27ba53..6f2645750f0d24 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
@@ -14,15 +14,17 @@
 
 import os
 import unittest
+
+import numpy as np
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.dygraph.nn import Embedding
-from paddle.fluid.optimizer import Adam
 from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay
-import numpy as np
-import paddle
+from paddle.fluid.dygraph.nn import Embedding
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.optimizer import Adam
 
 
 class SimpleLSTMRNN(fluid.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py
index 2f81d59a60916d..65e389b3596101 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py
@@ -13,19 +13,19 @@
 # limitations under the License.
 
 import os
+import tempfile
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.dygraph.nn import Embedding
-from paddle.optimizer import Adam
 from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay
-import numpy as np
-import paddle
+from paddle.fluid.dygraph.nn import Embedding
 from paddle.fluid.framework import _test_eager_guard
-
-import tempfile
+from paddle.optimizer import Adam
 
 
 class SimpleLSTMRNN(fluid.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
index d977dadeeba84e..1970c63bace057 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.dygraph.nn import Pool2D, BatchNorm, Linear
-from test_imperative_base import new_program_scope
+from paddle.fluid.dygraph.nn import BatchNorm, Linear, Pool2D
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.layer_helper import LayerHelper
 
 if fluid.is_compiled_with_cuda():
     fluid.set_flags({'FLAGS_cudnn_deterministic': True})
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py
index dc64e7d0574915..72c77e753f54b6 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid as fluid
-from paddle.fluid.dygraph.base import to_variable
-from paddle.fluid.optimizer import SGDOptimizer
+
 import numpy as np
-import paddle.fluid.core as core
+
 import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.optimizer import SGDOptimizer
 
 
 class SimpleNet(paddle.nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
index 1f27b0190a6aee..cc31e922b5efa2 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
@@ -13,16 +13,18 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+from test_imperative_base import new_program_scope
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.dygraph.nn import Embedding
 import paddle.fluid.framework as framework
-from paddle.fluid.optimizer import SGDOptimizer
 from paddle.fluid.dygraph.base import to_variable
-from test_imperative_base import new_program_scope
-import numpy as np
+from paddle.fluid.dygraph.nn import Embedding
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.optimizer import SGDOptimizer
 
 
 class SimpleNet(fluid.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py b/python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py
index 488500cb3802a5..f374d0b073c271 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py
@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import multiprocessing
 import os
-import sys
 import signal
-import unittest
-import multiprocessing
+import sys
 import time
+import unittest
 
 from paddle.fluid import core
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py
index f9034aa45f6f93..9b52dc0a04731b 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-from paddle.tensor import random
-import numpy as np
-import unittest
 from paddle import _legacy_C_ops
 from paddle.fluid.framework import _test_eager_guard
+from paddle.tensor import random
 
 if fluid.is_compiled_with_cuda():
     fluid.core.globals()['FLAGS_cudnn_deterministic'] = True
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py
index 825144cd16ade1..d129a9270ab5d0 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py
@@ -15,11 +15,11 @@
 import unittest
 
 import numpy as np
+from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
-from test_imperative_base import new_program_scope
 
 LOADED_VAR_SUFFIX = ".load_0"
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py
index d163350f136b7e..b0f6418e24c2c6 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py
@@ -15,13 +15,12 @@
 import unittest
 
 import numpy as np
+from jit_load_rename_var import rename_var_with_generator
+from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import core
-from paddle.fluid import unique_name
-from test_imperative_base import new_program_scope
-from jit_load_rename_var import rename_var_with_generator
+from paddle.fluid import core, unique_name
 
 LOADED_VAR_SUFFIX = ".load_0"
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_tensor_clear_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_tensor_clear_gradient.py
index 0445e09232f2c2..946e55c1e9ea33 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_tensor_clear_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_tensor_clear_gradient.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
-import paddle
-from paddle.fluid.wrapped_decorator import wrap_decorator
 import unittest
 from unittest import TestCase
+
 import numpy as np
 
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.wrapped_decorator import wrap_decorator
+
 
 def _dygraph_guard_(func):
     def __impl__(*args, **kwargs):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_thread_local_has_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_thread_local_has_grad.py
index 93a7469b55fdf6..8e7418b2f70754 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_thread_local_has_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_thread_local_has_grad.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import threading
+import time
 import unittest
+
+import numpy as np
+
 import paddle
-import time
 import paddle.nn as nn
-import numpy as np
-import threading
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py b/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py
index 43cad7ed996848..72d987b0d4c5fd 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
-import paddle.fluid as fluid
+
 import numpy as np
-import os
+
+import paddle.fluid as fluid
 
 
 class SimpleFCLayer(fluid.dygraph.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
index 8b34780ea7cc63..ccdf99f0f783dd 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
@@ -13,16 +13,17 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+from test_imperative_base import new_program_scope
+
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import Embedding, LayerNorm, Linear, Layer
-from paddle.fluid.dygraph import to_variable, guard
-from paddle.jit import TracedLayer
-from test_imperative_base import new_program_scope
-from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
-from paddle.fluid import core
-import numpy as np
 import paddle.nn.functional as F
+from paddle.fluid import Embedding, Layer, LayerNorm, Linear, core
+from paddle.fluid.dygraph import guard, to_variable
+from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
+from paddle.jit import TracedLayer
 
 np.set_printoptions(suppress=True)
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py
index 231a3157104c81..175e42fb60017b 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
-import paddle
-from paddle.fluid.wrapped_decorator import wrap_decorator
 import unittest
 from unittest import TestCase
+
 import numpy as np
+
+import paddle
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.wrapped_decorator import wrap_decorator
 
 
 def _dygraph_guard_(func):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_using_non_zero_gpu.py b/python/paddle/fluid/tests/unittests/test_imperative_using_non_zero_gpu.py
index 9d7ece633604d1..f9b3abf1b07e1b 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_using_non_zero_gpu.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_using_non_zero_gpu.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
 import unittest
-from paddle.fluid.dygraph import to_variable, guard
+
 import numpy as np
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.dygraph import guard, to_variable
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_increment.py b/python/paddle/fluid/tests/unittests/test_increment.py
index 4e435e55794e6a..18901aab4ccdaf 100755
--- a/python/paddle/fluid/tests/unittests/test_increment.py
+++ b/python/paddle/fluid/tests/unittests/test_increment.py
@@ -15,6 +15,7 @@
 import unittest
 
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_index_add_op.py b/python/paddle/fluid/tests/unittests/test_index_add_op.py
index c54479b3529683..0e2e9dbd1a4544 100644
--- a/python/paddle/fluid/tests/unittests/test_index_add_op.py
+++ b/python/paddle/fluid/tests/unittests/test_index_add_op.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
 from op_test import OpTest
+
+import paddle
 from paddle.fluid import Program
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_index_sample_op.py b/python/paddle/fluid/tests/unittests/test_index_sample_op.py
index 550ddfe344f3eb..84defacc099874 100755
--- a/python/paddle/fluid/tests/unittests/test_index_sample_op.py
+++ b/python/paddle/fluid/tests/unittests/test_index_sample_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
-import paddle
-import paddle.fluid as fluid
+
 import numpy as np
 from op_test import OpTest
 
+import paddle
+import paddle.fluid as fluid
+
 
 class TestIndexSampleOp(OpTest):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_index_select_op.py b/python/paddle/fluid/tests/unittests/test_index_select_op.py
index edb2dc711822a5..39895f2691ca3f 100644
--- a/python/paddle/fluid/tests/unittests/test_index_select_op.py
+++ b/python/paddle/fluid/tests/unittests/test_index_select_op.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
 from op_test import OpTest
+
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_infer_no_need_buffer_slots.py b/python/paddle/fluid/tests/unittests/test_infer_no_need_buffer_slots.py
index 659aa36c334d37..ed64e80e65872e 100644
--- a/python/paddle/fluid/tests/unittests/test_infer_no_need_buffer_slots.py
+++ b/python/paddle/fluid/tests/unittests/test_infer_no_need_buffer_slots.py
@@ -15,8 +15,8 @@
 import unittest
 
 import paddle.fluid as fluid
-import paddle.fluid.framework as framework
 import paddle.fluid.core as core
+import paddle.fluid.framework as framework
 
 
 class TestInferNoNeedBufferSlots(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_inference_api.py b/python/paddle/fluid/tests/unittests/test_inference_api.py
index e62b258fefefde..dff2c83623736d 100644
--- a/python/paddle/fluid/tests/unittests/test_inference_api.py
+++ b/python/paddle/fluid/tests/unittests/test_inference_api.py
@@ -13,15 +13,20 @@
 # limitations under the License.
 
 import unittest
+
 import paddle
 
 paddle.enable_static()
 import numpy as np
+
 import paddle.fluid as fluid
-from paddle.fluid.core import PaddleTensor
-from paddle.fluid.core import PaddleDType
-from paddle.inference import Config, create_predictor
-from paddle.inference import get_trt_compile_version, get_trt_runtime_version
+from paddle.fluid.core import PaddleDType, PaddleTensor
+from paddle.inference import (
+    Config,
+    create_predictor,
+    get_trt_compile_version,
+    get_trt_runtime_version,
+)
 
 
 class TestInferenceApi(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_inference_model_io.py b/python/paddle/fluid/tests/unittests/test_inference_model_io.py
index 1e98b7dbb258cc..b7aa6e7ba0ca58 100644
--- a/python/paddle/fluid/tests/unittests/test_inference_model_io.py
+++ b/python/paddle/fluid/tests/unittests/test_inference_model_io.py
@@ -12,25 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-
-import os
 import importlib
+import os
 import tempfile
-import numpy as np
-import paddle.fluid.core as core
-import paddle.fluid as fluid
+import unittest
 import warnings
 
+import numpy as np
+
 import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.fluid.executor as executor
 import paddle.fluid.layers as layers
 import paddle.fluid.optimizer as optimizer
 from paddle.fluid.compiler import CompiledProgram
 from paddle.fluid.framework import Program, program_guard
 from paddle.fluid.io import (
-    save_inference_model,
     load_inference_model,
+    save_inference_model,
     save_persistables,
 )
 
diff --git a/python/paddle/fluid/tests/unittests/test_initializer.py b/python/paddle/fluid/tests/unittests/test_initializer.py
index 4454d7a9bb8d5d..11e871e26da438 100644
--- a/python/paddle/fluid/tests/unittests/test_initializer.py
+++ b/python/paddle/fluid/tests/unittests/test_initializer.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import math
 import unittest
 
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.framework as framework
diff --git a/python/paddle/fluid/tests/unittests/test_initializer_nn.py b/python/paddle/fluid/tests/unittests/test_initializer_nn.py
index c9b81720ba94b1..75dc9e224e0a84 100644
--- a/python/paddle/fluid/tests/unittests/test_initializer_nn.py
+++ b/python/paddle/fluid/tests/unittests/test_initializer_nn.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
 
+import numpy as np
+
 import paddle
-import paddle.nn as nn
 import paddle.fluid as fluid
 import paddle.fluid.framework as framework
+import paddle.nn as nn
 import paddle.nn.initializer as initializer
 from paddle.fluid.core import VarDesc
 
diff --git a/python/paddle/fluid/tests/unittests/test_inner.py b/python/paddle/fluid/tests/unittests/test_inner.py
index 3e30e9fa2284b7..7fe097ba22b55c 100644
--- a/python/paddle/fluid/tests/unittests/test_inner.py
+++ b/python/paddle/fluid/tests/unittests/test_inner.py
@@ -17,8 +17,8 @@
 import numpy as np
 
 import paddle
-from paddle.static import Program, program_guard
 from paddle.fluid.framework import _test_eager_guard
+from paddle.static import Program, program_guard
 
 
 class TestMultiplyApi(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_inplace.py b/python/paddle/fluid/tests/unittests/test_inplace.py
index 2496ee37a05e4b..8912cad2c3b9d2 100644
--- a/python/paddle/fluid/tests/unittests/test_inplace.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
diff --git a/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py b/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py
index 6aa644e3ac6254..fd9d7a26b1abc6 100644
--- a/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 import numpy as np
-import os
-import paddle.fluid.core as core
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 
 class TestInplaceANBOpTraining(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py
index 252fdf68699b6c..9f448e7f07a47b 100644
--- a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py
@@ -14,9 +14,10 @@
 
 import unittest
 
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import numpy as np
 
 
 class ConvBNLayer(fluid.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_inplace_and_clear_gradient.py b/python/paddle/fluid/tests/unittests/test_inplace_and_clear_gradient.py
index aa7e837b6cd280..087be59cd32109 100644
--- a/python/paddle/fluid/tests/unittests/test_inplace_and_clear_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace_and_clear_gradient.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+
 import paddle
 from paddle import _legacy_C_ops
-import unittest
 
 paddle.disable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_inplace_auto_generated_apis.py b/python/paddle/fluid/tests/unittests/test_inplace_auto_generated_apis.py
index d8f8b5dbc72ce1..bbf06f74c24d0f 100644
--- a/python/paddle/fluid/tests/unittests/test_inplace_auto_generated_apis.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace_auto_generated_apis.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
diff --git a/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py b/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py
index 8e9b154a41cce0..fb6f04d48f04a6 100644
--- a/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
-import numpy as np
 import unittest
 
+import numpy as np
+
+import paddle.fluid as fluid
+
 
 class TestSoftmaxWithXe(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_input_spec.py b/python/paddle/fluid/tests/unittests/test_input_spec.py
index 88ffc212696454..3a623f463c1813 100644
--- a/python/paddle/fluid/tests/unittests/test_input_spec.py
+++ b/python/paddle/fluid/tests/unittests/test_input_spec.py
@@ -13,17 +13,18 @@
 # limitations under the License.
 
 import os
-import unittest
 import tempfile
+import unittest
+
 import numpy as np
 
 import paddle
 import paddle.fluid as fluid
-from paddle.static import InputSpec
-from paddle.fluid.framework import convert_np_dtype_to_dtype_
 from paddle.fluid.dygraph.dygraph_to_static.utils import (
     _compatible_non_tensor_spec,
 )
+from paddle.fluid.framework import convert_np_dtype_to_dtype_
+from paddle.static import InputSpec
 
 
 class TestInputSpec(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_install_check.py b/python/paddle/fluid/tests/unittests/test_install_check.py
index 15f2b5f3b7eedb..4fe8cd6203852b 100644
--- a/python/paddle/fluid/tests/unittests/test_install_check.py
+++ b/python/paddle/fluid/tests/unittests/test_install_check.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 import paddle
-import os
 
 
 class TestInstallCheck(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_instance_norm_op.py b/python/paddle/fluid/tests/unittests/test_instance_norm_op.py
index c5cf210f340b07..406fe75fc388d2 100644
--- a/python/paddle/fluid/tests/unittests/test_instance_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_instance_norm_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
 from paddle.fluid.dygraph import to_variable
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py
index 31719eecfa9dca..705b93ba9c620b 100644
--- a/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid.core as core
-import paddle.fluid as fluid
+
+import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
 from paddle.fluid.framework import _test_eager_guard
-import paddle
 
 
 class TestInstanceNorm(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_inverse_op.py b/python/paddle/fluid/tests/unittests/test_inverse_op.py
index f06d32170ba9b8..656b51bce3b987 100644
--- a/python/paddle/fluid/tests/unittests/test_inverse_op.py
+++ b/python/paddle/fluid/tests/unittests/test_inverse_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle
-from op_test import OpTest
 
 
 class TestInverseOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_io_save_load.py b/python/paddle/fluid/tests/unittests/test_io_save_load.py
index 80c2ae1f92ff56..c24e48d7407811 100644
--- a/python/paddle/fluid/tests/unittests/test_io_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_io_save_load.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+import tempfile
 import unittest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
 from paddle.fluid.framework import _test_eager_guard
-import tempfile
-import os
 
 
 class TestSaveLoadAPIError(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_iou_similarity_op.py b/python/paddle/fluid/tests/unittests/test_iou_similarity_op.py
index 3d131339794832..614766b6609ba1 100644
--- a/python/paddle/fluid/tests/unittests/test_iou_similarity_op.py
+++ b/python/paddle/fluid/tests/unittests/test_iou_similarity_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy.random as random
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_ir_graph.py b/python/paddle/fluid/tests/unittests/test_ir_graph.py
index a8effa96920a84..0aabb05be7d449 100644
--- a/python/paddle/fluid/tests/unittests/test_ir_graph.py
+++ b/python/paddle/fluid/tests/unittests/test_ir_graph.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 from paddle import fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py b/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py
index 62ea08307ce157..b7a0ab0d450426 100644
--- a/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py
@@ -14,11 +14,13 @@
 
 import os
 import unittest
+
 import numpy as np
+from parallel_executor_test_base import DeviceType, TestParallelExecutorBase
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
-from parallel_executor_test_base import TestParallelExecutorBase, DeviceType
+import paddle.fluid.core as core
 
 
 def fc_with_batchnorm(use_feed):
diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py
index 657c267767dfa4..595dd02e432bda 100644
--- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py
+++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py
@@ -14,16 +14,15 @@
 
 # nlp model stack of op operate on lod. It's a classical test case in optimize pass.
 
+import unittest
+
 import numpy as np
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-
-import unittest
 import paddle.fluid.core as core
-
-from paddle.fluid import compiler, Program, program_guard
+import paddle.fluid.layers as layers
+from paddle.fluid import Program, compiler, program_guard
 from paddle.fluid.executor import Executor
 from paddle.fluid.optimizer import MomentumOptimizer
 
diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py
index 85f78d9aef4a1b..53d01d9df69335 100644
--- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py
+++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py
@@ -14,11 +14,13 @@
 
 # nlp model stack of op operate on lod. It's a classical test case in optimize pass.
 
-import paddle
-import paddle.fluid as fluid
 import unittest
+
 from ir_memory_optimize_net_base import TestIrMemOptBase
 
+import paddle
+import paddle.fluid as fluid
+
 
 def lstm_net(
     data,
diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py
index a15079021ed561..6cf56915832d99 100644
--- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from parallel_executor_test_base import TestParallelExecutorBase, DeviceType
-import paddle.fluid as fluid
-import paddle.fluid.core as core
+import unittest
+
 import numpy as np
+from parallel_executor_test_base import DeviceType, TestParallelExecutorBase
+
 import paddle
-import unittest
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 
 def _feed_data_helper():
diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py
index 8d1191fddd5b3e..175cab191e6a77 100644
--- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py
+++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py
@@ -14,11 +14,12 @@
 
 import os
 import unittest
+
 import paddle.fluid.core as core
 
 os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0"
 
-from parallel_executor_test_base import TestParallelExecutorBase, DeviceType
+from parallel_executor_test_base import DeviceType, TestParallelExecutorBase
 from test_parallel_executor_transformer import get_feed_data_reader, transformer
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_is_complex.py b/python/paddle/fluid/tests/unittests/test_is_complex.py
index fd2e58eff6399b..f65546da552f05 100644
--- a/python/paddle/fluid/tests/unittests/test_is_complex.py
+++ b/python/paddle/fluid/tests/unittests/test_is_complex.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import unittest
 
+import numpy as np
+
+import paddle
+
 
 class TestIsComplex(unittest.TestCase):
     def test_for_integer(self):
diff --git a/python/paddle/fluid/tests/unittests/test_is_empty_op.py b/python/paddle/fluid/tests/unittests/test_is_empty_op.py
index d6235f7abc4bb9..6cf410eaede847 100644
--- a/python/paddle/fluid/tests/unittests/test_is_empty_op.py
+++ b/python/paddle/fluid/tests/unittests/test_is_empty_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_is_integer.py b/python/paddle/fluid/tests/unittests/test_is_integer.py
index 9e0eae91292966..2ef57ca1a7aefa 100644
--- a/python/paddle/fluid/tests/unittests/test_is_integer.py
+++ b/python/paddle/fluid/tests/unittests/test_is_integer.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import unittest
 
+import numpy as np
+
+import paddle
+
 
 class TestIsInteger(unittest.TestCase):
     def test_for_integer(self):
diff --git a/python/paddle/fluid/tests/unittests/test_is_tensor.py b/python/paddle/fluid/tests/unittests/test_is_tensor.py
index b91b5aaf3e7022..3ba3013fac1097 100644
--- a/python/paddle/fluid/tests/unittests/test_is_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_is_tensor.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle
 
 DELTA = 0.00001
diff --git a/python/paddle/fluid/tests/unittests/test_isclose_op.py b/python/paddle/fluid/tests/unittests/test_isclose_op.py
index 5d53c337ce18ce..fc2a5cd5ebef2d 100644
--- a/python/paddle/fluid/tests/unittests/test_isclose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_isclose_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_isfinite_op.py b/python/paddle/fluid/tests/unittests/test_isfinite_op.py
index cbe12d1cb3f4ba..24f6ab3dea2e2a 100644
--- a/python/paddle/fluid/tests/unittests/test_isfinite_op.py
+++ b/python/paddle/fluid/tests/unittests/test_isfinite_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from op_test import OpTest
 
 
 class TestInf(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py b/python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py
index e3d094890ac7c6..ef9da360863139 100644
--- a/python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
+
+import paddle
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_jit_layer.py b/python/paddle/fluid/tests/unittests/test_jit_layer.py
index e6718fafeab92b..c670ac00aed6c3 100644
--- a/python/paddle/fluid/tests/unittests/test_jit_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_jit_layer.py
@@ -13,16 +13,16 @@
 # limitations under the License.
 
 import os
-import paddle
-import unittest
 import tempfile
+import unittest
+
 import numpy as np
-from paddle.static import InputSpec
+
+import paddle
 from paddle.fluid.framework import _dygraph_place_guard
+from paddle.jit.dy2static.program_translator import ProgramTranslator
 from paddle.jit.layer import Layer
-from paddle.jit.dy2static.program_translator import (
-    ProgramTranslator,
-)
+from paddle.static import InputSpec
 
 paddle.seed(1)
 
diff --git a/python/paddle/fluid/tests/unittests/test_jit_pre_save_hooks.py b/python/paddle/fluid/tests/unittests/test_jit_pre_save_hooks.py
index 69203733c2cb24..795c7f9b43c7a9 100644
--- a/python/paddle/fluid/tests/unittests/test_jit_pre_save_hooks.py
+++ b/python/paddle/fluid/tests/unittests/test_jit_pre_save_hooks.py
@@ -17,9 +17,9 @@
 
 import paddle
 from paddle.jit.api import (
-    _run_save_pre_hooks,
     _clear_save_pre_hooks,
     _register_save_pre_hook,
+    _run_save_pre_hooks,
 )
 
 _counter = 0
diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py
index 7632c958464a4f..faf6a61df3f390 100644
--- a/python/paddle/fluid/tests/unittests/test_jit_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py
@@ -16,17 +16,19 @@
 import os
 import pickle
 import shutil
-import unittest
 import tempfile
+import unittest
+
 import numpy as np
+
 import paddle
-from paddle.static import InputSpec
 import paddle.fluid as fluid
-from paddle.fluid.layers.utils import flatten
+from paddle.fluid import unique_name
 from paddle.fluid.dygraph import Linear
-from paddle.jit.api import declarative
 from paddle.fluid.dygraph.io import INFER_PARAMS_INFO_SUFFIX
-from paddle.fluid import unique_name
+from paddle.fluid.layers.utils import flatten
+from paddle.jit.api import declarative
+from paddle.static import InputSpec
 
 BATCH_SIZE = 32
 BATCH_NUM = 10
diff --git a/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py b/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py
index 9fd0868f1e4c77..5d6e3af092accb 100644
--- a/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py
@@ -11,10 +11,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
+import paddle
 from paddle.nn.functional import kl_div
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_kron_op.py b/python/paddle/fluid/tests/unittests/test_kron_op.py
index a2f1657a06e660..ae9ce0525d553c 100644
--- a/python/paddle/fluid/tests/unittests/test_kron_op.py
+++ b/python/paddle/fluid/tests/unittests/test_kron_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_kthvalue_op.py b/python/paddle/fluid/tests/unittests/test_kthvalue_op.py
index f3f5ccf18797d9..276e3f4b8aa9bc 100644
--- a/python/paddle/fluid/tests/unittests/test_kthvalue_op.py
+++ b/python/paddle/fluid/tests/unittests/test_kthvalue_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_l1_loss.py b/python/paddle/fluid/tests/unittests/test_l1_loss.py
index 00b716ce871548..f1bce0d1b6a92c 100644
--- a/python/paddle/fluid/tests/unittests/test_l1_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_l1_loss.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import numpy as np
-import unittest
 
 
 class TestFunctionalL1Loss(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_l1_norm_op.py b/python/paddle/fluid/tests/unittests/test_l1_norm_op.py
index 03d69e23347502..7ca647da0a3b78 100644
--- a/python/paddle/fluid/tests/unittests/test_l1_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_l1_norm_op.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
+
+import numpy as np
 from op_test import OpTest
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_label_smooth_functional.py b/python/paddle/fluid/tests/unittests/test_label_smooth_functional.py
index 7f6e6d8434d18a..3c97be9c42b85e 100644
--- a/python/paddle/fluid/tests/unittests/test_label_smooth_functional.py
+++ b/python/paddle/fluid/tests/unittests/test_label_smooth_functional.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+
 import paddle
-from paddle import fluid
 import paddle.fluid.dygraph as dg
 import paddle.nn.functional as F
-import unittest
+from paddle import fluid
 
 
 class LabelSmoothTestCase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_label_smooth_op.py b/python/paddle/fluid/tests/unittests/test_label_smooth_op.py
index e8c4fbc43d32a5..b62a75438a7fd4 100644
--- a/python/paddle/fluid/tests/unittests/test_label_smooth_op.py
+++ b/python/paddle/fluid/tests/unittests/test_label_smooth_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_lamb_op.py b/python/paddle/fluid/tests/unittests/test_lamb_op.py
index 9c3bbe6d830bc0..ba3a865d31c019 100644
--- a/python/paddle/fluid/tests/unittests/test_lamb_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lamb_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 from paddle.fluid import core
 from paddle.fluid.op import Operator
diff --git a/python/paddle/fluid/tests/unittests/test_lambv2_op.py b/python/paddle/fluid/tests/unittests/test_lambv2_op.py
index bb2aee8873984e..6b513008109ec6 100644
--- a/python/paddle/fluid/tests/unittests/test_lambv2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lambv2_op.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-from paddle.fluid import core
-from paddle.fluid.dygraph.base import switch_to_static_graph
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
+from paddle.fluid import core
+from paddle.fluid.dygraph.base import switch_to_static_graph
 
 
 class LAMBOptimizer(paddle.optimizer.Lamb):
diff --git a/python/paddle/fluid/tests/unittests/test_launch_coverage.py b/python/paddle/fluid/tests/unittests/test_launch_coverage.py
index 1f00ea9af55639..006915662876ee 100644
--- a/python/paddle/fluid/tests/unittests/test_launch_coverage.py
+++ b/python/paddle/fluid/tests/unittests/test_launch_coverage.py
@@ -13,14 +13,14 @@
 # limitations under the License.
 
 import unittest
+from argparse import REMAINDER, ArgumentParser
 
-from argparse import ArgumentParser, REMAINDER
+from paddle.distributed.fleet.launch_utils import find_free_ports
 from paddle.distributed.utils.launch_utils import (
     _print_arguments,
-    get_gpus,
     get_cluster_from_args,
+    get_gpus,
 )
-from paddle.distributed.fleet.launch_utils import find_free_ports
 
 
 def _parse_args():
diff --git a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py
index 502f4ec5bb9775..200372d2d143f6 100644
--- a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 
 import unittest
+from functools import reduce
+from operator import mul
+
 import numpy as np
-import paddle
+from op_test import _set_use_system_allocator
 
-from operator import mul
-import paddle.fluid.core as core
+import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.nn.functional as F
-from functools import reduce
-from op_test import _set_use_system_allocator
 from paddle.fluid import Program, program_guard
 from paddle.fluid.contrib.mixed_precision.fp16_utils import (
     _keep_layer_norm_scale_bias_to_fp32,
diff --git a/python/paddle/fluid/tests/unittests/test_layer_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_layer_norm_op_v2.py
index 6e9e4931465434..8b258c75661655 100644
--- a/python/paddle/fluid/tests/unittests/test_layer_norm_op_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_layer_norm_op_v2.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid.core as core
-import paddle.fluid as fluid
-from paddle.fluid.framework import _test_eager_guard
+
+import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
-import paddle
+from paddle.fluid.framework import _test_eager_guard
 
 
 class TestDygraphLayerNormv2(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index c5cf9253f755fe..f3f5598f52c733 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -12,29 +12,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import contextlib
+import inspect
 import unittest
 
-import contextlib
 import numpy as np
 from decorator_helper import prog_scope
-import inspect
+from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.layers.device import get_places
+import paddle.fluid.layers as layers
 import paddle.fluid.nets as nets
-from paddle.fluid.framework import Program, program_guard, default_main_program
-from paddle.fluid.param_attr import ParamAttr
+import paddle.nn.functional as F
 from paddle.fluid import core
+from paddle.fluid.dygraph import base, nn, to_variable
+from paddle.fluid.framework import (
+    Program,
+    _test_eager_guard,
+    default_main_program,
+    program_guard,
+)
 from paddle.fluid.initializer import Constant
-import paddle.fluid.layers as layers
-from test_imperative_base import new_program_scope
-from paddle.fluid.dygraph import nn
-from paddle.fluid.dygraph import base
-from paddle.fluid.dygraph import to_variable
-from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.layers.device import get_places
+from paddle.fluid.param_attr import ParamAttr
 from paddle.tensor import random
-import paddle.nn.functional as F
 
 
 class LayerTest(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_layout_autotune.py b/python/paddle/fluid/tests/unittests/test_layout_autotune.py
index 259fe01601e968..70c283a549a09b 100644
--- a/python/paddle/fluid/tests/unittests/test_layout_autotune.py
+++ b/python/paddle/fluid/tests/unittests/test_layout_autotune.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import json
+import os
 import tempfile
 import unittest
 import warnings
diff --git a/python/paddle/fluid/tests/unittests/test_lazy_init.py b/python/paddle/fluid/tests/unittests/test_lazy_init.py
index e8148e542b1802..8b2bef91c00e8e 100644
--- a/python/paddle/fluid/tests/unittests/test_lazy_init.py
+++ b/python/paddle/fluid/tests/unittests/test_lazy_init.py
@@ -12,11 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
+
+import paddle
 from paddle import LazyGuard
-from paddle.nn import Linear, Layer
+from paddle.fluid import unique_name
+from paddle.nn import Layer, Linear
 from paddle.nn.initializer import (
     Constant,
     Normal,
@@ -25,7 +28,6 @@
     XavierNormal,
     XavierUniform,
 )
-from paddle.fluid import unique_name
 
 
 class TestInitializerBase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_lbfgs.py b/python/paddle/fluid/tests/unittests/test_lbfgs.py
index 21d96d17c2ad9f..a13aeadb084664 100644
--- a/python/paddle/fluid/tests/unittests/test_lbfgs.py
+++ b/python/paddle/fluid/tests/unittests/test_lbfgs.py
@@ -18,7 +18,6 @@
 
 import paddle
 import paddle.nn.functional as F
-
 from paddle.incubate.optimizer.functional.lbfgs import minimize_lbfgs
 
 np.random.seed(123)
diff --git a/python/paddle/fluid/tests/unittests/test_lcm.py b/python/paddle/fluid/tests/unittests/test_lcm.py
index 83f3f82d7e7a20..0e110468d6a966 100644
--- a/python/paddle/fluid/tests/unittests/test_lcm.py
+++ b/python/paddle/fluid/tests/unittests/test_lcm.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_lerp_op.py b/python/paddle/fluid/tests/unittests/test_lerp_op.py
index 1cae3fbaba9ac9..625d5b1b13dfe7 100644
--- a/python/paddle/fluid/tests/unittests/test_lerp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lerp_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_lgamma_op.py b/python/paddle/fluid/tests/unittests/test_lgamma_op.py
index 3b88e7fda57c9c..43d0b139acd1d0 100644
--- a/python/paddle/fluid/tests/unittests/test_lgamma_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lgamma_op.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import math
+import unittest
+
 import numpy as np
-import paddle
 from op_test import OpTest
 from scipy import special
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_limit_by_capacity_op.py b/python/paddle/fluid/tests/unittests/test_limit_by_capacity_op.py
index 02c71ea3a4147d..597ffcf79714b4 100644
--- a/python/paddle/fluid/tests/unittests/test_limit_by_capacity_op.py
+++ b/python/paddle/fluid/tests/unittests/test_limit_by_capacity_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
+
+import paddle
 from paddle.distributed.models.moe import utils
 from paddle.fluid import core
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_linalg_cond.py b/python/paddle/fluid/tests/unittests/test_linalg_cond.py
index 4724bbc6d08020..61ad09cbfe400b 100644
--- a/python/paddle/fluid/tests/unittests/test_linalg_cond.py
+++ b/python/paddle/fluid/tests/unittests/test_linalg_cond.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.static as static
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_linalg_lstsq_op.py b/python/paddle/fluid/tests/unittests/test_linalg_lstsq_op.py
index f627057ed242da..b82fb8ed09a0a5 100644
--- a/python/paddle/fluid/tests/unittests/test_linalg_lstsq_op.py
+++ b/python/paddle/fluid/tests/unittests/test_linalg_lstsq_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_linalg_pinv_op.py b/python/paddle/fluid/tests/unittests/test_linalg_pinv_op.py
index 8c39cbb7c9517f..353b4d8da55e9c 100644
--- a/python/paddle/fluid/tests/unittests/test_linalg_pinv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_linalg_pinv_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_linear.py b/python/paddle/fluid/tests/unittests/test_linear.py
index 0cec046354294c..71f5c831ae4b6d 100644
--- a/python/paddle/fluid/tests/unittests/test_linear.py
+++ b/python/paddle/fluid/tests/unittests/test_linear.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid.core as core
+
 import paddle
-from paddle import fluid
+import paddle.fluid.core as core
 import paddle.nn.functional as F
+from paddle import fluid
 
 
 class LinearTestCase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py b/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py
index d352275c263f8c..6899a340633787 100755
--- a/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py
+++ b/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import random
-import numpy as np
+import unittest
 
+import numpy as np
 from op_test import OpTest
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_linear_interp_op.py b/python/paddle/fluid/tests/unittests/test_linear_interp_op.py
index a413f2f7a325b9..c05f55a3634b4b 100755
--- a/python/paddle/fluid/tests/unittests/test_linear_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_linear_interp_op.py
@@ -14,11 +14,13 @@
 
 import platform
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_linear_interp_v2_op.py b/python/paddle/fluid/tests/unittests/test_linear_interp_v2_op.py
index 38817856fc999a..a56419b81a1507 100755
--- a/python/paddle/fluid/tests/unittests/test_linear_interp_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_linear_interp_v2_op.py
@@ -14,11 +14,13 @@
 
 import platform
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
 from paddle.nn.functional import interpolate
 
diff --git a/python/paddle/fluid/tests/unittests/test_linspace.py b/python/paddle/fluid/tests/unittests/test_linspace.py
index 3549d46ec9eec9..2e2c2718f9b97b 100644
--- a/python/paddle/fluid/tests/unittests/test_linspace.py
+++ b/python/paddle/fluid/tests/unittests/test_linspace.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
-from paddle.fluid import core
+from paddle.fluid import Program, core, program_guard
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py
index 3bac2d40ae3a20..4c63a4f2a9e4c1 100644
--- a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py
@@ -13,17 +13,19 @@
 # limitations under the License.
 
 import os
+
 from dist_test_utils import remove_ps_flag, silentremove
 
 silentremove("test_handle_signal_in_serv_op.flag")
 silentremove("test_list_and_serv_run_empty_optimize_block.flag")
 
-import paddle
-import paddle.fluid as fluid
 import time
 import unittest
 from multiprocessing import Process
 
+import paddle
+import paddle.fluid as fluid
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_load_op.py b/python/paddle/fluid/tests/unittests/test_load_op.py
index a299e6aaffe004..acf0a810293872 100644
--- a/python/paddle/fluid/tests/unittests/test_load_op.py
+++ b/python/paddle/fluid/tests/unittests/test_load_op.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+import tempfile
 import unittest
+
 import numpy as np
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
-import os
-import tempfile
-import paddle
 
 
 class TestLoadOp(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_load_op_xpu.py b/python/paddle/fluid/tests/unittests/test_load_op_xpu.py
index 21e4636ce5b698..3ede3b26914881 100644
--- a/python/paddle/fluid/tests/unittests/test_load_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/test_load_op_xpu.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import numpy as np
 import os
 import tempfile
+import unittest
+
+import numpy as np
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
-import paddle
 
 
 @unittest.skipIf(
diff --git a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py
index 6bace212953a62..98518f52f669a4 100644
--- a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py
+++ b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py
@@ -13,14 +13,15 @@
 # limitations under the License.
 
 import os
+import tempfile
 import unittest
+
 import numpy as np
+from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
-from test_imperative_base import new_program_scope
-import tempfile
 
 
 def convolutional_neural_network(img):
diff --git a/python/paddle/fluid/tests/unittests/test_load_vars_shape_check.py b/python/paddle/fluid/tests/unittests/test_load_vars_shape_check.py
index 756b2ab77320ef..15d81c68e8c8f3 100644
--- a/python/paddle/fluid/tests/unittests/test_load_vars_shape_check.py
+++ b/python/paddle/fluid/tests/unittests/test_load_vars_shape_check.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import os
 import shutil
+import unittest
+
 import paddle.fluid as fluid
 from paddle.fluid.executor import Executor
 
diff --git a/python/paddle/fluid/tests/unittests/test_locality_aware_nms_op.py b/python/paddle/fluid/tests/unittests/test_locality_aware_nms_op.py
index a09707c5767a91..e5b71c2d65e78f 100644
--- a/python/paddle/fluid/tests/unittests/test_locality_aware_nms_op.py
+++ b/python/paddle/fluid/tests/unittests/test_locality_aware_nms_op.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
 import unittest
+
 import numpy as np
-import copy
 from op_test import OpTest
 from test_multiclass_nms_op import iou
+
 import paddle.fluid as fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_lod_array_length_op.py b/python/paddle/fluid/tests/unittests/test_lod_array_length_op.py
index 4b702d0a0f0ac1..4b18c2a9fd81fc 100644
--- a/python/paddle/fluid/tests/unittests/test_lod_array_length_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lod_array_length_op.py
@@ -14,13 +14,14 @@
 
 import unittest
 
+import numpy
+
 import paddle
-import paddle.fluid.layers as layers
-from paddle.fluid.executor import Executor
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
+import paddle.fluid.layers as layers
 from paddle.fluid import Program, program_guard
-import numpy
+from paddle.fluid.executor import Executor
 
 
 class TestLoDArrayLength(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_lod_rank_table.py b/python/paddle/fluid/tests/unittests/test_lod_rank_table.py
index 88ad04a52c2054..c523c31a24b44e 100644
--- a/python/paddle/fluid/tests/unittests/test_lod_rank_table.py
+++ b/python/paddle/fluid/tests/unittests/test_lod_rank_table.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
+import numpy
+
+from paddle.fluid import Program, core, program_guard
+from paddle.fluid.executor import Executor
 from paddle.fluid.layers import data
 from paddle.fluid.layers.control_flow import lod_rank_table
-from paddle.fluid.executor import Executor
-from paddle.fluid import Program, program_guard, core
-import numpy
-import unittest
 
 
 class TestLoDRankTable(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_lod_reset_op.py b/python/paddle/fluid/tests/unittests/test_lod_reset_op.py
index 5e9bd45907a40c..a9c8b47cbb991b 100644
--- a/python/paddle/fluid/tests/unittests/test_lod_reset_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lod_reset_op.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
 from op_test import OpTest
+
+import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_lod_tensor_array.py b/python/paddle/fluid/tests/unittests/test_lod_tensor_array.py
index c6656ff1caa66b..d38627ebec46e2 100644
--- a/python/paddle/fluid/tests/unittests/test_lod_tensor_array.py
+++ b/python/paddle/fluid/tests/unittests/test_lod_tensor_array.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid.core as core
-import numpy as np
 
 
 class TestLoDTensorArray(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_log_loss_op.py b/python/paddle/fluid/tests/unittests/test_log_loss_op.py
index 35d31128732bd8..908f4bf94e510f 100644
--- a/python/paddle/fluid/tests/unittests/test_log_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_log_loss_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle.fluid as fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_log_softmax.py b/python/paddle/fluid/tests/unittests/test_log_softmax.py
index ab371869c85c97..274d4cf05bd098 100644
--- a/python/paddle/fluid/tests/unittests/test_log_softmax.py
+++ b/python/paddle/fluid/tests/unittests/test_log_softmax.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16
+
 import paddle
 import paddle.fluid.core as core
 import paddle.nn.functional as F
+from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16
 
 np.random.seed(10)
 
diff --git a/python/paddle/fluid/tests/unittests/test_logcumsumexp_op.py b/python/paddle/fluid/tests/unittests/test_logcumsumexp_op.py
index 96f0e4bff6a669..556db0f2585644 100644
--- a/python/paddle/fluid/tests/unittests/test_logcumsumexp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_logcumsumexp_op.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Optional
-import unittest
 import itertools
+import unittest
+from typing import Optional
+
 import numpy as np
+from op_test import OpTest
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
-from op_test import OpTest
+import paddle.fluid.core as core
 
 
 def np_naive_logcumsumexp(x: np.ndarray, axis: Optional[int] = None):
diff --git a/python/paddle/fluid/tests/unittests/test_logical_op.py b/python/paddle/fluid/tests/unittests/test_logical_op.py
index c05d99a4d94167..02466986238c10 100755
--- a/python/paddle/fluid/tests/unittests/test_logical_op.py
+++ b/python/paddle/fluid/tests/unittests/test_logical_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
-from paddle.static import Program, program_guard, Executor
-from paddle.framework import _non_static_mode
 from paddle.fluid.framework import _test_eager_guard
+from paddle.framework import _non_static_mode
+from paddle.static import Executor, Program, program_guard
 
 SUPPORTED_DTYPES = [
     bool,
diff --git a/python/paddle/fluid/tests/unittests/test_logit_op.py b/python/paddle/fluid/tests/unittests/test_logit_op.py
index 74f645fb68d1c8..464247d3e73ab0 100644
--- a/python/paddle/fluid/tests/unittests/test_logit_op.py
+++ b/python/paddle/fluid/tests/unittests/test_logit_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_logspace.py b/python/paddle/fluid/tests/unittests/test_logspace.py
index 170d056895a941..2a0d466a600d89 100644
--- a/python/paddle/fluid/tests/unittests/test_logspace.py
+++ b/python/paddle/fluid/tests/unittests/test_logspace.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_logsumexp.py b/python/paddle/fluid/tests/unittests/test_logsumexp.py
index 1389ad1564fd1c..eed00f73e72741 100644
--- a/python/paddle/fluid/tests/unittests/test_logsumexp.py
+++ b/python/paddle/fluid/tests/unittests/test_logsumexp.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
-import paddle.fluid.core as core
 from op_test import OpTest
 
+import paddle
+import paddle.fluid.core as core
+
 
 def ref_logsumexp(x, axis=None, keepdim=False, reduce_all=False):
     if isinstance(axis, int):
diff --git a/python/paddle/fluid/tests/unittests/test_lookahead.py b/python/paddle/fluid/tests/unittests/test_lookahead.py
index ab8394c2f7dd29..e90c9bf0c8b629 100644
--- a/python/paddle/fluid/tests/unittests/test_lookahead.py
+++ b/python/paddle/fluid/tests/unittests/test_lookahead.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
 import paddle.nn as nn
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py
index ca60044439649f..ec9eee17c01416 100644
--- a/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py
@@ -13,17 +13,19 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle import enable_static
+from paddle.fluid.op import Operator
 from paddle.fluid.tests.unittests.op_test import (
     OpTest,
     convert_float_to_uint16,
     convert_uint16_to_float,
     skip_check_grad_ci,
 )
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-from paddle.fluid.op import Operator
-from paddle import enable_static
 
 
 def _lookup(weights, ids, flat_ids, op_version="lookup_table"):
diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_dequant_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_dequant_op.py
index 42a491344c62cd..19b7ca2597be5a 100644
--- a/python/paddle/fluid/tests/unittests/test_lookup_table_dequant_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lookup_table_dequant_op.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import struct
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import struct
 
 
 class TestLookupTableDequantOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_op.py
index 02fccaaca22328..1892ce5c56d55a 100644
--- a/python/paddle/fluid/tests/unittests/test_lookup_table_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lookup_table_op.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-from op_test import OpTest, skip_check_grad_ci, check_out_dtype
-import paddle.fluid.core as core
-from paddle.fluid.op import Operator
+from op_test import OpTest, check_out_dtype, skip_check_grad_ci
+
 import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
+import paddle.fluid.core as core
 import paddle.nn.functional as F
+from paddle.fluid import Program, program_guard
+from paddle.fluid.op import Operator
 
 
 class TestLookupTableOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py
index e0dbf743430c66..c022f564809a98 100644
--- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py
@@ -13,18 +13,20 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid.tests.unittests.op_test import convert_uint16_to_float
 from paddle.fluid.tests.unittests.test_lookup_table_bf16_op import (
-    _lookup,
     TestLookupTableBF16Op,
     TestLookupTableBF16OpIds4D,
     TestLookupTableBF16OpWIsSelectedRows,
     TestLookupTableBF16OpWIsSelectedRows4DIds,
+    _lookup,
 )
-import paddle.fluid as fluid
-import paddle.fluid.core as core
 
 
 class TestLookupTableV2BF16Op(TestLookupTableBF16Op):
diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py
index e39df64180da4f..2a74fff41d734f 100644
--- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py
@@ -13,14 +13,15 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest, skip_check_grad_ci
+
 import paddle
-import paddle.fluid.core as core
-import paddle.fluid as fluid
-from paddle.fluid.op import Operator
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
+from paddle.fluid.op import Operator
 
 
 class TestStaticGraphSupportMultipleInt(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py
index d1f656a5663b69..4d87942ef799bd 100644
--- a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py
+++ b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import math
-import numpy as np
 import unittest
 
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_lrn_op.py b/python/paddle/fluid/tests/unittests/test_lrn_op.py
index 521889f53eca76..36aab4fb5adf24 100644
--- a/python/paddle/fluid/tests/unittests/test_lrn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lrn_op.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from op_test import OpTest
 from paddle.fluid import Program, program_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py
index b82b38c7ba5f64..cbc7450bbc6d21 100644
--- a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py
@@ -12,16 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import numpy as np
 import math
+import random
+import unittest
 
-import paddle.fluid.core as core
+import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.fluid.layers as layers
-import random
 
 random.seed(2)
 np.set_printoptions(threshold=np.inf)
diff --git a/python/paddle/fluid/tests/unittests/test_lstm_op.py b/python/paddle/fluid/tests/unittests/test_lstm_op.py
index aeb9b28c860c00..8df805a231b92d 100644
--- a/python/paddle/fluid/tests/unittests/test_lstm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lstm_op.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 from paddle import fluid
-from paddle.fluid.layers import lstm as LSTM
+from paddle.fluid.framework import Program, program_guard
 from paddle.fluid.layers import fill_constant
-from paddle.fluid.framework import program_guard, Program
+from paddle.fluid.layers import lstm as LSTM
 
 SIGMOID_THRESHOLD_MIN = -40.0
 SIGMOID_THRESHOLD_MAX = 13.0
diff --git a/python/paddle/fluid/tests/unittests/test_lstm_unit_op.py b/python/paddle/fluid/tests/unittests/test_lstm_unit_op.py
index d46e8e4719e818..e389ae936b6fae 100644
--- a/python/paddle/fluid/tests/unittests/test_lstm_unit_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lstm_unit_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 from paddle import fluid
+from paddle.fluid.framework import Program, program_guard
 from paddle.fluid.layers import lstm_unit
-from paddle.fluid.framework import program_guard, Program
 
 
 def sigmoid_np(x):
diff --git a/python/paddle/fluid/tests/unittests/test_lstmp_op.py b/python/paddle/fluid/tests/unittests/test_lstmp_op.py
index 730af353dce6be..c299f54159465e 100644
--- a/python/paddle/fluid/tests/unittests/test_lstmp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lstmp_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 import test_lstm_op as LstmTest
+
 from paddle import fluid
 from paddle.fluid import Program, program_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_lu_op.py b/python/paddle/fluid/tests/unittests/test_lu_op.py
index d76fdf16ad9334..790ebb36f6d7c2 100644
--- a/python/paddle/fluid/tests/unittests/test_lu_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lu_op.py
@@ -12,16 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from op_test import OpTest
-import unittest
+import copy
 import itertools
+import unittest
+
 import numpy as np
+import scipy
+import scipy.linalg
+from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import scipy
-import scipy.linalg
-import copy
 
 
 def scipy_lu(A, pivot):
diff --git a/python/paddle/fluid/tests/unittests/test_lu_unpack_op.py b/python/paddle/fluid/tests/unittests/test_lu_unpack_op.py
index 0c00fbea820880..677ae648fbe09b 100644
--- a/python/paddle/fluid/tests/unittests/test_lu_unpack_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lu_unpack_op.py
@@ -12,16 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from op_test import OpTest
-import unittest
+import copy
 import itertools
+import unittest
+
 import numpy as np
+import scipy
+import scipy.linalg
+from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import scipy
-import scipy.linalg
-import copy
 
 
 def scipy_lu_unpack(A):

From 30315ac90c9772877f2244e30114f370e8740f2e Mon Sep 17 00:00:00 2001
From: caozhou <48191911+Caozhou1995@users.noreply.github.com>
Date: Tue, 29 Nov 2022 19:23:19 +0800
Subject: [PATCH 038/154] [Auto Parallel] Add pattern match (#48464)

* add pattern match

* add unittest
---
 .../paddle/distributed/auto_parallel/graph.py |  16 +-
 .../auto_parallel/tuner/rule_based_tuner.py   | 292 ++++++++++++++----
 .../unittests/auto_parallel/CMakeLists.txt    |   1 +
 .../auto_parallel/test_group_operators.py     |   2 +-
 .../unittests/auto_parallel/test_pattern.py   |  17 +-
 .../auto_parallel/test_pattern_match.py       | 142 +++++++++
 6 files changed, 407 insertions(+), 63 deletions(-)
 create mode 100644 python/paddle/fluid/tests/unittests/auto_parallel/test_pattern_match.py

diff --git a/python/paddle/distributed/auto_parallel/graph.py b/python/paddle/distributed/auto_parallel/graph.py
index be27bd50867d73..0ccb93412abcac 100644
--- a/python/paddle/distributed/auto_parallel/graph.py
+++ b/python/paddle/distributed/auto_parallel/graph.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
+from collections import OrderedDict
+
 
 class Node:
     def __init__(self, id, **attrs):
@@ -100,6 +102,8 @@ def __init__(self, **attrs):
         # Attributes for Graph
         self._attrs = {}
         self._attrs.update(attrs)
+        self._reverse_adjs = {}
+        self._attr_to_nodes = {}
 
     @property
     def nodes(self):
@@ -120,6 +124,7 @@ def add_node(self, node_id, **attrs):
             node = Node(node_id, **attrs)
             self._nodes[node_id] = node
             self._adjs[node_id] = {}
+            self._reverse_adjs[node_id] = []
         else:
             self._nodes[node_id].attrs.update(attrs)
 
@@ -134,14 +139,21 @@ def add_edge(self, src_id, tgt_id, **attrs):
         if src_id not in self._nodes:
             src_node = Node(src_id)
             self._nodes[src_id] = src_node
-            self._adjs[src_id] = {}
+            # for one tensor to multiple ops
+            self._adjs[src_id] = OrderedDict()
+            self._reverse_adjs[src_id] = []
         if tgt_id not in self._nodes:
             tgt_node = Node(tgt_id)
             self._nodes[tgt_id] = tgt_node
-            self._adjs[tgt_id] = {}
+            # for one tensor to multiple ops
+            self._adjs[tgt_id] = OrderedDict()
+            self._reverse_adjs[tgt_id] = []
         # add the edge
         edge = Edge(src_id, tgt_id, **attrs)
         self._adjs[src_id][tgt_id] = edge
+
+        # add the reverse adj
+        self._reverse_adjs[tgt_id].append(self.nodes[src_id])
         return edge
 
     def __len__(self):
diff --git a/python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py b/python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py
index f6e855f71ffb04..e00efcb15323a0 100644
--- a/python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py
+++ b/python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from abc import ABC, abstractmethod
+from abc import abstractmethod
 
 from ..graph import Graph
 
@@ -32,6 +32,57 @@ def register(name):
     return cls
 
 
+class BasePattern(Graph):
+    name = "base"
+
+    def __init__(self):
+        super().__init__()
+        self.build()
+
+    @abstractmethod
+    def build(self):
+        pass
+
+
+@register_pattern
+class QKVPattern(BasePattern):
+    name = "qkv"
+
+    def __init__(self):
+        super().__init__()
+
+    def build(self):
+        query = self.add_node(0, **{"type": "var"})
+
+        q_weight = self.add_node(1, **{"dim": 2, "type": "param"})
+        k_weight = self.add_node(2, **{"dim": 2, "type": "param"})
+        v_weight = self.add_node(3, **{"dim": 2, "type": "param"})
+
+        q_matmul = self.add_node(4, **{"type": "matmul_v2"})
+        k_matmul = self.add_node(5, **{"type": "matmul_v2"})
+        v_matmul = self.add_node(6, **{"type": "matmul_v2"})
+
+        q_x = self.add_edge(0, 4, **{"input_name": "X"})
+        k_x = self.add_edge(0, 5, **{"input_name": "X"})
+        v_x = self.add_edge(0, 6, **{"input_name": "X"})
+        q_y = self.add_edge(1, 4, **{"input_name": "Y"})
+        k_y = self.add_edge(2, 5, **{"input_name": "Y"})
+        v_y = self.add_edge(3, 6, **{"input_name": "Y"})
+
+        q = self.add_node(7, **{"type": "var"})
+        k = self.add_node(8, **{"type": "var"})
+        v = self.add_node(9, **{"type": "var"})
+
+        q_out = self.add_edge(4, 7, **{"output_name": "Out"})
+        k_out = self.add_edge(5, 8, **{"output_name": "Out"})
+        v_out = self.add_edge(6, 9, **{"output_name": "Out"})
+
+        # Pattern
+        self.attrs["shard_spec"] = [
+            [(1, 2, 3), [[-1, 0], [-1, 1]]],
+        ]  # 2-tuple list such as [(tensor_id, shard_sepc)]
+
+
 def convert_to_graph(ops, block):
     """Convert ops to graph."""
     graph = Graph()
@@ -50,7 +101,9 @@ def convert_to_graph(ops, block):
         op_node = graph.add_node(node_id, **attrs)
         graph.attrs["op_to_id"][op.desc.id()] = op_node.id
         graph.attrs["id_to_op"][op_node.id] = op.desc.id()
+        graph._attr_to_nodes[op_node.id] = {}
         for input_name in op.input_names:
+            graph._attr_to_nodes[op_node.id][input_name] = []
             for var_name in op.input(input_name):
                 if var_name not in graph.attrs["var_to_id"]:
                     # create var node
@@ -59,6 +112,7 @@ def convert_to_graph(ops, block):
                     var = block._var_recursive(var_name)
                     if var.is_parameter:
                         var_node.attrs["type"] = "param"
+                        var_node.attrs["dim"] = len(var.shape)
                     else:
                         var_node.attrs["type"] = "var"
                     graph.attrs["var_to_id"][var_name] = var_node.id
@@ -70,8 +124,10 @@ def convert_to_graph(ops, block):
                 # create edge that input -> op
                 input_edge = graph.add_edge(var_node.id, op_node.id)
                 input_edge.attrs["input_name"] = input_name
+                graph._attr_to_nodes[op_node.id][input_name].append(var_node)
 
             for output_name in op.output_names:
+                graph._attr_to_nodes[op_node.id][output_name] = []
                 for var_name in op.output(output_name):
                     if var_name not in graph.attrs["var_to_id"]:
                         # create var node
@@ -92,64 +148,189 @@ def convert_to_graph(ops, block):
                     output_edge = graph.add_edge(op_node.id, var_node.id)
                     output_edge.attrs["output_name"] = output_name
 
+                    graph._attr_to_nodes[op_node.id][output_name].append(
+                        var_node
+                    )
+
     return graph
 
 
-class BasePattern(ABC):
-    name = "base"
+def match(pattern, graph):
+    def _is_op_node(node):
+        """Judge whether node is op node"""
+        if node.attrs["type"] not in ["var", "param", "data"]:
+            return True
 
-    def __init__(self):
-        self.graph = None
-        self.build()
+        return False
 
-    @abstractmethod
-    def build(self):
-        pass
+    def _compare_op_node(src, tgt):
+        """Compare whether two op nodes are equal"""
+        if src.attrs["type"] != tgt.attrs["type"]:
+            return False
 
+        return True
 
-@register_pattern
-class QKVPattern(BasePattern):
-    name = "qkv"
+    def _compare_var_node(src, tgt):
+        """Compare whether two var nodes are equal"""
+        for key in src.attrs:
+            if key not in tgt.attrs:
+                return False
+            if src.attrs[key] != tgt.attrs[key]:
+                return False
 
-    def __init__(self):
-        super().__init__()
+        return True
 
-    def build(self):
-        self.graph = Graph()
+    def _match_core(src_node, tgt_node):
+        nonlocal not_matched
+        # do not support one input name or output name corresponding to multiple vars
+        if not_matched:
+            return
 
-        query = self.graph.add_node(0, **{"type": "var"})
+        if _is_op_node(src_node):
+            # compare op node whether equal
+            if not _compare_op_node(src_node, tgt_node):
+                return
 
-        q_weight = self.graph.add_node(1, **{"dim": 2, "type": "param"})
-        k_weight = self.graph.add_node(2, **{"dim": 2, "type": "param"})
-        v_weight = self.graph.add_node(3, **{"dim": 2, "type": "param"})
+            result[src_node.id] = tgt_node.id
 
-        q_matmul = self.graph.add_node(4, **{"type": "matmul_v2"})
-        k_matmul = self.graph.add_node(5, **{"type": "matmul_v2"})
-        v_matmul = self.graph.add_node(6, **{"type": "matmul_v2"})
+            # input var nodes
+            src_input_nodes = src_reverse_adjs[src_node.id]
+            for node in src_input_nodes:
+                # has visited
+                if node.id in result:
+                    continue
+                edge = src_edges[node.id][src_node.id]
+                input_name = edge.attrs["input_name"]
+
+                # NOTE: do not support one input name or output name corresponding to multiple vars
+                compare_nodes = tgt_attr_to_nodes[tgt_node.id].get(
+                    input_name, None
+                )
+                if not compare_nodes:
+                    not_matched = True
+                    return
+                _match_core(node, compare_nodes[0])
+
+            # output var nodes
+            src_output_node_ids = src_edges[src_node.id].keys()
+            for node_id in src_output_node_ids:
+                # has visited
+                if node_id in result:
+                    continue
+                node = src_nodes[node_id]
+                edge = src_edges[src_node.id][node_id]
+                output_name = edge.attrs["output_name"]
+
+                # NOTE: do not support one input name or output name corresponding to multiple vars
+                compare_nodes = tgt_attr_to_nodes[tgt_node.id].get(
+                    output_name, None
+                )
+                if not compare_nodes:
+                    not_matched = True
+                    return
+                _match_core(node, compare_nodes[0])
 
-        q_x = self.graph.add_edge(0, 4, **{"input_name": "X"})
-        k_x = self.graph.add_edge(0, 5, **{"input_name": "X"})
-        v_x = self.graph.add_edge(0, 6, **{"input_name": "X"})
-        q_y = self.graph.add_edge(1, 4, **{"input_name": "Y"})
-        k_y = self.graph.add_edge(2, 5, **{"input_name": "Y"})
-        v_y = self.graph.add_edge(3, 6, **{"input_name": "Y"})
+        else:
+            # compare var node whether equal
+            if not _compare_var_node(src_node, tgt_node):
+                not_matched = True
+                return
 
-        q = self.graph.add_node(7, **{"type": "var"})
-        k = self.graph.add_node(8, **{"type": "var"})
-        v = self.graph.add_node(9, **{"type": "var"})
+            result[src_node.id] = tgt_node.id
 
-        q_out = self.graph.add_edge(7, 4, **{"output_name": "Out"})
-        k_out = self.graph.add_edge(8, 5, **{"output_name": "Out"})
-        v_out = self.graph.add_edge(9, 6, **{"output_name": "Out"})
+            # as input for op nodes
+            src_as_input_node_ids = src_edges[src_node.id].keys()
+            for node_id in src_as_input_node_ids:
+                if node_id in result:
+                    continue
 
-        # Pattern
-        self.graph.attrs["shard_tensor"] = [
-            (1, 2, 3),
-            [[-1, 0], [-1, 1]],
-        ]  # 2-tuple such as (tensor_id, patterns)
+                src_edge = src_edges[src_node.id][node_id]
+                input_name = src_edge.attrs["input_name"]
+                compare_node_ids = tgt_edges[tgt_node.id].keys()
+
+                compare_node = None
+                for compare_node_id in compare_node_ids:
+                    edge = tgt_edges[tgt_node.id][compare_node_id]
+                    if (
+                        edge.attrs["input_name"] == input_name
+                        and compare_node_id not in result.values()
+                    ):
+                        compare_node = tgt_nodes[compare_node_id]
+                        break
+
+                if not compare_node:
+                    not_matched = True
+                    return
+                _match_core(src_nodes[node_id], compare_node)
+
+            # as output for nodes
+            src_as_output_nodes = src_reverse_adjs[src_node.id]
+            for node in src_as_output_nodes:
+                if node.id in result:
+                    continue
+
+                src_edge = src_edges[node.id][src_node.id]
+                output_name = src_edge.attrs["output_name"]
 
+                compare_node_ids = tgt_reverse_adjs[tgt_node.id]
 
-class OperatorGroupUtil:
+                compare_node = None
+                for node_id in compare_node_ids:
+                    edge = tgt_edges[node_id][tgt_node.id]
+                    if edge.attrs["output_name"] == output_name:
+                        compare_node = tgt_nodes[node_id]
+                        break
+                if not compare_node:
+                    not_matched = True
+                    return
+                _match_core(src_nodes[node_id], compare_node)
+
+    results = []
+    result = {}
+    has_matched = set()
+    src_nodes = pattern.nodes
+    src_edges = pattern._adjs
+    src_reverse_adjs = pattern._reverse_adjs
+
+    tgt_nodes = graph.nodes
+    tgt_edges = graph._adjs
+    tgt_reverse_adjs = graph._reverse_adjs
+    tgt_attr_to_nodes = graph._attr_to_nodes
+    not_matched = False
+
+    # starts with a op node
+    src_start_node = None
+    for node_id in src_nodes:
+        node = src_nodes[node_id]
+        if node.attrs["type"] not in ["var", "param", "data"]:
+            src_start_node = node
+            break
+    assert src_start_node is not None
+
+    for node_id in tgt_nodes:
+        node = tgt_nodes[node_id]
+        if node.attrs["type"] == src_start_node.attrs["type"]:
+            _match_core(src_start_node, node)
+            if not not_matched:
+                need_to_append = True
+                for value in result.values():
+                    if value in has_matched:
+                        result = {}
+                        need_to_append = False
+                        break
+                if need_to_append:
+                    results.append(result)
+                    for value in result.values():
+                        has_matched.add(value)
+                    result = {}
+            else:
+                not_matched = False
+                result = {}
+
+    return results
+
+
+class OperatorClusteringUtil:
     common_starts = ["layer_norm", "matmul_v2", "matmul"]
 
     @staticmethod
@@ -257,7 +438,10 @@ def get_longest_repeated_sub_seq(suffixes, heights, seq):
                 min_index = min(index_group)
                 if max_index - min_index >= k:
                     longest_sub_seq = seq[min_index : min_index + k]
-                    if longest_sub_seq[0] in OperatorGroupUtil.common_starts:
+                    if (
+                        longest_sub_seq[0]
+                        in OperatorClusteringUtil.common_starts
+                    ):
                         return longest_sub_seq
             if longest_sub_seq is not None:
                 return longest_sub_seq
@@ -325,9 +509,9 @@ def __init__(self, dist_context, mode="train"):
         self._dist_context = dist_context
         self._mode = mode
 
-    def group_operators(self, ops):
+    def cluster_operators(self, ops):
         """
-        Group operators to layers.
+        Cluster operators to layers.
 
         Args:
             ops (list): A operator list.
@@ -337,7 +521,7 @@ def group_operators(self, ops):
         """
         seq = [op.type for op in ops]
 
-        while not OperatorGroupUtil.stop_replace(seq):
+        while not OperatorClusteringUtil.stop_replace(seq):
             to_replace_seq = []
             to_replace_idxes = []
             has_append = False
@@ -351,11 +535,15 @@ def group_operators(self, ops):
                 elif isinstance(seq, list) and has_append:
                     break
 
-            ranks = OperatorGroupUtil.get_ranks(to_replace_seq)
-            suffixes = OperatorGroupUtil.get_suffixes(ranks)
-            heights = OperatorGroupUtil.get_heights(suffixes, to_replace_seq)
-            longest_sub_seq = OperatorGroupUtil.get_longest_repeated_sub_seq(
-                suffixes, heights, to_replace_seq
+            ranks = OperatorClusteringUtil.get_ranks(to_replace_seq)
+            suffixes = OperatorClusteringUtil.get_suffixes(ranks)
+            heights = OperatorClusteringUtil.get_heights(
+                suffixes, to_replace_seq
+            )
+            longest_sub_seq = (
+                OperatorClusteringUtil.get_longest_repeated_sub_seq(
+                    suffixes, heights, to_replace_seq
+                )
             )
             has_merged = False
             if longest_sub_seq is None:
@@ -374,10 +562,10 @@ def group_operators(self, ops):
                     seq = [to_replace_seq]
                     break
 
-            decomposed_sub_seq = OperatorGroupUtil.get_decomposed_sub_seq(
+            decomposed_sub_seq = OperatorClusteringUtil.get_decomposed_sub_seq(
                 longest_sub_seq
             )
-            to_replace_seq = OperatorGroupUtil.replace_by_decomposed_seq(
+            to_replace_seq = OperatorClusteringUtil.replace_by_decomposed_seq(
                 decomposed_sub_seq, to_replace_seq
             )
             result = seq[: to_replace_idxes[0]]
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt b/python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt
index 8486056984cf0d..18fad917b68397 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt
@@ -120,4 +120,5 @@ if(WITH_DISTRIBUTE AND WITH_GPU)
   py_test_modules(test_fp16_assign MODULES test_fp16_assign)
   py_test_modules(test_group_operators MODULES test_group_operators)
   py_test_modules(test_pattern MODULES test_pattern)
+  py_test_modules(test_pattern_match MODULES test_pattern_match)
 endif()
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_group_operators.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_group_operators.py
index e6353dadb947eb..2823d4d9a318c6 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_group_operators.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_group_operators.py
@@ -121,7 +121,7 @@ def test_gpt(self):
 
         dist_context = DistributedContext()
         tuner = RuleBasedTuner(dist_context)
-        layers = tuner.group_operators(train_program.global_block().ops)
+        layers = tuner.cluster_operators(train_program.global_block().ops)
         op_types = []
         for layer in layers:
             tmp = []
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_pattern.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_pattern.py
index 159def7617a2fd..dca87bdf9ce248 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_pattern.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_pattern.py
@@ -14,6 +14,7 @@
 
 import sys
 import unittest
+
 import numpy as np
 
 import paddle
@@ -22,8 +23,8 @@
 sys.path.append("..")
 import auto_parallel_gpt_model as modeling
 from auto_parallel_gpt_model import (
-    GPTModel,
     GPTForPretraining,
+    GPTModel,
     GPTPretrainingCriterion,
 )
 
@@ -111,22 +112,22 @@ def test_gpt(self):
             sequence_len,
             vocab_size,
         )
+        from paddle.distributed.auto_parallel.dist_context import (
+            DistributedContext,
+        )
         from paddle.distributed.auto_parallel.tuner.rule_based_tuner import (
+            _PATTERNS,
             RuleBasedTuner,
             convert_to_graph,
-            _PATTERNS,
-        )
-        from paddle.distributed.auto_parallel.dist_context import (
-            DistributedContext,
         )
 
         dist_context = DistributedContext()
         tuner = RuleBasedTuner(dist_context)
-        layers = tuner.group_operators(train_program.global_block().ops)
+        layers = tuner.cluster_operators(train_program.global_block().ops)
         layer = layers[0]
         graph = convert_to_graph(layer, train_program.global_block())
-        print(graph)
-        print("qkv: ", _PATTERNS["qkv"].graph)
+        print("graph: ", graph)
+        print("qkv: ", _PATTERNS["qkv"].attrs["shard_spec"])
 
 
 if __name__ == "__main__":
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_pattern_match.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_pattern_match.py
new file mode 100644
index 00000000000000..e18298b890a585
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_pattern_match.py
@@ -0,0 +1,142 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import unittest
+
+import numpy as np
+
+import paddle
+import paddle.static as static
+
+sys.path.append("..")
+import auto_parallel_gpt_model as modeling
+from auto_parallel_gpt_model import (
+    GPTForPretraining,
+    GPTModel,
+    GPTPretrainingCriterion,
+)
+
+
+def get_gpt_model(
+    train_program, start_program, place, batch_size, sequence_len, vocab_size
+):
+    with static.program_guard(train_program, start_program):
+        tokens = paddle.static.data(
+            name="tokens", shape=[batch_size, sequence_len], dtype='int64'
+        )
+        position_ids = paddle.static.data(
+            name="position_ids", shape=[batch_size, sequence_len], dtype='int64'
+        )
+        attention_mask = paddle.static.data(
+            name="attention_mask",
+            shape=[batch_size, 1, sequence_len, sequence_len],
+            dtype='float32',
+        )
+        labels = paddle.static.data(
+            name="labels", shape=[batch_size, sequence_len], dtype='int64'
+        )
+        loss_mask = paddle.static.data(
+            name="loss_mask", shape=[batch_size, sequence_len], dtype='float32'
+        )
+
+        gpt = GPTModel(
+            vocab_size=1000,
+            hidden_size=64,
+            num_hidden_layers=2,
+            num_attention_heads=8,
+            intermediate_size=256,
+            hidden_act="gelu",
+            hidden_dropout_prob=0.0,
+            attention_probs_dropout_prob=0.0,
+            max_position_embeddings=1024,
+            type_vocab_size=1,
+            initializer_range=0.02,
+            pad_token_id=0,
+            eos_token_id=7,
+            bos_token_id=0,
+            eol_token_id=3,
+        )
+
+        model = GPTForPretraining(
+            gpt, vocab_size=1000, hidden_size=64, initializer_range=0.02
+        )
+        preds = model(tokens, position_ids, attention_mask)
+        criterion = GPTPretrainingCriterion()
+        loss = criterion(preds, labels, loss_mask)
+
+    def gen_data():
+        np.random.seed(2021)
+        tokens = []
+        position_ids = []
+        attention_mask = []
+        labels = []
+        loss_mask = []
+        for _ in range(batch_size):
+            tokens.append(np.random.randint(vocab_size, size=sequence_len))
+            position_ids.append(np.arange(sequence_len))
+            attention_mask.append([np.tril(np.ones(sequence_len))])
+            labels.append(np.random.randint(vocab_size, size=sequence_len))
+            loss_mask.append(np.ones(sequence_len))
+
+        return tokens, position_ids, attention_mask, labels, loss_mask
+
+    return train_program, start_program, loss, gen_data
+
+
+class TestGroupOperators(unittest.TestCase):
+    def test_gpt(self):
+        modeling.init_global()
+        train_program = static.Program()
+        start_program = static.Program()
+        place = paddle.set_device("gpu")
+        batch_size = 8
+        sequence_len = 512
+        vocab_size = 1000
+        train_program, start_program, loss, gen_data = get_gpt_model(
+            train_program,
+            start_program,
+            place,
+            batch_size,
+            sequence_len,
+            vocab_size,
+        )
+        from paddle.distributed.auto_parallel.dist_context import (
+            DistributedContext,
+        )
+        from paddle.distributed.auto_parallel.tuner.rule_based_tuner import (
+            _PATTERNS,
+            RuleBasedTuner,
+            convert_to_graph,
+            match,
+        )
+
+        dist_context = DistributedContext()
+        tuner = RuleBasedTuner(dist_context)
+        layers = tuner.cluster_operators(train_program.global_block().ops)
+        layer = layers[0]
+        graph = convert_to_graph(layer, train_program.global_block())
+        results = match(_PATTERNS["qkv"], graph)
+        shard_tensor_infos = _PATTERNS["qkv"].attrs["shard_spec"]
+        tensor_ids = shard_tensor_infos[0][0]
+        if results:
+            for result in results:
+                for node_id in result:
+                    if node_id in tensor_ids:
+                        print(graph.attrs["id_to_var"][result[node_id]])
+        print("shard_spec: ", shard_tensor_infos[0][1])
+
+
+if __name__ == "__main__":
+    unittest.main()

From 91dd8a2e16be57d891679842a7f17ab4efd865a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?=
 <83450930+Liyulingyue@users.noreply.github.com>
Date: Tue, 29 Nov 2022 19:23:30 +0800
Subject: [PATCH 039/154] Replace LoDTensor with phi::DenseTensor in
 fluid\operators (#48417)

* replace LoDTensor with phi::DenseTensor in fluid\operators

* replace LoDTensor with phi::DenseTensor in fluid\operators

* Update split_lod_tensor_op.cc

* Update warpctc_op.cc

* Update broadcast_tensors_op.cc

* Update crf_decoding_op.cc

* Update lstm_op.cc

* Update lstm_op.cc

* Update lod_reset_op.cc

* Update gru_op.cc

* Update linear_chain_crf_op.cc

* resume 2 files for confilct

* Update gru_op.cc

* Update linear_chain_crf_op.cc

* Update lstm_op.cc
---
 .../fluid/operators/array_to_lod_tensor_op.cc | 20 ++---
 paddle/fluid/operators/assert_op.cc           |  6 +-
 paddle/fluid/operators/assign_op.cc           | 13 ++--
 paddle/fluid/operators/assign_pos_op.cu       | 11 +--
 paddle/fluid/operators/assign_pos_op.h        |  2 -
 paddle/fluid/operators/attention_lstm_op.cc   | 31 ++++----
 paddle/fluid/operators/attention_lstm_op.h    |  1 -
 paddle/fluid/operators/batch_norm_op.cc       |  8 +-
 paddle/fluid/operators/batch_norm_op.h        |  1 -
 .../fluid/operators/beam_search_decode_op.h   | 14 ++--
 .../operators/beam_search_decode_op_def.h     | 21 +++---
 .../operators/beam_search_decode_op_test.cc   |  9 +--
 .../operators/beam_search_decode_op_xpu.cc    | 15 ++--
 .../operators/beam_search_decode_op_xpu.h     | 18 ++---
 .../beam_search_decode_op_xpu_test.cc         | 13 ++--
 paddle/fluid/operators/beam_search_op.cc      | 39 +++++-----
 .../fluid/operators/broadcast_tensors_op.cc   | 15 ++--
 .../operators/check_memory_continue_op.cc     | 13 ++--
 paddle/fluid/operators/chunk_eval_op.h        |  6 +-
 paddle/fluid/operators/coalesce_tensor_op.cc  |  8 +-
 .../operators/common_infer_shape_functions.cc |  2 +-
 paddle/fluid/operators/copy_cross_scope_op.cc | 13 ++--
 paddle/fluid/operators/crf_decoding_op.cc     |  9 ++-
 paddle/fluid/operators/crf_decoding_op.h      |  5 +-
 paddle/fluid/operators/ctc_align_op.cu        |  8 +-
 paddle/fluid/operators/ctc_align_op.h         |  9 +--
 paddle/fluid/operators/cudnn_lstm_op.cu.cc    |  1 -
 paddle/fluid/operators/cvm_op.cu              |  7 +-
 paddle/fluid/operators/cvm_op.h               |  7 +-
 paddle/fluid/operators/data_norm_op.cc        |  5 +-
 paddle/fluid/operators/data_norm_op.cu        |  1 -
 .../operators/deformable_psroi_pooling_op.cc  |  7 +-
 .../operators/deformable_psroi_pooling_op.cu  |  5 +-
 .../operators/deformable_psroi_pooling_op.h   |  9 +--
 paddle/fluid/operators/dequeue_op.cc          |  3 +-
 paddle/fluid/operators/detection_map_op.cc    | 33 ++++----
 paddle/fluid/operators/edit_distance_op.cc    |  6 +-
 paddle/fluid/operators/enqueue_op.cc          |  3 +-
 .../fill_constant_batch_size_like_op_mlu.cc   |  2 +-
 .../fill_constant_batch_size_like_op_npu.cc   |  2 +-
 paddle/fluid/operators/fill_op.cc             |  2 +-
 paddle/fluid/operators/filter_by_instag_op.cc |  9 ++-
 paddle/fluid/operators/filter_by_instag_op.cu | 30 ++++----
 paddle/fluid/operators/filter_by_instag_op.h  | 26 ++++---
 .../get_tensor_from_selected_rows_op.cc       | 17 +++--
 paddle/fluid/operators/group_norm_op.cc       |  5 +-
 paddle/fluid/operators/group_norm_op.h        |  1 -
 paddle/fluid/operators/gru_op.cc              | 75 ++++++++++---------
 paddle/fluid/operators/gru_op.cu.cc           | 15 ++--
 paddle/fluid/operators/gru_op.h               | 16 ++--
 .../operators/hierarchical_sigmoid_op.cc      | 46 ++++++------
 paddle/fluid/operators/im2sequence_op.h       |  3 +-
 paddle/fluid/operators/index_select_op.h      | 15 ++--
 paddle/fluid/operators/inplace_abn_op.cc      |  4 +-
 paddle/fluid/operators/instance_norm_op.cc    |  8 +-
 paddle/fluid/operators/instance_norm_op.h     |  1 -
 paddle/fluid/operators/label_smooth_op_mlu.cc |  5 +-
 paddle/fluid/operators/label_smooth_op_npu.cc |  5 +-
 paddle/fluid/operators/layer_norm_op.cc       |  5 +-
 .../fluid/operators/limit_by_capacity_op.cu   |  1 -
 paddle/fluid/operators/linear_chain_crf_op.cc | 13 ++--
 paddle/fluid/operators/linear_chain_crf_op.h  |  5 +-
 paddle/fluid/operators/load_combine_op.cc     |  2 +-
 paddle/fluid/operators/load_op.cc             |  5 +-
 paddle/fluid/operators/load_op_npu.cc         |  3 +-
 paddle/fluid/operators/lod_rank_table_op.cc   |  7 +-
 paddle/fluid/operators/lod_reset_op.cc        | 22 +++---
 .../fluid/operators/lod_tensor_to_array_op.cc | 27 +++----
 .../fluid/operators/lookup_table_dequant_op.h |  9 +--
 paddle/fluid/operators/lookup_table_op.cc     |  2 +-
 paddle/fluid/operators/lookup_table_op.cu     | 21 +++---
 paddle/fluid/operators/lookup_table_op.h      | 28 +++----
 paddle/fluid/operators/lookup_table_v2_op.cc  |  2 +-
 paddle/fluid/operators/lookup_table_v2_op.h   | 22 +++---
 .../fluid/operators/lookup_table_v2_op_mlu.cc | 13 ++--
 .../fluid/operators/lookup_table_v2_op_npu.cc |  2 +-
 paddle/fluid/operators/lstm_op.cc             | 36 +++++----
 paddle/fluid/operators/lstm_op.h              | 42 +++++------
 paddle/fluid/operators/lstmp_op.cc            | 50 +++++++------
 paddle/fluid/operators/lstmp_op.h             | 37 +++++----
 .../fluid/operators/match_matrix_tensor_op.cc | 37 ++++-----
 paddle/fluid/operators/memcpy_d2h_op.cc       |  6 +-
 paddle/fluid/operators/memcpy_h2d_op.cc       |  6 +-
 paddle/fluid/operators/memcpy_op.cc           |  6 +-
 paddle/fluid/operators/merge_lod_tensor_op.cc |  8 +-
 paddle/fluid/operators/nce_op.cc              |  2 +-
 paddle/fluid/operators/nce_op.h               |  7 +-
 paddle/fluid/operators/number_count_op.cu     |  5 +-
 paddle/fluid/operators/one_hot_op.cc          |  3 +-
 paddle/fluid/operators/one_hot_op.cu          |  5 +-
 paddle/fluid/operators/one_hot_op.h           |  5 +-
 paddle/fluid/operators/one_hot_op_npu.cc      |  4 +-
 paddle/fluid/operators/one_hot_op_xpu.cc      |  5 +-
 paddle/fluid/operators/one_hot_v2_op.cc       |  3 +-
 paddle/fluid/operators/one_hot_v2_op_mlu.cc   |  5 +-
 paddle/fluid/operators/one_hot_v2_op_npu.cc   |  5 +-
 paddle/fluid/operators/partial_concat_op.cu   |  5 +-
 paddle/fluid/operators/partial_sum_op.cu      |  5 +-
 .../operators/positive_negative_pair_op.h     |  1 -
 paddle/fluid/operators/prroi_pool_op.cc       |  9 +--
 paddle/fluid/operators/prroi_pool_op.cu       |  7 +-
 .../operators/prune_gate_by_capacity_op.cu    | 10 +--
 paddle/fluid/operators/psroi_pool_op.cc       |  4 +-
 .../operators/pull_box_extended_sparse_op.h   |  1 -
 paddle/fluid/operators/pull_box_sparse_op.h   |  1 -
 .../fluid/operators/pull_gpups_sparse_op.cu   |  1 -
 paddle/fluid/operators/pull_gpups_sparse_op.h |  1 -
 paddle/fluid/operators/py_func_op.cc          |  4 +-
 paddle/fluid/operators/pyramid_hash_op.cc     | 17 ++---
 paddle/fluid/operators/random_routing_op.cu   |  9 +--
 paddle/fluid/operators/recurrent_op.cc        | 13 ++--
 .../reorder_lod_tensor_by_rank_op.cc          |  9 ++-
 paddle/fluid/operators/reverse_op.cc          |  6 +-
 paddle/fluid/operators/roi_align_op.cc        |  5 +-
 paddle/fluid/operators/roi_align_op_mlu.cc    |  5 +-
 paddle/fluid/operators/roi_pool_op.cc         |  5 +-
 paddle/fluid/operators/row_conv_op.cc         | 15 ++--
 paddle/fluid/operators/row_conv_op.cu         | 12 ++-
 paddle/fluid/operators/run_program_op.cc      |  8 +-
 paddle/fluid/operators/run_program_op.h       | 46 ++++++------
 paddle/fluid/operators/save_combine_op.cc     |  4 +-
 paddle/fluid/operators/save_combine_op.h      | 15 ++--
 paddle/fluid/operators/save_op.cc             |  5 +-
 paddle/fluid/operators/save_op.h              |  3 +-
 paddle/fluid/operators/search_compute.h       |  1 -
 paddle/fluid/operators/select_input_op.cc     |  2 +-
 paddle/fluid/operators/select_output_op.cc    |  4 +-
 paddle/fluid/operators/shape_op.cc            | 10 +--
 paddle/fluid/operators/shape_op_mlu.cc        |  3 +-
 paddle/fluid/operators/shard_index_op.cc      |  3 +-
 paddle/fluid/operators/shard_index_op_npu.cc  |  5 +-
 paddle/fluid/operators/share_data_op.cc       |  2 +-
 .../fluid/operators/shrink_rnn_memory_op.cc   | 11 +--
 paddle/fluid/operators/shuffle_batch_op.cc    |  9 ++-
 paddle/fluid/operators/shuffle_batch_op.h     | 19 ++---
 paddle/fluid/operators/slice_op.cc            |  8 +-
 paddle/fluid/operators/split_lod_tensor_op.cc | 12 +--
 paddle/fluid/operators/split_op.cc            |  5 +-
 paddle/fluid/operators/sum_op.cc              | 16 ++--
 paddle/fluid/operators/sum_op_mlu.cc          |  1 -
 paddle/fluid/operators/sum_op_npu.cc          |  1 -
 paddle/fluid/operators/svd_helper.h           |  2 +-
 paddle/fluid/operators/tdm_child_op.h         | 13 ++--
 paddle/fluid/operators/tdm_sampler_op.h       | 13 ++--
 paddle/fluid/operators/transfer_layout_op.cc  |  5 +-
 paddle/fluid/operators/var_conv_2d_op.cc      | 60 ++++++++-------
 paddle/fluid/operators/var_conv_2d_op.h       |  1 -
 paddle/fluid/operators/warpctc_op.cc          |  6 +-
 148 files changed, 804 insertions(+), 784 deletions(-)

diff --git a/paddle/fluid/operators/array_to_lod_tensor_op.cc b/paddle/fluid/operators/array_to_lod_tensor_op.cc
index c81945d47385bf..ebf64a92746e85 100644
--- a/paddle/fluid/operators/array_to_lod_tensor_op.cc
+++ b/paddle/fluid/operators/array_to_lod_tensor_op.cc
@@ -157,7 +157,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
           return table_items[a].index < table_items[b].index;
         });
 
-    // Build LoDTensor `out`
+    // Build phi::DenseTensor `out`
     framework::LoD *out_lod = out->mutable_lod();
     out_lod->clear();
     auto prefix_lod = rank_table.coarse_lod();
@@ -215,16 +215,18 @@ class ArrayToLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker {
   void Make() override {
     AddInput("X",
              "(std::vector<LodTensor>) A vector of tensors that is going to "
-             "be casted to a big LoDTensor.");
+             "be casted to a big phi::DenseTensor.");
     AddInput("RankTable",
              "(LoDRankTable) RankTable provides the coarse lod information to "
-             "build the output LoDTensor. See "
+             "build the output phi::DenseTensor. See "
              "'paddle/framework/lod_rank_table.h' for more details.");
-    AddOutput("Out", "(LoDTensor) The LoDTensor formed by input tensor array.");
+    AddOutput("Out",
+              "(phi::DenseTensor) The phi::DenseTensor formed by input tensor "
+              "array.");
     AddComment(
-        R"DOC(This Op build a big LoDTensor from a std::vector<LoDTensor>
+        R"DOC(This Op build a big phi::DenseTensor from a std::vector<phi::DenseTensor>
           and a LoDRankTable. It is supposed to be used in getting dynamic RNN's
-          outputs back to a normal LoDTensor. The std::vector<LoDTensor>
+          outputs back to a normal phi::DenseTensor. The std::vector<phi::DenseTensor>
           would be the output of RNN Op and the LoDRankTable would be build
           with RNN's input.)DOC");
   }
@@ -247,9 +249,9 @@ class ArrayToLoDTensorInferShape : public framework::InferShapeBase {
     // detail kernel implementation.
     context->SetOutputDim("Out", context->GetInputDim("X"));
 
-    // The output LoDTensor's lod_level should be input X's lod_level + 1.
-    // For compile-time, we call SetLoDLevel to set output's lod_level.
-    // For runtime, output LoDTensor's lod is determined by input X's lod and
+    // The output phi::DenseTensor's lod_level should be input X's lod_level
+    // + 1. For compile-time, we call SetLoDLevel to set output's lod_level. For
+    // runtime, output phi::DenseTensor's lod is determined by input X's lod and
     // the level specified by input RandTable.
     // We cannot get X's detail lod and RankTable's level in this function, so
     // leave this work to the detail kernel implementation.
diff --git a/paddle/fluid/operators/assert_op.cc b/paddle/fluid/operators/assert_op.cc
index b65a709291f4e6..b1ca48320016c1 100644
--- a/paddle/fluid/operators/assert_op.cc
+++ b/paddle/fluid/operators/assert_op.cc
@@ -41,8 +41,6 @@ const char kSummarize[] = "summarize";
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-
 class AssertOp : public framework::OperatorBase {
  public:
   AssertOp(const std::string &type,
@@ -58,7 +56,7 @@ class AssertOp : public framework::OperatorBase {
     PADDLE_ENFORCE_NOT_NULL(cond_var_ptr,
                             platform::errors::NotFound(
                                 "Input(Condition) of AssertOp is not found."));
-    const LoDTensor &cond = cond_var_ptr->Get<LoDTensor>();
+    const phi::DenseTensor &cond = cond_var_ptr->Get<phi::DenseTensor>();
     PADDLE_ENFORCE_EQ(
         cond.dims(),
         phi::make_ddim({1}),
@@ -78,7 +76,7 @@ class AssertOp : public framework::OperatorBase {
     const std::vector<std::string> &x_names = Inputs(kData);
     for (const std::string &name : x_names) {
       const framework::Variable *x_var_ptr = scope.FindVar(name);
-      const phi::DenseTensor &x_tensor = x_var_ptr->Get<LoDTensor>();
+      const phi::DenseTensor &x_tensor = x_var_ptr->Get<phi::DenseTensor>();
       formatter.Print(x_tensor, name);
     }
 
diff --git a/paddle/fluid/operators/assign_op.cc b/paddle/fluid/operators/assign_op.cc
index 91bc5019f3f079..1af424fa77dbe6 100644
--- a/paddle/fluid/operators/assign_op.cc
+++ b/paddle/fluid/operators/assign_op.cc
@@ -79,16 +79,19 @@ class AssignInferVarType : public framework::VarTypeInference {
 class AssignOpProtoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X",
-             "(LoDTensor, SelectedRows or LoDTensorArray) The input variable "
-             "could be LoDTensor, SelectedRows or LoDTensorArray.")
+    AddInput(
+        "X",
+        "(phi::DenseTensor, SelectedRows or phi::DenseTensorArray) The input "
+        "variable "
+        "could be phi::DenseTensor, SelectedRows or phi::DenseTensorArray.")
         .AsDispensable();
     AddOutput("Out",
-              "(LoDTensor, SelectedRows or LoDTensorArray) The type of output "
+              "(phi::DenseTensor, SelectedRows or phi::DenseTensorArray) The "
+              "type of output "
               "is the same as input X.");
     AddComment(R"DOC(Assign Operator
 
-Out = X,  when type in [LoDTensor/SelectedRows/LoDTensorArray]
+Out = X,  when type in [phi::DenseTensor/SelectedRows/phi::DenseTensorArray]
 raise error if the type is not listed above.
 )DOC");
   }
diff --git a/paddle/fluid/operators/assign_pos_op.cu b/paddle/fluid/operators/assign_pos_op.cu
index 0f1107765d3844..e5f783ec2d6ac9 100644
--- a/paddle/fluid/operators/assign_pos_op.cu
+++ b/paddle/fluid/operators/assign_pos_op.cu
@@ -59,13 +59,14 @@ class AssignPosCUDAKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& context) const override {
     // assign pos decides which tokens should be fetched belong to specially
     // counter orderingly.
-    auto cum_count = context.Input<LoDTensor>(
+    auto cum_count = context.Input<phi::DenseTensor>(
         "cum_count");  // (counter number) int32 | int64
-    auto numbers =
-        context.Input<LoDTensor>("X");  // (batch_size * seq_len, topk) int32
+    auto numbers = context.Input<phi::DenseTensor>(
+        "X");  // (batch_size * seq_len, topk) int32
     auto eff_num_len =
-        context.Input<LoDTensor>("eff_num_len");  // (sum(cum_count))
-    auto out = context.Output<LoDTensor>("Out");  // (cum_count) value ranges
+        context.Input<phi::DenseTensor>("eff_num_len");  // (sum(cum_count))
+    auto out =
+        context.Output<phi::DenseTensor>("Out");  // (cum_count) value ranges
                                                   // from 0 to batch_size *
                                                   // seq_len * topk
     auto place = context.GetPlace();
diff --git a/paddle/fluid/operators/assign_pos_op.h b/paddle/fluid/operators/assign_pos_op.h
index 49e95184e346fb..6c75fb55f58468 100644
--- a/paddle/fluid/operators/assign_pos_op.h
+++ b/paddle/fluid/operators/assign_pos_op.h
@@ -20,8 +20,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-
 template <typename T>
 class AssignPosOpCPUKernel : public framework::OpKernel<T> {
  public:
diff --git a/paddle/fluid/operators/attention_lstm_op.cc b/paddle/fluid/operators/attention_lstm_op.cc
index 49a847eecaeaaf..d3ae66b3c02ff8 100644
--- a/paddle/fluid/operators/attention_lstm_op.cc
+++ b/paddle/fluid/operators/attention_lstm_op.cc
@@ -205,11 +205,12 @@ framework::OpKernelType AttentionLSTMOp::GetExpectedKernelType(
 }
 
 void AttentionLSTMOpMaker::Make() {
-  AddInput("X",
-           "(LoDTensor) the input is a LodTensor, which support "
-           "variable-time length input sequence. The underlying tensor in "
-           "this LoDTensor is a matrix with shape (T X M), where T is the "
-           "total time steps in this mini-batch, M is the dim size of x.");
+  AddInput(
+      "X",
+      "(phi::DenseTensor) the input is a LodTensor, which support "
+      "variable-time length input sequence. The underlying tensor in "
+      "this phi::DenseTensor is a matrix with shape (T X M), where T is the "
+      "total time steps in this mini-batch, M is the dim size of x.");
   AddInput("C0",
            "(Tensor) LSTM C0"
            "This is a tensor with shape (N x D), where N is the batch size, D "
@@ -247,12 +248,14 @@ void AttentionLSTMOpMaker::Make() {
            "Note: we should add the bias of hidden and context accorindg to "
            "the same gate: "
            "{B_forget, B_input, B_output, B_cell}");
-  AddOutput("Hidden",
-            "(LoDTensor) (same as LSTMOp) the hidden state of LSTM operator. "
-            "The shape is (T x D), and lod is the same with the `Input`.");
-  AddOutput("Cell",
-            "(LoDTensor) (same as LSTMOp) the cell state of LSTM operator. "
-            "The shape is (T x D), and lod is the same with the `Input`.");
+  AddOutput(
+      "Hidden",
+      "(phi::DenseTensor) (same as LSTMOp) the hidden state of LSTM operator. "
+      "The shape is (T x D), and lod is the same with the `Input`.");
+  AddOutput(
+      "Cell",
+      "(phi::DenseTensor) (same as LSTMOp) the cell state of LSTM operator. "
+      "The shape is (T x D), and lod is the same with the `Input`.");
   AddOutput("AttentionedX",
             "(Tensor) shape is (T x 1), the result after X * AttentionWeight,"
             " where T is the total time steps in this mini-batch,"
@@ -339,7 +342,7 @@ class AttentionLSTMKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     using DeviceContext = phi::CPUContext;
 
-    auto* x = ctx.Input<LoDTensor>("X");
+    auto* x = ctx.Input<phi::DenseTensor>("X");
     auto* h0 = ctx.Input<phi::DenseTensor>("H0");
     auto* c0 = ctx.Input<phi::DenseTensor>("C0");
     auto* atten_w = ctx.Input<phi::DenseTensor>("AttentionWeight");
@@ -350,8 +353,8 @@ class AttentionLSTMKernel : public framework::OpKernel<T> {
     auto* lstm_w = ctx.Input<phi::DenseTensor>("LSTMWeight");
     auto* lstm_b = ctx.Input<phi::DenseTensor>("LSTMBias");
 
-    auto* hidden_out = ctx.Output<LoDTensor>("Hidden");
-    auto* cell_out = ctx.Output<LoDTensor>("Cell");
+    auto* hidden_out = ctx.Output<phi::DenseTensor>("Hidden");
+    auto* cell_out = ctx.Output<phi::DenseTensor>("Cell");
     auto* atted_x = ctx.Output<phi::DenseTensor>("AttentionedX");
     auto* fc_out = ctx.Output<phi::DenseTensor>("AttentionFCOut");
     auto* lstm_x = ctx.Output<phi::DenseTensor>("LSTMX");
diff --git a/paddle/fluid/operators/attention_lstm_op.h b/paddle/fluid/operators/attention_lstm_op.h
index 32511b97d6a561..41d7d594df2073 100644
--- a/paddle/fluid/operators/attention_lstm_op.h
+++ b/paddle/fluid/operators/attention_lstm_op.h
@@ -18,7 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 class AttentionLSTMOp : public framework::OperatorWithKernel {
diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc
index a20b2ad21d3e9c..abf177ee9f9f46 100644
--- a/paddle/fluid/operators/batch_norm_op.cc
+++ b/paddle/fluid/operators/batch_norm_op.cc
@@ -383,8 +383,8 @@ framework::OpKernelType BatchNormGradOp::GetExpectedKernelType(
   const Tensor *t = nullptr;
   if (var->IsType<Tensor>()) {
     t = &var->Get<Tensor>();
-  } else if (var->IsType<LoDTensor>()) {
-    t = &var->Get<LoDTensor>();
+  } else if (var->IsType<phi::DenseTensor>()) {
+    t = &var->Get<phi::DenseTensor>();
   }
   if (t == nullptr) {
     PADDLE_THROW(
@@ -525,8 +525,8 @@ framework::OpKernelType BatchNormDoubleGradOp::GetExpectedKernelType(
   const Tensor *t = nullptr;
   if (var->IsType<Tensor>()) {
     t = &var->Get<Tensor>();
-  } else if (var->IsType<LoDTensor>()) {
-    t = &var->Get<LoDTensor>();
+  } else if (var->IsType<phi::DenseTensor>()) {
+    t = &var->Get<phi::DenseTensor>();
   }
   if (t == nullptr) {
     PADDLE_THROW(
diff --git a/paddle/fluid/operators/batch_norm_op.h b/paddle/fluid/operators/batch_norm_op.h
index 35f1572899f857..b11deeb49509b0 100644
--- a/paddle/fluid/operators/batch_norm_op.h
+++ b/paddle/fluid/operators/batch_norm_op.h
@@ -28,7 +28,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using DataLayout = phi::DataLayout;
 
 template <typename T>
diff --git a/paddle/fluid/operators/beam_search_decode_op.h b/paddle/fluid/operators/beam_search_decode_op.h
index 7bad6950a7a260..e635405f3884ea 100644
--- a/paddle/fluid/operators/beam_search_decode_op.h
+++ b/paddle/fluid/operators/beam_search_decode_op.h
@@ -23,8 +23,8 @@ namespace operators {
 struct BeamSearchDecodeFunctor {
   BeamSearchDecodeFunctor(const LoDTensorArray& step_ids,
                           const LoDTensorArray& step_scores,
-                          LoDTensor* id_tensor,
-                          LoDTensor* score_tensor,
+                          phi::DenseTensor* id_tensor,
+                          phi::DenseTensor* score_tensor,
                           size_t beam_size,
                           int end_id)
       : beam_size_(beam_size),
@@ -119,8 +119,8 @@ struct BeamSearchDecodeFunctor {
   const LoDTensorArray& step_scores_origin_;
   LoDTensorArray step_ids_ = LoDTensorArray();
   LoDTensorArray step_scores_ = LoDTensorArray();
-  LoDTensor* id_tensor_;
-  LoDTensor* score_tensor_;
+  phi::DenseTensor* id_tensor_;
+  phi::DenseTensor* score_tensor_;
 };
 
 template <typename DeviceContext, typename T>
@@ -164,8 +164,10 @@ class BeamSearchDecodeOpKernel : public framework::OpKernel<T> {
     int end_id = context.Attr<int>("end_id");
 
     // prepare output
-    LoDTensor* sentenceIds = context.Output<LoDTensor>("SentenceIds");
-    LoDTensor* sentenceScores = context.Output<LoDTensor>("SentenceScores");
+    phi::DenseTensor* sentenceIds =
+        context.Output<phi::DenseTensor>("SentenceIds");
+    phi::DenseTensor* sentenceScores =
+        context.Output<phi::DenseTensor>("SentenceScores");
 
     BeamSearchDecodeFunctor bs(
         *ids, *scores, sentenceIds, sentenceScores, beam_size, end_id);
diff --git a/paddle/fluid/operators/beam_search_decode_op_def.h b/paddle/fluid/operators/beam_search_decode_op_def.h
index c9e89a7f354c4b..e57dfe512c27a5 100644
--- a/paddle/fluid/operators/beam_search_decode_op_def.h
+++ b/paddle/fluid/operators/beam_search_decode_op_def.h
@@ -23,7 +23,6 @@ limitations under the License. */
 
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 using LoDTensorArray = framework::LoDTensorArray;
 
 // all the lod have 2 levels.
@@ -54,15 +53,15 @@ struct BeamSearchDecoder {
    * with word score.
    * Param:
    *  sentence_vector_list: sentence_vector for each source sentence.
-   *  id_tensor: result LoDTensor for sentences of id.
-   *  score_tensor: result LoDTensor for sentences of score.
+   *  id_tensor: result phi::DenseTensor for sentences of id.
+   *  score_tensor: result phi::DenseTensor for sentences of score.
    *  reverse: whether ids of sentence in sentence_vector_list is reversed
    *  sort_by_score: whether to sort hypotheses of each sentence by scores.
    */
   void ConvertSentenceVectorToLodTensor(
       std::vector<SentenceVector<T>> sentence_vector_list,
-      LoDTensor* id_tensor,
-      LoDTensor* score_tensor,
+      phi::DenseTensor* id_tensor,
+      phi::DenseTensor* score_tensor,
       bool reverse = true,
       bool sort_by_score = true) const;
 
@@ -72,8 +71,8 @@ struct BeamSearchDecoder {
    */
   void Backtrace(const LoDTensorArray& step_ids,
                  const LoDTensorArray& step_scores,
-                 LoDTensor* id_tensor,
-                 LoDTensor* score_tensor) const;
+                 phi::DenseTensor* id_tensor,
+                 phi::DenseTensor* score_tensor) const;
 
   size_t beam_size_;
   int end_id_;
@@ -82,8 +81,8 @@ struct BeamSearchDecoder {
 template <typename T>
 void BeamSearchDecoder<T>::ConvertSentenceVectorToLodTensor(
     std::vector<SentenceVector<T>> sentence_vector_list,
-    LoDTensor* id_tensor,
-    LoDTensor* score_tensor,
+    phi::DenseTensor* id_tensor,
+    phi::DenseTensor* score_tensor,
     bool reverse,
     bool sort_by_score) const {
   size_t src_num = sentence_vector_list.size();
@@ -158,8 +157,8 @@ void BeamSearchDecoder<T>::ConvertSentenceVectorToLodTensor(
 template <typename T>
 void BeamSearchDecoder<T>::Backtrace(const LoDTensorArray& step_ids,
                                      const LoDTensorArray& step_scores,
-                                     LoDTensor* id_tensor,
-                                     LoDTensor* score_tensor) const {
+                                     phi::DenseTensor* id_tensor,
+                                     phi::DenseTensor* score_tensor) const {
   PADDLE_ENFORCE_NE(
       step_ids.empty(),
       true,
diff --git a/paddle/fluid/operators/beam_search_decode_op_test.cc b/paddle/fluid/operators/beam_search_decode_op_test.cc
index 03103d98a0a0fd..72fcf40ec3d58e 100644
--- a/paddle/fluid/operators/beam_search_decode_op_test.cc
+++ b/paddle/fluid/operators/beam_search_decode_op_test.cc
@@ -18,7 +18,6 @@ limitations under the License. */
 
 using CPUPlace = paddle::platform::CPUPlace;
 using LoD = paddle::framework::LoD;
-using LoDTensor = phi::DenseTensor;
 using LoDTensorArray = paddle::framework::LoDTensorArray;
 
 template <typename T>
@@ -59,7 +58,7 @@ void GenerateExample(const std::vector<size_t>& level_0,
   lod.push_back(level_1);
 
   // Ids
-  LoDTensor tensor_id;
+  phi::DenseTensor tensor_id;
   tensor_id.set_lod(lod);
   tensor_id.Resize({static_cast<int64_t>(data.size())});
   // malloc memory
@@ -69,7 +68,7 @@ void GenerateExample(const std::vector<size_t>& level_0,
   }
 
   // Scores
-  LoDTensor tensor_score;
+  phi::DenseTensor tensor_score;
   tensor_score.set_lod(lod);
   tensor_score.Resize({static_cast<int64_t>(data.size())});
   // malloc memory
@@ -124,8 +123,8 @@ void BeamSearchDecodeTestFrame() {
 
   BeamSearchDecoder<T> helper(2, 1);  // beam_size = 2, end_id = 1
 
-  LoDTensor id_tensor;
-  LoDTensor score_tensor;
+  phi::DenseTensor id_tensor;
+  phi::DenseTensor score_tensor;
   helper.Backtrace(ids, scores, &id_tensor, &score_tensor);
 
   LoD lod = id_tensor.lod();
diff --git a/paddle/fluid/operators/beam_search_decode_op_xpu.cc b/paddle/fluid/operators/beam_search_decode_op_xpu.cc
index cfea2f57da2731..aa7e6ca0476072 100644
--- a/paddle/fluid/operators/beam_search_decode_op_xpu.cc
+++ b/paddle/fluid/operators/beam_search_decode_op_xpu.cc
@@ -62,20 +62,21 @@ class BeamSearchDecodeXPUKernel : public framework::OpKernel<T> {
     int end_id = context.Attr<int>("end_id");
 
     // prepare output
-    LoDTensor* sentenceIds = nullptr;
-    LoDTensor* sentenceScores = nullptr;
+    phi::DenseTensor* sentenceIds = nullptr;
+    phi::DenseTensor* sentenceScores = nullptr;
 
-    LoDTensor* sentenceIds_temp = context.Output<LoDTensor>("SentenceIds");
-    LoDTensor* sentenceScores_temp =
-        context.Output<LoDTensor>("SentenceScores");
+    phi::DenseTensor* sentenceIds_temp =
+        context.Output<phi::DenseTensor>("SentenceIds");
+    phi::DenseTensor* sentenceScores_temp =
+        context.Output<phi::DenseTensor>("SentenceScores");
 
     if (platform::is_xpu_place(ids->at(0).place())) {
-      sentenceIds = new LoDTensor();
+      sentenceIds = new phi::DenseTensor();
       sentenceIds->set_lod(sentenceIds_temp->lod());
     }
 
     if (platform::is_xpu_place(ids->at(0).place())) {
-      sentenceScores = new LoDTensor();
+      sentenceScores = new phi::DenseTensor();
       sentenceScores->set_lod(sentenceScores_temp->lod());
     }
 
diff --git a/paddle/fluid/operators/beam_search_decode_op_xpu.h b/paddle/fluid/operators/beam_search_decode_op_xpu.h
index e528d48d7218e5..25f109910f7791 100644
--- a/paddle/fluid/operators/beam_search_decode_op_xpu.h
+++ b/paddle/fluid/operators/beam_search_decode_op_xpu.h
@@ -18,7 +18,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-int SetMeta(const LoDTensor& srcTensor, LoDTensor* dstTensor) {
+int SetMeta(const phi::DenseTensor& srcTensor, phi::DenseTensor* dstTensor) {
   if (srcTensor.dtype() == paddle::experimental::DataType::INT32 ||
       srcTensor.dtype() == paddle::experimental::DataType::INT64 ||
       srcTensor.dtype() == paddle::experimental::DataType::FLOAT32 ||
@@ -33,8 +33,8 @@ int SetMeta(const LoDTensor& srcTensor, LoDTensor* dstTensor) {
   return xpu::Error_t::SUCCESS;
 }
 template <typename T>
-int CopyTensorByXPU(const LoDTensor& srcTensor,
-                    LoDTensor* dstTensor,
+int CopyTensorByXPU(const phi::DenseTensor& srcTensor,
+                    phi::DenseTensor* dstTensor,
                     int flag,
                     const Place& place) {
   const T* srcData = srcTensor.template data<T>();
@@ -67,8 +67,8 @@ int CopyTensorByXPU(const LoDTensor& srcTensor,
   return xpu::Error_t::SUCCESS;
 }
 
-const int CopyTensorByType(const LoDTensor& srcTensor,
-                           LoDTensor* dstTensor,
+const int CopyTensorByType(const phi::DenseTensor& srcTensor,
+                           phi::DenseTensor* dstTensor,
                            int flag,
                            const Place& place) {
   int r = 0;
@@ -97,8 +97,8 @@ const int CopyTensorByType(const LoDTensor& srcTensor,
 struct BeamSearchDecodeXPUFunctor {
   BeamSearchDecodeXPUFunctor(const LoDTensorArray& step_ids,
                              const LoDTensorArray& step_scores,
-                             LoDTensor* id_tensor,
-                             LoDTensor* score_tensor,
+                             phi::DenseTensor* id_tensor,
+                             phi::DenseTensor* score_tensor,
                              size_t beam_size,
                              int end_id)
       : beam_size_(beam_size),
@@ -164,8 +164,8 @@ struct BeamSearchDecodeXPUFunctor {
   // scenarios.
   LoDTensorArray step_ids_ = LoDTensorArray();
   LoDTensorArray step_scores_ = LoDTensorArray();
-  LoDTensor* id_tensor_;
-  LoDTensor* score_tensor_;
+  phi::DenseTensor* id_tensor_;
+  phi::DenseTensor* score_tensor_;
 };
 
 }  // namespace operators
diff --git a/paddle/fluid/operators/beam_search_decode_op_xpu_test.cc b/paddle/fluid/operators/beam_search_decode_op_xpu_test.cc
index c3bd7d55d3784e..8ec90efdf71248 100644
--- a/paddle/fluid/operators/beam_search_decode_op_xpu_test.cc
+++ b/paddle/fluid/operators/beam_search_decode_op_xpu_test.cc
@@ -19,7 +19,6 @@ limitations under the License. */
 using CPUPlace = paddle::platform::CPUPlace;
 using XPUPlace = paddle::platform::XPUPlace;
 using LoD = paddle::framework::LoD;
-using LoDTensor = phi::DenseTensor;
 using LoDTensorArray = paddle::framework::LoDTensorArray;
 
 template <typename T>
@@ -67,7 +66,7 @@ void GenerateXPUExample(const std::vector<size_t>& level_0,
   lod.push_back(level_1);
 
   // Ids
-  LoDTensor tensor_id_cpu;
+  phi::DenseTensor tensor_id_cpu;
   tensor_id_cpu.set_lod(lod);
   tensor_id_cpu.Resize({static_cast<int64_t>(data.size())});
   // malloc memory
@@ -76,7 +75,7 @@ void GenerateXPUExample(const std::vector<size_t>& level_0,
     id_cpu_ptr[i] = static_cast<int64_t>(data.at(i));
   }
 
-  LoDTensor tensor_id;
+  phi::DenseTensor tensor_id;
   const phi::DenseTensorMeta meta_data_id(paddle::experimental::DataType::INT64,
                                           tensor_id_cpu.dims());
   tensor_id.set_meta(meta_data_id);
@@ -90,7 +89,7 @@ void GenerateXPUExample(const std::vector<size_t>& level_0,
                        tensor_id_cpu.numel() * sizeof(int64_t));
 
   // Scores
-  LoDTensor tensor_score_cpu;
+  phi::DenseTensor tensor_score_cpu;
   tensor_score_cpu.set_lod(lod);
   tensor_score_cpu.Resize({static_cast<int64_t>(data.size())});
   // malloc memory
@@ -99,7 +98,7 @@ void GenerateXPUExample(const std::vector<size_t>& level_0,
     score_cpu_ptr[i] = static_cast<T>(data.at(i));
   }
 
-  LoDTensor tensor_score;
+  phi::DenseTensor tensor_score;
 
   if (std::is_same<float, T>::value) {
     const phi::DenseTensorMeta meta_data_score(
@@ -178,8 +177,8 @@ void BeamSearchDecodeTestByXPUFrame() {
   ASSERT_EQ(ids.size(), 5UL);
   ASSERT_EQ(scores.size(), 5UL);
 
-  LoDTensor id_tensor_cpu;
-  LoDTensor score_tensor_cpu;
+  phi::DenseTensor id_tensor_cpu;
+  phi::DenseTensor score_tensor_cpu;
 
   paddle::operators::BeamSearchDecodeXPUFunctor bs_xpu(
       ids, scores, &id_tensor_cpu, &score_tensor_cpu, 2, 1);
diff --git a/paddle/fluid/operators/beam_search_op.cc b/paddle/fluid/operators/beam_search_op.cc
index 668896c791f3c1..49669f1b350d9f 100644
--- a/paddle/fluid/operators/beam_search_op.cc
+++ b/paddle/fluid/operators/beam_search_op.cc
@@ -27,37 +27,42 @@ class BeamSearchOpMaker : public framework::OpProtoAndCheckerMaker {
   void Make() override {
     // inputs and outputs stored in proto
     AddInput("pre_ids",
-             "(LoDTensor) The LoDTensor containing the selected ids at the "
+             "(phi::DenseTensor) The phi::DenseTensor containing the selected "
+             "ids at the "
              "previous step. It should be a tensor with shape (batch_size, 1) "
              "and lod `[[0, 1, ... , batch_size], [0, 1, ..., batch_size]]` at "
              "the first step.");
-    AddInput("pre_scores",
-             "(LoDTensor) The LoDTensor containing the accumulated "
-             "scores corresponding to the selected ids at the previous step.");
+    AddInput(
+        "pre_scores",
+        "(phi::DenseTensor) The phi::DenseTensor containing the accumulated "
+        "scores corresponding to the selected ids at the previous step.");
     AddInput("ids",
-             "(LoDTensor) The LoDTensor containing the candidates ids. Its "
+             "(phi::DenseTensor) The phi::DenseTensor containing the "
+             "candidates ids. Its "
              "shape should be (batch_size * beam_size, W). If not set, it will "
              "be calculated out according to Input(scores) in this operator.")
         .AsDispensable();
-    AddInput("scores",
-             "(LoDTensor) The LoDTensor containing the current scores "
-             "corresponding to Input(ids). If Input(ids) is not nullptr, its "
-             "shape is the same as that of Input(ids)."
-             "If is_accumulated is true, Input(scores) is accumulated scores "
-             "and will be used derectedly. Else, each score will be "
-             "transformed to the log field and accumulate Input(pre_sores) "
-             "first.");
+    AddInput(
+        "scores",
+        "(phi::DenseTensor) The phi::DenseTensor containing the current scores "
+        "corresponding to Input(ids). If Input(ids) is not nullptr, its "
+        "shape is the same as that of Input(ids)."
+        "If is_accumulated is true, Input(scores) is accumulated scores "
+        "and will be used derectedly. Else, each score will be "
+        "transformed to the log field and accumulate Input(pre_sores) "
+        "first.");
     AddOutput("selected_ids",
               "A LodTensor that stores the IDs selected by beam search.");
-    AddOutput("selected_scores",
-              "A LoDTensor containing the accumulated scores corresponding to "
-              "Output(selected_ids).");
+    AddOutput(
+        "selected_scores",
+        "A phi::DenseTensor containing the accumulated scores corresponding to "
+        "Output(selected_ids).");
     AddOutput("parent_idx",
               "A Tensor preserving the selected_ids' parent index in pre_ids.")
         .AsDispensable();
 
     // Attributes stored in AttributeMap
-    AddAttr<int>("level", "the level of LoDTensor");
+    AddAttr<int>("level", "the level of phi::DenseTensor");
     AddAttr<int>("beam_size", "beam size for beam search");
     AddAttr<int>("end_id",
                  "the token id which indicates the end of a sequence");
diff --git a/paddle/fluid/operators/broadcast_tensors_op.cc b/paddle/fluid/operators/broadcast_tensors_op.cc
index df91ef10b181ab..34a76e86aae0d8 100644
--- a/paddle/fluid/operators/broadcast_tensors_op.cc
+++ b/paddle/fluid/operators/broadcast_tensors_op.cc
@@ -41,12 +41,13 @@ class BroadcastTensorsOp : public framework::OperatorWithKernel {
 class BroadcastTensorsOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X",
-             "A Varaible list. The shape and data type of the list elements"
-             "should be consistent. Variable can be multi-dimensional Tensor"
-             "or LoDTensor, and data types can be: bool, float16, float32, "
-             "float64, int32, "
-             "int64.")
+    AddInput(
+        "X",
+        "A Varaible list. The shape and data type of the list elements"
+        "should be consistent. Variable can be multi-dimensional Tensor"
+        "or phi::DenseTensor, and data types can be: bool, float16, float32, "
+        "float64, int32, "
+        "int64.")
         .AsDuplicable();
     AddOutput("Out",
               "the sum of input :code:`x`. its shape and data types are "
@@ -54,7 +55,7 @@ class BroadcastTensorsOpMaker : public framework::OpProtoAndCheckerMaker {
         .AsDuplicable();
     AddComment(
         R"DOC(This OP is used to broadcast a vector of inputs
-                     with Tensor or LoDTensor type, following broadcast semantics.)DOC");
+                     with phi::DenseTensor type, following broadcast semantics.)DOC");
   }
 };
 
diff --git a/paddle/fluid/operators/check_memory_continue_op.cc b/paddle/fluid/operators/check_memory_continue_op.cc
index aca6951c87e818..0099dd109cabca 100644
--- a/paddle/fluid/operators/check_memory_continue_op.cc
+++ b/paddle/fluid/operators/check_memory_continue_op.cc
@@ -31,12 +31,13 @@ class CheckMemoryContinueOp : public framework::OperatorWithKernel {
 class CheckMemoryContinueOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X", "(vector<LoDTensor>) The input tensors.").AsDuplicable();
-    AddOutput("Out", "(LoDTensor) The output tensor.").AsDuplicable();
-    AddOutput(
-        "XOut",
-        "(vector<LoDTensor>) The output tensors which are the same as x. It is "
-        "used to build the graph dependency");
+    AddInput("X", "(vector<phi::DenseTensor>) The input tensors.")
+        .AsDuplicable();
+    AddOutput("Out", "(phi::DenseTensor) The output tensor.").AsDuplicable();
+    AddOutput("XOut",
+              "(vector<phi::DenseTensor>) The output tensors which are the "
+              "same as x. It is "
+              "used to build the graph dependency");
     AddComment(R"DOC(
 CheckMemoryContinue Operator.
 
diff --git a/paddle/fluid/operators/chunk_eval_op.h b/paddle/fluid/operators/chunk_eval_op.h
index 5422d4466188b7..7e614ccee7f566 100644
--- a/paddle/fluid/operators/chunk_eval_op.h
+++ b/paddle/fluid/operators/chunk_eval_op.h
@@ -23,8 +23,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-
 template <typename DeviceContext, typename T>
 class ChunkEvalKernel : public framework::OpKernel<T> {
  public:
@@ -187,9 +185,9 @@ class ChunkEvalKernel : public framework::OpKernel<T> {
         context.Attr<std::vector<int>>("excluded_chunk_types").begin(),
         context.Attr<std::vector<int>>("excluded_chunk_types").end());
 
-    auto* inference = context.Input<LoDTensor>("Inference");
+    auto* inference = context.Input<phi::DenseTensor>("Inference");
     auto place = inference->place();
-    auto* label = context.Input<LoDTensor>("Label");
+    auto* label = context.Input<phi::DenseTensor>("Label");
     auto* precision = context.Output<phi::DenseTensor>("Precision");
     auto* recall = context.Output<phi::DenseTensor>("Recall");
     auto* f1 = context.Output<phi::DenseTensor>("F1-Score");
diff --git a/paddle/fluid/operators/coalesce_tensor_op.cc b/paddle/fluid/operators/coalesce_tensor_op.cc
index 719b5c057b602e..e148c5b4b10e5f 100644
--- a/paddle/fluid/operators/coalesce_tensor_op.cc
+++ b/paddle/fluid/operators/coalesce_tensor_op.cc
@@ -120,7 +120,7 @@ class CoalesceTensorOpKernel : public framework::OpKernel<T> {
                           in_var_names.size(),
                           out_var_names.size()));
 
-    // Input & Output check: only support LoDTensor
+    // Input & Output check: only support phi::DenseTensor
     bool has_not_init_in_vars = false;
     for (size_t i = 0; i < in_tensors.size(); ++i) {
       PADDLE_ENFORCE_NOT_NULL(
@@ -426,17 +426,17 @@ class CoalesceTensorOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("Input",
-             "(vector<LoDTensor>) The input tensors of"
+             "(vector<phi::DenseTensor>) The input tensors of"
              " coalesce_tensor operator.")
         .AsDuplicable();
     AddOutput("Output",
-              "(vector<LoDTensor>) The output "
+              "(vector<phi::DenseTensor>) The output "
               "tensors of coalesce_tensor operator. And the address "
               "of output tensors are continuous, they are sliced from the "
               "tensor of FusedOutput.")
         .AsDuplicable();
     AddOutput("FusedOutput",
-              "(LoDTensor) The output tensor "
+              "(phi::DenseTensor) The output tensor "
               "of coalesce_tensor operator. And the tensors of"
               " Output is sliced from the tensor of FusedOutput.");
     AddAttr<int>("dtype", "The output data type.");
diff --git a/paddle/fluid/operators/common_infer_shape_functions.cc b/paddle/fluid/operators/common_infer_shape_functions.cc
index b256d94a5a894f..fcb58dcb242270 100644
--- a/paddle/fluid/operators/common_infer_shape_functions.cc
+++ b/paddle/fluid/operators/common_infer_shape_functions.cc
@@ -154,7 +154,7 @@ void BinaryOpBroadcastInferShape(framework::InferShapeContext *ctx) {
       ctx->GetInputsVarType(y_name).front(),
       framework::proto::VarType::LOD_TENSOR,
       platform::errors::InvalidArgument(
-          "The var type of input %s should be LoDTensor, but got %s.",
+          "The var type of input %s should be phi::DenseTensor, but got %s.",
           ctx->Inputs(y_name).front(),
           ctx->GetInputsVarType(y_name).front()));
 
diff --git a/paddle/fluid/operators/copy_cross_scope_op.cc b/paddle/fluid/operators/copy_cross_scope_op.cc
index 1fcf6832d25c4b..a36e9b73639ba3 100644
--- a/paddle/fluid/operators/copy_cross_scope_op.cc
+++ b/paddle/fluid/operators/copy_cross_scope_op.cc
@@ -30,7 +30,6 @@ class OpBase;
 }  // namespace imperative
 }  // namespace paddle
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 namespace paddle {
@@ -64,7 +63,7 @@ class CopyCrossScopeOp : public framework::OperatorBase {
     PADDLE_ENFORCE_NOT_NULL(
         id_var,
         platform::errors::NotFound("No variable with name %s found.", id_name));
-    auto id_tensor = id_var->GetMutable<LoDTensor>();
+    auto id_tensor = id_var->GetMutable<phi::DenseTensor>();
     auto it = scope.kids().begin();
     phi::DenseTensor cpu_id_tensor;
     paddle::framework::TensorCopySync(
@@ -88,8 +87,8 @@ class CopyCrossScopeOp : public framework::OperatorBase {
             platform::errors::NotFound(
                 "No variable with name %s found in destination scope.",
                 x_name));
-        auto dst_tensor = dst_var->GetMutable<LoDTensor>();
-        auto main_tensor = main_var->GetMutable<LoDTensor>();
+        auto dst_tensor = dst_var->GetMutable<phi::DenseTensor>();
+        auto main_tensor = main_var->GetMutable<phi::DenseTensor>();
         paddle::framework::TensorCopySync(
             *dst_tensor, main_tensor->place(), main_tensor);
       }
@@ -109,8 +108,8 @@ class CopyCrossScopeOp : public framework::OperatorBase {
         dst_var,
         platform::errors::NotFound(
             "No variable with name %s found in destination scope.", x_name));
-    auto src_tensor = source_var->GetMutable<LoDTensor>();
-    auto dst_tensor = dst_var->GetMutable<LoDTensor>();
+    auto src_tensor = source_var->GetMutable<phi::DenseTensor>();
+    auto dst_tensor = dst_var->GetMutable<phi::DenseTensor>();
     paddle::framework::TensorCopySync(
         *src_tensor, dst_tensor->place(), dst_tensor);
 
@@ -120,7 +119,7 @@ class CopyCrossScopeOp : public framework::OperatorBase {
           main_var,
           platform::errors::NotFound(
               "No variable with name %s found in destination scope.", x_name));
-      auto main_tensor = main_var->GetMutable<LoDTensor>();
+      auto main_tensor = main_var->GetMutable<phi::DenseTensor>();
       paddle::framework::TensorCopySync(
           *dst_tensor, main_tensor->place(), main_tensor);
     }
diff --git a/paddle/fluid/operators/crf_decoding_op.cc b/paddle/fluid/operators/crf_decoding_op.cc
index ae1086b623f13b..62bd73374b3a18 100644
--- a/paddle/fluid/operators/crf_decoding_op.cc
+++ b/paddle/fluid/operators/crf_decoding_op.cc
@@ -21,7 +21,8 @@ class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker {
   void Make() override {
     AddInput(
         "Emission",
-        "(Tensor/LoDTensor). For a LoDTensor input, its shape is [N x D] "
+        "(Tensor/phi::DenseTensor). For a phi::DenseTensor input, its shape is "
+        "[N x D] "
         "where N is the total sequence length of the mini-batch and D is "
         "the total tag number. While for a tensor input, its shape is "
         "[B X S X D] with B the batch size and S the sequence length of each "
@@ -39,14 +40,14 @@ class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker {
         "The data type is the same as Input(Emission).");
     AddInput(
         "Label",
-        "(Tensor/LoDTensor). The ground truth with shape "
-        "[N x 1] (for LoDTensor) or [B x S] (for Tensor). This input is "
+        "(phi::DenseTensor). The ground truth with shape "
+        "[N x 1] (for phi::DenseTensor) or [B x S] (for Tensor). This input is "
         "optional. See more details in the operator's comments. The data type "
         "is int64.")
         .AsDispensable();
     AddOutput(
         "ViterbiPath",
-        "(Tensor/LoDTensor). The decoding results. What to "
+        "(phi::DenseTensor). The decoding results. What to "
         "return changes depending on whether the Input(Label) (the ground "
         "truth) is given. See more details in the operator's comment. "
         "The data type is int64.");
diff --git a/paddle/fluid/operators/crf_decoding_op.h b/paddle/fluid/operators/crf_decoding_op.h
index dbce3700e8a642..f674ce03a984a9 100644
--- a/paddle/fluid/operators/crf_decoding_op.h
+++ b/paddle/fluid/operators/crf_decoding_op.h
@@ -24,15 +24,14 @@ namespace paddle {
 namespace operators {
 
 using framework::LoD;
-using LoDTensor = phi::DenseTensor;
 
 template <typename DeviceContext, typename T>
 class CRFDecodingOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* emission_weights = ctx.Input<LoDTensor>("Emission");
+    auto* emission_weights = ctx.Input<phi::DenseTensor>("Emission");
     auto* transition_weights = ctx.Input<phi::DenseTensor>("Transition");
-    auto* label = ctx.Input<LoDTensor>("Label");
+    auto* label = ctx.Input<phi::DenseTensor>("Label");
     auto* decoded_path = ctx.Output<phi::DenseTensor>("ViterbiPath");
 
     int64_t* path = decoded_path->mutable_data<int64_t>(platform::CPUPlace());
diff --git a/paddle/fluid/operators/ctc_align_op.cu b/paddle/fluid/operators/ctc_align_op.cu
index 2095b3d3858e34..cef3cf25ff6ff8 100644
--- a/paddle/fluid/operators/ctc_align_op.cu
+++ b/paddle/fluid/operators/ctc_align_op.cu
@@ -85,8 +85,8 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel<T> {
                       platform::errors::InvalidArgument(
                           "CTCAlign operator CUDA kernel must use CUDAPlace "
                           "rather than CPUPlace."));
-    auto* input = ctx.Input<LoDTensor>("Input");
-    auto* output = ctx.Output<LoDTensor>("Output");
+    auto* input = ctx.Input<phi::DenseTensor>("Input");
+    auto* output = ctx.Output<phi::DenseTensor>("Output");
     const int blank = ctx.Attr<int>("blank");
     const int merge_repeated =
         static_cast<int>(ctx.Attr<bool>("merge_repeated"));
@@ -99,9 +99,9 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel<T> {
       auto input_dims = input->dims();
       T* output_data = output->mutable_data<T>({input_dims[0], input_dims[1]},
                                                ctx.GetPlace());
-      auto* input_length = ctx.Input<LoDTensor>("InputLength");
+      auto* input_length = ctx.Input<phi::DenseTensor>("InputLength");
       const T* input_length_data = input_length->data<T>();
-      auto* output_length = ctx.Output<LoDTensor>("OutputLength");
+      auto* output_length = ctx.Output<phi::DenseTensor>("OutputLength");
       T* output_length_data =
           output_length->mutable_data<T>({input_dims[0], 1}, ctx.GetPlace());
       PaddingMergeAndDelCudaKernel<T>
diff --git a/paddle/fluid/operators/ctc_align_op.h b/paddle/fluid/operators/ctc_align_op.h
index e137170d99a4a7..9279cf531d449c 100644
--- a/paddle/fluid/operators/ctc_align_op.h
+++ b/paddle/fluid/operators/ctc_align_op.h
@@ -25,14 +25,13 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 template <typename DeviceContext, typename T>
 class CTCAlignKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* input = ctx.Input<LoDTensor>("Input");
-    auto* output = ctx.Output<LoDTensor>("Output");
+    auto* input = ctx.Input<phi::DenseTensor>("Input");
+    auto* output = ctx.Output<phi::DenseTensor>("Output");
     size_t blank = static_cast<size_t>(ctx.Attr<int>("blank"));
     bool merge_repeated = ctx.Attr<bool>("merge_repeated");
     T* output_data = output->mutable_data<T>(ctx.GetPlace());
@@ -43,10 +42,10 @@ class CTCAlignKernel : public framework::OpKernel<T> {
     if (input->lod().empty()) {
       size_t padding_value =
           static_cast<size_t>(ctx.Attr<int>("padding_value"));
-      auto* input_length = ctx.Input<LoDTensor>("InputLength");
+      auto* input_length = ctx.Input<phi::DenseTensor>("InputLength");
       const T* input_length_data = input_length->data<T>();
 
-      auto* output_length = ctx.Output<LoDTensor>("OutputLength");
+      auto* output_length = ctx.Output<phi::DenseTensor>("OutputLength");
       T* output_length_data = output_length->mutable_data<T>(ctx.GetPlace());
 
       for (size_t batch_id = 0; batch_id < (unsigned)input_dims[0];
diff --git a/paddle/fluid/operators/cudnn_lstm_op.cu.cc b/paddle/fluid/operators/cudnn_lstm_op.cu.cc
index 399bc5bb0bb261..d436a4b5d531d2 100644
--- a/paddle/fluid/operators/cudnn_lstm_op.cu.cc
+++ b/paddle/fluid/operators/cudnn_lstm_op.cu.cc
@@ -26,7 +26,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 template <typename T, typename Type>
diff --git a/paddle/fluid/operators/cvm_op.cu b/paddle/fluid/operators/cvm_op.cu
index e8fdcec36082a1..5cac5392f4abb8 100644
--- a/paddle/fluid/operators/cvm_op.cu
+++ b/paddle/fluid/operators/cvm_op.cu
@@ -23,7 +23,6 @@ namespace operators {
 
 using phi::PADDLE_CUDA_NUM_THREADS;
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 __global__ void CvmComputeKernel(const bool use_cvm,
@@ -87,7 +86,7 @@ template <typename T>
 class CVMCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    const auto* x = context.Input<LoDTensor>("X");
+    const auto* x = context.Input<phi::DenseTensor>("X");
     const T* x_data = x->data<T>();
 
     auto batch_size = x->dims()[0];
@@ -95,7 +94,7 @@ class CVMCUDAKernel : public framework::OpKernel<T> {
     auto item_size = numel / batch_size;
     auto use_cvm = context.Attr<bool>("use_cvm");
 
-    auto* y = context.Output<LoDTensor>("Y");
+    auto* y = context.Output<phi::DenseTensor>("Y");
     T* y_data = y->mutable_data<T>(context.GetPlace());
 
     // for Input X do not have Lod Information.
@@ -128,7 +127,7 @@ template <typename T>
 class CVMGradCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* dx = context.Output<LoDTensor>(framework::GradVarName("X"));
+    auto* dx = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
     T* dx_data = dx->mutable_data<T>(context.GetPlace());
 
     const phi::DenseTensor* cvm = context.Input<phi::DenseTensor>("CVM");
diff --git a/paddle/fluid/operators/cvm_op.h b/paddle/fluid/operators/cvm_op.h
index 355fc6690ce1a4..9bd5a00b3733fd 100644
--- a/paddle/fluid/operators/cvm_op.h
+++ b/paddle/fluid/operators/cvm_op.h
@@ -20,7 +20,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 void CvmComputeKernel(const bool use_cvm,
@@ -61,14 +60,14 @@ template <typename T>
 class CVMOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    const auto* x = context.Input<LoDTensor>("X");
+    const auto* x = context.Input<phi::DenseTensor>("X");
     const T* x_data = x->data<T>();
 
     auto batch_size = x->dims()[0];
     auto item_size = x->numel() / batch_size;
     auto use_cvm = context.Attr<bool>("use_cvm");
 
-    auto* y = context.Output<LoDTensor>("Y");
+    auto* y = context.Output<phi::DenseTensor>("Y");
     T* y_data = y->mutable_data<T>(context.GetPlace());
 
     // for Input X do not have Lod Information.
@@ -102,7 +101,7 @@ template <typename T>
 class CVMGradOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* dx = context.Output<LoDTensor>(framework::GradVarName("X"));
+    auto* dx = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
     T* dx_data = dx->mutable_data<T>(context.GetPlace());
 
     const phi::DenseTensor* cvm = context.Input<phi::DenseTensor>("CVM");
diff --git a/paddle/fluid/operators/data_norm_op.cc b/paddle/fluid/operators/data_norm_op.cc
index 09d09c33900e8d..36dc93445df594 100644
--- a/paddle/fluid/operators/data_norm_op.cc
+++ b/paddle/fluid/operators/data_norm_op.cc
@@ -24,7 +24,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using DataLayout = phi::DataLayout;
 
 template <typename T>
@@ -487,8 +486,8 @@ class DataNormGradOp : public framework::OperatorWithKernel {
     const Tensor *t = nullptr;
     if (var->IsType<Tensor>()) {
       t = &var->Get<Tensor>();
-    } else if (var->IsType<LoDTensor>()) {
-      t = &var->Get<LoDTensor>();
+    } else if (var->IsType<phi::DenseTensor>()) {
+      t = &var->Get<phi::DenseTensor>();
     }
     if (t == nullptr) {
       PADDLE_THROW(platform::errors::InvalidArgument(
diff --git a/paddle/fluid/operators/data_norm_op.cu b/paddle/fluid/operators/data_norm_op.cu
index 790e55965a9d2a..1b895b0c8daa5b 100644
--- a/paddle/fluid/operators/data_norm_op.cu
+++ b/paddle/fluid/operators/data_norm_op.cu
@@ -27,7 +27,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using DataLayout = phi::DataLayout;
 using phi::PADDLE_CUDA_NUM_THREADS;
 
diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.cc b/paddle/fluid/operators/deformable_psroi_pooling_op.cc
index bac1bb04bc0dd4..5240116c6a4f88 100644
--- a/paddle/fluid/operators/deformable_psroi_pooling_op.cc
+++ b/paddle/fluid/operators/deformable_psroi_pooling_op.cc
@@ -33,9 +33,9 @@ class DeformablePSROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
              "H is height of the feature, and "
              "W is the width of the feature.");
     AddInput("ROIs",
-             "(LoDTensor), "
+             "(phi::DenseTensor), "
              "ROIs (Regions of Interest) to pool over. "
-             "ROIs should be a 2-D LoDTensor of shape (num_rois, 4) "
+             "ROIs should be a 2-D phi::DenseTensor of shape (num_rois, 4) "
              "given as [[x1, y1, x2, y2], ...]. "
              "(x1, y1) is the top left coordinates, and "
              "(x2, y2) is the bottom right coordinates.");
@@ -149,7 +149,8 @@ class DeformablePSROIPoolOp : public framework::OperatorWithKernel {
         rois_dims.size(),
         2,
         platform::errors::InvalidArgument(
-            "Input(ROIs) should be a 2-D LoDTensor of shape (num_rois, 4) "
+            "Input(ROIs) should be a 2-D phi::DenseTensor of shape (num_rois, "
+            "4) "
             "given as [[ x1, y1, x2, y2], ...]. The rank of Input(ROIs) should "
             "be 2, but received ROIs rank is:%d, ROIs shape is:[%s].",
             rois_dims.size(),
diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.cu b/paddle/fluid/operators/deformable_psroi_pooling_op.cu
index f1816850317a16..80d248b818b4f9 100644
--- a/paddle/fluid/operators/deformable_psroi_pooling_op.cu
+++ b/paddle/fluid/operators/deformable_psroi_pooling_op.cu
@@ -40,7 +40,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using phi::PADDLE_CUDA_NUM_THREADS;
 
 static inline int GET_BLOCKS(const int N) {
@@ -185,7 +184,7 @@ class DeformablePSROIPoolCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     const phi::DenseTensor* input = ctx.Input<phi::DenseTensor>("Input");
-    const LoDTensor* rois = ctx.Input<LoDTensor>("ROIs");
+    const phi::DenseTensor* rois = ctx.Input<phi::DenseTensor>("ROIs");
     const phi::DenseTensor* trans = ctx.Input<phi::DenseTensor>("Trans");
     phi::DenseTensor* out = ctx.Output<phi::DenseTensor>("Output");
     out->mutable_data<T>(ctx.GetPlace());
@@ -486,7 +485,7 @@ class DeformablePSROIPoolGradCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     const phi::DenseTensor* input = ctx.Input<phi::DenseTensor>("Input");
-    const LoDTensor* rois = ctx.Input<LoDTensor>("ROIs");
+    const phi::DenseTensor* rois = ctx.Input<phi::DenseTensor>("ROIs");
     const phi::DenseTensor* trans = ctx.Input<phi::DenseTensor>("Trans");
     const phi::DenseTensor* top_count = ctx.Input<phi::DenseTensor>("TopCount");
     const phi::DenseTensor* output_grad =
diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.h b/paddle/fluid/operators/deformable_psroi_pooling_op.h
index 7af8c99aa376b9..231d14e537b547 100644
--- a/paddle/fluid/operators/deformable_psroi_pooling_op.h
+++ b/paddle/fluid/operators/deformable_psroi_pooling_op.h
@@ -34,7 +34,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 T bilinear_interp(
@@ -80,7 +79,7 @@ void DeformablePSROIPoolForwardCPUKernel(const int count,
                                          T* top_count,
                                          const int batch_size,
                                          int* roi_batch_id_data,
-                                         const LoDTensor* rois) {
+                                         const phi::DenseTensor* rois) {
   for (int ix = 0; ix < count; ix++) {
     int pw = ix % pooled_width;
     int ph = (ix / pooled_width) % pooled_height;
@@ -174,7 +173,7 @@ class DeformablePSROIPoolCPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* input = ctx.Input<phi::DenseTensor>("Input");
-    auto* rois = ctx.Input<LoDTensor>("ROIs");
+    auto* rois = ctx.Input<phi::DenseTensor>("ROIs");
     auto* trans = ctx.Input<phi::DenseTensor>("Trans");
     auto* out = ctx.Output<phi::DenseTensor>("Output");
     out->mutable_data<T>(ctx.GetPlace());
@@ -316,7 +315,7 @@ void DeformablePSROIPoolBackwardAccCPUKernel(const int count,
                                              const int channels_each_class,
                                              const int batch_size,
                                              int* roi_batch_id_data,
-                                             const LoDTensor* rois) {
+                                             const phi::DenseTensor* rois) {
   for (int index = 0; index < count; index++) {
     int pw = index % pooled_width;
     int ph = (index / pooled_width) % pooled_height;
@@ -476,7 +475,7 @@ class DeformablePSROIPoolGradCPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* input = ctx.Input<phi::DenseTensor>("Input");
-    auto* rois = ctx.Input<LoDTensor>("ROIs");
+    auto* rois = ctx.Input<phi::DenseTensor>("ROIs");
     auto* trans = ctx.Input<phi::DenseTensor>("Trans");
     auto* top_count = ctx.Input<phi::DenseTensor>("TopCount");
     auto* output_grad =
diff --git a/paddle/fluid/operators/dequeue_op.cc b/paddle/fluid/operators/dequeue_op.cc
index 18216fb04d5cf6..2e954081ed7409 100644
--- a/paddle/fluid/operators/dequeue_op.cc
+++ b/paddle/fluid/operators/dequeue_op.cc
@@ -20,7 +20,6 @@
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/var_type.h"
 #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
-using LoDTensor = phi::DenseTensor;
 using LoDTensorBlockingQueueHolder =
     paddle::operators::reader::LoDTensorBlockingQueueHolder;
 
@@ -59,7 +58,7 @@ class DequeueOp : public framework::OperatorBase {
           out_var,
           platform::errors::NotFound("No variable with name %s found",
                                      out_names[i]));
-      auto* out_tensor = out_var->GetMutable<LoDTensor>();
+      auto* out_tensor = out_var->GetMutable<phi::DenseTensor>();
       PADDLE_ENFORCE_NOT_NULL(
           out_tensor,
           platform::errors::InvalidArgument(
diff --git a/paddle/fluid/operators/detection_map_op.cc b/paddle/fluid/operators/detection_map_op.cc
index 51fdd4ad1f2ec8..5d3cccb3a66174 100644
--- a/paddle/fluid/operators/detection_map_op.cc
+++ b/paddle/fluid/operators/detection_map_op.cc
@@ -103,7 +103,8 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("DetectRes",
-             "(LoDTensor) A 2-D LoDTensor with shape [M, 6] represents the "
+             "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [M, 6] "
+             "represents the "
              "detections. Each row has 6 values: "
              "[label, confidence, xmin, ymin, xmax, ymax], M is the total "
              "number of detect results in this mini-batch. For each instance, "
@@ -111,7 +112,7 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker {
              "offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is "
              "no detected data.");
     AddInput("Label",
-             "(LoDTensor) A 2-D LoDTensor represents the"
+             "(phi::DenseTensor) A 2-D phi::DenseTensor represents the"
              "Labeled ground-truth data. Each row has 6 values: "
              "[label, xmin, ymin, xmax, ymax, is_difficult] or 5 values: "
              "[label, xmin, ymin, xmax, ymax], where N is the total "
@@ -135,14 +136,16 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker {
              "current mini-batch are calculated.")
         .AsDispensable();
     AddInput("TruePos",
-             "(LoDTensor) A 2-D LoDTensor with shape [Ntp, 2], store the "
+             "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [Ntp, 2], "
+             "store the "
              "input true positive example of each class."
              "This input is used to pass the AccumTruePos generated by the "
              "previous mini-batch when the multi mini-batches cumulative "
              "calculation carried out. ")
         .AsDispensable();
     AddInput("FalsePos",
-             "(LoDTensor) A 2-D LoDTensor with shape [Nfp, 2], store the "
+             "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [Nfp, 2], "
+             "store the "
              "input false positive example of each class."
              "This input is used to pass the AccumFalsePos generated by the "
              "previous mini-batch when the multi mini-batches cumulative "
@@ -153,16 +156,18 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker {
               "positive example count of each class. It combines the input "
               "input(PosCount) and the positive example count computed from "
               "input(Detection) and input(Label).");
-    AddOutput("AccumTruePos",
-              "(LoDTensor) A LoDTensor with shape [Ntp', 2], store the "
-              "true positive example of each class. It combines the "
-              "input(TruePos) and the true positive examples computed from "
-              "input(Detection) and input(Label).");
-    AddOutput("AccumFalsePos",
-              "(LoDTensor) A LoDTensor with shape [Nfp', 2], store the "
-              "false positive example of each class. It combines the "
-              "input(FalsePos) and the false positive examples computed from "
-              "input(Detection) and input(Label).");
+    AddOutput(
+        "AccumTruePos",
+        "(phi::DenseTensor) A phi::DenseTensor with shape [Ntp', 2], store the "
+        "true positive example of each class. It combines the "
+        "input(TruePos) and the true positive examples computed from "
+        "input(Detection) and input(Label).");
+    AddOutput(
+        "AccumFalsePos",
+        "(phi::DenseTensor) A phi::DenseTensor with shape [Nfp', 2], store the "
+        "false positive example of each class. It combines the "
+        "input(FalsePos) and the false positive examples computed from "
+        "input(Detection) and input(Label).");
     AddOutput("MAP",
               "(Tensor) A tensor with shape [1], store the mAP evaluate "
               "result of the detection.");
diff --git a/paddle/fluid/operators/edit_distance_op.cc b/paddle/fluid/operators/edit_distance_op.cc
index 70de5a3bb7588c..c4c5db6b50cdab 100644
--- a/paddle/fluid/operators/edit_distance_op.cc
+++ b/paddle/fluid/operators/edit_distance_op.cc
@@ -35,11 +35,11 @@ class EditDistanceOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("Hyps",
-             "2-D Tensor<int64_t>, or 2-D LoDTensor<int64_t> with last "
+             "2-D Tensor<int64_t>, or 2-D phi::DenseTensor<int64_t> with last "
              "dimension being 1. "
              "The indices for hypothesis strings.");
     AddInput("Refs",
-             "2-D Tensor<int64_t>, or 2-D LoDTensor<int64_t> with last "
+             "2-D Tensor<int64_t>, or 2-D phi::DenseTensor<int64_t> with last "
              "dimension being 1. "
              "The indices for reference strings.");
     AddInput("HypsLength",
@@ -75,7 +75,7 @@ A will be transformed into B at least after two substitutions and one
 
 So the edit distance between A and B is 3.
 
-Input(Hyps) is a 2-D Tensor or a 2-D LoDTensor consisting of all the hypothesis strings.
+Input(Hyps) is a 2-D Tensor or a 2-D phi::DenseTensor consisting of all the hypothesis strings.
 And the `batch_size` reference strings are arranged in order in the same way in the
 Input(Refs).
 
diff --git a/paddle/fluid/operators/enqueue_op.cc b/paddle/fluid/operators/enqueue_op.cc
index e4f2f70c72a4b0..c8279719789c45 100644
--- a/paddle/fluid/operators/enqueue_op.cc
+++ b/paddle/fluid/operators/enqueue_op.cc
@@ -31,7 +31,6 @@ class OpBase;
 }  // namespace imperative
 }  // namespace paddle
 
-using LoDTensor = phi::DenseTensor;
 using LoDTensorBlockingQueueHolder =
     paddle::operators::reader::LoDTensorBlockingQueueHolder;
 
@@ -61,7 +60,7 @@ class EnqueueOp : public framework::OperatorBase {
     PADDLE_ENFORCE_NOT_NULL(in_var,
                             platform::errors::NotFound(
                                 "No variable with name %s found.", var_name));
-    auto* in_tensor = in_var->GetMutable<LoDTensor>();
+    auto* in_tensor = in_var->GetMutable<phi::DenseTensor>();
     auto* queue_holder =
         queue_holder_var->template GetMutable<LoDTensorBlockingQueueHolder>();
 
diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op_mlu.cc b/paddle/fluid/operators/fill_constant_batch_size_like_op_mlu.cc
index 67812f5bc54b3e..34b760252bece7 100644
--- a/paddle/fluid/operators/fill_constant_batch_size_like_op_mlu.cc
+++ b/paddle/fluid/operators/fill_constant_batch_size_like_op_mlu.cc
@@ -32,7 +32,7 @@ class FillConstantBatchSizeLikeOpMLUKernel : public framework::OpKernel<T> {
     auto *out = ctx.Output<phi::DenseTensor>("Out");
     auto *in = ctx.Input<phi::DenseTensor>("Input");
     if (in->lod().size() && ctx.Attr<int>("input_dim_idx") == 0) {
-      // set the correct batch size for the LoDTensor.
+      // set the correct batch size for the phi::DenseTensor.
       auto odims = out->dims();
       int output_dim_idx = ctx.Attr<int>("output_dim_idx");
       odims[output_dim_idx] = static_cast<int>(in->lod().back().size()) - 1;
diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc
index 339f36c53f96a0..22df3e5a9d23a4 100644
--- a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc
+++ b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc
@@ -35,7 +35,7 @@ class FillConstantBatchSizeLikeOpNPUKernel : public framework::OpKernel<T> {
     auto *out = ctx.Output<phi::DenseTensor>("Out");
     auto *in = ctx.Input<phi::DenseTensor>("Input");
     if (in->lod().size() && ctx.Attr<int>("input_dim_idx") == 0) {
-      // set the correct batch size for the LoDTensor.
+      // set the correct batch size for the phi::DenseTensor.
       auto odims = out->dims();
       int output_dim_idx = ctx.Attr<int>("output_dim_idx");
       odims[output_dim_idx] = static_cast<int>(in->lod().back().size()) - 1;
diff --git a/paddle/fluid/operators/fill_op.cc b/paddle/fluid/operators/fill_op.cc
index 8fe7b417e662e6..8937676c344ff8 100644
--- a/paddle/fluid/operators/fill_op.cc
+++ b/paddle/fluid/operators/fill_op.cc
@@ -27,7 +27,7 @@ class FillOpMaker : public framework::OpProtoAndCheckerMaker {
 Fill an tensor with `value` and `shape`. The type of the tensor is specify by
 `dtype`.
 )DOC");
-    AddOutput("Out", "(LoDTensor) The output tensor.");
+    AddOutput("Out", "(phi::DenseTensor) The output tensor.");
     AddAttr<std::vector<float>>(
         "value", "The float values of tensor, which are flatten in row major");
     AddAttr<std::vector<int>>("shape", "The shape of output tensor");
diff --git a/paddle/fluid/operators/filter_by_instag_op.cc b/paddle/fluid/operators/filter_by_instag_op.cc
index a0ac46c4a66030..808792468ff38d 100644
--- a/paddle/fluid/operators/filter_by_instag_op.cc
+++ b/paddle/fluid/operators/filter_by_instag_op.cc
@@ -69,16 +69,17 @@ class FilterByInstagOp : public framework::OperatorWithKernel {
 class FilterByInstagOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("Ins", "(LoDTensor) embeded tensor");
-    AddInput("Ins_tag", "(LoDTensor) ins tag list");
+    AddInput("Ins", "(phi::DenseTensor) embeded tensor");
+    AddInput("Ins_tag", "(phi::DenseTensor) ins tag list");
     AddInput("Filter_tag", "(1D Tensor) filter tag list");
     AddAttr<bool>("is_lod", "is Ins with LoD info or not, default True");
     AddAttr<int64_t>("out_val_if_empty",
                      "if the output after filter is empty, the output value")
         .SetDefault(0);
-    AddOutput("Out", "(LoDTensor) embeded tensor filtered by instag");
+    AddOutput("Out", "(phi::DenseTensor) embeded tensor filtered by instag");
     AddOutput("LossWeight", "(Tensor) loss weight.");
-    AddOutput("IndexMap", "(LoDTensor) mapping from Out rows to X1 rows");
+    AddOutput("IndexMap",
+              "(phi::DenseTensor) mapping from Out rows to X1 rows");
     AddComment(R"DOC(
 Filter By Instag Op
 
diff --git a/paddle/fluid/operators/filter_by_instag_op.cu b/paddle/fluid/operators/filter_by_instag_op.cu
index 90bc2eda3c92cc..56068684e16ce7 100644
--- a/paddle/fluid/operators/filter_by_instag_op.cu
+++ b/paddle/fluid/operators/filter_by_instag_op.cu
@@ -45,7 +45,6 @@ namespace operators {
 
 using Tensor = phi::DenseTensor;
 using SelectedRows = phi::SelectedRows;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 using Vector = framework::Vector<T>;
@@ -341,7 +340,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
     //    context.cuda_device_context().GetMaxThreadsPerBlock();
     // X1 is global FC output
     // Dim [batch size, embedding size]
-    const LoDTensor* x1 = context.Input<LoDTensor>("Ins");
+    const phi::DenseTensor* x1 = context.Input<phi::DenseTensor>("Ins");
     bool is_lod = context.Attr<bool>("is_lod");
 
     int is_x1_lod = -1;
@@ -354,7 +353,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
     size_t x1_embed_size = x1->dims()[1];
     // X2 is ins tag list
     // LoD [[0, Sum(ins1), Sum(ins1, ins2), ... ]]
-    const LoDTensor* x2 = context.Input<LoDTensor>("Ins_tag");
+    const phi::DenseTensor* x2 = context.Input<phi::DenseTensor>("Ins_tag");
     // expected auto = const int64_t
     const int64_t* x2_data = x2->data<int64_t>();
 
@@ -389,7 +388,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
         x1_lods.push_back(i + 1);
       }
     } else {
-      // x1_lods = context.Input<LoDTensor>("Ins")->lod()[0];
+      // x1_lods = context.Input<phi::DenseTensor>("Ins")->lod()[0];
       // new: lod_level=0 => lod() return {}
       if (x1->lod().size() != 0) {  // lod_level = 1
         x1_lods = x1->lod()[0];
@@ -412,9 +411,10 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
     // for those whose ins been dropout, set 0 for whole lines.
     // otherwise, copy whole line
     // Dim [local fc count, batch size, embedding size]
-    LoDTensor* out = context.Output<LoDTensor>("Out");
-    LoDTensor* map = context.Output<LoDTensor>("IndexMap");
-    LoDTensor* loss_weight = context.Output<LoDTensor>("LossWeight");
+    phi::DenseTensor* out = context.Output<phi::DenseTensor>("Out");
+    phi::DenseTensor* map = context.Output<phi::DenseTensor>("IndexMap");
+    phi::DenseTensor* loss_weight =
+        context.Output<phi::DenseTensor>("LossWeight");
 
     int out_first = x1_lods.back();
 
@@ -563,13 +563,15 @@ class FilterByInstagGradGPUKernel : public framework::OpKernel<T> {
     auto gpu_place = context.GetPlace();
     gpuStream_t current_stream = context.cuda_device_context().stream();
     auto max_thread_num_per_block = 1024;
-    auto* output_grad = context.Input<LoDTensor>(framework::GradVarName("Out"));
-    auto* x1_grad = context.Output<LoDTensor>(framework::GradVarName("Ins"));
-    auto* loss_weight = context.Input<LoDTensor>("LossWeight");
-    auto* mmap = context.Input<LoDTensor>("IndexMap");
-    auto* x1 = context.Input<LoDTensor>("Ins");
-
-    x1_grad->set_lod(context.Input<LoDTensor>("Ins")->lod());
+    auto* output_grad =
+        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+    auto* x1_grad =
+        context.Output<phi::DenseTensor>(framework::GradVarName("Ins"));
+    auto* loss_weight = context.Input<phi::DenseTensor>("LossWeight");
+    auto* mmap = context.Input<phi::DenseTensor>("IndexMap");
+    auto* x1 = context.Input<phi::DenseTensor>("Ins");
+
+    x1_grad->set_lod(context.Input<phi::DenseTensor>("Ins")->lod());
     x1_grad->Resize(x1->dims());
 
     auto* mmap_data = mmap->data<int64_t>();
diff --git a/paddle/fluid/operators/filter_by_instag_op.h b/paddle/fluid/operators/filter_by_instag_op.h
index c5d4e35428f43d..04f1099168a5ce 100644
--- a/paddle/fluid/operators/filter_by_instag_op.h
+++ b/paddle/fluid/operators/filter_by_instag_op.h
@@ -31,7 +31,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
 using SelectedRows = phi::SelectedRows;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 using Vector = framework::Vector<T>;
@@ -42,12 +41,12 @@ class FilterByInstagKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& context) const override {
     // X1 is global FC output
     // Dim [batch size, embedding size]
-    auto* x1 = context.Input<LoDTensor>("Ins");
+    auto* x1 = context.Input<phi::DenseTensor>("Ins");
     bool is_x1_lod = context.Attr<bool>("is_lod");
     int64_t out_val_if_empty = context.Attr<int64_t>("out_val_if_empty");
     // X2 is ins tag list
     // LoD [[0, Sum(ins1), Sum(ins1, ins2), ... ]]
-    auto* x2 = context.Input<LoDTensor>("Ins_tag");
+    auto* x2 = context.Input<phi::DenseTensor>("Ins_tag");
     // X3 is local fc tag list
     // LoD [[0, Sum(fc1), Sum(fc1, fc2) ...]]
     auto* x3 = context.Input<phi::DenseTensor>("Filter_tag");
@@ -107,9 +106,10 @@ class FilterByInstagKernel : public framework::OpKernel<T> {
     // for those whose ins been dropout, set 0 for whole lines.
     // otherwise, copy whole line
     // Dim [local fc count, batch size, embedding size]
-    LoDTensor* out = context.Output<LoDTensor>("Out");
-    LoDTensor* map = context.Output<LoDTensor>("IndexMap");
-    LoDTensor* loss_weight = context.Output<LoDTensor>("LossWeight");
+    phi::DenseTensor* out = context.Output<phi::DenseTensor>("Out");
+    phi::DenseTensor* map = context.Output<phi::DenseTensor>("IndexMap");
+    phi::DenseTensor* loss_weight =
+        context.Output<phi::DenseTensor>("LossWeight");
     // expected auto = const T
     auto* x1_data = x1->data<T>();
     // expected auto = T
@@ -196,12 +196,14 @@ template <typename T>
 class FilterByInstagGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* output_grad = context.Input<LoDTensor>(framework::GradVarName("Out"));
-    auto* x1_grad = context.Output<LoDTensor>(framework::GradVarName("Ins"));
-    auto* loss_weight = context.Input<LoDTensor>("LossWeight");
-    auto* mmap = context.Input<LoDTensor>("IndexMap");
-    auto* x1 = context.Input<LoDTensor>("Ins");
-    x1_grad->set_lod(context.Input<LoDTensor>("Ins")->lod());
+    auto* output_grad =
+        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+    auto* x1_grad =
+        context.Output<phi::DenseTensor>(framework::GradVarName("Ins"));
+    auto* loss_weight = context.Input<phi::DenseTensor>("LossWeight");
+    auto* mmap = context.Input<phi::DenseTensor>("IndexMap");
+    auto* x1 = context.Input<phi::DenseTensor>("Ins");
+    x1_grad->set_lod(context.Input<phi::DenseTensor>("Ins")->lod());
     x1_grad->Resize(x1->dims());
     auto mmap_data = mmap->data<int64_t>();
     // expected auto = T
diff --git a/paddle/fluid/operators/get_tensor_from_selected_rows_op.cc b/paddle/fluid/operators/get_tensor_from_selected_rows_op.cc
index c96bc1a9025514..658352d844d9a2 100644
--- a/paddle/fluid/operators/get_tensor_from_selected_rows_op.cc
+++ b/paddle/fluid/operators/get_tensor_from_selected_rows_op.cc
@@ -35,13 +35,14 @@ class GetTensorFromSelectedRowsOp : public framework::OperatorWithKernel {
                           "but the received is %s",
                           ctx->Inputs("X").front(),
                           ctx->GetInputsVarType("X").front()));
-    PADDLE_ENFORCE_EQ(ctx->GetOutputsVarType("Out").front(),
-                      framework::proto::VarType::LOD_TENSOR,
-                      platform::errors::InvalidArgument(
-                          "The output Out(%s)'s type should be LoDTensor, "
-                          "but the received is %s",
-                          ctx->Outputs("Out").front(),
-                          ctx->GetOutputsVarType("Out").front()));
+    PADDLE_ENFORCE_EQ(
+        ctx->GetOutputsVarType("Out").front(),
+        framework::proto::VarType::LOD_TENSOR,
+        platform::errors::InvalidArgument(
+            "The output Out(%s)'s type should be phi::DenseTensor, "
+            "but the received is %s",
+            ctx->Outputs("Out").front(),
+            ctx->GetOutputsVarType("Out").front()));
     ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
   }
 
@@ -72,7 +73,7 @@ class GetTensorFromSelectedRowsOpProtoMaker
  public:
   void Make() override {
     AddInput("X", "The input type is SelectedRows.");
-    AddOutput("Out", "The output type is LoDTensor.");
+    AddOutput("Out", "The output type is phi::DenseTensor.");
     AddComment(
         R"DOC(
 GetTensorFromSelectedRows Operator
diff --git a/paddle/fluid/operators/group_norm_op.cc b/paddle/fluid/operators/group_norm_op.cc
index f6f8cb5aff6d88..3d6566d62b2a74 100644
--- a/paddle/fluid/operators/group_norm_op.cc
+++ b/paddle/fluid/operators/group_norm_op.cc
@@ -29,7 +29,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using DataLayout = phi::DataLayout;
 
 class GroupNormOp : public framework::OperatorWithKernel {
@@ -127,8 +126,8 @@ class GroupNormGradOp : public framework::OperatorWithKernel {
     const Tensor *t = nullptr;
     if (var->IsType<Tensor>()) {
       t = &var->Get<Tensor>();
-    } else if (var->IsType<LoDTensor>()) {
-      t = &var->Get<LoDTensor>();
+    } else if (var->IsType<phi::DenseTensor>()) {
+      t = &var->Get<phi::DenseTensor>();
     }
     PADDLE_ENFORCE_NOT_NULL(
         t,
diff --git a/paddle/fluid/operators/group_norm_op.h b/paddle/fluid/operators/group_norm_op.h
index 657892877fe818..0ce89b4625a131 100644
--- a/paddle/fluid/operators/group_norm_op.h
+++ b/paddle/fluid/operators/group_norm_op.h
@@ -29,7 +29,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using DataLayout = phi::DataLayout;
 
 template <typename DeviceContext, typename T>
diff --git a/paddle/fluid/operators/gru_op.cc b/paddle/fluid/operators/gru_op.cc
index fc78f514a45077..cceecdcad5fd2b 100644
--- a/paddle/fluid/operators/gru_op.cc
+++ b/paddle/fluid/operators/gru_op.cc
@@ -115,11 +115,12 @@ class GRUOp : public framework::OperatorWithKernel {
 class GRUOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("Input",
-             "(LoDTensor) The first input is a LodTensor, which supports "
-             "variable-time length input sequence. The underlying tensor in "
-             "this LoDTenosr is a matrix with shape (T X 3D), where, T is the "
-             "total time steps in this mini-batch, D is the hidden size.");
+    AddInput(
+        "Input",
+        "(phi::DenseTensor) The first input is a LodTensor, which supports "
+        "variable-time length input sequence. The underlying tensor in "
+        "this phi::DenseTensor is a matrix with shape (T X 3D), where, T is "
+        "the total time steps in this mini-batch, D is the hidden size.");
     AddInput("H0",
              "(Tensor, optional) The initial hidden state is an optional "
              "input. This is a tensor with shape (N x D), where N is the "
@@ -136,35 +137,38 @@ class GRUOpMaker : public framework::OpProtoAndCheckerMaker {
              "(Tensor, optional) Bias vector with shape (1 x 3D) concating "
              "bias of the update gate, reset gate and output candidate.")
         .AsDispensable();
-    AddOutput("BatchGate",
-              "(LoDTensor) To compute with batches, sequence data will be "
-              "reorganized into several successive batches each containing "
-              "data from the same time step. The LoDTensor BatchGate contains "
-              "the update gate, reset gate and output candidate values "
-              "organized in batches. The LoD size is 2. The first LoD contains "
-              "the batch offsets and the second LoD contains the indexes in "
-              "the raw sequence data.")
+    AddOutput(
+        "BatchGate",
+        "(phi::DenseTensor) To compute with batches, sequence data will be "
+        "reorganized into several successive batches each containing "
+        "data from the same time step. The phi::DenseTensor BatchGate contains "
+        "the update gate, reset gate and output candidate values "
+        "organized in batches. The LoD size is 2. The first LoD contains "
+        "the batch offsets and the second LoD contains the indexes in "
+        "the raw sequence data.")
         .AsIntermediate()
         .AsExtra();
-    AddOutput(
-        "BatchResetHiddenPrev",
-        "(LoDTensor) The reset hidden state LoDTensor organized in batches. "
-        "This LoDTensor is a matrix with shape (T X D) and has the same LoD "
-        "with `BatchGate`.")
+    AddOutput("BatchResetHiddenPrev",
+              "(phi::DenseTensor) The reset hidden state phi::DenseTensor "
+              "organized in batches. "
+              "This phi::DenseTensor is a matrix with shape (T X D) and has "
+              "the same LoD "
+              "with `BatchGate`.")
         .AsIntermediate()
         .AsExtra();
-    AddOutput(
-        "BatchHidden",
-        "(LoDTensor) The hidden state LoDTensor organized in batches.  "
-        "This LoDTensor is a matrix with shape (T X D) and has the same LoD "
-        "with `BatchGate`.")
+    AddOutput("BatchHidden",
+              "(phi::DenseTensor) The hidden state phi::DenseTensor organized "
+              "in batches.  "
+              "This phi::DenseTensor is a matrix with shape (T X D) and has "
+              "the same LoD "
+              "with `BatchGate`.")
         .AsIntermediate()
         .AsExtra();
-    AddOutput(
-        "Hidden",
-        "(LoDTensor) the hidden state LoDTensor organized in sequences. "
-        "This LoDTensor is a matrix with shape (T X D) and has the same LoD "
-        "with `BatchGate`.");
+    AddOutput("Hidden",
+              "(phi::DenseTensor) the hidden state phi::DenseTensor organized "
+              "in sequences. "
+              "This phi::DenseTensor is a matrix with shape (T X D) and has "
+              "the same LoD with `BatchGate`.");
     AddAttr<std::string>("activation",
                          "(string, default tanh) "
                          "The activation type used for output candidate {h}_t.")
@@ -314,23 +318,24 @@ class GRUCPUKernel : public framework::OpKernel<T> {
  public:
   void BatchCompute(const framework::ExecutionContext& context) const {
     using DeviceContext = phi::CPUContext;
-    using LodTensorPtr = LoDTensor*;
+    using LodTensorPtr = phi::DenseTensor*;
     bool is_test = context.Attr<bool>("is_test");
 
     bool origin_mode = context.Attr<bool>("origin_mode");
-    auto* input = context.Input<LoDTensor>("Input");
+    auto* input = context.Input<phi::DenseTensor>("Input");
     auto* h0 = context.Input<phi::DenseTensor>("H0");
     auto* weight = context.Input<phi::DenseTensor>("Weight");
     const T* weight_data = weight->data<T>();
     auto* bias = context.Input<phi::DenseTensor>("Bias");
-    auto* hidden = context.Output<LoDTensor>("Hidden");
+    auto* hidden = context.Output<phi::DenseTensor>("Hidden");
     hidden->mutable_data<T>(context.GetPlace());
 
     auto input_dims = input->dims();
     auto hidden_dims = hidden->dims();
 
     LodTensorPtr batch_gate, batch_reset_hidden_prev, batch_hidden;
-    LoDTensor batch_gate_tmp, batch_reset_hidden_prev_tmp, batch_hidden_tmp;
+    phi::DenseTensor batch_gate_tmp, batch_reset_hidden_prev_tmp,
+        batch_hidden_tmp;
     if (is_test) {
       batch_gate = &batch_gate_tmp;
       batch_gate->Resize(input_dims);
@@ -341,10 +346,10 @@ class GRUCPUKernel : public framework::OpKernel<T> {
       batch_hidden = &batch_hidden_tmp;
       batch_hidden->Resize(hidden_dims);
     } else {
-      batch_gate = context.Output<LoDTensor>("BatchGate");
-      batch_hidden = context.Output<LoDTensor>("BatchHidden");
+      batch_gate = context.Output<phi::DenseTensor>("BatchGate");
+      batch_hidden = context.Output<phi::DenseTensor>("BatchHidden");
       batch_reset_hidden_prev =
-          context.Output<LoDTensor>("BatchResetHiddenPrev");
+          context.Output<phi::DenseTensor>("BatchResetHiddenPrev");
     }
     batch_gate->mutable_data<T>(context.GetPlace());
     batch_reset_hidden_prev->mutable_data<T>(context.GetPlace());
diff --git a/paddle/fluid/operators/gru_op.cu.cc b/paddle/fluid/operators/gru_op.cu.cc
index 2d63eb4d3a698d..a6b57bd88f77d2 100644
--- a/paddle/fluid/operators/gru_op.cu.cc
+++ b/paddle/fluid/operators/gru_op.cu.cc
@@ -21,23 +21,24 @@ template <typename DeviceContext, typename T>
 class GRUKernel : public framework::OpKernel<T> {
  public:
   void BatchCompute(const framework::ExecutionContext& context) const {
-    using LodTensorPtr = LoDTensor*;
+    using LodTensorPtr = phi::DenseTensor*;
 
     bool is_test = context.Attr<bool>("is_test");
     bool origin_mode = context.Attr<bool>("origin_mode");
-    auto* input = context.Input<LoDTensor>("Input");
+    auto* input = context.Input<phi::DenseTensor>("Input");
     auto* h0 = context.Input<phi::DenseTensor>("H0");
     auto* weight = context.Input<phi::DenseTensor>("Weight");
     const T* weight_data = weight->data<T>();
     auto* bias = context.Input<phi::DenseTensor>("Bias");
-    auto* hidden = context.Output<LoDTensor>("Hidden");
+    auto* hidden = context.Output<phi::DenseTensor>("Hidden");
     hidden->mutable_data<T>(context.GetPlace());
 
     auto input_dims = input->dims();
     auto hidden_dims = hidden->dims();
 
     LodTensorPtr batch_gate, batch_reset_hidden_prev, batch_hidden;
-    LoDTensor batch_gate_tmp, batch_reset_hidden_prev_tmp, batch_hidden_tmp;
+    phi::DenseTensor batch_gate_tmp, batch_reset_hidden_prev_tmp,
+        batch_hidden_tmp;
     if (is_test) {
       batch_gate = &batch_gate_tmp;
       batch_gate->Resize(input_dims);
@@ -48,10 +49,10 @@ class GRUKernel : public framework::OpKernel<T> {
       batch_hidden = &batch_hidden_tmp;
       batch_hidden->Resize(hidden_dims);
     } else {
-      batch_gate = context.Output<LoDTensor>("BatchGate");
-      batch_hidden = context.Output<LoDTensor>("BatchHidden");
+      batch_gate = context.Output<phi::DenseTensor>("BatchGate");
+      batch_hidden = context.Output<phi::DenseTensor>("BatchHidden");
       batch_reset_hidden_prev =
-          context.Output<LoDTensor>("BatchResetHiddenPrev");
+          context.Output<phi::DenseTensor>("BatchResetHiddenPrev");
     }
     batch_gate->mutable_data<T>(context.GetPlace());
     batch_reset_hidden_prev->mutable_data<T>(context.GetPlace());
diff --git a/paddle/fluid/operators/gru_op.h b/paddle/fluid/operators/gru_op.h
index e050c42a0ec545..89731e2efa0228 100644
--- a/paddle/fluid/operators/gru_op.h
+++ b/paddle/fluid/operators/gru_op.h
@@ -25,7 +25,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 template <typename DeviceContext, typename T>
@@ -47,15 +46,15 @@ class GRUGradKernel : public framework::OpKernel<T> {
     auto* h0 = context.Input<phi::DenseTensor>("H0");
     auto* weight = context.Input<phi::DenseTensor>("Weight");
     const T* weight_data = weight->data<T>();
-    auto* batch_gate = context.Input<LoDTensor>("BatchGate");
+    auto* batch_gate = context.Input<phi::DenseTensor>("BatchGate");
     auto* batch_reset_hidden_prev =
-        context.Input<LoDTensor>("BatchResetHiddenPrev");
-    auto* batch_hidden = context.Input<LoDTensor>("BatchHidden");
-    auto* hidden = context.Input<LoDTensor>("Hidden");
+        context.Input<phi::DenseTensor>("BatchResetHiddenPrev");
+    auto* batch_hidden = context.Input<phi::DenseTensor>("BatchHidden");
+    auto* hidden = context.Input<phi::DenseTensor>("Hidden");
     auto* hidden_grad =
-        context.Input<LoDTensor>(framework::GradVarName("Hidden"));
+        context.Input<phi::DenseTensor>(framework::GradVarName("Hidden"));
     auto* input_grad =
-        context.Output<LoDTensor>(framework::GradVarName("Input"));
+        context.Output<phi::DenseTensor>(framework::GradVarName("Input"));
     auto* h0_grad =
         context.Output<phi::DenseTensor>(framework::GradVarName("H0"));
     auto* weight_grad =
@@ -68,7 +67,8 @@ class GRUGradKernel : public framework::OpKernel<T> {
     int frame_size = hidden_dims[1];
 
     phi::funcs::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
-    LoDTensor batch_hidden_grad, batch_gate_grad, batch_reset_hidden_prev_grad;
+    phi::DenseTensor batch_hidden_grad, batch_gate_grad,
+        batch_reset_hidden_prev_grad;
     batch_hidden_grad.mutable_data<T>(hidden_dims, context.GetPlace());
     batch_gate_grad.mutable_data<T>(gate_dims, context.GetPlace());
     batch_reset_hidden_prev_grad.mutable_data<T>(hidden_dims,
diff --git a/paddle/fluid/operators/hierarchical_sigmoid_op.cc b/paddle/fluid/operators/hierarchical_sigmoid_op.cc
index 8193be6b6b8e19..7255abcb7b4b63 100644
--- a/paddle/fluid/operators/hierarchical_sigmoid_op.cc
+++ b/paddle/fluid/operators/hierarchical_sigmoid_op.cc
@@ -82,43 +82,44 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(LoDTensor, required) The input tensor with shape [N, D], "
+             "(phi::DenseTensor, required) The input tensor with shape [N, D], "
              "where N is the size of mini-batch, and D is the feature size.");
     AddInput("W",
-             "(LoDTensor, required), The parameters of hierarchical "
+             "(phi::DenseTensor, required), The parameters of hierarchical "
              "sigmoid operator, each of them is a 2-D tensor, the shape is"
              "[K, D]. Which K is the num of non-leaf node in Path Tree");
     AddInput("Label",
-             "(LoDTensor, required), The labels of training data. It's a"
+             "(phi::DenseTensor, required), The labels of training data. It's a"
              "tensor with shape [N, 1].");
-    AddInput("PathTable",
-             "(LoDTensor, optional), The Path Table from root to current word"
-             "it should have shape like [N, L], L is the length of the Path")
-        .AsDispensable();
     AddInput(
-        "PathCode",
-        "(LoDTensor, optional), The Code on each Node of the Path from root "
-        "to current word"
+        "PathTable",
+        "(phi::DenseTensor, optional), The Path Table from root to current word"
         "it should have shape like [N, L], L is the length of the Path")
         .AsDispensable();
+    AddInput("PathCode",
+             "(phi::DenseTensor, optional), The Code on each Node of the Path "
+             "from root "
+             "to current word"
+             "it should have shape like [N, L], L is the length of the Path")
+        .AsDispensable();
     AddInput("Bias",
-             "(LoDTensor, optional), The bias is a tensor with shape or "
+             "(phi::DenseTensor, optional), The bias is a tensor with shape or "
              "[num_classes, 1]"
              "[num_classes - 1, 1].")
         .AsDispensable();
-    AddOutput(
-        "Out",
-        "(LoDTensor, required) The output of hierarchical sigmoid operator."
-        "The shape is [N, 1].");
+    AddOutput("Out",
+              "(phi::DenseTensor, required) The output of hierarchical sigmoid "
+              "operator."
+              "The shape is [N, 1].");
     AddOutput("PreOut",
-              "(LoDTensor, required) A intermedia 2-D tensor with shape "
+              "(phi::DenseTensor, required) A intermedia 2-D tensor with shape "
               "[batch_size, code_length], where code_length represents the "
               "maximum path length from root to leaf nodes.")
         .AsIntermediate();
-    AddOutput(
-        "W_Out",
-        "(LoDTensor, optional) using input 'W' as Output to make it mutable"
-        "When we are using prefetch")
+    AddOutput("W_Out",
+              "(phi::DenseTensor, optional) using input 'W' as Output to make "
+              "it mutable"
+              "When we are using prefetch")
         .AsIntermediate();
     AddAttr<AttrType>("num_classes", "(int, optional), The number of classes")
         .SetDefault(2);
@@ -227,7 +228,8 @@ class HierarchicalSigmoidGradOpGradVarTypeInference
     auto bias_grad_var_name = framework::GradVarName("Bias");
     if (ctx->HasOutput(bias_grad_var_name)) {
       VLOG(3) << "hierarchical_sigmoid_grad op "
-              << framework::GradVarName("Bias") << " is set to LoDTensor";
+              << framework::GradVarName("Bias")
+              << " is set to phi::DenseTensor";
       ctx->SetOutputType(bias_grad_var_name,
                          framework::proto::VarType::LOD_TENSOR);
     }
@@ -241,7 +243,7 @@ class HierarchicalSigmoidGradOpGradVarTypeInference
                          framework::proto::VarType::SELECTED_ROWS);
     } else {
       VLOG(3) << "hierarchical_sigmoid_grad op " << framework::GradVarName("W")
-              << " is set to LoDTensor";
+              << " is set to phi::DenseTensor";
       ctx->SetOutputType(w_grad_var_name,
                          framework::proto::VarType::LOD_TENSOR);
     }
diff --git a/paddle/fluid/operators/im2sequence_op.h b/paddle/fluid/operators/im2sequence_op.h
index afb4db0f3c633b..a9da8f8f4dbbc7 100644
--- a/paddle/fluid/operators/im2sequence_op.h
+++ b/paddle/fluid/operators/im2sequence_op.h
@@ -27,7 +27,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 inline int Im2SeqOutputSize(
     int input_size, int filter_size, int padding_0, int padding_1, int stride) {
@@ -41,7 +40,7 @@ class Im2SequenceKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     const phi::DenseTensor* in = ctx.Input<phi::DenseTensor>("X");
-    LoDTensor* out = ctx.Output<LoDTensor>("Out");
+    phi::DenseTensor* out = ctx.Output<phi::DenseTensor>("Out");
     auto in_dim = in->dims();
     int batch_size = in_dim[0];
     int img_channels = in_dim[1];
diff --git a/paddle/fluid/operators/index_select_op.h b/paddle/fluid/operators/index_select_op.h
index 71dd10d0aa42bd..a705a95156608c 100644
--- a/paddle/fluid/operators/index_select_op.h
+++ b/paddle/fluid/operators/index_select_op.h
@@ -23,21 +23,20 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using DDim = framework::DDim;
 
 template <typename DeviceContext, typename T, typename IndexT = int>
 void IndexSelectInner(const framework::ExecutionContext& context,
-                      LoDTensor* input,
-                      const LoDTensor& index,
-                      LoDTensor* output,
+                      phi::DenseTensor* input,
+                      const phi::DenseTensor& index,
+                      phi::DenseTensor* output,
                       int dim) {
   auto input_dim = input->dims();
   auto input_dim_size = input_dim.size();
   auto output_dim = output->dims();
   auto index_size = index.dims()[0];
 
-  LoDTensor index_cpu_copy;
+  phi::DenseTensor index_cpu_copy;
   if (!platform::is_cpu_place(index.place())) {
     framework::TensorCopySync(index, platform::CPUPlace(), &index_cpu_copy);
   }
@@ -127,9 +126,9 @@ struct IndexSelectAdd<
 
 template <typename DeviceContext, typename T, typename IndexT = int>
 void IndexSelectGradInner(const framework::ExecutionContext& context,
-                          const LoDTensor& out_grad,
-                          const LoDTensor& index,
-                          LoDTensor* x_grad,
+                          const phi::DenseTensor& out_grad,
+                          const phi::DenseTensor& index,
+                          phi::DenseTensor* x_grad,
                           int dim) {
   const T* input_data = out_grad.data<T>();
   const IndexT* index_data = index.data<IndexT>();
diff --git a/paddle/fluid/operators/inplace_abn_op.cc b/paddle/fluid/operators/inplace_abn_op.cc
index 61379a3d893ea7..53453c6cad184a 100644
--- a/paddle/fluid/operators/inplace_abn_op.cc
+++ b/paddle/fluid/operators/inplace_abn_op.cc
@@ -147,8 +147,8 @@ class InplaceABNGradOp : public paddle::operators::BatchNormGradOp {
     const phi::DenseTensor* t = nullptr;
     if (var->IsType<Tensor>()) {
       t = &var->Get<Tensor>();
-    } else if (var->IsType<LoDTensor>()) {
-      t = &var->Get<LoDTensor>();
+    } else if (var->IsType<phi::DenseTensor>()) {
+      t = &var->Get<phi::DenseTensor>();
     }
     if (t == nullptr) {
       PADDLE_THROW(
diff --git a/paddle/fluid/operators/instance_norm_op.cc b/paddle/fluid/operators/instance_norm_op.cc
index ae4da5c51a0889..ed474193461c39 100644
--- a/paddle/fluid/operators/instance_norm_op.cc
+++ b/paddle/fluid/operators/instance_norm_op.cc
@@ -108,8 +108,8 @@ framework::OpKernelType InstanceNormGradOp::GetExpectedKernelType(
   const Tensor *t = nullptr;
   if (var->IsType<Tensor>()) {
     t = &var->Get<Tensor>();
-  } else if (var->IsType<LoDTensor>()) {
-    t = &var->Get<LoDTensor>();
+  } else if (var->IsType<phi::DenseTensor>()) {
+    t = &var->Get<phi::DenseTensor>();
   }
   if (t == nullptr) {
     PADDLE_THROW(
@@ -129,8 +129,8 @@ framework::OpKernelType InstanceNormDoubleGradOp::GetExpectedKernelType(
   const Tensor *t = nullptr;
   if (var->IsType<Tensor>()) {
     t = &var->Get<Tensor>();
-  } else if (var->IsType<LoDTensor>()) {
-    t = &var->Get<LoDTensor>();
+  } else if (var->IsType<phi::DenseTensor>()) {
+    t = &var->Get<phi::DenseTensor>();
   }
   if (t == nullptr) {
     PADDLE_THROW(
diff --git a/paddle/fluid/operators/instance_norm_op.h b/paddle/fluid/operators/instance_norm_op.h
index da6bb74ac56bc2..2101f6a12bb53c 100644
--- a/paddle/fluid/operators/instance_norm_op.h
+++ b/paddle/fluid/operators/instance_norm_op.h
@@ -23,7 +23,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using DataLayout = phi::DataLayout;
 
 class InstanceNormOp : public framework::OperatorWithKernel {
diff --git a/paddle/fluid/operators/label_smooth_op_mlu.cc b/paddle/fluid/operators/label_smooth_op_mlu.cc
index 34293fd8fc6edf..211ffc7fb2cd60 100644
--- a/paddle/fluid/operators/label_smooth_op_mlu.cc
+++ b/paddle/fluid/operators/label_smooth_op_mlu.cc
@@ -19,15 +19,14 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 class LabelSmoothMLUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* in_t = ctx.Input<LoDTensor>("X");
+    auto* in_t = ctx.Input<phi::DenseTensor>("X");
     auto* dist_t = ctx.Input<phi::DenseTensor>("PriorDist");
-    auto* out_t = ctx.Output<LoDTensor>("Out");
+    auto* out_t = ctx.Output<phi::DenseTensor>("Out");
     auto epsilon = ctx.Attr<float>("epsilon");
     auto epsilon_gt = 1.0f - epsilon;
 
diff --git a/paddle/fluid/operators/label_smooth_op_npu.cc b/paddle/fluid/operators/label_smooth_op_npu.cc
index f6c96357e8ec2b..529e8564cb19bf 100644
--- a/paddle/fluid/operators/label_smooth_op_npu.cc
+++ b/paddle/fluid/operators/label_smooth_op_npu.cc
@@ -19,7 +19,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 void LabelSmoothMuls(const platform::Place& place,
@@ -58,8 +57,8 @@ template <typename T>
 class LabelSmoothNPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* out_t = ctx.Output<LoDTensor>("Out");
-    auto* in_t = ctx.Input<LoDTensor>("X");
+    auto* out_t = ctx.Output<phi::DenseTensor>("Out");
+    auto* in_t = ctx.Input<phi::DenseTensor>("X");
     auto* dist_t = ctx.Input<phi::DenseTensor>("PriorDist");
     auto epsilon = ctx.Attr<float>("epsilon");
 
diff --git a/paddle/fluid/operators/layer_norm_op.cc b/paddle/fluid/operators/layer_norm_op.cc
index 30ddc3bbe22494..1081df4166aacb 100644
--- a/paddle/fluid/operators/layer_norm_op.cc
+++ b/paddle/fluid/operators/layer_norm_op.cc
@@ -21,7 +21,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using DataLayout = phi::DataLayout;
 
 class LayerNormOp : public framework::OperatorWithKernel {
@@ -214,8 +213,8 @@ class LayerNormGradOp : public framework::OperatorWithKernel {
     const Tensor *t = nullptr;
     if (var->IsType<Tensor>()) {
       t = &var->Get<Tensor>();
-    } else if (var->IsType<LoDTensor>()) {
-      t = &var->Get<LoDTensor>();
+    } else if (var->IsType<phi::DenseTensor>()) {
+      t = &var->Get<phi::DenseTensor>();
     }
     PADDLE_ENFORCE_NOT_NULL(
         t, platform::errors::NotFound("Y@GRAD of LayerNorm Op is not found."));
diff --git a/paddle/fluid/operators/limit_by_capacity_op.cu b/paddle/fluid/operators/limit_by_capacity_op.cu
index f6e0bffa1d1ce9..28ae524e0a4f9f 100644
--- a/paddle/fluid/operators/limit_by_capacity_op.cu
+++ b/paddle/fluid/operators/limit_by_capacity_op.cu
@@ -28,7 +28,6 @@
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 template <typename T>
diff --git a/paddle/fluid/operators/linear_chain_crf_op.cc b/paddle/fluid/operators/linear_chain_crf_op.cc
index 99c10e868a396e..64fe6562a6c7d9 100644
--- a/paddle/fluid/operators/linear_chain_crf_op.cc
+++ b/paddle/fluid/operators/linear_chain_crf_op.cc
@@ -23,23 +23,24 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("Emission",
-             "(LoDTensor/Tensor<float>). When a LoDTensor input,A 2-D LoDTensor"
+             "(phi::DenseTensor<float>). When a phi::DenseTensor "
+             "input,A 2-D phi::DenseTensor"
              " with shape [N x D], where N is the size of the "
              "mini-batch and D is the total tag number. The unscaled emission "
              "weight matrix for the linear chain CRF. When a Tensor input,"
              "A Tensor with shape [N x S x D], where N is batch number,"
              "S is max length of sequences, D is the total tag number."
-             "A LoDTensor or Tensor with type float32, float64.");
+             "A phi::DenseTensor with type float32, float64.");
     AddInput("Transition",
              "(Tensor, default Tensor<float>) A 2-D Tensor with shape "
              "[(D + 2) x D]. The learnable parameter for the linear_chain_crf "
              "operator. See more details in the operator's comments.");
     AddInput("Label",
-             "(LoDTensor/Tensor<int64_t>), when a LoDTensor input,  "
+             "(phi::DenseTensor<int64_t>), when a phi::DenseTensor input,  "
              "[N x 1], where N is the total element number in a mini-batch. "
              "when a Tensor input, [N x S], where N is batch number. "
              "S is max length of sequences. The ground truth."
-             "A  LoDTensor or Tensor with int64.");
+             "A  phi::DenseTensor with int64.");
     AddInput("Length",
              "(Tensor, default Tensor<int64_t>) A Tensor with shape "
              "[M x 1], where M is the sequence number in a mini-batch."
@@ -63,7 +64,7 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker {
         "The exponentials of Input(Emission). This is an intermediate "
         "computational result in forward computation, and will be reused in "
         "backward computation."
-        "A LoDTensor or Tensor with type float32, float64.")
+        "A phi::DenseTensor with type float32, float64.")
         .AsIntermediate();
     AddOutput(
         "TransitionExps",
@@ -71,7 +72,7 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker {
         "[(D + 2) x D]. The exponentials of Input(Transition). This is an "
         "intermediate computational result in forward computation, and "
         "will be reused in backward computation."
-        "A LoDTensor or Tensor with type float32, float64.")
+        "A phi::DenseTensor with type float32, float64.")
         .AsIntermediate();
     AddOutput(
         "LogLikelihood",
diff --git a/paddle/fluid/operators/linear_chain_crf_op.h b/paddle/fluid/operators/linear_chain_crf_op.h
index bda310c31fff06..bf68c7298e72a7 100644
--- a/paddle/fluid/operators/linear_chain_crf_op.h
+++ b/paddle/fluid/operators/linear_chain_crf_op.h
@@ -47,7 +47,6 @@ struct ScalarMul {
 };
 
 using framework::LoD;
-using LoDTensor = phi::DenseTensor;
 
 template <typename DeviceContext, typename T>
 class LinearChainCRFOpKernel : public framework::OpKernel<T> {
@@ -114,7 +113,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
       phi::funcs::set_constant(ctx.device_context(), emission_exps, 0.0);
       phi::funcs::set_constant(ctx.device_context(), alpha, 0.0);
     } else {
-      in_lod = ctx.Input<LoDTensor>("Label")->lod();
+      in_lod = ctx.Input<phi::DenseTensor>("Label")->lod();
       PADDLE_ENFORCE_NE(in_lod.size(),
                         0,
                         platform::errors::InvalidArgument(
@@ -286,7 +285,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
       emission_exps_tmp.Resize(
           {emission_dims[0] * emission_dims[1], emission_dims[2]});
     } else {
-      in_lod = ctx.Input<LoDTensor>("Label")->lod();
+      in_lod = ctx.Input<phi::DenseTensor>("Label")->lod();
       PADDLE_ENFORCE_NE(in_lod.size(),
                         0,
                         platform::errors::InvalidArgument(
diff --git a/paddle/fluid/operators/load_combine_op.cc b/paddle/fluid/operators/load_combine_op.cc
index 94bfc44977fb34..78c06e8c24a000 100644
--- a/paddle/fluid/operators/load_combine_op.cc
+++ b/paddle/fluid/operators/load_combine_op.cc
@@ -62,7 +62,7 @@ class LoadCombineOpProtoMaker : public framework::OpProtoAndCheckerMaker {
     AddComment(R"DOC(
 LoadCombine Operator.
 
-LoadCombine operator loads LoDTensor variables from a file, which could be
+LoadCombine operator loads phi::DenseTensor variables from a file, which could be
 loaded in memory already. The file should contain one or more LoDTensors
 serialized using the SaveCombine operator. The
 LoadCombine operator applies a deserialization strategy to appropriately load
diff --git a/paddle/fluid/operators/load_op.cc b/paddle/fluid/operators/load_op.cc
index d39beb9266a7e8..0c66dbd36568f7 100644
--- a/paddle/fluid/operators/load_op.cc
+++ b/paddle/fluid/operators/load_op.cc
@@ -37,7 +37,7 @@ class LoadOp : public framework::OperatorWithKernel {
 class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddOutput("Out", "The LoDTensor / SelectedRows need to be loaded");
+    AddOutput("Out", "The phi::DenseTensor / SelectedRows need to be loaded");
     AddAttr<bool>(
         "load_as_fp16",
         "If true, the tensor will be first loaded and then "
@@ -54,7 +54,8 @@ class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker {
                                   "(vector<int64_t>) The shape of the output")
         .SetDefault({});
     AddComment(
-        "Load operator will load a LoDTensor / SelectedRows variable from "
+        "Load operator will load a phi::DenseTensor / SelectedRows variable "
+        "from "
         "disk "
         "file.");
   }
diff --git a/paddle/fluid/operators/load_op_npu.cc b/paddle/fluid/operators/load_op_npu.cc
index 4efe67d36c5ca6..8c00f0868300a0 100644
--- a/paddle/fluid/operators/load_op_npu.cc
+++ b/paddle/fluid/operators/load_op_npu.cc
@@ -54,7 +54,8 @@ class LoadOpKernel : public framework::OpKernel<T> {
       LoadSelectedRows(fin, place, out_var);
     } else {
       PADDLE_THROW(platform::errors::InvalidArgument(
-          "Load operator only supports loading LoDTensor and SelectedRows "
+          "Load operator only supports loading phi::DenseTensor and "
+          "SelectedRows "
           "variable, %s has wrong type",
           out_var_name));
     }
diff --git a/paddle/fluid/operators/lod_rank_table_op.cc b/paddle/fluid/operators/lod_rank_table_op.cc
index cffb76010761cc..a399ad4527ff84 100644
--- a/paddle/fluid/operators/lod_rank_table_op.cc
+++ b/paddle/fluid/operators/lod_rank_table_op.cc
@@ -52,13 +52,14 @@ class LoDRankTableOp : public framework::OperatorBase {
 class LoDRankTableOpProtoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X",
-             "(LoDTensor) input lod tensor, must contain lod information.");
+    AddInput(
+        "X",
+        "(phi::DenseTensor) input lod tensor, must contain lod information.");
     AddOutput("Out", "(LoDRankTable) The rank table of specific level.");
     AddAttr<int>("level", "(int) the specific lod level to rank.")
         .SetDefault(0)
         .EqualGreaterThan(0);
-    AddComment(R"DOC(Create LoDRanTable by LoDTensor
+    AddComment(R"DOC(Create LoDRanTable by phi::DenseTensor
 
 LoD Rank Table stores the `level` of `lod` which is ordered by sequence
 length in descending order. It is useful when implement dynamic RNN and is
diff --git a/paddle/fluid/operators/lod_reset_op.cc b/paddle/fluid/operators/lod_reset_op.cc
index 1e03bb806f1925..374bb8920fbbd5 100644
--- a/paddle/fluid/operators/lod_reset_op.cc
+++ b/paddle/fluid/operators/lod_reset_op.cc
@@ -105,18 +105,20 @@ class LoDResetOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(Tensor, LoDTensor) Input variable of LoDResetOp which "
-             "could be a Tensor or LoDTensor, where the data of output "
+             "(Tensor, phi::DenseTensor) Input variable of LoDResetOp which "
+             "could be a Tensor or phi::DenseTensor, where the data of output "
              "variable inherits from.");
     AddInput("Y",
-             "(Tensor, LoDTensor, optional) If provided and Y is LoDTensor, "
+             "(phi::DenseTensor, optional) If provided and Y is "
+             "phi::DenseTensor, "
              "lod of Input(Y) would be considered as the target lod first, "
              "otherwise data of Input(Y) would be considered as the "
              "target lod.")
         .AsDispensable();
-    AddOutput("Out",
-              "(LoDTensor) Output variable of LoDResetOp which should be a "
-              "LoDTensor.");
+    AddOutput(
+        "Out",
+        "(phi::DenseTensor) Output variable of LoDResetOp which should be a "
+        "phi::DenseTensor.");
     AddAttr<std::vector<int>>("target_lod",
                               "The target level 0 LoD from Attr().")
         .SetDefault(std::vector<int>{});
@@ -124,7 +126,7 @@ class LoDResetOpMaker : public framework::OpProtoAndCheckerMaker {
     AddComment(R"DOC(LoDReset operator
 
 Set LoD of `X` to a new one specified by `Y` or attribute `target_lod`. When `Y`
-provided and `Y` is a LoDTensor, `Y.lod` would be considered as target LoD
+provided and `Y` is a phi::DenseTensor, `Y.lod` would be considered as target LoD
 first, otherwise `Y.data` would be considered as target LoD. If `Y` is not
 provided, target LoD should be specified by attribute `target_lod`.
 If target LoD is specified by `Y.data` or `target_lod`, only one level LoD
@@ -132,7 +134,7 @@ is supported.
 
 Example 1:
 
-Given a 1-level LoDTensor input(X):
+Given a 1-level phi::DenseTensor input(X):
     X.lod =  [[ 0,     2,                   5      6 ]]
     X.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
     X.dims = [6, 1]
@@ -146,7 +148,7 @@ then we get a 1-level LoDTensor:
 
 Example 2:
 
-Given a 1-level LoDTensor input(X):
+Given a 1-level phi::DenseTensor input(X):
     X.lod =  [[ 0,     2,                   5      6 ]]
     X.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
     X.dims = [6, 1]
@@ -162,7 +164,7 @@ then we get a 1-level LoDTensor:
 
 Example 3:
 
-Given a 1-level LoDTensor input(X):
+Given a 1-level phi::DenseTensor input(X):
     X.lod =  [[ 0,      2,                   5     6 ]]
     X.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
     X.dims = [6, 1]
diff --git a/paddle/fluid/operators/lod_tensor_to_array_op.cc b/paddle/fluid/operators/lod_tensor_to_array_op.cc
index f3c26a9121d63f..a736385a1401e3 100644
--- a/paddle/fluid/operators/lod_tensor_to_array_op.cc
+++ b/paddle/fluid/operators/lod_tensor_to_array_op.cc
@@ -125,11 +125,11 @@ class LoDTensorToArrayOp : public framework::OperatorBase {
     PADDLE_ENFORCE_LT(
         rank_level,
         x.lod().size(),
-        platform::errors::InvalidArgument(
-            "Input should be a LoDTensor, and its lod_level should be at "
-            "least %d, but given is %d.",
-            rank_level + 1,
-            x.lod().size()));
+        platform::errors::InvalidArgument("Input should be a phi::DenseTensor, "
+                                          "and its lod_level should be at "
+                                          "least %d, but given is %d.",
+                                          rank_level + 1,
+                                          x.lod().size()));
     out.resize(max_seq_len);
     std::vector<std::vector<CopyRange>> copy_ranges(max_seq_len);
 
@@ -189,14 +189,15 @@ class LoDTensorToArrayOp : public framework::OperatorBase {
 class LoDTensorToArrayOpProtoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X",
-             "(LoDTensor), the input lod tensor is a minibatch of sequences, "
-             "and will be split to a tensor_array according to "
-             "Input(RankTable).");
+    AddInput(
+        "X",
+        "(phi::DenseTensor), the input lod tensor is a minibatch of sequences, "
+        "and will be split to a tensor_array according to "
+        "Input(RankTable).");
     AddInput("RankTable", "(LoDRankTable), the rank table.");
     AddOutput("Out",
               "(LoDTensorArray), the result tensor_array, which is actually a "
-              "std::vector<LoDTensor>.");
+              "std::vector<phi::DenseTensor>.");
     AddComment(R"DOC(LoDTensorToArray operator.
 Input(X) is a minibatch of sequences. Input(RankTable) stores the order of the input sequences.
 The lod_tensor_to_array operator will spilt the input sequences to a tensor_array, with each
@@ -234,9 +235,9 @@ class LoDTensorToArrayInferShape : public framework::InferShapeBase {
     // kernel implementation.
     context->SetOutputDim("Out", x_dim);
 
-    // The output LoDTensor's lod_level should be input X's lod_level - 1.
-    // For compile time, we call SetLoDLevel to set output's lod_level.
-    // For runtime, output LoDTensor's lod is determined by input X's lod and
+    // The output phi::DenseTensor's lod_level should be input X's lod_level
+    // - 1. For compile time, we call SetLoDLevel to set output's lod_level. For
+    // runtime, output phi::DenseTensor's lod is determined by input X's lod and
     // the level specified by input RandTable.
     // We cannot get X's detail lod and RankTable's level in this function, so
     // leave this work to the detail kernel implementation.
diff --git a/paddle/fluid/operators/lookup_table_dequant_op.h b/paddle/fluid/operators/lookup_table_dequant_op.h
index 05d74855f5d477..3f9ec485ce4f81 100644
--- a/paddle/fluid/operators/lookup_table_dequant_op.h
+++ b/paddle/fluid/operators/lookup_table_dequant_op.h
@@ -28,7 +28,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using SelectedRows = phi::SelectedRows;
 using DDim = framework::DDim;
 
@@ -52,8 +51,8 @@ template <typename T>
 class LookupTableDequantKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &context) const override {
-    auto *ids_t = context.Input<LoDTensor>("Ids");      // int tensor
-    auto *output_t = context.Output<LoDTensor>("Out");  // float tensor
+    auto *ids_t = context.Input<phi::DenseTensor>("Ids");      // int tensor
+    auto *output_t = context.Output<phi::DenseTensor>("Out");  // float tensor
     auto *table_var = context.InputVar("W");
 
     auto id_name = context.InputNames("Ids").front();
@@ -66,9 +65,9 @@ class LookupTableDequantKernel : public framework::OpKernel<T> {
 
     PADDLE_ENFORCE_GE(
         table_var->Type(),
-        framework::VarTypeTrait<LoDTensor>::kId,
+        framework::VarTypeTrait<phi::DenseTensor>::kId,
         platform::errors::InvalidArgument("lookup table must be LodTensor"));
-    auto *table_t = context.Input<LoDTensor>("W");
+    auto *table_t = context.Input<phi::DenseTensor>("W");
     int64_t row_number = table_t->dims()[0];
     int64_t quant_number = table_t->dims()[1];
     int64_t row_width = (quant_number - 2) * 4;
diff --git a/paddle/fluid/operators/lookup_table_op.cc b/paddle/fluid/operators/lookup_table_op.cc
index 02cd9a205f009d..8ad3966a1d236f 100644
--- a/paddle/fluid/operators/lookup_table_op.cc
+++ b/paddle/fluid/operators/lookup_table_op.cc
@@ -212,7 +212,7 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference {
                          framework::proto::VarType::SELECTED_ROWS);
     } else {
       VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W")
-              << " is set to LoDTensor";
+              << " is set to phi::DenseTensor";
       ctx->SetOutputType(out_var_name, framework::proto::VarType::LOD_TENSOR);
     }
     ctx->SetOutputDataType(out_var_name, ctx->GetInputDataType("W"));
diff --git a/paddle/fluid/operators/lookup_table_op.cu b/paddle/fluid/operators/lookup_table_op.cu
index 0562228f516fac..1052e5117e434e 100644
--- a/paddle/fluid/operators/lookup_table_op.cu
+++ b/paddle/fluid/operators/lookup_table_op.cu
@@ -103,9 +103,9 @@ template <typename T>
 class LookupTableCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &context) const override {
-    auto *table_t = context.Input<LoDTensor>("W");
-    auto *ids_t = context.Input<LoDTensor>("Ids");
-    auto *output_t = context.Output<LoDTensor>("Out");
+    auto *table_t = context.Input<phi::DenseTensor>("W");
+    auto *ids_t = context.Input<phi::DenseTensor>("Ids");
+    auto *output_t = context.Output<phi::DenseTensor>("Out");
     int64_t padding_idx = context.Attr<int64_t>("padding_idx");
 
     auto id_name = context.InputNames("Ids").front();
@@ -157,9 +157,10 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> {
     // Since paddings are not trainable and fixed in forward, the gradient of
     // paddings makes no sense and we don't deal with it in backward.
     if (is_sparse) {
-      auto *ids = context.Input<LoDTensor>("Ids");
-      auto *table = context.Input<LoDTensor>("W");
-      auto *d_output = context.Input<LoDTensor>(framework::GradVarName("Out"));
+      auto *ids = context.Input<phi::DenseTensor>("Ids");
+      auto *table = context.Input<phi::DenseTensor>("W");
+      auto *d_output =
+          context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
       auto *d_table =
           context.Output<phi::SelectedRows>(framework::GradVarName("W"));
 
@@ -209,9 +210,11 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> {
                    stream);
 
     } else {
-      auto ids_t = context.Input<LoDTensor>("Ids");
-      auto d_output_t = context.Input<LoDTensor>(framework::GradVarName("Out"));
-      auto d_table_t = context.Output<LoDTensor>(framework::GradVarName("W"));
+      auto ids_t = context.Input<phi::DenseTensor>("Ids");
+      auto d_output_t =
+          context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+      auto d_table_t =
+          context.Output<phi::DenseTensor>(framework::GradVarName("W"));
 
       int N = d_table_t->dims()[0];
       int D = d_table_t->dims()[1];
diff --git a/paddle/fluid/operators/lookup_table_op.h b/paddle/fluid/operators/lookup_table_op.h
index cfd34cfe67848f..1ba6d6e31ecdcb 100644
--- a/paddle/fluid/operators/lookup_table_op.h
+++ b/paddle/fluid/operators/lookup_table_op.h
@@ -27,7 +27,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using SelectedRows = phi::SelectedRows;
 using DDim = framework::DDim;
 
@@ -37,8 +36,8 @@ template <typename T>
 class LookupTableKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &context) const override {
-    auto *ids_t = context.Input<LoDTensor>("Ids");      // int tensor
-    auto *output_t = context.Output<LoDTensor>("Out");  // float tensor
+    auto *ids_t = context.Input<phi::DenseTensor>("Ids");      // int tensor
+    auto *output_t = context.Output<phi::DenseTensor>("Out");  // float tensor
     auto *table_var = context.InputVar("W");
 
     auto id_name = context.InputNames("Ids").front();
@@ -51,8 +50,8 @@ class LookupTableKernel : public framework::OpKernel<T> {
     int64_t *ids = const_cast<int64_t *>(ids_t->data<int64_t>());
     int64_t ids_numel = ids_t->numel();
 
-    if (table_var->IsType<LoDTensor>()) {
-      auto *table_t = context.Input<LoDTensor>("W");
+    if (table_var->IsType<phi::DenseTensor>()) {
+      auto *table_t = context.Input<phi::DenseTensor>("W");
       int64_t row_number = table_t->dims()[0];
       int64_t row_width = table_t->dims()[1];
 
@@ -165,15 +164,15 @@ class LookupTableGradKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext &context) const override {
     auto *table_var = context.InputVar("W");
     DDim table_dim;
-    if (table_var->IsType<LoDTensor>()) {
-      table_dim = context.Input<LoDTensor>("W")->dims();
+    if (table_var->IsType<phi::DenseTensor>()) {
+      table_dim = context.Input<phi::DenseTensor>("W")->dims();
     } else if (table_var->IsType<phi::SelectedRows>()) {
       auto *table_t = context.Input<phi::SelectedRows>("W");
       table_dim = table_t->value().dims();
     } else {
       PADDLE_THROW(platform::errors::InvalidArgument(
           "The parameter W of a LookupTable "
-          "must be either LoDTensor or SelectedRows"));
+          "must be either phi::DenseTensor or SelectedRows"));
     }
 
     int64_t padding_idx = context.Attr<int64_t>("padding_idx");
@@ -181,8 +180,9 @@ class LookupTableGradKernel : public framework::OpKernel<T> {
     // Since paddings are not trainable and fixed in forward, the gradient of
     // paddings makes no sense and we don't deal with it in backward.
     if (is_sparse) {
-      auto *ids = context.Input<LoDTensor>("Ids");
-      auto *d_output = context.Input<LoDTensor>(framework::GradVarName("Out"));
+      auto *ids = context.Input<phi::DenseTensor>("Ids");
+      auto *d_output =
+          context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
       auto *d_table =
           context.Output<phi::SelectedRows>(framework::GradVarName("W"));
 
@@ -216,9 +216,11 @@ class LookupTableGradKernel : public framework::OpKernel<T> {
                             d_output_dims_2d));
       memcpy(d_table_data, d_output_data, sizeof(T) * d_output->numel());
     } else {
-      auto *ids = context.Input<LoDTensor>("Ids");
-      auto *d_output = context.Input<LoDTensor>(framework::GradVarName("Out"));
-      auto *d_table = context.Output<LoDTensor>(framework::GradVarName("W"));
+      auto *ids = context.Input<phi::DenseTensor>("Ids");
+      auto *d_output =
+          context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+      auto *d_table =
+          context.Output<phi::DenseTensor>(framework::GradVarName("W"));
 
       auto *ids_data = ids->data<int64_t>();
 
diff --git a/paddle/fluid/operators/lookup_table_v2_op.cc b/paddle/fluid/operators/lookup_table_v2_op.cc
index 5f023fbad6a027..84f8c6cf6492a2 100644
--- a/paddle/fluid/operators/lookup_table_v2_op.cc
+++ b/paddle/fluid/operators/lookup_table_v2_op.cc
@@ -156,7 +156,7 @@ class LookupTableV2OpGradVarTypeInference : public framework::VarTypeInference {
                          framework::proto::VarType::SELECTED_ROWS);
     } else {
       VLOG(3) << "lookup_table_v2_grad op " << framework::GradVarName("W")
-              << " is set to LoDTensor";
+              << " is set to phi::DenseTensor";
       ctx->SetOutputType(out_var_name, framework::proto::VarType::LOD_TENSOR);
     }
     ctx->SetOutputDataType(out_var_name, ctx->GetInputDataType("W"));
diff --git a/paddle/fluid/operators/lookup_table_v2_op.h b/paddle/fluid/operators/lookup_table_v2_op.h
index e100ade31a0afa..e9369bcb475ccd 100644
--- a/paddle/fluid/operators/lookup_table_v2_op.h
+++ b/paddle/fluid/operators/lookup_table_v2_op.h
@@ -28,7 +28,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using SelectedRows = phi::SelectedRows;
 using DDim = framework::DDim;
 
@@ -57,7 +56,7 @@ struct LookupTableV2CPUFunctor {
 
   template <typename IdT>
   void apply() {
-    auto *output_t = context_.Output<LoDTensor>("Out");  // float tensor
+    auto *output_t = context_.Output<phi::DenseTensor>("Out");  // float tensor
     auto *table_var = context_.InputVar("W");
 
     int64_t padding_idx = context_.Attr<int64_t>("padding_idx");
@@ -65,8 +64,8 @@ struct LookupTableV2CPUFunctor {
     auto ids = CopyIdsToVector<IdT, int64_t>(*ids_t_);
     auto ids_numel = static_cast<int64_t>(ids.size());
 
-    if (table_var->template IsType<LoDTensor>()) {
-      const auto &table_t = table_var->template Get<LoDTensor>();
+    if (table_var->template IsType<phi::DenseTensor>()) {
+      const auto &table_t = table_var->template Get<phi::DenseTensor>();
       int64_t row_number = table_t.dims()[0];
       int64_t row_width = table_t.dims()[1];
 
@@ -168,15 +167,15 @@ struct LookupTableV2GradCPUFunctor {
   void apply() {
     auto *table_var = context_.InputVar("W");
     DDim table_dim;
-    if (table_var->template IsType<LoDTensor>()) {
-      table_dim = context_.Input<LoDTensor>("W")->dims();
+    if (table_var->template IsType<phi::DenseTensor>()) {
+      table_dim = context_.Input<phi::DenseTensor>("W")->dims();
     } else if (table_var->template IsType<phi::SelectedRows>()) {
       auto *table_t = context_.Input<phi::SelectedRows>("W");
       table_dim = table_t->value().dims();
     } else {
       PADDLE_THROW(platform::errors::InvalidArgument(
           "The parameter W of a LookupTableV2 "
-          "must be either LoDTensor or SelectedRows"));
+          "must be either phi::DenseTensor or SelectedRows"));
     }
 
     int64_t padding_idx = context_.Attr<int64_t>("padding_idx");
@@ -188,7 +187,8 @@ struct LookupTableV2GradCPUFunctor {
     // Since paddings are not trainable and fixed in forward, the gradient of
     // paddings makes no sense and we don't deal with it in backward.
     if (is_sparse) {
-      auto *d_output = context_.Input<LoDTensor>(framework::GradVarName("Out"));
+      auto *d_output =
+          context_.Input<phi::DenseTensor>(framework::GradVarName("Out"));
       auto *d_table =
           context_.Output<phi::SelectedRows>(framework::GradVarName("W"));
 
@@ -219,8 +219,10 @@ struct LookupTableV2GradCPUFunctor {
       memcpy(d_table_data, d_output_data, sizeof(T) * d_output->numel());
 
     } else {
-      auto *d_output = context_.Input<LoDTensor>(framework::GradVarName("Out"));
-      auto *d_table = context_.Output<LoDTensor>(framework::GradVarName("W"));
+      auto *d_output =
+          context_.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+      auto *d_table =
+          context_.Output<phi::DenseTensor>(framework::GradVarName("W"));
       auto *ids_data = ids.data();
 
       int64_t N = table_dim[0];
diff --git a/paddle/fluid/operators/lookup_table_v2_op_mlu.cc b/paddle/fluid/operators/lookup_table_v2_op_mlu.cc
index 39e6cd984722bd..de9864aeee6a16 100644
--- a/paddle/fluid/operators/lookup_table_v2_op_mlu.cc
+++ b/paddle/fluid/operators/lookup_table_v2_op_mlu.cc
@@ -32,7 +32,7 @@ class LookupTableV2MLUKernel : public framework::OpKernel<T> {
     PADDLE_ENFORCE_EQ(
         table_var->IsType<phi::DenseTensor>(),
         true,
-        platform::errors::InvalidArgument("mlu only accept LoDTensor"));
+        platform::errors::InvalidArgument("mlu only accept phi::DenseTensor"));
     output_t->mutable_data<T>(ctx.GetPlace());
 
     MLUCnnlTensorDesc ids_desc(*ids_t);
@@ -55,11 +55,12 @@ class LookupTableV2GradMLUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
     auto *table_var = ctx.InputVar("W");
-    PADDLE_ENFORCE_EQ(table_var->IsType<phi::DenseTensor>(),
-                      true,
-                      platform::errors::PermissionDenied(
-                          "Unsupported Variable Type , idx in "
-                          "LookupTableV2GradMLUKernel should be LoDTensor."));
+    PADDLE_ENFORCE_EQ(
+        table_var->IsType<phi::DenseTensor>(),
+        true,
+        platform::errors::PermissionDenied(
+            "Unsupported Variable Type , idx in "
+            "LookupTableV2GradMLUKernel should be phi::DenseTensor."));
     bool is_sparse = ctx.Attr<bool>("is_sparse");
     PADDLE_ENFORCE_EQ(
         is_sparse,
diff --git a/paddle/fluid/operators/lookup_table_v2_op_npu.cc b/paddle/fluid/operators/lookup_table_v2_op_npu.cc
index b8719d33b5d6c0..d11ef440f8a3f4 100644
--- a/paddle/fluid/operators/lookup_table_v2_op_npu.cc
+++ b/paddle/fluid/operators/lookup_table_v2_op_npu.cc
@@ -37,7 +37,7 @@ class LookupTableV2NPUKernel : public framework::OpKernel<T> {
     PADDLE_ENFORCE_EQ(
         table_var->IsType<phi::DenseTensor>(),
         true,
-        platform::errors::InvalidArgument("npu only accept LoDTensor"));
+        platform::errors::InvalidArgument("npu only accept phi::DenseTensor"));
     output_t->mutable_data<T>(ctx.GetPlace());
 
     int64_t padding_idx = ctx.Attr<int64_t>("padding_idx");
diff --git a/paddle/fluid/operators/lstm_op.cc b/paddle/fluid/operators/lstm_op.cc
index ba56eeddf89d18..b7310ed475994c 100644
--- a/paddle/fluid/operators/lstm_op.cc
+++ b/paddle/fluid/operators/lstm_op.cc
@@ -146,11 +146,12 @@ class LSTMOp : public framework::OperatorWithKernel {
 class LSTMOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("Input",
-             "(LoDTensor) the first input is a LodTensor, which support "
-             "variable-time length input sequence. The underlying tensor in "
-             "this LoDTensor is a matrix with shape (T X 4D), where T is the "
-             "total time steps in this mini-batch, D is the hidden size.");
+    AddInput(
+        "Input",
+        "(phi::DenseTensor) the first input is a phi::DenseTensor, which "
+        "support variable-time length input sequence. The underlying tensor in "
+        "this phi::DenseTensor is a matrix with shape (T X 4D), where T is the "
+        "total time steps in this mini-batch, D is the hidden size.");
     AddInput("H0",
              "(Tensor, optional) the initial hidden state is an optional "
              "input. This is a tensor with shape (N x D), where N is the "
@@ -176,23 +177,26 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker {
              " - The shape is (1 x 7D). "
              " - Bias = {b_c, b_i, b_f, b_o, W_ic, W_fc, W_oc}.");
     AddOutput("Hidden",
-              "(LoDTensor) the hidden state of LSTM operator. "
+              "(phi::DenseTensor) the hidden state of LSTM operator. "
               "The shape is (T x D), and lod is the same with the `Input`.");
     AddOutput("Cell",
-              "(LoDTensor) the cell state of LSTM operator. "
+              "(phi::DenseTensor) the cell state of LSTM operator. "
               "The shape is (T x D), and lod is the same with the `Input`.");
-    AddOutput("BatchGate",
-              "(LoDTensor) This LoDTensor contains input gate, forget gate "
-              "and output gate after the nonlinear computation. This "
-              "LoDTensor has the same shape as the reorganized input, which "
-              "is also be called batch input. The LoD size is 2. The first "
-              "LoD is the batch offsets and the second LoD contains the "
-              "indexes, which denote the position of reorganized sequence "
-              "in the raw input.")
+    AddOutput(
+        "BatchGate",
+        "(phi::DenseTensor) This phi::DenseTensor contains input gate, forget "
+        "gate "
+        "and output gate after the nonlinear computation. This "
+        "phi::DenseTensor has the same shape as the reorganized input, which "
+        "is also be called batch input. The LoD size is 2. The first "
+        "LoD is the batch offsets and the second LoD contains the "
+        "indexes, which denote the position of reorganized sequence "
+        "in the raw input.")
         .AsIntermediate()
         .AsExtra();
     AddOutput("BatchCellPreAct",
-              "(LoDTensor) This LoDTensor is obtained in the forward and used "
+              "(phi::DenseTensor) This phi::DenseTensor is obtained in the "
+              "forward and used "
               "in the backward.")
         .AsIntermediate()
         .AsExtra();
diff --git a/paddle/fluid/operators/lstm_op.h b/paddle/fluid/operators/lstm_op.h
index a6bb901897416c..dc4f2f1548612d 100644
--- a/paddle/fluid/operators/lstm_op.h
+++ b/paddle/fluid/operators/lstm_op.h
@@ -24,7 +24,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 template <typename DeviceContext, typename T>
@@ -44,25 +43,25 @@ class LSTMKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     bool is_test = ctx.Attr<bool>("is_test");
 
-    auto* input = ctx.Input<LoDTensor>("Input");
+    auto* input = ctx.Input<phi::DenseTensor>("Input");
     auto* weight = ctx.Input<phi::DenseTensor>("Weight");
     auto* bias = ctx.Input<phi::DenseTensor>("Bias");
 
     auto* hidden_t0 = ctx.Input<phi::DenseTensor>("H0");
     auto* cell_t0 = ctx.Input<phi::DenseTensor>("C0");
 
-    LoDTensor* batch_gate = nullptr;
-    LoDTensor batch_gate_temp;
+    phi::DenseTensor* batch_gate = nullptr;
+    phi::DenseTensor batch_gate_temp;
     if (is_test) {
       batch_gate = &batch_gate_temp;
       batch_gate->Resize(input->dims());
     } else {
-      batch_gate = ctx.Output<LoDTensor>("BatchGate");
+      batch_gate = ctx.Output<phi::DenseTensor>("BatchGate");
     }
     batch_gate->mutable_data<T>(ctx.GetPlace());
-    auto* hidden_out = ctx.Output<LoDTensor>("Hidden");
+    auto* hidden_out = ctx.Output<phi::DenseTensor>("Hidden");
     hidden_out->mutable_data<T>(ctx.GetPlace());
-    auto* cell_out = ctx.Output<LoDTensor>("Cell");
+    auto* cell_out = ctx.Output<phi::DenseTensor>("Cell");
     cell_out->mutable_data<T>(ctx.GetPlace());
 
     bool is_reverse = ctx.Attr<bool>("is_reverse");
@@ -110,12 +109,12 @@ class LSTMKernel : public framework::OpKernel<T> {
     }
 
     // Use the local variable as here.
-    LoDTensor batch_hidden, batch_cell, batch_cell_pre_act_temp;
-    LoDTensor* batch_cell_pre_act;
+    phi::DenseTensor batch_hidden, batch_cell, batch_cell_pre_act_temp;
+    phi::DenseTensor* batch_cell_pre_act;
     if (is_test) {
       batch_cell_pre_act = &batch_cell_pre_act_temp;
     } else {
-      batch_cell_pre_act = ctx.Output<LoDTensor>("BatchCellPreAct");
+      batch_cell_pre_act = ctx.Output<phi::DenseTensor>("BatchCellPreAct");
     }
     batch_hidden.mutable_data<T>(dims, ctx.GetPlace());
     batch_cell.mutable_data<T>(dims, ctx.GetPlace());
@@ -191,11 +190,11 @@ class LSTMKernel : public framework::OpKernel<T> {
 
     phi::funcs::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
     batch_hidden.set_lod(batch_gate->lod());
-    // restore the output hidden in LoDTensor from the batch hidden
+    // restore the output hidden in phi::DenseTensor from the batch hidden
     to_seq(device_ctx, batch_hidden, hidden_out);
 
     batch_cell.set_lod(batch_gate->lod());
-    // restore the output cell state in LoDTensor from the batch cell
+    // restore the output cell state in phi::DenseTensor from the batch cell
     to_seq(device_ctx, batch_cell, cell_out);
   }
 };
@@ -204,19 +203,20 @@ template <typename DeviceContext, typename T>
 class LSTMGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* input = ctx.Input<LoDTensor>("Input");
+    auto* input = ctx.Input<phi::DenseTensor>("Input");
     auto* weight = ctx.Input<phi::DenseTensor>("Weight");
     auto* bias = ctx.Input<phi::DenseTensor>("Bias");
 
-    auto* hidden_out = ctx.Input<LoDTensor>("Hidden");
-    auto* cell_out = ctx.Input<LoDTensor>("Cell");
+    auto* hidden_out = ctx.Input<phi::DenseTensor>("Hidden");
+    auto* cell_out = ctx.Input<phi::DenseTensor>("Cell");
 
-    auto* batch_gate = ctx.Input<LoDTensor>("BatchGate");
-    auto* batch_cell_pre_act = ctx.Input<LoDTensor>("BatchCellPreAct");
+    auto* batch_gate = ctx.Input<phi::DenseTensor>("BatchGate");
+    auto* batch_cell_pre_act = ctx.Input<phi::DenseTensor>("BatchCellPreAct");
 
-    auto* hidden_g = ctx.Input<LoDTensor>(framework::GradVarName("Hidden"));
+    auto* hidden_g =
+        ctx.Input<phi::DenseTensor>(framework::GradVarName("Hidden"));
 
-    auto* in_g = ctx.Output<LoDTensor>(framework::GradVarName("Input"));
+    auto* in_g = ctx.Output<phi::DenseTensor>(framework::GradVarName("Input"));
     auto* weight_g =
         ctx.Output<phi::DenseTensor>(framework::GradVarName("Weight"));
     auto* bias_g = ctx.Output<phi::DenseTensor>(framework::GradVarName("Bias"));
@@ -301,12 +301,12 @@ class LSTMGradKernel : public framework::OpKernel<T> {
       to_batch(ctx, src, &dst, false);
     };
 
-    LoDTensor batch_hidden, batch_hidden_g, batch_cell;
+    phi::DenseTensor batch_hidden, batch_hidden_g, batch_cell;
     ToBatch(device_ctx, *hidden_out, out_dims, batch_hidden);
     ToBatch(device_ctx, *hidden_g, out_dims, batch_hidden_g);
     ToBatch(device_ctx, *cell_out, out_dims, batch_cell);
 
-    LoDTensor batch_cell_g, batch_gate_g;
+    phi::DenseTensor batch_cell_g, batch_gate_g;
     batch_cell_g.mutable_data<T>(out_dims, ctx.GetPlace());
     // TODO(qingqing) support the case output cell has gradient.
     // to_batch(device_ctx, *cell_g, batch_cell_g, false);
diff --git a/paddle/fluid/operators/lstmp_op.cc b/paddle/fluid/operators/lstmp_op.cc
index 156fc55fb6b9a8..dc36b3431d4891 100644
--- a/paddle/fluid/operators/lstmp_op.cc
+++ b/paddle/fluid/operators/lstmp_op.cc
@@ -154,11 +154,12 @@ class LSTMPOp : public framework::OperatorWithKernel {
 class LSTMPOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("Input",
-             "(LoDTensor) the input for sequence data, which supports "
-             "variable-time length input sequence. The underlying tensor in "
-             "this LoDTensor is a matrix with shape (T X 4D), where T is the "
-             "total time steps in this mini-batch, D is the hidden size.");
+    AddInput(
+        "Input",
+        "(phi::DenseTensor) the input for sequence data, which supports "
+        "variable-time length input sequence. The underlying tensor in "
+        "this phi::DenseTensor is a matrix with shape (T X 4D), where T is the "
+        "total time steps in this mini-batch, D is the hidden size.");
     AddInput("H0",
              "(Tensor, optional) the initial hidden state is an optional "
              "input. This is a tensor with shape (N x D), where N is the "
@@ -190,29 +191,34 @@ class LSTMPOpMaker : public framework::OpProtoAndCheckerMaker {
              " - The shape is (1 x 7D). "
              " - Bias = {b_c, b_i, b_f, b_o, W_ic, W_fc, W_oc}.");
     AddOutput("Projection",
-              "(LoDTensor) the projection of the hidden state of LSTMP "
+              "(phi::DenseTensor) the projection of the hidden state of LSTMP "
               "operator. The shape is (T x P), and LoD is the same with the "
               "`Input`.");
     AddOutput("Cell",
-              "(LoDTensor) the cell state of LSTMP operator. "
+              "(phi::DenseTensor) the cell state of LSTMP operator. "
               "The shape is (T x D), and lod is the same with the `Input`.");
-    AddOutput("BatchGate",
-              "(LoDTensor) This LoDTensor contains input gate, forget gate "
-              "and output gate after the activations. This LoDTensor has the "
-              "same shape as the reorganized input, which is also be called "
-              "batch input. The LoD size is 2. The first-level LoD is the "
-              "batch offsets and the second contains the indices, which "
-              "denotes the position of reorganized sequence in the raw input.")
+    AddOutput(
+        "BatchGate",
+        "(phi::DenseTensor) This phi::DenseTensor contains input gate, forget "
+        "gate "
+        "and output gate after the activations. This phi::DenseTensor has the "
+        "same shape as the reorganized input, which is also be called "
+        "batch input. The LoD size is 2. The first-level LoD is the "
+        "batch offsets and the second contains the indices, which "
+        "denotes the position of reorganized sequence in the raw input.")
         .AsIntermediate();
-    AddOutput("BatchCellPreAct",
-              "(LoDTensor) the pre-activation cell state reorganized in batch. "
-              "This LoDTensor is obtained in the forward and used in the "
-              "backward.")
+    AddOutput(
+        "BatchCellPreAct",
+        "(phi::DenseTensor) the pre-activation cell state reorganized in "
+        "batch. "
+        "This phi::DenseTensor is obtained in the forward and used in the "
+        "backward.")
         .AsIntermediate();
-    AddOutput("BatchHidden",
-              "(LoDTensor) the hidden state reorganized in batch. "
-              "This LoDTensor is obtained in the forward and used in the "
-              "backward.")
+    AddOutput(
+        "BatchHidden",
+        "(phi::DenseTensor) the hidden state reorganized in batch. "
+        "This phi::DenseTensor is obtained in the forward and used in the "
+        "backward.")
         .AsIntermediate();
     AddAttr<bool>("use_peepholes",
                   "(bool, default: True) "
diff --git a/paddle/fluid/operators/lstmp_op.h b/paddle/fluid/operators/lstmp_op.h
index 23f3a14db837df..8056bf0bd49f2a 100644
--- a/paddle/fluid/operators/lstmp_op.h
+++ b/paddle/fluid/operators/lstmp_op.h
@@ -29,7 +29,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 using platform::Transform;
 
@@ -107,7 +106,7 @@ class LSTMPKernel : public framework::OpKernel<T> {
   }
 
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* input = ctx.Input<LoDTensor>("Input");
+    auto* input = ctx.Input<phi::DenseTensor>("Input");
     auto* weight = ctx.Input<phi::DenseTensor>("Weight");
     auto* proj_weight = ctx.Input<phi::DenseTensor>("ProjWeight");
     auto* bias = ctx.Input<phi::DenseTensor>("Bias");
@@ -118,11 +117,11 @@ class LSTMPKernel : public framework::OpKernel<T> {
     auto proj_clip = static_cast<T>(ctx.Attr<float>("proj_clip"));
     auto cell_clip = static_cast<T>(ctx.Attr<float>("cell_clip"));
 
-    auto* batch_gate = ctx.Output<LoDTensor>("BatchGate");
+    auto* batch_gate = ctx.Output<phi::DenseTensor>("BatchGate");
     batch_gate->mutable_data<T>(ctx.GetPlace());
-    auto* proj_out = ctx.Output<LoDTensor>("Projection");
+    auto* proj_out = ctx.Output<phi::DenseTensor>("Projection");
     proj_out->mutable_data<T>(ctx.GetPlace());
-    auto* cell_out = ctx.Output<LoDTensor>("Cell");
+    auto* cell_out = ctx.Output<phi::DenseTensor>("Cell");
     cell_out->mutable_data<T>(ctx.GetPlace());
 
     bool is_reverse = ctx.Attr<bool>("is_reverse");
@@ -172,10 +171,10 @@ class LSTMPKernel : public framework::OpKernel<T> {
     }
 
     // Use the local variable as here.
-    LoDTensor batch_proj, batch_cell;
-    auto* batch_cell_pre_act = ctx.Output<LoDTensor>("BatchCellPreAct");
+    phi::DenseTensor batch_proj, batch_cell;
+    auto* batch_cell_pre_act = ctx.Output<phi::DenseTensor>("BatchCellPreAct");
     batch_cell_pre_act->mutable_data<T>(dims, ctx.GetPlace());
-    auto* batch_hidden = ctx.Output<LoDTensor>("BatchHidden");
+    auto* batch_hidden = ctx.Output<phi::DenseTensor>("BatchHidden");
     batch_hidden->mutable_data<T>(dims, ctx.GetPlace());    // T x D
     batch_proj.mutable_data<T>(proj_dims, ctx.GetPlace());  // T x P
     batch_cell.mutable_data<T>(dims, ctx.GetPlace());       // T x D
@@ -272,11 +271,11 @@ class LSTMPKernel : public framework::OpKernel<T> {
 
     phi::funcs::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
     batch_proj.set_lod(batch_gate->lod());
-    // restore the output hidden in LoDTensor from the batch hidden
+    // restore the output hidden in phi::DenseTensor from the batch hidden
     to_seq(device_ctx, batch_proj, proj_out);
 
     batch_cell.set_lod(batch_gate->lod());
-    // restore the output cell state in LoDTensor from the batch cell
+    // restore the output cell state in phi::DenseTensor from the batch cell
     to_seq(device_ctx, batch_cell, cell_out);
   }
 };
@@ -310,20 +309,20 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
     auto* proj_weight = ctx.Input<phi::DenseTensor>("ProjWeight");
     auto* bias = ctx.Input<phi::DenseTensor>("Bias");
 
-    auto* proj_out = ctx.Input<LoDTensor>("Projection");
-    auto* cell_out = ctx.Input<LoDTensor>("Cell");
+    auto* proj_out = ctx.Input<phi::DenseTensor>("Projection");
+    auto* cell_out = ctx.Input<phi::DenseTensor>("Cell");
 
     auto proj_clip = static_cast<T>(ctx.Attr<float>("proj_clip"));
     auto cell_clip = static_cast<T>(ctx.Attr<float>("cell_clip"));
 
-    auto* batch_gate = ctx.Input<LoDTensor>("BatchGate");
-    auto* batch_cell_pre_act = ctx.Input<LoDTensor>("BatchCellPreAct");
-    auto* batch_hidden = ctx.Input<LoDTensor>("BatchHidden");
+    auto* batch_gate = ctx.Input<phi::DenseTensor>("BatchGate");
+    auto* batch_cell_pre_act = ctx.Input<phi::DenseTensor>("BatchCellPreAct");
+    auto* batch_hidden = ctx.Input<phi::DenseTensor>("BatchHidden");
 
     auto* projection_g =
-        ctx.Input<LoDTensor>(framework::GradVarName("Projection"));
+        ctx.Input<phi::DenseTensor>(framework::GradVarName("Projection"));
 
-    auto* in_g = ctx.Output<LoDTensor>(framework::GradVarName("Input"));
+    auto* in_g = ctx.Output<phi::DenseTensor>(framework::GradVarName("Input"));
     auto* weight_g =
         ctx.Output<phi::DenseTensor>(framework::GradVarName("Weight"));
     auto* proj_weight_g =
@@ -415,13 +414,13 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
       to_batch(ctx, src, &dst, false);
     };
 
-    LoDTensor batch_hidden_g, batch_proj, batch_proj_g, batch_cell;
+    phi::DenseTensor batch_hidden_g, batch_proj, batch_proj_g, batch_cell;
     batch_hidden_g.mutable_data<T>(out_dims, ctx.GetPlace());
     ToBatch(device_ctx, *proj_out, proj_dims, batch_proj);        // T x P
     ToBatch(device_ctx, *projection_g, proj_dims, batch_proj_g);  // T x P
     ToBatch(device_ctx, *cell_out, out_dims, batch_cell);         // T x D
 
-    LoDTensor batch_cell_g, batch_gate_g;
+    phi::DenseTensor batch_cell_g, batch_gate_g;
     batch_cell_g.mutable_data<T>(out_dims, ctx.GetPlace());
     // TODO(qingqing) support the case output cell has gradient.
     // to_batch(device_ctx, *cell_g, batch_cell_g, false);
diff --git a/paddle/fluid/operators/match_matrix_tensor_op.cc b/paddle/fluid/operators/match_matrix_tensor_op.cc
index 07dfde8d0d4123..facf44725e2b6f 100644
--- a/paddle/fluid/operators/match_matrix_tensor_op.cc
+++ b/paddle/fluid/operators/match_matrix_tensor_op.cc
@@ -25,7 +25,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using LoD = framework::LoD;
 
 void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const {
@@ -92,7 +91,7 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const {
   if (ctx->IsRuntime()) {
     framework::Variable* x_var =
         PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("X")[0]);
-    const auto& x_lod = x_var->Get<LoDTensor>().lod();
+    const auto& x_lod = x_var->Get<phi::DenseTensor>().lod();
     PADDLE_ENFORCE_EQ(x_lod.empty(),
                       false,
                       platform::errors::InvalidArgument(
@@ -117,7 +116,7 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const {
 
     framework::Variable* y_var =
         PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("Y")[0]);
-    const auto& y_lod = y_var->Get<LoDTensor>().lod();
+    const auto& y_lod = y_var->Get<phi::DenseTensor>().lod();
     PADDLE_ENFORCE_EQ(y_lod.empty(),
                       false,
                       platform::errors::InvalidArgument(
@@ -213,18 +212,22 @@ void MatchMatrixTensorOpGrad::InferShape(
 
 void MatchMatrixTensorOpMaker::Make() {
   AddInput("X",
-           "X (LoDTensor, default LoDTensor<float>) Input variable which "
+           "X (phi::DenseTensor, default phi::DenseTensor<float>) Input "
+           "variable which "
            "should contain lod information.");
   AddInput("Y",
-           "Y (LoDTensor, default LoDTensor<float>) Input variable which "
+           "Y (phi::DenseTensor, default phi::DenseTensor<float>) Input "
+           "variable which "
            "should contain lod information.");
   AddInput("W", "W (Tensor), The weight of X and Y.");
   AddAttr<int>("dim_t", "the dim of W").SetDefault(1);
   AddOutput("Out",
-            "(LoDTensor, default LoDTensor<float>) Output variable which "
+            "(phi::DenseTensor, default phi::DenseTensor<float>) Output "
+            "variable which "
             "is X * W * Y");
   AddOutput("Tmp",
-            "(LoDTensor, default LoDTensor<float>) tmp variable which is "
+            "(phi::DenseTensor, default phi::DenseTensor<float>) tmp variable "
+            "which is "
             "used for X * W");
   AddComment(R"DOC(
       Match Matrix Tensor Operator
@@ -242,11 +245,11 @@ template <typename DeviceContext, typename T>
 class CPUMatchMatrixTensorOPKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* x = ctx.Input<LoDTensor>("X");
-    auto* y = ctx.Input<LoDTensor>("Y");
+    auto* x = ctx.Input<phi::DenseTensor>("X");
+    auto* y = ctx.Input<phi::DenseTensor>("Y");
     auto* w = ctx.Input<phi::DenseTensor>("W");
-    auto* out = ctx.Output<LoDTensor>("Out");
-    auto* tmp = ctx.Output<LoDTensor>("Tmp");
+    auto* out = ctx.Output<phi::DenseTensor>("Out");
+    auto* tmp = ctx.Output<phi::DenseTensor>("Tmp");
 
     int dim_t = ctx.Attr<int>("dim_t");
     int64_t dim_in = x->dims()[1];
@@ -322,10 +325,10 @@ template <typename DeviceContext, typename T>
 class CPUMatchMatrixTensorOPGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* x = ctx.Input<LoDTensor>("X");
-    auto* y = ctx.Input<LoDTensor>("Y");
+    auto* x = ctx.Input<phi::DenseTensor>("X");
+    auto* y = ctx.Input<phi::DenseTensor>("Y");
     auto* w = ctx.Input<phi::DenseTensor>("W");
-    auto* tmp = ctx.Input<LoDTensor>("Tmp");
+    auto* tmp = ctx.Input<phi::DenseTensor>("Tmp");
 
     int dim_t = ctx.Attr<int>("dim_t");
     int64_t dim_in = x->dims()[1];
@@ -346,9 +349,9 @@ class CPUMatchMatrixTensorOPGradKernel : public framework::OpKernel<T> {
     auto* bottom_r_data = y->data<T>();
     auto* bottom_l_trans_data = tmp->data<T>();
 
-    auto* d_out = ctx.Input<LoDTensor>(framework::GradVarName("Out"));
-    auto* d_x = ctx.Output<LoDTensor>(framework::GradVarName("X"));
-    auto* d_y = ctx.Output<LoDTensor>(framework::GradVarName("Y"));
+    auto* d_out = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+    auto* d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
+    auto* d_y = ctx.Output<phi::DenseTensor>(framework::GradVarName("Y"));
 
     Tensor tmp_grad;
     tmp_grad.Resize(tmp->dims());
diff --git a/paddle/fluid/operators/memcpy_d2h_op.cc b/paddle/fluid/operators/memcpy_d2h_op.cc
index 80181779ab3477..82feee0f695db5 100644
--- a/paddle/fluid/operators/memcpy_d2h_op.cc
+++ b/paddle/fluid/operators/memcpy_d2h_op.cc
@@ -83,9 +83,9 @@ class MemcpyD2HKernel {
 class MemcpyD2HOpProtoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X", "(LoDTensor) The input variable ");
+    AddInput("X", "(phi::DenseTensor) The input variable ");
     AddOutput("Out",
-              "(LoDTensor) The type of output "
+              "(phi::DenseTensor) The type of output "
               "is the same as input X.");
     AddAttr<int>(
         "dst_place_type",
@@ -98,7 +98,7 @@ class MemcpyD2HOpProtoMaker : public framework::OpProtoAndCheckerMaker {
     MemcpyD2H Operator.
     By now, it ONLY supports the memcopy between NPUPlace/CUDAPlace <-> CUDAPinnedPlace/CPU.
     You would have to update it if you want other more capacities.
-Out = X,  when type in [LoDTensor]
+Out = X,  when type in [phi::DenseTensor]
 raise error if the type is not listed above.
 )DOC");
   }
diff --git a/paddle/fluid/operators/memcpy_h2d_op.cc b/paddle/fluid/operators/memcpy_h2d_op.cc
index 8d2cfcff80768e..1426b23dc1b664 100644
--- a/paddle/fluid/operators/memcpy_h2d_op.cc
+++ b/paddle/fluid/operators/memcpy_h2d_op.cc
@@ -84,9 +84,9 @@ class MemcpyH2DKernel {
 class MemcpyH2DOpProtoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X", "(LoDTensor) The input variable ");
+    AddInput("X", "(phi::DenseTensor) The input variable ");
     AddOutput("Out",
-              "(LoDTensor) The type of output "
+              "(phi::DenseTensor) The type of output "
               "is the same as input X.");
     AddAttr<int>("dst_place_type",
                  "Determine the dst place of tensor copy. "
@@ -100,7 +100,7 @@ class MemcpyH2DOpProtoMaker : public framework::OpProtoAndCheckerMaker {
     MemcpyD2H Operator.
     By now, it ONLY supports the memcopy between CUDAPinnedPlace/CPU <-> NPUPlace/CUDAPlace.
     You would have to update it if you want other more capacities.
-Out = X,  when type in [LoDTensor]
+Out = X,  when type in [phi::DenseTensor]
 raise error if the type is not listed above.
 )DOC");
   }
diff --git a/paddle/fluid/operators/memcpy_op.cc b/paddle/fluid/operators/memcpy_op.cc
index caa4164ee5bc0c..66cf6a00b7af43 100644
--- a/paddle/fluid/operators/memcpy_op.cc
+++ b/paddle/fluid/operators/memcpy_op.cc
@@ -100,9 +100,9 @@ class MemcpyKernel {
 class MemcpyOpProtoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X", "(LoDTensor) The input variable ");
+    AddInput("X", "(phi::DenseTensor) The input variable ");
     AddOutput("Out",
-              "(LoDTensor) The type of output "
+              "(phi::DenseTensor) The type of output "
               "is the same as input X.");
     AddAttr<int>("dst_place_type",
                  "Determine the dst place of tensor copy. "
@@ -122,7 +122,7 @@ class MemcpyOpProtoMaker : public framework::OpProtoAndCheckerMaker {
     NPUPlace <-> CPUPlace, and used as an internal op by Recompute-Offload.
     You would have to update it if you want other more capacities.
 
-Out = X,  when type in [LoDTensor]
+Out = X,  when type in [phi::DenseTensor]
 raise error if the type is not listed above.
 )DOC");
   }
diff --git a/paddle/fluid/operators/merge_lod_tensor_op.cc b/paddle/fluid/operators/merge_lod_tensor_op.cc
index 5bd34010db9227..007f853f3243fe 100644
--- a/paddle/fluid/operators/merge_lod_tensor_op.cc
+++ b/paddle/fluid/operators/merge_lod_tensor_op.cc
@@ -104,7 +104,7 @@ class MergeLoDTensorOp : public framework::OperatorBase {
     out_lod->clear();
     size_t out_offset = 0;
 
-    // Build LoDTensor `out`
+    // Build phi::DenseTensor `out`
 
     size_t in_true_idx = 0;
     size_t in_false_idx = 0;
@@ -182,18 +182,18 @@ class MergeLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "The input LoDTensor, contains complete lod information to "
+             "The input phi::DenseTensor, contains complete lod information to "
              "construct the output");
     AddInput("Mask", "A bool column vector which mask the input");
     AddInput("InTrue", "The True branch to be merged");
     AddInput("InFalse", "The False branch to be merged");
-    AddOutput("Out", "The merged output LoDTensor");
+    AddOutput("Out", "The merged output phi::DenseTensor");
     AddAttr<int>("level", "(int) the specific lod level to rank.")
         .SetDefault(0)
         .EqualGreaterThan(0);
     AddComment(
         R"DOC(
-        Merge True and False branches of LoDTensor into a single Output,
+        Merge True and False branches of phi::DenseTensor into a single Output,
         with a mask at certain lod level. X is used to obtain complete
         lod information. Please refer to SplitLoDTensorOp.)DOC");
   }
diff --git a/paddle/fluid/operators/nce_op.cc b/paddle/fluid/operators/nce_op.cc
index 9badb74988bc4b..b80de062796a05 100644
--- a/paddle/fluid/operators/nce_op.cc
+++ b/paddle/fluid/operators/nce_op.cc
@@ -300,7 +300,7 @@ class NCEOpGradVarTypeInference : public framework::VarTypeInference {
       ctx->SetOutputType(weight_grad, framework::proto::VarType::SELECTED_ROWS);
     } else {
       VLOG(3) << "nce_op_grad op " << weight_grad << " and "
-              << " is set to LoDTensor";
+              << " is set to phi::DenseTensor";
       ctx->SetOutputType(weight_grad, framework::proto::VarType::LOD_TENSOR);
     }
     ctx->SetOutputDataType(weight_grad, ctx->GetInputDataType("Input"));
diff --git a/paddle/fluid/operators/nce_op.h b/paddle/fluid/operators/nce_op.h
index a699f81b827e5b..a4b418b14cc84b 100644
--- a/paddle/fluid/operators/nce_op.h
+++ b/paddle/fluid/operators/nce_op.h
@@ -32,7 +32,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using SelectedRows = phi::SelectedRows;
 using Sampler = math::Sampler;
 using DDim = framework::DDim;
@@ -395,15 +394,15 @@ class NCEGradKernel : public framework::OpKernel<T> {
 
       auto *table_var = context.InputVar("Weight");
       DDim table_dim;
-      if (table_var->IsType<LoDTensor>()) {
-        table_dim = context.Input<LoDTensor>("Weight")->dims();
+      if (table_var->IsType<phi::DenseTensor>()) {
+        table_dim = context.Input<phi::DenseTensor>("Weight")->dims();
       } else if (table_var->IsType<phi::SelectedRows>()) {
         auto *table_t = context.Input<phi::SelectedRows>("Weight");
         table_dim = table_t->value().dims();
       } else {
         PADDLE_THROW(platform::errors::InvalidArgument(
             "The parameter Weight of a NCE_OP "
-            "must be either LoDTensor or SelectedRows"));
+            "must be either phi::DenseTensor or SelectedRows"));
       }
 
       auto d_w =
diff --git a/paddle/fluid/operators/number_count_op.cu b/paddle/fluid/operators/number_count_op.cu
index 25541ebdb36217..99623917d59ee3 100644
--- a/paddle/fluid/operators/number_count_op.cu
+++ b/paddle/fluid/operators/number_count_op.cu
@@ -37,7 +37,6 @@ static inline int GET_BLOCKS(const int N) {
   return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
 }
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 template <typename T>
@@ -86,9 +85,9 @@ template <typename T>
 class NumberCountOpCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto numbers = context.Input<LoDTensor>("numbers");
+    auto numbers = context.Input<phi::DenseTensor>("numbers");
     auto upper_range = context.Attr<int>("upper_range");
-    auto number_count = context.Output<LoDTensor>("Out");
+    auto number_count = context.Output<phi::DenseTensor>("Out");
 
     int64_t batch_size = numbers->numel();
     auto place = context.GetPlace();
diff --git a/paddle/fluid/operators/one_hot_op.cc b/paddle/fluid/operators/one_hot_op.cc
index 8e1a07975e2dac..0cd6cab49eb11e 100644
--- a/paddle/fluid/operators/one_hot_op.cc
+++ b/paddle/fluid/operators/one_hot_op.cc
@@ -79,7 +79,8 @@ class OneHotOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(LoDTensor, LoDTensor<int>) Input variable with rank at least 2. "
+             "(phi::DenseTensor, phi::DenseTensor<int>) Input variable with "
+             "rank at least 2. "
              "The last dimension of X should be 1. Each value of X is an index "
              "to indicate the position.");
     AddInput("depth_tensor", "(Tensor, Tensor<int>), Length of one-hot vector")
diff --git a/paddle/fluid/operators/one_hot_op.cu b/paddle/fluid/operators/one_hot_op.cu
index b36ca97b3e40f9..917fa857e07782 100644
--- a/paddle/fluid/operators/one_hot_op.cu
+++ b/paddle/fluid/operators/one_hot_op.cu
@@ -60,13 +60,12 @@ struct OneHotOpCUDAFunctor {
   }
 };
 
-using LoDTensor = phi::DenseTensor;
 template <typename DeviceContext, typename T>
 class OneHotCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in = context.Input<LoDTensor>("X");
-    auto* out = context.Output<LoDTensor>("Out");
+    auto* in = context.Input<phi::DenseTensor>("X");
+    auto* out = context.Output<phi::DenseTensor>("Out");
 
     int depth = -1;
     if (context.HasInput("depth_tensor")) {
diff --git a/paddle/fluid/operators/one_hot_op.h b/paddle/fluid/operators/one_hot_op.h
index b8eb1c046d59d0..d878fd5a6d44bb 100644
--- a/paddle/fluid/operators/one_hot_op.h
+++ b/paddle/fluid/operators/one_hot_op.h
@@ -76,14 +76,13 @@ struct OneHotOpFunctor {
   }
 };
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 template <typename DeviceContext, typename T>
 class OneHotKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in = context.Input<LoDTensor>("X");
-    auto* out = context.Output<LoDTensor>("Out");
+    auto* in = context.Input<phi::DenseTensor>("X");
+    auto* out = context.Output<phi::DenseTensor>("Out");
     int depth = context.Attr<int>("depth");
     bool allow_out_of_range = context.Attr<bool>("allow_out_of_range");
     if (context.HasInput("depth_tensor")) {
diff --git a/paddle/fluid/operators/one_hot_op_npu.cc b/paddle/fluid/operators/one_hot_op_npu.cc
index 2ca74cac0a051d..e2997dc079c61c 100644
--- a/paddle/fluid/operators/one_hot_op_npu.cc
+++ b/paddle/fluid/operators/one_hot_op_npu.cc
@@ -25,8 +25,8 @@ class OneHotNPUKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto& dev_ctx =
         ctx.template device_context<paddle::platform::NPUDeviceContext>();
-    auto* in = ctx.Input<LoDTensor>("X");
-    auto* out = ctx.Output<LoDTensor>("Out");
+    auto* in = ctx.Input<phi::DenseTensor>("X");
+    auto* out = ctx.Output<phi::DenseTensor>("Out");
     int depth = ctx.Attr<int>("depth");
 
     if (ctx.HasInput("depth_tensor")) {
diff --git a/paddle/fluid/operators/one_hot_op_xpu.cc b/paddle/fluid/operators/one_hot_op_xpu.cc
index afa7104e9175a8..66826cd4ff33a3 100644
--- a/paddle/fluid/operators/one_hot_op_xpu.cc
+++ b/paddle/fluid/operators/one_hot_op_xpu.cc
@@ -22,15 +22,14 @@
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 template <typename DeviceContext, typename T>
 class OneHotXPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    const auto* in = context.Input<LoDTensor>("X");
-    auto* out = context.Output<LoDTensor>("Out");
+    const auto* in = context.Input<phi::DenseTensor>("X");
+    auto* out = context.Output<phi::DenseTensor>("Out");
 
     // get depth from attr
     int depth = context.Attr<int>("depth");
diff --git a/paddle/fluid/operators/one_hot_v2_op.cc b/paddle/fluid/operators/one_hot_v2_op.cc
index 55cb5d1a53b2fc..f5b55fcf0275a2 100644
--- a/paddle/fluid/operators/one_hot_v2_op.cc
+++ b/paddle/fluid/operators/one_hot_v2_op.cc
@@ -52,7 +52,8 @@ class OneHotV2OpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(LoDTensor, LoDTensor<int>) Input variable with rank at least 2. "
+             "(phi::DenseTensor, phi::DenseTensor<int>) Input variable with "
+             "rank at least 2. "
              "The last dimension of X should be 1. Each value of X is an index "
              "to indicate the position.");
     AddInput("depth_tensor", "(Tensor, Tensor<int>), Length of one-hot vector")
diff --git a/paddle/fluid/operators/one_hot_v2_op_mlu.cc b/paddle/fluid/operators/one_hot_v2_op_mlu.cc
index 1b7ce8f243b6f3..f98cbabf58a87a 100644
--- a/paddle/fluid/operators/one_hot_v2_op_mlu.cc
+++ b/paddle/fluid/operators/one_hot_v2_op_mlu.cc
@@ -20,7 +20,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 class OneHotV2MLUKernel : public framework::OpKernel<T> {
@@ -28,8 +27,8 @@ class OneHotV2MLUKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto& dev_ctx =
         ctx.template device_context<paddle::platform::MLUDeviceContext>();
-    auto* in = ctx.Input<LoDTensor>("X");
-    auto* out = ctx.Output<LoDTensor>("Out");
+    auto* in = ctx.Input<phi::DenseTensor>("X");
+    auto* out = ctx.Output<phi::DenseTensor>("Out");
     int depth = ctx.Attr<int>("depth");
     if (ctx.HasInput("depth_tensor")) {
       std::vector<int32_t> depth_data;
diff --git a/paddle/fluid/operators/one_hot_v2_op_npu.cc b/paddle/fluid/operators/one_hot_v2_op_npu.cc
index 01ab76ab5ccd3e..8cc97b417ca78e 100644
--- a/paddle/fluid/operators/one_hot_v2_op_npu.cc
+++ b/paddle/fluid/operators/one_hot_v2_op_npu.cc
@@ -18,7 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 class OneHotV2NPUKernel : public framework::OpKernel<T> {
@@ -26,8 +25,8 @@ class OneHotV2NPUKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto& dev_ctx =
         ctx.template device_context<paddle::platform::NPUDeviceContext>();
-    auto* in = ctx.Input<LoDTensor>("X");
-    auto* out = ctx.Output<LoDTensor>("Out");
+    auto* in = ctx.Input<phi::DenseTensor>("X");
+    auto* out = ctx.Output<phi::DenseTensor>("Out");
     int depth = ctx.Attr<int>("depth");
 
     if (ctx.HasInput("depth_tensor")) {
diff --git a/paddle/fluid/operators/partial_concat_op.cu b/paddle/fluid/operators/partial_concat_op.cu
index 01bf11d2ea7055..ef52bbad525a4a 100644
--- a/paddle/fluid/operators/partial_concat_op.cu
+++ b/paddle/fluid/operators/partial_concat_op.cu
@@ -23,7 +23,6 @@ namespace operators {
 
 #define CEIL_DIV(x, y) (((x) + (y)-1) / (y))
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 template <class T>
@@ -154,8 +153,8 @@ class PartialConcatGradOpCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
     auto *out_grad = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
-    auto ins = ctx.MultiInput<LoDTensor>("X");
-    auto outs = ctx.MultiOutput<LoDTensor>(framework::GradVarName("X"));
+    auto ins = ctx.MultiInput<phi::DenseTensor>("X");
+    auto outs = ctx.MultiOutput<phi::DenseTensor>(framework::GradVarName("X"));
 
     PADDLE_ENFORCE_EQ(ins[0] != nullptr,
                       true,
diff --git a/paddle/fluid/operators/partial_sum_op.cu b/paddle/fluid/operators/partial_sum_op.cu
index f427ab4e3f2d24..c92e9618bfce02 100644
--- a/paddle/fluid/operators/partial_sum_op.cu
+++ b/paddle/fluid/operators/partial_sum_op.cu
@@ -23,7 +23,6 @@ namespace operators {
 
 #define CEIL_DIV(x, y) (((x) + (y)-1) / (y))
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 template <class T>
@@ -153,8 +152,8 @@ class PartialSumGradOpCUDAKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext &ctx) const override {
     const Tensor *out_grad =
         ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
-    auto ins = ctx.MultiInput<LoDTensor>("X");
-    auto outs = ctx.MultiOutput<LoDTensor>(framework::GradVarName("X"));
+    auto ins = ctx.MultiInput<phi::DenseTensor>("X");
+    auto outs = ctx.MultiOutput<phi::DenseTensor>(framework::GradVarName("X"));
 
     PADDLE_ENFORCE_EQ(
         ins[0] != nullptr,
diff --git a/paddle/fluid/operators/positive_negative_pair_op.h b/paddle/fluid/operators/positive_negative_pair_op.h
index 439a02e37d6cfb..1cc89cda21bc79 100644
--- a/paddle/fluid/operators/positive_negative_pair_op.h
+++ b/paddle/fluid/operators/positive_negative_pair_op.h
@@ -20,7 +20,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 template <typename DeviceContext, typename T>
 class PositiveNegativePairKernel : public framework::OpKernel<T> {
diff --git a/paddle/fluid/operators/prroi_pool_op.cc b/paddle/fluid/operators/prroi_pool_op.cc
index 57c80fc1fa4326..9b3146c3b84875 100644
--- a/paddle/fluid/operators/prroi_pool_op.cc
+++ b/paddle/fluid/operators/prroi_pool_op.cc
@@ -20,7 +20,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 class PRROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
@@ -33,9 +32,9 @@ class PRROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
              "H is the height of the input feature map, and "
              "W is the width.");
     AddInput("ROIs",
-             "(LoDTensor), "
+             "(phi::DenseTensor), "
              "ROIs (Regions of Interest) to pool over. "
-             "should be a 2-D LoDTensor of shape (num_rois, 4) "
+             "should be a 2-D phi::DenseTensor of shape (num_rois, 4) "
              "given as [(x1, y1, x2, y2), ...]. "
              "where (x1, y1) is the top left coordinates, and "
              "(x2, y2) is the bottom right coordinates. "
@@ -95,13 +94,13 @@ class PRROIPoolOp : public framework::OperatorWithKernel {
         rois_dims.size(),
         2,
         platform::errors::InvalidArgument(
-            "ROIs should be a 2-D LoDTensor of shape (num_rois, 4) "
+            "ROIs should be a 2-D phi::DenseTensor of shape (num_rois, 4) "
             "given as [(x1, y1, x2, y2), ...]"));
     PADDLE_ENFORCE_EQ(
         rois_dims[1],
         4,
         platform::errors::InvalidArgument(
-            "ROIs should be a 2-D LoDTensor of shape (num_rois, 4) "
+            "ROIs should be a 2-D phi::DenseTensor of shape (num_rois, 4) "
             "given as [(x1, y1, x2, y2), ...]"));
     int pooled_height = ctx->Attrs().Get<int>("pooled_height");
     int pooled_width = ctx->Attrs().Get<int>("pooled_width");
diff --git a/paddle/fluid/operators/prroi_pool_op.cu b/paddle/fluid/operators/prroi_pool_op.cu
index eaa0526174317a..b24ded79dd0501 100644
--- a/paddle/fluid/operators/prroi_pool_op.cu
+++ b/paddle/fluid/operators/prroi_pool_op.cu
@@ -18,7 +18,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 static constexpr int kNumCUDAThreads = 512;
 static constexpr int kNumMaximumNumBlocks = 4096;
@@ -219,7 +218,7 @@ class GPUPRROIPoolOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* in = ctx.Input<phi::DenseTensor>("X");
-    auto* rois = ctx.Input<LoDTensor>("ROIs");
+    auto* rois = ctx.Input<phi::DenseTensor>("ROIs");
     auto* out = ctx.Output<phi::DenseTensor>("Out");
 
     auto pooled_height = ctx.Attr<int>("pooled_height");
@@ -322,7 +321,7 @@ class GPUPRROIPoolGradOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* in = ctx.Input<phi::DenseTensor>("X");
-    auto* rois = ctx.Input<LoDTensor>("ROIs");
+    auto* rois = ctx.Input<phi::DenseTensor>("ROIs");
     auto* out = ctx.Input<phi::DenseTensor>("Out");
 
     auto* output_grad =
@@ -330,7 +329,7 @@ class GPUPRROIPoolGradOpKernel : public framework::OpKernel<T> {
     auto* input_grad =
         ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
     auto* input_roi_grad =
-        ctx.Output<LoDTensor>(framework::GradVarName("ROIs"));
+        ctx.Output<phi::DenseTensor>(framework::GradVarName("ROIs"));
 
     auto pooled_height = ctx.Attr<int>("pooled_height");
     auto pooled_width = ctx.Attr<int>("pooled_width");
diff --git a/paddle/fluid/operators/prune_gate_by_capacity_op.cu b/paddle/fluid/operators/prune_gate_by_capacity_op.cu
index 9f038002cfbe6e..cec8b15c414462 100644
--- a/paddle/fluid/operators/prune_gate_by_capacity_op.cu
+++ b/paddle/fluid/operators/prune_gate_by_capacity_op.cu
@@ -30,7 +30,6 @@ DECLARE_bool(avoid_op_randomness);
 
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 
 static constexpr int kNumCUDAThreads = 512;
 static constexpr int kNumMaxinumNumBlocks = 4096;
@@ -111,10 +110,11 @@ template <typename DeviceContext, typename T>
 class PruneGateByCapacityCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* gate_idx = context.Input<LoDTensor>("GateIdx");
-    auto* expert_count = context.Input<LoDTensor>("ExpertCount");
-    // auto* expert_count_out = context.Output<LoDTensor>("ExpertCountOut");
-    auto* new_gate_idx = context.Output<LoDTensor>("NewGateIdx");
+    auto* gate_idx = context.Input<phi::DenseTensor>("GateIdx");
+    auto* expert_count = context.Input<phi::DenseTensor>("ExpertCount");
+    // auto* expert_count_out =
+    // context.Output<phi::DenseTensor>("ExpertCountOut");
+    auto* new_gate_idx = context.Output<phi::DenseTensor>("NewGateIdx");
     auto* new_gate_idx_data = new_gate_idx->mutable_data<T>(context.GetPlace());
 
     phi::DenseTensor expert_count_out;
diff --git a/paddle/fluid/operators/psroi_pool_op.cc b/paddle/fluid/operators/psroi_pool_op.cc
index e85e51d9ebebef..1222f97c091688 100644
--- a/paddle/fluid/operators/psroi_pool_op.cc
+++ b/paddle/fluid/operators/psroi_pool_op.cc
@@ -32,9 +32,9 @@ class PSROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
              "H is the height of the input feature map, and "
              "W is the width. The data type can be float32 or float64");
     AddInput("ROIs",
-             "(LoDTensor), "
+             "(phi::DenseTensor), "
              "ROIs (Regions of Interest) to pool over. "
-             "should be a 2-D LoDTensor of shape (num_rois, 4) "
+             "should be a 2-D phi::DenseTensor of shape (num_rois, 4) "
              "given as [(x1, y1, x2, y2), ...]. "
              "where (x1, y1) is the top left coordinates, and "
              "(x2, y2) is the bottom right coordinates. "
diff --git a/paddle/fluid/operators/pull_box_extended_sparse_op.h b/paddle/fluid/operators/pull_box_extended_sparse_op.h
index 7da62ca9711a31..eff3bfd2a5f3c3 100644
--- a/paddle/fluid/operators/pull_box_extended_sparse_op.h
+++ b/paddle/fluid/operators/pull_box_extended_sparse_op.h
@@ -108,7 +108,6 @@ static void PushBoxExtendedSparseFunctor(
 #endif
 }
 
-using LoDTensor = phi::DenseTensor;
 template <typename T>
 class PullBoxExtendedSparseCPUKernel : public framework::OpKernel<T> {
  public:
diff --git a/paddle/fluid/operators/pull_box_sparse_op.h b/paddle/fluid/operators/pull_box_sparse_op.h
index c77cb440d08e31..dd41fd6ff0f4f2 100644
--- a/paddle/fluid/operators/pull_box_sparse_op.h
+++ b/paddle/fluid/operators/pull_box_sparse_op.h
@@ -113,7 +113,6 @@ static void PushBoxSparseFunctor(const framework::ExecutionContext &ctx) {
 #endif
 }
 
-using LoDTensor = phi::DenseTensor;
 template <typename T>
 class PullBoxSparseKernel : public framework::OpKernel<T> {
  public:
diff --git a/paddle/fluid/operators/pull_gpups_sparse_op.cu b/paddle/fluid/operators/pull_gpups_sparse_op.cu
index d22c632d60dd25..ff68c42c8eb1b1 100644
--- a/paddle/fluid/operators/pull_gpups_sparse_op.cu
+++ b/paddle/fluid/operators/pull_gpups_sparse_op.cu
@@ -19,7 +19,6 @@
 namespace paddle {
 namespace operators {
 using phi::PADDLE_CUDA_NUM_THREADS;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 class PullGpuPSSparseCUDAKernel : public framework::OpKernel<T> {
diff --git a/paddle/fluid/operators/pull_gpups_sparse_op.h b/paddle/fluid/operators/pull_gpups_sparse_op.h
index 7269ddd7d505a0..2d844a4ce2bf09 100644
--- a/paddle/fluid/operators/pull_gpups_sparse_op.h
+++ b/paddle/fluid/operators/pull_gpups_sparse_op.h
@@ -97,7 +97,6 @@ static void PushGpuPSSparseFunctor(const framework::ExecutionContext &ctx) {
 #endif
 }
 
-using LoDTensor = phi::DenseTensor;
 template <typename T>
 class PullGpuPSSparseCPUKernel : public framework::OpKernel<T> {
  public:
diff --git a/paddle/fluid/operators/py_func_op.cc b/paddle/fluid/operators/py_func_op.cc
index 474576f8f6809e..2c5736d3668905 100644
--- a/paddle/fluid/operators/py_func_op.cc
+++ b/paddle/fluid/operators/py_func_op.cc
@@ -118,8 +118,8 @@ static void CallPythonFunc(py::object *callable,
       out->ShareDataWith(*py_out_tensor);
     } catch (py::cast_error &) {
       PADDLE_THROW(platform::errors::InvalidArgument(
-          "py::cast to LoDTensor error. The %d-th output expection is "
-          "LoDTensor",
+          "py::cast to phi::DenseTensor error. The %d-th output expection is "
+          "phi::DenseTensor",
           i));
     }
   }
diff --git a/paddle/fluid/operators/pyramid_hash_op.cc b/paddle/fluid/operators/pyramid_hash_op.cc
index 44767dd7ab9987..5eead81365053f 100644
--- a/paddle/fluid/operators/pyramid_hash_op.cc
+++ b/paddle/fluid/operators/pyramid_hash_op.cc
@@ -29,7 +29,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using LoD = framework::LoD;
 
 class PyramidHashOpMaker : public framework::OpProtoAndCheckerMaker {
@@ -275,12 +274,12 @@ class CPUPyramidHashOPKernel : public framework::OpKernel<T> {
   }
 
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* bottom = ctx.Input<LoDTensor>("X");
+    auto* bottom = ctx.Input<phi::DenseTensor>("X");
     auto* _blobs_0 = ctx.Input<phi::DenseTensor>("W");
     auto* _blobs_1 = ctx.Input<phi::DenseTensor>("WhiteList");
     auto* _blobs_2 = ctx.Input<phi::DenseTensor>("BlackList");
-    auto* top = ctx.Output<LoDTensor>("Out");
-    auto* drop_pos = ctx.Output<LoDTensor>("DropPos");
+    auto* top = ctx.Output<phi::DenseTensor>("Out");
+    auto* drop_pos = ctx.Output<phi::DenseTensor>("DropPos");
 
     int _num_emb = ctx.Attr<int>("num_emb");
     bool use_filter = ctx.Attr<bool>("use_filter");
@@ -296,7 +295,7 @@ class CPUPyramidHashOPKernel : public framework::OpKernel<T> {
 
     const auto& offset = bottom->lod()[0];
     const auto* bottom_data_ori = bottom->data<int32_t>();
-    auto* buff = ctx.Output<LoDTensor>("X_Temp_Out");
+    auto* buff = ctx.Output<phi::DenseTensor>("X_Temp_Out");
     buff->Resize(phi::make_ddim({bottom->dims()[0], bottom->dims()[1]}));
     float* bottom_data = buff->mutable_data<float>(ctx.GetPlace());
     for (int i = 0; i < bottom->dims()[0]; i++) {
@@ -512,10 +511,10 @@ class CPUPyramidHashOPGradKernel : public framework::OpKernel<T> {
   }
 
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* bottom = ctx.Input<LoDTensor>("X");
+    auto* bottom = ctx.Input<phi::DenseTensor>("X");
     auto* _blobs = ctx.Input<phi::DenseTensor>("W");
-    auto* drop_pos = ctx.Input<LoDTensor>("DropPos");
-    auto* top = ctx.Input<LoDTensor>(framework::GradVarName("Out"));
+    auto* drop_pos = ctx.Input<phi::DenseTensor>("DropPos");
+    auto* top = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
 
     int _num_emb = ctx.Attr<int>("num_emb");
     float _lr = ctx.Attr<float>("lr");
@@ -523,7 +522,7 @@ class CPUPyramidHashOPGradKernel : public framework::OpKernel<T> {
     int _space_len = ctx.Attr<int>("space_len");
     int _pyramid_layer = ctx.Attr<int>("pyramid_layer");
 
-    auto* buff = ctx.Input<LoDTensor>("X_Temp_Out");
+    auto* buff = ctx.Input<phi::DenseTensor>("X_Temp_Out");
     auto* bottom_data = buff->data<T>();
 
     int _slot_len = bottom->dims()[0];
diff --git a/paddle/fluid/operators/random_routing_op.cu b/paddle/fluid/operators/random_routing_op.cu
index 1fdb1bf73a3047..f7f111299c73d5 100644
--- a/paddle/fluid/operators/random_routing_op.cu
+++ b/paddle/fluid/operators/random_routing_op.cu
@@ -29,7 +29,6 @@ static inline int GET_BLOCKS(const int N) {
   return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
 }
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 template <typename T>
@@ -54,10 +53,10 @@ template <typename T>
 class RandomRoutingOpCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto topk_idx = context.Input<LoDTensor>("TopK_Idx");
-    auto topk_value = context.Input<LoDTensor>("TopK_Value");
-    auto prob = context.Input<LoDTensor>("Prob");
-    auto out = context.Output<LoDTensor>("Out");
+    auto topk_idx = context.Input<phi::DenseTensor>("TopK_Idx");
+    auto topk_value = context.Input<phi::DenseTensor>("TopK_Value");
+    auto prob = context.Input<phi::DenseTensor>("Prob");
+    auto out = context.Output<phi::DenseTensor>("Out");
 
     auto place = context.GetPlace();
     const auto& dev_ctx = context.template device_context<phi::GPUContext>();
diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc
index 4b2ed12b1cf8fd..40c553f41cc678 100644
--- a/paddle/fluid/operators/recurrent_op.cc
+++ b/paddle/fluid/operators/recurrent_op.cc
@@ -153,12 +153,13 @@ int64_t RecurrentBase::GetSequenceLength(const framework::Scope &scope) const {
     PADDLE_ENFORCE_NOT_NULL(var,
                             platform::errors::InvalidArgument(
                                 "RecurrentOp finds var %s is NULL", iname));
-    PADDLE_ENFORCE_EQ(var->IsType<phi::DenseTensor>(),
-                      true,
-                      platform::errors::InvalidArgument(
-                          "RecurrentOp only accepts LoDTensor as input but "
-                          "input var %s is not LoDTensor",
-                          iname));
+    PADDLE_ENFORCE_EQ(
+        var->IsType<phi::DenseTensor>(),
+        true,
+        platform::errors::InvalidArgument(
+            "RecurrentOp only accepts phi::DenseTensor as input but "
+            "input var %s is not phi::DenseTensor",
+            iname));
     auto &dim = var->Get<phi::DenseTensor>().dims();
     if (seq_len == -1) {
       seq_len = dim[0];
diff --git a/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc b/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc
index dfafbb7c8a2cb5..dbd424b1fa0e5d 100644
--- a/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc
+++ b/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc
@@ -37,13 +37,14 @@ class ReorderLoDTensorByRankTableOpProtoMaker
     : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X",
-             "(LoDTensor), the input lod tensor to be reordered according to "
-             "Input(RankTable).");
+    AddInput(
+        "X",
+        "(phi::DenseTensor), the input lod tensor to be reordered according to "
+        "Input(RankTable).");
     AddInput("RankTable",
              "(LoDRankTable), the rank table according to which Input(X) is "
              "reordered.");
-    AddOutput("Out", "LoDTensor, the reordered lod tensor.");
+    AddOutput("Out", "phi::DenseTensor, the reordered lod tensor.");
     AddComment(R"DOC(ReorderLoDTensorByRankTable operator.
 
 Input(X) is a batch of sequences. Input(RankTable) stores new orders of the
diff --git a/paddle/fluid/operators/reverse_op.cc b/paddle/fluid/operators/reverse_op.cc
index 810a73d89d2176..93877aa8251cb7 100644
--- a/paddle/fluid/operators/reverse_op.cc
+++ b/paddle/fluid/operators/reverse_op.cc
@@ -48,15 +48,15 @@ class ReverseOpVarTypeInference : public framework::VarTypeInference {
 class ReverseOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X", "The LoDTensor to be flipped.");
-    AddOutput("Out", "The LoDTensor after flipping.");
+    AddInput("X", "The phi::DenseTensor to be flipped.");
+    AddOutput("Out", "The phi::DenseTensor after flipping.");
     AddAttr<std::vector<int>>(
         "axis", "The axises that along which order of elements is reversed.")
         .SupportTensor();
     AddComment(R"DOC(
       Reverse Operator.
 
-      Reverse the order of elements in the input LoDTensor along given axises.
+      Reverse the order of elements in the input phi::DenseTensor along given axises.
 
       Case 1:
         Given
diff --git a/paddle/fluid/operators/roi_align_op.cc b/paddle/fluid/operators/roi_align_op.cc
index 0cb74c50dfc27e..6a7999c56557f0 100644
--- a/paddle/fluid/operators/roi_align_op.cc
+++ b/paddle/fluid/operators/roi_align_op.cc
@@ -21,7 +21,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 class ROIAlignOp : public framework::OperatorWithKernel {
  public:
@@ -73,9 +72,9 @@ class ROIAlignOpMaker : public framework::OpProtoAndCheckerMaker {
              "H is the height of the feature, and "
              "W is the width of the feature.");
     AddInput("ROIs",
-             "(LoDTensor), "
+             "(phi::DenseTensor), "
              "ROIs (Regions of Interest) to pool over. "
-             "should be a 2-D LoDTensor of shape (num_rois, 4)"
+             "should be a 2-D phi::DenseTensor of shape (num_rois, 4)"
              "given as [[x1, y1, x2, y2], ...]. "
              "(x1, y1) is the top left coordinates, and "
              "(x2, y2) is the bottom right coordinates.");
diff --git a/paddle/fluid/operators/roi_align_op_mlu.cc b/paddle/fluid/operators/roi_align_op_mlu.cc
index 15d42db2751e4f..5bde4dd7b6686e 100644
--- a/paddle/fluid/operators/roi_align_op_mlu.cc
+++ b/paddle/fluid/operators/roi_align_op_mlu.cc
@@ -20,14 +20,13 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 class ROIAlignOpMLUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* in = ctx.Input<phi::DenseTensor>("X");
-    auto* rois = ctx.Input<LoDTensor>("ROIs");
+    auto* rois = ctx.Input<phi::DenseTensor>("ROIs");
     auto* out = ctx.Output<phi::DenseTensor>("Out");
     out->mutable_data<T>(ctx.GetPlace());
     out->set_layout(phi::DataLayout::kNHWC);
@@ -175,7 +174,7 @@ template <typename T>
 class ROIAlignGradOpMLUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* rois = ctx.Input<LoDTensor>("ROIs");
+    auto* rois = ctx.Input<phi::DenseTensor>("ROIs");
     auto* out_grad = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
     auto* in_grad = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
 
diff --git a/paddle/fluid/operators/roi_pool_op.cc b/paddle/fluid/operators/roi_pool_op.cc
index 7bba00fb90aa2c..b2e8a6ae58883b 100644
--- a/paddle/fluid/operators/roi_pool_op.cc
+++ b/paddle/fluid/operators/roi_pool_op.cc
@@ -24,7 +24,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 class ROIPoolOp : public framework::OperatorWithKernel {
  public:
@@ -75,9 +74,9 @@ class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
              "H is the height of the feature, and "
              "W is the width of the feature.");
     AddInput("ROIs",
-             "(LoDTensor), "
+             "(phi::DenseTensor), "
              "ROIs (Regions of Interest) to pool over. "
-             "should be a 2-D LoDTensor of shape (num_rois, 4)"
+             "should be a 2-D phi::DenseTensor of shape (num_rois, 4)"
              "given as [[x1, y1, x2, y2], ...]. "
              "Where batch_id is the id of the data, "
              "(x1, y1) is the top left coordinates, and "
diff --git a/paddle/fluid/operators/row_conv_op.cc b/paddle/fluid/operators/row_conv_op.cc
index 2e3e6e70dfda55..ae8a34ec312f6c 100644
--- a/paddle/fluid/operators/row_conv_op.cc
+++ b/paddle/fluid/operators/row_conv_op.cc
@@ -23,8 +23,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-
 template <typename T,
           int MajorType = Eigen::RowMajor,
           typename IndexType = Eigen::DenseIndex>
@@ -84,7 +82,7 @@ class RowConvOpMaker : public framework::OpProtoAndCheckerMaker {
     AddInput("X",
              "the input(X) is a LodTensor or tensor, LodTensor(X) supports "
              "variable time-length input sequences. The underlying tensor "
-             "in this LoDTensor is a matrix with shape (T x N), where T "
+             "in this phi::DenseTensor is a matrix with shape (T x N), where T "
              "is the total time steps in this mini-batch and N is the input "
              "data dimension. the shape of Tensor input(X) has shape "
              "(B x T x N), B is batch size;");
@@ -142,9 +140,9 @@ template <typename T>
 class RowConvKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &context) const override {
-    auto *x = context.Input<LoDTensor>("X");
+    auto *x = context.Input<phi::DenseTensor>("X");
     auto *filter = context.Input<phi::DenseTensor>("Filter");
-    auto *out = context.Output<LoDTensor>("Out");
+    auto *out = context.Output<phi::DenseTensor>("Out");
 
     out->mutable_data<T>(context.GetPlace());
 
@@ -217,10 +215,11 @@ template <typename T>
 class RowConvGradKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &context) const override {
-    auto *x = context.Input<LoDTensor>("X");
+    auto *x = context.Input<phi::DenseTensor>("X");
     auto *filter = context.Input<phi::DenseTensor>("Filter");
-    auto *d_out = context.Input<LoDTensor>(framework::GradVarName("Out"));
-    auto *dx = context.Output<LoDTensor>(framework::GradVarName("X"));
+    auto *d_out =
+        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+    auto *dx = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
     auto *d_filter =
         context.Output<phi::DenseTensor>(framework::GradVarName("Filter"));
 
diff --git a/paddle/fluid/operators/row_conv_op.cu b/paddle/fluid/operators/row_conv_op.cu
index 34595180c9d721..81f140b36fce4a 100644
--- a/paddle/fluid/operators/row_conv_op.cu
+++ b/paddle/fluid/operators/row_conv_op.cu
@@ -18,8 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-
 namespace {
 
 inline int DivUp(int x, int y) { return (x + y - 1) / y; }
@@ -325,9 +323,9 @@ template <typename T>
 class RowConvKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &context) const override {
-    auto *X = context.Input<LoDTensor>("X");
+    auto *X = context.Input<phi::DenseTensor>("X");
     auto *Filter = context.Input<phi::DenseTensor>("Filter");
-    auto *Out = context.Output<LoDTensor>("Out");
+    auto *Out = context.Output<phi::DenseTensor>("Out");
 
     const T *in = X->data<T>();
     const T *weight = Filter->data<T>();
@@ -379,15 +377,15 @@ template <typename T>
 class RowConvGradKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &context) const override {
-    auto *X = context.Input<LoDTensor>("X");
+    auto *X = context.Input<phi::DenseTensor>("X");
     auto *Filter = context.Input<phi::DenseTensor>("Filter");
-    auto *dOut = context.Input<LoDTensor>(framework::GradVarName("Out"));
+    auto *dOut = context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
     const T *in = X->data<T>();
     const T *weights = Filter->data<T>();
     const T *dout = dOut->data<T>();
 
     phi::DenseTensor *dX =
-        context.Output<LoDTensor>(framework::GradVarName("X"));
+        context.Output<phi::DenseTensor>(framework::GradVarName("X"));
     phi::DenseTensor *dFilter =
         context.Output<phi::DenseTensor>(framework::GradVarName("Filter"));
     int batch_size = 0;
diff --git a/paddle/fluid/operators/run_program_op.cc b/paddle/fluid/operators/run_program_op.cc
index 64afb3a2b91e96..52e35c343063ac 100644
--- a/paddle/fluid/operators/run_program_op.cc
+++ b/paddle/fluid/operators/run_program_op.cc
@@ -65,18 +65,18 @@ class RunProgramOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(vector<LoDTensor>)"
+             "(vector<phi::DenseTensor>)"
              "The input tensors of RunProgram operator, also the feed targets "
              "of loaded program.")
         .AsDuplicable();
     AddInput("Params",
-             "(vector<LoDTensor or SelecetedRows>)"
+             "(vector<phi::DenseTensor or SelecetedRows>)"
              "The input parameter of RunProgram operator, also the parameters "
              "of the loaded program.")
         .AsDuplicable()
         .AsDispensable();
     AddOutput("Out",
-              "(vector<LoDTensor>)"
+              "(vector<phi::DenseTensor>)"
               "The output tensors of RunProgram operator, also the fetch "
               "targets of the loaded program.")
         .AsDuplicable();
@@ -87,7 +87,7 @@ class RunProgramOpMaker : public framework::OpProtoAndCheckerMaker {
               "NOTE: Do not use Scope directly because Scope output is not "
               "currently supported.");
     AddOutput("DOut",
-              "(vector<LoDTensor>)"
+              "(vector<phi::DenseTensor>)"
               "The output tensors for GRAD Tensors in RunProgram forward "
               "operator, the forward operator contains GRAD Tensors when it "
               "computes double grad.")
diff --git a/paddle/fluid/operators/run_program_op.h b/paddle/fluid/operators/run_program_op.h
index 93c222cfb8f060..f7d3630c019d84 100644
--- a/paddle/fluid/operators/run_program_op.h
+++ b/paddle/fluid/operators/run_program_op.h
@@ -48,25 +48,24 @@ using BlockDesc = framework::BlockDesc;
 using ProgramDesc = framework::ProgramDesc;
 
 using Variable = framework::Variable;
-using LoDTensor = phi::DenseTensor;
 using SelectedRows = phi::SelectedRows;
 
 namespace details {
 
-// all input vars should be LoDTensor & is initialized
+// all input vars should be phi::DenseTensor & is initialized
 static void CheckInputVarStatus(const Variable &var,
                                 const std::string &var_name) {
+  PADDLE_ENFORCE_EQ(var.IsType<phi::DenseTensor>(),
+                    true,
+                    platform::errors::InvalidArgument(
+                        "The input variable %s of "
+                        "RunProgram(Grad)Op holds "
+                        "wrong type. Expect type is phi::DenseTensor, but "
+                        "receive type is %s.",
+                        var_name,
+                        platform::demangle(framework::ToTypeName(var.Type()))));
   PADDLE_ENFORCE_EQ(
-      var.IsType<LoDTensor>(),
-      true,
-      platform::errors::InvalidArgument(
-          "The input variable %s of "
-          "RunProgram(Grad)Op holds "
-          "wrong type. Expect type is LoDTensor, but receive type is %s.",
-          var_name,
-          platform::demangle(framework::ToTypeName(var.Type()))));
-  PADDLE_ENFORCE_EQ(
-      var.Get<LoDTensor>().IsInitialized(),
+      var.Get<phi::DenseTensor>().IsInitialized(),
       true,
       platform::errors::InvalidArgument("The tensor in input variable %s of "
                                         "RunProgram(Grad)Op "
@@ -77,17 +76,18 @@ static void CheckInputVarStatus(const Variable &var,
 static void CheckOutputVarStatus(const Variable &src_var,
                                  const Variable &dst_var,
                                  const std::string &var_name) {
-  if (dst_var.IsType<LoDTensor>()) {
+  if (dst_var.IsType<phi::DenseTensor>()) {
     PADDLE_ENFORCE_EQ(
-        src_var.IsType<LoDTensor>(),
+        src_var.IsType<phi::DenseTensor>(),
         true,
         platform::errors::InvalidArgument(
             "The output variable %s get from "
             "RunProgram(Grad)Op's internal scope holds "
-            "wrong type. Expect type is LoDTensor, but receive type is %s.",
+            "wrong type. Expect type is phi::DenseTensor, but receive type is "
+            "%s.",
             var_name,
             platform::demangle(framework::ToTypeName(src_var.Type()))));
-    PADDLE_ENFORCE_EQ(src_var.Get<LoDTensor>().IsInitialized(),
+    PADDLE_ENFORCE_EQ(src_var.Get<phi::DenseTensor>().IsInitialized(),
                       true,
                       platform::errors::InvalidArgument(
                           "The tensor in output variable %s get from "
@@ -115,7 +115,7 @@ static void CheckOutputVarStatus(const Variable &src_var,
   } else {
     PADDLE_THROW(platform::errors::InvalidArgument(
         "The RunProgram(Grad)Op only support output "
-        "variable of type LoDTensor or SelectedRows, "
+        "variable of type phi::DenseTensor or SelectedRows, "
         "but received variable %s's type is %s",
         var_name,
         platform::demangle(framework::ToTypeName(dst_var.Type()))));
@@ -123,12 +123,12 @@ static void CheckOutputVarStatus(const Variable &src_var,
 }
 
 static void VariableShare(const Variable &src_var, Variable *dst_var) {
-  // The previous check ensures that the variable type can only be LoDTensor or
-  // SelectedRows.
-  if (src_var.IsType<LoDTensor>()) {
-    auto *lod_tensor = dst_var->GetMutable<LoDTensor>();
-    lod_tensor->ShareDataWith(src_var.Get<LoDTensor>());
-    lod_tensor->set_lod(src_var.Get<LoDTensor>().lod());
+  // The previous check ensures that the variable type can only be
+  // phi::DenseTensor or SelectedRows.
+  if (src_var.IsType<phi::DenseTensor>()) {
+    auto *lod_tensor = dst_var->GetMutable<phi::DenseTensor>();
+    lod_tensor->ShareDataWith(src_var.Get<phi::DenseTensor>());
+    lod_tensor->set_lod(src_var.Get<phi::DenseTensor>().lod());
   } else if (src_var.IsType<phi::SelectedRows>()) {
     auto *selected_rows = dst_var->GetMutable<phi::SelectedRows>();
     selected_rows->mutable_value()->ShareDataWith(
diff --git a/paddle/fluid/operators/save_combine_op.cc b/paddle/fluid/operators/save_combine_op.cc
index a25241d368affb..41780561144b1a 100644
--- a/paddle/fluid/operators/save_combine_op.cc
+++ b/paddle/fluid/operators/save_combine_op.cc
@@ -54,7 +54,7 @@ class SaveCombineOpProtoMaker : public framework::OpProtoAndCheckerMaker {
     AddComment(R"DOC(
 SaveCombine operator
 
-This operator will serialize and write a list of input LoDTensor variables
+This operator will serialize and write a list of input phi::DenseTensor variables
 to a file on disk.
 )DOC");
     AddAttr<bool>("overwrite",
@@ -70,7 +70,7 @@ to a file on disk.
     AddAttr<std::string>(
         "file_path",
         "(string)"
-        "The \"file_path\" where the LoDTensor variables will be saved.")
+        "The \"file_path\" where the phi::DenseTensor variables will be saved.")
         .AddCustomChecker(
             [](const std::string& path) { return !path.empty(); });
     AddAttr<bool>("save_to_memory",
diff --git a/paddle/fluid/operators/save_combine_op.h b/paddle/fluid/operators/save_combine_op.h
index 20baefe5974281..fd54202a75d3fa 100644
--- a/paddle/fluid/operators/save_combine_op.h
+++ b/paddle/fluid/operators/save_combine_op.h
@@ -72,13 +72,14 @@ class SaveCombineOpKernel : public framework::OpKernel<T> {
           inp_vars[i],
           platform::errors::InvalidArgument("Cannot find variable %s to save.",
                                             inp_var_names[i]));
-      PADDLE_ENFORCE_EQ(inp_vars[i]->IsType<phi::DenseTensor>() ||
-                            inp_vars[i]->IsType<framework::Vocab>(),
-                        true,
-                        platform::errors::InvalidArgument(
-                            "SaveCombine operator only supports saving "
-                            "LoDTensor or Vocab variable, %s has wrong type.",
-                            inp_var_names[i]));
+      PADDLE_ENFORCE_EQ(
+          inp_vars[i]->IsType<phi::DenseTensor>() ||
+              inp_vars[i]->IsType<framework::Vocab>(),
+          true,
+          platform::errors::InvalidArgument(
+              "SaveCombine operator only supports saving "
+              "phi::DenseTensor or Vocab variable, %s has wrong type.",
+              inp_var_names[i]));
 
       if (inp_vars[i]->IsType<phi::DenseTensor>()) {
         auto &tensor = inp_vars[i]->Get<phi::DenseTensor>();
diff --git a/paddle/fluid/operators/save_op.cc b/paddle/fluid/operators/save_op.cc
index 586482ff7d7726..179a18ba8d7cee 100644
--- a/paddle/fluid/operators/save_op.cc
+++ b/paddle/fluid/operators/save_op.cc
@@ -40,11 +40,12 @@ class SaveOp : public framework::OperatorWithKernel {
 class SaveOpProtoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X", "(Tensor ) Input LoDTensor and SelectedRows to be saved");
+    AddInput("X",
+             "(Tensor ) Input phi::DenseTensor and SelectedRows to be saved");
     AddComment(R"DOC(
 Save operator
 
-This operator will serialize and write LoDTensor / SelectedRows variable to file on disk.
+This operator will serialize and write phi::DenseTensor / SelectedRows variable to file on disk.
 )DOC");
     AddAttr<bool>("overwrite",
                   "(boolean, default true)"
diff --git a/paddle/fluid/operators/save_op.h b/paddle/fluid/operators/save_op.h
index ddb84af69d14a6..7b78ac1ecea876 100644
--- a/paddle/fluid/operators/save_op.h
+++ b/paddle/fluid/operators/save_op.h
@@ -64,7 +64,8 @@ class SaveOpKernel : public framework::OpKernel<T> {
       SaveSelectedRows(ctx, place, input_var, filename);
     } else {
       PADDLE_THROW(platform::errors::InvalidArgument(
-          "Save operator only supports saving LoDTensor and SelectedRows "
+          "Save operator only supports saving phi::DenseTensor and "
+          "SelectedRows "
           "variable, %s has wrong type",
           iname));
     }
diff --git a/paddle/fluid/operators/search_compute.h b/paddle/fluid/operators/search_compute.h
index bbbcce34c4a54b..34728c86c56b6e 100644
--- a/paddle/fluid/operators/search_compute.h
+++ b/paddle/fluid/operators/search_compute.h
@@ -29,7 +29,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using LoD = framework::LoD;
 
 template <typename DeviceContext, typename T>
diff --git a/paddle/fluid/operators/select_input_op.cc b/paddle/fluid/operators/select_input_op.cc
index 5af8ba45c496b2..3b00aab8c8e894 100644
--- a/paddle/fluid/operators/select_input_op.cc
+++ b/paddle/fluid/operators/select_input_op.cc
@@ -73,7 +73,7 @@ class SelectInputOpProtoMaker : public framework::OpProtoAndCheckerMaker {
     // Because this op is blocking whole control flow. I am implementing MVP
     // (minimal viable product) here.
     AddComment(R"DOC(
-Merge branches of LoDTensor into a single Output with a mask integer
+Merge branches of phi::DenseTensor into a single Output with a mask integer
 specifying the output branchi.
 )DOC");
   }
diff --git a/paddle/fluid/operators/select_output_op.cc b/paddle/fluid/operators/select_output_op.cc
index bfc0d4a4c1f18f..f57933bab0c0b8 100644
--- a/paddle/fluid/operators/select_output_op.cc
+++ b/paddle/fluid/operators/select_output_op.cc
@@ -71,7 +71,9 @@ class SelectOutputOp : public framework::OperatorBase {
 class SelectOutputOpProtoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X", "The input LoDTensor or LoDTensorArray or SelectedRows.");
+    AddInput(
+        "X",
+        "The input phi::DenseTensor or phi::DenseTensorArray or SelectedRows.");
     AddInput("Mask", "Tensor with numel 1 specifying which branch to output");
     AddOutput("Out",
               "The output can contains multiple variables. The output of "
diff --git a/paddle/fluid/operators/shape_op.cc b/paddle/fluid/operators/shape_op.cc
index 3a243f24ff8cf3..6849b4e42721e2 100644
--- a/paddle/fluid/operators/shape_op.cc
+++ b/paddle/fluid/operators/shape_op.cc
@@ -47,11 +47,11 @@ class ShapeOp : public framework::OperatorWithKernel {
 class ShapeOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("Input", "(LoDTensor), The input tensor.");
-    AddOutput(
-        "Out",
-        "(LoDTensor), The shape of input tensor, the data type of the shape"
-        " is int32_t, will be on the same device with the input Tensor.");
+    AddInput("Input", "(phi::DenseTensor), The input tensor.");
+    AddOutput("Out",
+              "(phi::DenseTensor), The shape of input tensor, the data type of "
+              "the shape"
+              " is int32_t, will be on the same device with the input Tensor.");
     AddComment(R"DOC(
 Shape Operator.
 
diff --git a/paddle/fluid/operators/shape_op_mlu.cc b/paddle/fluid/operators/shape_op_mlu.cc
index 2863367e97e941..bd51b49851840b 100644
--- a/paddle/fluid/operators/shape_op_mlu.cc
+++ b/paddle/fluid/operators/shape_op_mlu.cc
@@ -21,7 +21,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using SelectedRows = phi::SelectedRows;
 
 template <typename T>
@@ -33,7 +32,7 @@ class ShapeMLUKernel : public framework::OpKernel<T> {
     if (in_var->IsType<phi::SelectedRows>()) {
       in_dims = in_var->Get<phi::SelectedRows>().value().dims();
     } else {
-      in_dims = in_var->Get<LoDTensor>().dims();
+      in_dims = in_var->Get<phi::DenseTensor>().dims();
     }
     auto* out_t = ctx.Output<phi::DenseTensor>("Out");
     out_t->Resize({in_dims.size()});
diff --git a/paddle/fluid/operators/shard_index_op.cc b/paddle/fluid/operators/shard_index_op.cc
index e601a50409936b..4c22efc2af2993 100644
--- a/paddle/fluid/operators/shard_index_op.cc
+++ b/paddle/fluid/operators/shard_index_op.cc
@@ -37,7 +37,8 @@ class ShardIndexOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(LoDTensor, LoDTensor<int|int64>) Input variable. Each value "
+             "(phi::DenseTensor, phi::DenseTensor<int|int64>) Input variable. "
+             "Each value "
              "of X is an index.");
     AddOutput(
         "Out",
diff --git a/paddle/fluid/operators/shard_index_op_npu.cc b/paddle/fluid/operators/shard_index_op_npu.cc
index 53a352b7fc76ad..3cc025ca9ed64c 100644
--- a/paddle/fluid/operators/shard_index_op_npu.cc
+++ b/paddle/fluid/operators/shard_index_op_npu.cc
@@ -18,15 +18,14 @@
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 template <typename T>
 class ShardIndexNPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     VLOG(4) << "start kernel";
-    auto* in = context.Input<LoDTensor>("X");
-    auto* out = context.Output<LoDTensor>("Out");
+    auto* in = context.Input<phi::DenseTensor>("X");
+    auto* out = context.Output<phi::DenseTensor>("Out");
     int index_num = context.Attr<int>("index_num");
     int nshards = context.Attr<int>("nshards");
     int shard_id = context.Attr<int>("shard_id");
diff --git a/paddle/fluid/operators/share_data_op.cc b/paddle/fluid/operators/share_data_op.cc
index 7f903ba3440b5a..69e16bb7ac9c9c 100644
--- a/paddle/fluid/operators/share_data_op.cc
+++ b/paddle/fluid/operators/share_data_op.cc
@@ -34,7 +34,7 @@ class ShareDataOp : public framework::OperatorWithKernel {
             in_type == framework::proto::VarType::SELECTED_ROWS,
         true,
         platform::errors::InvalidArgument(
-            "Type of Variable[X] must be LoDTensor or SelectedRows!"));
+            "Type of Variable[X] must be phi::DenseTensor or SelectedRows!"));
     PADDLE_ENFORCE_EQ(
         in_type,
         out_type,
diff --git a/paddle/fluid/operators/shrink_rnn_memory_op.cc b/paddle/fluid/operators/shrink_rnn_memory_op.cc
index ad932f8ce22b57..077e51c707cfa1 100644
--- a/paddle/fluid/operators/shrink_rnn_memory_op.cc
+++ b/paddle/fluid/operators/shrink_rnn_memory_op.cc
@@ -92,12 +92,13 @@ class ShrinkRNNMemoryOp : public ArrayOp {
 class ShrinkRNNMemoryOpProtoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X", "(LoDTensor) The RNN step memory to be shrank.");
+    AddInput("X", "(phi::DenseTensor) The RNN step memory to be shrank.");
     AddInput("RankTable", "(LoDRankTable) The lod_rank_table of dynamic RNN.");
-    AddInput("I",
-             "(LoDTensor) The step index. The RNN step memory 'X' will be "
-             "shrank to match the size of the input of the index'th step.");
-    AddOutput("Out", "(LoDTensor) The shrank RNN step memory.");
+    AddInput(
+        "I",
+        "(phi::DenseTensor) The step index. The RNN step memory 'X' will be "
+        "shrank to match the size of the input of the index'th step.");
+    AddOutput("Out", "(phi::DenseTensor) The shrank RNN step memory.");
     AddComment(R"DOC(
 This operator is used to shrink output batch of memory defined in dynamic RNN.
 
diff --git a/paddle/fluid/operators/shuffle_batch_op.cc b/paddle/fluid/operators/shuffle_batch_op.cc
index 2fe8512b4b1550..6eeec761120b04 100644
--- a/paddle/fluid/operators/shuffle_batch_op.cc
+++ b/paddle/fluid/operators/shuffle_batch_op.cc
@@ -76,17 +76,18 @@ class ShuffleBatchOp : public framework::OperatorWithKernel {
 class ShuffleBatchOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X", "(LoDTensor) The input tensor of shuffle_batch op.");
-    AddInput("Seed", "(LoDTensor) The input seed tensor.");
+    AddInput("X", "(phi::DenseTensor) The input tensor of shuffle_batch op.");
+    AddInput("Seed", "(phi::DenseTensor) The input seed tensor.");
     AddAttr<int>(
         "startup_seed",
         "If input tensor 'Seed' is not initialized, the 'startup_seed' "
         "will be used to replace it. The seed after shuffle batch will "
         "be saved in 'SeedOut'. ")
         .SetDefault(0);
-    AddOutput("Out", "(LoDTensor) The output tensor of shuffle_batch op.");
+    AddOutput("Out",
+              "(phi::DenseTensor) The output tensor of shuffle_batch op.");
     AddOutput("ShuffleIdx", "(Tensor) Record forword shuffle order");
-    AddOutput("SeedOut", "(LoDTensor) Saved new generated seed.");
+    AddOutput("SeedOut", "(phi::DenseTensor) Saved new generated seed.");
     AddComment(R"DOC(
 Shuffle Batch Operator.
 
diff --git a/paddle/fluid/operators/shuffle_batch_op.h b/paddle/fluid/operators/shuffle_batch_op.h
index c445648f7569e3..2f1fbee16e3d9a 100644
--- a/paddle/fluid/operators/shuffle_batch_op.h
+++ b/paddle/fluid/operators/shuffle_batch_op.h
@@ -33,7 +33,6 @@
 namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 using Vector = framework::Vector<T>;
@@ -42,11 +41,11 @@ template <typename T>
 class ShuffleBatchKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &context) const override {
-    auto *x = context.Input<LoDTensor>("X");
-    auto *seed = context.Input<LoDTensor>("Seed");
-    auto *out = context.Output<LoDTensor>("Out");
-    auto *shuffleidx = context.Output<LoDTensor>("ShuffleIdx");
-    auto *seed_out = context.Output<LoDTensor>("SeedOut");
+    auto *x = context.Input<phi::DenseTensor>("X");
+    auto *seed = context.Input<phi::DenseTensor>("Seed");
+    auto *out = context.Output<phi::DenseTensor>("Out");
+    auto *shuffleidx = context.Output<phi::DenseTensor>("ShuffleIdx");
+    auto *seed_out = context.Output<phi::DenseTensor>("SeedOut");
 
     auto x_embed_size = x->dims()[x->dims().size() - 1];
     auto elem_size = 1;
@@ -128,9 +127,11 @@ template <typename T>
 class ShuffleBatchGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &context) const override {
-    auto *out_grad = context.Input<LoDTensor>(framework::GradVarName("Out"));
-    auto *shuffleidx = context.Input<LoDTensor>("ShuffleIdx");
-    auto *x_grad = context.Output<LoDTensor>(framework::GradVarName("X"));
+    auto *out_grad =
+        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+    auto *shuffleidx = context.Input<phi::DenseTensor>("ShuffleIdx");
+    auto *x_grad =
+        context.Output<phi::DenseTensor>(framework::GradVarName("X"));
 
     auto embed_size = out_grad->dims()[out_grad->dims().size() - 1];
     auto elem_size = 1;
diff --git a/paddle/fluid/operators/slice_op.cc b/paddle/fluid/operators/slice_op.cc
index 07867f5070b3c2..d6f48d334759d9 100644
--- a/paddle/fluid/operators/slice_op.cc
+++ b/paddle/fluid/operators/slice_op.cc
@@ -203,9 +203,9 @@ class SliceOpVarTypeInference : public framework::VarTypeInference {
     auto not_decrease =
         paddle::get<std::vector<int>>(decrease_axis).size() == 0;
     if (not_decrease) {
-      // The default type of out is LoDTensor.
+      // The default type of out is phi::DenseTensor.
       // However, if no axis is decreased and the type of input is not
-      // LoDTensor, the type of out should be the same as input.
+      // phi::DenseTensor, the type of out should be the same as input.
       // For example, input is a LoDTensorArray and no axis is decreased, the
       // output should be a LoDTensorArray.
       ctx->SetOutputType(out_name, ctx->GetInputType(x_name));
@@ -369,8 +369,8 @@ class SliceOpGradVarTypeInference : public framework::VarTypeInference {
     auto d_out = framework::GradVarName("Out");
     auto out = framework::GradVarName("Input");
     // The types of grad_input and input should always be the same.
-    // The default type of out is LoDTensor, but the type of input can be
-    // LoDTensor or LoDTensorArray,
+    // The default type of out is phi::DenseTensor, but the type of input can be
+    // phi::DenseTensor or phi::DenseTensorArray,
     // so set the type of both to be the same.
     ctx->SetOutputType(out, ctx->GetInputType(x));
     ctx->SetOutputDataType(out, ctx->GetInputDataType(d_out));
diff --git a/paddle/fluid/operators/split_lod_tensor_op.cc b/paddle/fluid/operators/split_lod_tensor_op.cc
index 2a64319d986653..e648575a1edca1 100644
--- a/paddle/fluid/operators/split_lod_tensor_op.cc
+++ b/paddle/fluid/operators/split_lod_tensor_op.cc
@@ -145,20 +145,20 @@ class SplitLoDTensorOp : public framework::OperatorBase {
 class SplitLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X", "The input LoDTensor");
+    AddInput("X", "The input phi::DenseTensor");
     AddInput("Mask", "A bool column vector which mask the input");
-    AddOutput("OutTrue", "True branch of input LoDTensor");
-    AddOutput("OutFalse", "False branch of input LoDTensor");
+    AddOutput("OutTrue", "True branch of input phi::DenseTensor");
+    AddOutput("OutFalse", "False branch of input phi::DenseTensor");
     AddAttr<int>("level", "(int) the specific lod level to split.")
         .SetDefault(0)
         .EqualGreaterThan(0);
     AddComment(
         R"DOC(
-        Split a LoDTensor with a Mask at certain level. The input LoDTensor
+        Split a phi::DenseTensor with a Mask at certain level. The input phi::DenseTensor
         has 3 sequence at certain lod level. The Mask is a bool column vector,
         such as [0, 1, 0] at the same level. The first and third sequence will
-        be send to False Output LoDTensor; whereas the second sequence will
-        be send to True Output LoDTensor. Please refer to MergeLoDTensorOp.)DOC");
+        be send to False Output phi::DenseTensor; whereas the second sequence will
+        be send to True Output phi::DenseTensor. Please refer to MergeLoDTensorOp.)DOC");
   }
 };
 
diff --git a/paddle/fluid/operators/split_op.cc b/paddle/fluid/operators/split_op.cc
index 0c2d79a664ea82..fc7e8a869e3ef8 100644
--- a/paddle/fluid/operators/split_op.cc
+++ b/paddle/fluid/operators/split_op.cc
@@ -21,7 +21,6 @@ limitations under the License. */
 
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 
 using framework::Variable;
 
@@ -77,11 +76,11 @@ class SplitOp : public framework::OperatorWithKernel {
       const paddle::small_vector<framework::InferShapeVarPtr,
                                  phi::kInputSmallVectorSize>
           &sections_varptr_list = ctx->GetInputVarPtrs("SectionsTensorList");
-      std::vector<LoDTensor> sections_from_tensor;
+      std::vector<phi::DenseTensor> sections_from_tensor;
       sections_from_tensor.reserve(sections_tensor_list_size);
       for (const auto &section_varptr : sections_varptr_list) {
         Variable *var = PADDLE_GET_CONST(Variable *, section_varptr);
-        sections_from_tensor.emplace_back(var->Get<LoDTensor>());
+        sections_from_tensor.emplace_back(var->Get<phi::DenseTensor>());
       }
       sections_final = std::move(phi::IntArray(sections_from_tensor));
     } else if (!ctx->IsRuntime() && ctx->HasInputs("SectionsTensorList")) {
diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc
index a4ec6b6cf6d500..098167cb69d7a8 100644
--- a/paddle/fluid/operators/sum_op.cc
+++ b/paddle/fluid/operators/sum_op.cc
@@ -128,11 +128,12 @@ class SumOp : public framework::OperatorWithKernel {
 class SumOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X",
-             "A Varaible list. The shape and data type of the list elements"
-             "should be consistent. Variable can be multi-dimensional Tensor"
-             "or LoDTensor, and data types can be: float32, float64, int32, "
-             "int64.")
+    AddInput(
+        "X",
+        "A Varaible list. The shape and data type of the list elements"
+        "should be consistent. Variable can be multi-dimensional Tensor"
+        "or phi::DenseTensor, and data types can be: float32, float64, int32, "
+        "int64.")
         .AsDuplicable();
     AddOutput("Out",
               "the sum of input :code:`x`. its shape and data types are "
@@ -145,8 +146,9 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker {
         "(string, default \"float32\"). Data type of mkldnn kernel")
         .SetDefault("float32")
         .InEnum({"float32", "bfloat16"});
-    AddComment(R"DOC(This OP is used to sum one or more Tensor or LoDTensor
-                    of the input. If the input is LoDTensor, the output only
+    AddComment(
+        R"DOC(This OP is used to sum one or more Tensor or phi::DenseTensor
+                    of the input. If the input is phi::DenseTensor, the output only
                     shares LoD information with the first input.)DOC");
   }
 };
diff --git a/paddle/fluid/operators/sum_op_mlu.cc b/paddle/fluid/operators/sum_op_mlu.cc
index af1cb524631b3e..aad62e9ce2c333 100644
--- a/paddle/fluid/operators/sum_op_mlu.cc
+++ b/paddle/fluid/operators/sum_op_mlu.cc
@@ -21,7 +21,6 @@ namespace operators {
 
 using Tensor = phi::DenseTensor;
 using SelectedRows = phi::SelectedRows;
-using LoDTensor = phi::DenseTensor;
 
 template <typename DeviceContext, typename T>
 class SumMLUKernel : public framework::OpKernel<T> {
diff --git a/paddle/fluid/operators/sum_op_npu.cc b/paddle/fluid/operators/sum_op_npu.cc
index b6c4ddb73f20b5..20cc7ec18b8b78 100644
--- a/paddle/fluid/operators/sum_op_npu.cc
+++ b/paddle/fluid/operators/sum_op_npu.cc
@@ -25,7 +25,6 @@ namespace operators {
 
 using Tensor = phi::DenseTensor;
 using SelectedRows = phi::SelectedRows;
-using LoDTensor = phi::DenseTensor;
 
 template <typename DeviceContext, typename T>
 class SumNPUKernel : public framework::OpKernel<T> {
diff --git a/paddle/fluid/operators/svd_helper.h b/paddle/fluid/operators/svd_helper.h
index a12b70bbdae3ac..d6c306ff2a9f3e 100644
--- a/paddle/fluid/operators/svd_helper.h
+++ b/paddle/fluid/operators/svd_helper.h
@@ -744,7 +744,7 @@ struct DeviceIndependenceTensorOperations {
       const framework::AttributeMap& attrs,
       std::vector<int> out_shape,
       NameOutTensor out_str = {"Out"}) {
-    // varialble set dims must be LoDTensor / SelectedRowTensor
+    // varialble set dims must be phi::DenseTensor / SelectedRowTensor
     framework::Scope& local_scope = context.scope().NewScope();
     framework::VariableNameMap op_outputs;
     for (auto out_name : out_str) {
diff --git a/paddle/fluid/operators/tdm_child_op.h b/paddle/fluid/operators/tdm_child_op.h
index af417b169978e2..3f781ab65eeb80 100644
--- a/paddle/fluid/operators/tdm_child_op.h
+++ b/paddle/fluid/operators/tdm_child_op.h
@@ -29,16 +29,15 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using DDim = framework::DDim;
 using LoD = framework::LoD;
 
 template <typename T, typename InfoT = int, typename OutT = int>
 void TDMChildInner(const framework::ExecutionContext &context,
-                   const LoDTensor &input,
-                   const LoDTensor &tree_info,
-                   LoDTensor *child,
-                   LoDTensor *mask) {
+                   const phi::DenseTensor &input,
+                   const phi::DenseTensor &tree_info,
+                   phi::DenseTensor *child,
+                   phi::DenseTensor *mask) {
   auto child_nums = context.Attr<int>("child_nums");
   auto info_dims = tree_info.dims();
   int node_nums = info_dims[0];
@@ -114,7 +113,7 @@ class TDMChildKernel : public framework::OpKernel<T> {
     auto *input_var = ctx.InputVar("X");
     auto *tree_info_var = ctx.InputVar("TreeInfo");
 
-    auto &input_tensor = input_var->Get<LoDTensor>();
+    auto &input_tensor = input_var->Get<phi::DenseTensor>();
     const auto &input_type =
         framework::TransToProtoVarType(input_tensor.dtype());
     bool input_type_match = input_type == framework::proto::VarType::INT32 ||
@@ -130,7 +129,7 @@ class TDMChildKernel : public framework::OpKernel<T> {
                           paddle::framework::DataTypeToString(
                               framework::proto::VarType::INT64)));
 
-    auto &tree_info_tensor = tree_info_var->Get<LoDTensor>();
+    auto &tree_info_tensor = tree_info_var->Get<phi::DenseTensor>();
     const auto &info_type =
         framework::TransToProtoVarType(tree_info_tensor.dtype());
     bool info_type_match = info_type == framework::proto::VarType::INT32 ||
diff --git a/paddle/fluid/operators/tdm_sampler_op.h b/paddle/fluid/operators/tdm_sampler_op.h
index af70476a4e536c..d98680c574154a 100644
--- a/paddle/fluid/operators/tdm_sampler_op.h
+++ b/paddle/fluid/operators/tdm_sampler_op.h
@@ -33,17 +33,16 @@ using Tensor = phi::DenseTensor;
 using Sampler = math::Sampler;
 using DDim = framework::DDim;
 using LoD = framework::LoD;
-using LoDTensor = phi::DenseTensor;
 using LoDAndOffset = std::pair<LoD, std::pair<size_t, size_t>>;
 
 template <typename T, typename TreeT = int, typename OutT = int>
 void TDMSamplerInner(const framework::ExecutionContext &context,
-                     const LoDTensor &input_tensor,
-                     const LoDTensor &travel_lod_tensor,
-                     const LoDTensor &layer_lod_tensor,
-                     LoDTensor *out_tensor,
-                     LoDTensor *label_tensor,
-                     LoDTensor *mask_tensor) {
+                     const phi::DenseTensor &input_tensor,
+                     const phi::DenseTensor &travel_lod_tensor,
+                     const phi::DenseTensor &layer_lod_tensor,
+                     phi::DenseTensor *out_tensor,
+                     phi::DenseTensor *label_tensor,
+                     phi::DenseTensor *mask_tensor) {
   auto neg_samples_num_vec =
       context.Attr<std::vector<int>>("neg_samples_num_list");
   auto layer_offset_lod = context.Attr<std::vector<int>>("layer_offset_lod");
diff --git a/paddle/fluid/operators/transfer_layout_op.cc b/paddle/fluid/operators/transfer_layout_op.cc
index 84f1948cd64a1e..5bba1c225a5882 100644
--- a/paddle/fluid/operators/transfer_layout_op.cc
+++ b/paddle/fluid/operators/transfer_layout_op.cc
@@ -94,8 +94,9 @@ class TransferLayoutKernel {
 class TransferLayoutOpProtoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X", "(LoDTensor) The input Tensor");
-    AddOutput("Out", "(LoDTensor) The Output Tensor with desired layout");
+    AddInput("X", "(phi::DenseTensor) The input Tensor");
+    AddOutput("Out",
+              "(phi::DenseTensor) The Output Tensor with desired layout");
     // NOTE(zhiqiu): in most case, the src_layout is not needed, the op can use
     // the layout
     // of input X. However, in some mkldnn kernel, the src layout computed by
diff --git a/paddle/fluid/operators/var_conv_2d_op.cc b/paddle/fluid/operators/var_conv_2d_op.cc
index 2a795a21d34773..35118ae64876c6 100644
--- a/paddle/fluid/operators/var_conv_2d_op.cc
+++ b/paddle/fluid/operators/var_conv_2d_op.cc
@@ -25,16 +25,17 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using LoD = framework::LoD;
 
 void VarConv2dOpMaker::Make() {
   AddInput("X",
-           "X (LoDTensor, default LoDTensor<float>) Input variable which "
+           "X (phi::DenseTensor, default phi::DenseTensor<float>) Input "
+           "variable which "
            "should contain lod information.");
-  AddInput("ROW", "(LoDTensor) the row variable provides lod information");
+  AddInput("ROW",
+           "(phi::DenseTensor) the row variable provides lod information");
   AddInput("COLUMN",
-           "(LoDTensor) the column variable provides lod information");
+           "(phi::DenseTensor) the column variable provides lod information");
   AddInput("W", "W (Tensor), the filter.");
   AddAttr<int>("InputChannel", "the input filter num").SetDefault(1);
   AddAttr<int>("OutputChannel", "the output filter num").SetDefault(1);
@@ -43,9 +44,12 @@ void VarConv2dOpMaker::Make() {
   AddAttr<int>("KernelH", "the height of Kernel").SetDefault(1);
   AddAttr<int>("KernelW", "the width of Kernel").SetDefault(1);
 
-  AddOutput("Out", "(LoDTensor, default LoDTensor<float>) Output variable");
+  AddOutput(
+      "Out",
+      "(phi::DenseTensor, default phi::DenseTensor<float>) Output variable");
   AddOutput("Col",
-            "(LoDTensor, default LoDTensor<float>) the intermediate result "
+            "(phi::DenseTensor, default phi::DenseTensor<float>) the "
+            "intermediate result "
             "variable");
 
   AddComment(R"DOC(
@@ -125,7 +129,7 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const {
   if (ctx->IsRuntime()) {
     framework::Variable* x_var =
         PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("X")[0]);
-    const auto& x_lod = x_var->Get<LoDTensor>().lod();
+    const auto& x_lod = x_var->Get<phi::DenseTensor>().lod();
     PADDLE_ENFORCE_EQ(
         !x_lod.empty(),
         true,
@@ -146,7 +150,7 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const {
 
     framework::Variable* row_var =
         PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("ROW")[0]);
-    const auto& row_lod = row_var->Get<LoDTensor>().lod();
+    const auto& row_lod = row_var->Get<phi::DenseTensor>().lod();
     PADDLE_ENFORCE_EQ(!row_lod.empty(),
                       true,
                       platform::errors::InvalidArgument(
@@ -155,7 +159,7 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const {
 
     framework::Variable* col_var =
         PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("COLUMN")[0]);
-    const auto& col_lod = col_var->Get<LoDTensor>().lod();
+    const auto& col_lod = col_var->Get<phi::DenseTensor>().lod();
     PADDLE_ENFORCE_EQ(!col_lod.empty(),
                       true,
                       platform::errors::InvalidArgument(
@@ -175,11 +179,11 @@ template <typename DeviceContext, typename T>
 class CPUVarConv2dOPKernel : public framework::OpKernel<T> {
  public:
   void Im2Col(const framework::ExecutionContext& ctx,
-              const LoDTensor& input,
-              LoDTensor* col) const {
+              const phi::DenseTensor& input,
+              phi::DenseTensor* col) const {
     int input_channel = ctx.Attr<int>("InputChannel");
-    auto* in_row = ctx.Input<LoDTensor>("ROW");
-    auto* in_col = ctx.Input<LoDTensor>("COLUMN");
+    auto* in_row = ctx.Input<phi::DenseTensor>("ROW");
+    auto* in_col = ctx.Input<phi::DenseTensor>("COLUMN");
     int kernel_h = ctx.Attr<int>("KernelH");
     int kernel_w = ctx.Attr<int>("KernelW");
     int stride_h = ctx.Attr<int>("StrideH");
@@ -267,12 +271,12 @@ class CPUVarConv2dOPKernel : public framework::OpKernel<T> {
   }
 
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* bottom = ctx.Input<LoDTensor>("X");
-    auto* in_row = ctx.Input<LoDTensor>("ROW");
-    auto* in_col = ctx.Input<LoDTensor>("COLUMN");
+    auto* bottom = ctx.Input<phi::DenseTensor>("X");
+    auto* in_row = ctx.Input<phi::DenseTensor>("ROW");
+    auto* in_col = ctx.Input<phi::DenseTensor>("COLUMN");
     auto* w = ctx.Input<phi::DenseTensor>("W");
-    auto* top = ctx.Output<LoDTensor>("Out");
-    auto* col = ctx.Output<LoDTensor>("Col");
+    auto* top = ctx.Output<phi::DenseTensor>("Out");
+    auto* col = ctx.Output<phi::DenseTensor>("Col");
 
     int output_channel = ctx.Attr<int>("OutputChannel");
     int input_channel = ctx.Attr<int>("InputChannel");
@@ -390,10 +394,10 @@ template <typename DeviceContext, typename T>
 class CPUVarConv2dOPGradKernel : public framework::OpKernel<T> {
  public:
   void Im2ColGrad(const framework::ExecutionContext& ctx, T* top_diff) const {
-    auto* x = ctx.Input<LoDTensor>("X");
-    auto* in_row = ctx.Input<LoDTensor>("ROW");
-    auto* in_col = ctx.Input<LoDTensor>("COLUMN");
-    auto* col = ctx.Input<LoDTensor>("Col");
+    auto* x = ctx.Input<phi::DenseTensor>("X");
+    auto* in_row = ctx.Input<phi::DenseTensor>("ROW");
+    auto* in_col = ctx.Input<phi::DenseTensor>("COLUMN");
+    auto* col = ctx.Input<phi::DenseTensor>("Col");
 
     int input_channel = ctx.Attr<int>("InputChannel");
     int kernel_h = ctx.Attr<int>("KernelH");
@@ -401,7 +405,7 @@ class CPUVarConv2dOPGradKernel : public framework::OpKernel<T> {
     int stride_h = ctx.Attr<int>("StrideH");
     int stride_w = ctx.Attr<int>("StrideW");
 
-    auto* dx = ctx.Output<LoDTensor>(framework::GradVarName("X"));
+    auto* dx = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
 
     auto* dx_data = dx->mutable_data<T>(ctx.GetPlace());
     memset(dx_data, 0.0, x->dims()[0] * x->dims()[1] * sizeof(T));
@@ -450,18 +454,18 @@ class CPUVarConv2dOPGradKernel : public framework::OpKernel<T> {
   }
 
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* x = ctx.Input<LoDTensor>("X");
+    auto* x = ctx.Input<phi::DenseTensor>("X");
     auto* w = ctx.Input<phi::DenseTensor>("W");
-    auto* col = ctx.Input<LoDTensor>("Col");
-    auto* out = ctx.Input<LoDTensor>("Out");
+    auto* col = ctx.Input<phi::DenseTensor>("Col");
+    auto* out = ctx.Input<phi::DenseTensor>("Out");
 
     int output_channel = ctx.Attr<int>("OutputChannel");
     int input_channel = ctx.Attr<int>("InputChannel");
     int kernel_h = ctx.Attr<int>("KernelH");
     int kernel_w = ctx.Attr<int>("KernelW");
 
-    auto* d_out = ctx.Input<LoDTensor>(framework::GradVarName("Out"));
-    auto* dx = ctx.Output<LoDTensor>(framework::GradVarName("X"));
+    auto* d_out = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+    auto* dx = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
     auto* d_w = ctx.Output<phi::DenseTensor>(framework::GradVarName("W"));
 
     Tensor col_grad;
diff --git a/paddle/fluid/operators/var_conv_2d_op.h b/paddle/fluid/operators/var_conv_2d_op.h
index 84c766767f1026..1a5fa9de2c7ced 100644
--- a/paddle/fluid/operators/var_conv_2d_op.h
+++ b/paddle/fluid/operators/var_conv_2d_op.h
@@ -20,7 +20,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using LoD = framework::LoD;
 
 class VarConv2dOP : public framework::OperatorWithKernel {
diff --git a/paddle/fluid/operators/warpctc_op.cc b/paddle/fluid/operators/warpctc_op.cc
index 99059ffaa28976..3f09b20068975e 100644
--- a/paddle/fluid/operators/warpctc_op.cc
+++ b/paddle/fluid/operators/warpctc_op.cc
@@ -44,8 +44,8 @@ class WarpCTCOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("Logits",
-             "(2-D LoDTensor<float>) or (3-D Tensor<float>), the "
-             "unscaled probabilities of variable-length sequences."
+             "(2-D phi::DenseTensor<float>) or (3-D phi::DenseTensor<float>), "
+             "the unscaled probabilities of variable-length sequences."
              "When is a 2-D Tensor with LoD information, "
              "it's shape is [Lp, num_classes + 1], "
              "where Lp is the sum of all input sequences' length "
@@ -56,7 +56,7 @@ class WarpCTCOpMaker : public framework::OpProtoAndCheckerMaker {
              "where max_logit_length is the length of the longest "
              "logit sequence.");
     AddInput("Label",
-             "(2-D LoDTensor<int>) or (2-D Tensor<int>), the "
+             "(2-D phi::DenseTensor<int>), the "
              "ground truth of variable-length sequence. "
              "When it is a 2-D Tensor with LoD information, "
              "it is of the shape [Lg, 1], where Lg is th sum of "

From d86aa4ca0c4f0e8efeb13f5bfe4693f670cc018b Mon Sep 17 00:00:00 2001
From: Paulina Gacek <paulina.gacek@intel.com>
Date: Tue, 29 Nov 2022 12:27:20 +0100
Subject: [PATCH 040/154] [PHI] traspose2 kernel migration (#47748)

* traspose2 kernel migrated

* Got rid of mutable_data

* x modification added

* ops added in extra info file

* Formatting fix

* 2 fuse passes with tanpose2 commented

* nr of outs changed in 2 passes, passes uncommented

* Changes in passes reverted

* transpose chnaged in operator.cc

* MKLDNN check in operator.cc

* Transpose fixes

* Fix deleted from operato

* template corrected

Co-authored-by: Paulina Gacek <paulinagacek@intel.com>
---
 .../operators/mkldnn/transpose_mkldnn_op.cc   |  18 +--
 paddle/fluid/operators/ops_extra_info.h       |   3 +
 paddle/fluid/platform/mkldnn_reuse.h          |  44 ------
 paddle/phi/backends/onednn/onednn_reuse.h     |  80 ++++++++++
 .../kernels/onednn/transpose_grad_kernel.cc   |   2 +-
 paddle/phi/kernels/onednn/transpose_kernel.cc | 140 ++++++++++++++++++
 6 files changed, 226 insertions(+), 61 deletions(-)
 create mode 100644 paddle/phi/kernels/onednn/transpose_kernel.cc

diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
index e59b901b6a38d5..f7f7e5f6ad8935 100644
--- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
@@ -42,9 +42,6 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
 
     auto& astream = OneDNNContext::tls().get_stream();
 
-    platform::SetInMemDescWithLogicalLayoutFusesSupport(
-        ctx, const_cast<phi::DenseTensor*>(x), x->mem_desc());
-
     if (ndims == 1) {
       framework::TensorCopy(*x, x->place(), out);
       out->set_mem_desc(x->mem_desc());
@@ -82,11 +79,8 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
     reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
     astream.wait();
 
-    platform::SetOutMemDescWithLogicalLayoutFusesSupport(
-        ctx,
-        out,
-        reorder_dst_memory_p->get_desc().permute_axes(
-            TransposeToPermuteAxis(transpose_axis)));
+    out->set_mem_desc(reorder_dst_memory_p->get_desc().permute_axes(
+        TransposeToPermuteAxis(transpose_axis)));
   }
 
  private:
@@ -180,11 +174,3 @@ REGISTER_OP_KERNEL(transpose_grad,
                    MKLDNN,
                    ::paddle::platform::CPUPlace,
                    ops::TransposeMKLDNNGradOpKernel<float>);
-
-REGISTER_OP_KERNEL(transpose2,
-                   MKLDNN,
-                   ::paddle::platform::CPUPlace,
-                   ops::TransposeMKLDNNOpKernel<float>,
-                   ops::TransposeMKLDNNOpKernel<uint8_t>,
-                   ops::TransposeMKLDNNOpKernel<int8_t>,
-                   ops::TransposeMKLDNNOpKernel<paddle::platform::bfloat16>);
diff --git a/paddle/fluid/operators/ops_extra_info.h b/paddle/fluid/operators/ops_extra_info.h
index 94adfaf3b4500b..12df9f96d6d58d 100644
--- a/paddle/fluid/operators/ops_extra_info.h
+++ b/paddle/fluid/operators/ops_extra_info.h
@@ -120,6 +120,9 @@ const std::unordered_map<std::string, ExtraAttrPropertySet>
         {"Scale_weights", ExtraAttrProperty::ONEDNN},
         {"x_data_format", ExtraAttrProperty::ONEDNN},
         {"y_data_format", ExtraAttrProperty::ONEDNN},
+        {"fused_squeeze2_axes", ExtraAttrProperty::ONEDNN},
+        {"fused_unsqueeze2_axes", ExtraAttrProperty::ONEDNN},
+        {"fused_reshape2_shape", ExtraAttrProperty::ONEDNN},
         // ONEDNN pass dedicated attributes
         {"Activation_scale", ExtraAttrProperty::ONEDNN},
         {"Bias_scales", ExtraAttrProperty::ONEDNN},
diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h
index 7a8ef9c939572a..0142fa2afd13de 100644
--- a/paddle/fluid/platform/mkldnn_reuse.h
+++ b/paddle/fluid/platform/mkldnn_reuse.h
@@ -151,50 +151,6 @@ static void SetOutMemDescWithLogicalLayoutFusesSupport(
   }
 }
 
-static void SetInMemDescWithSqueeze2FuseSupport(
-    const framework::ExecutionContext& ctx,
-    phi::DenseTensor* in,
-    const dnnl::memory::desc& in_md) {
-  const std::vector<int> fused_squeeze2_axes =
-      ctx.Attr<std::vector<int>>("fused_squeeze2_axes");
-  const std::set<int64_t> squeeze2_axes_set(fused_squeeze2_axes.begin(),
-                                            fused_squeeze2_axes.end());
-  const std::vector<int64_t>& x_vec_dims = in_md.dims();
-  std::vector<int64_t> squeezed_op_tz(
-      x_vec_dims.size() - fused_squeeze2_axes.size(), 0);
-
-  int j = 0;
-  for (size_t i = 0; i < x_vec_dims.size(); ++i) {
-    if (squeeze2_axes_set.count(i) ||
-        squeeze2_axes_set.count(i - x_vec_dims.size())) {
-      PADDLE_ENFORCE_EQ(
-          x_vec_dims[i],
-          1,
-          platform::errors::InvalidArgument(
-              "Squeeze2 input dim %d should be equal to one, but get %d.",
-              i,
-              x_vec_dims[i]));
-      continue;
-    }
-    squeezed_op_tz[j++] = x_vec_dims[i];
-  }
-
-  in->set_mem_desc(in_md.reshape(squeezed_op_tz));
-  in->Resize(phi::make_ddim(squeezed_op_tz));
-}
-
-static void SetInMemDescWithLogicalLayoutFusesSupport(
-    const framework::ExecutionContext& ctx,
-    phi::DenseTensor* in,
-    const dnnl::memory::desc& in_md) {
-  if (ctx.HasAttr("fused_squeeze2_axes")) {
-    SetInMemDescWithSqueeze2FuseSupport(ctx, in, in_md);
-  } else {
-    in->set_mem_desc(in_md);
-    in->Resize(phi::make_ddim(in_md.dims()));
-  }
-}
-
 template <typename XT, typename YT, typename OT>
 class MatMulV2MKLDNNHandler
     : public phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul> {
diff --git a/paddle/phi/backends/onednn/onednn_reuse.h b/paddle/phi/backends/onednn/onednn_reuse.h
index 7f64f8668c91bd..dbb70cb07aaeca 100644
--- a/paddle/phi/backends/onednn/onednn_reuse.h
+++ b/paddle/phi/backends/onednn/onednn_reuse.h
@@ -15,6 +15,7 @@ limitations under the License. */
 
 #include <algorithm>
 #include <memory>
+#include <set>
 #include <sstream>
 #include <string>
 #include <utility>
@@ -1660,6 +1661,85 @@ class PoolingOneDNNHandler
   }
 };
 
+static void SetOutMemDescWithUnsqueeze2FuseSupport(
+    const std::vector<int> fused_unsqueeze2_axes,
+    phi::DenseTensor* out,
+    const dnnl::memory::desc& out_md) {
+  const std::vector<int64_t>& op_tz = out_md.dims();
+  std::vector<int64_t> unsqueezed_op_tz(
+      op_tz.size() + fused_unsqueeze2_axes.size(), 0);
+
+  for (const auto& axis : fused_unsqueeze2_axes) {
+    int positive_axis = axis < 0 ? unsqueezed_op_tz.size() + axis : axis;
+    unsqueezed_op_tz[positive_axis] = 1;
+  }
+
+  int j = 0;
+  for (size_t i = 0; i < unsqueezed_op_tz.size(); ++i) {
+    if (unsqueezed_op_tz[i] == 0) {
+      unsqueezed_op_tz[i] = op_tz[j++];
+    }
+  }
+  out->set_mem_desc(out_md.reshape(unsqueezed_op_tz));
+  out->Resize(make_ddim(unsqueezed_op_tz));
+}
+
+static void SetOutMemDescWithReshape2FuseSupport(
+    const std::vector<int> fused_reshape2_shape_,
+    phi::DenseTensor* out,
+    const dnnl::memory::desc& out_md) {
+  std::vector<int64_t> fused_reshape2_shape(fused_reshape2_shape_.begin(),
+                                            fused_reshape2_shape_.end());
+
+  const int out_shape_numel = out->numel();
+  const int new_shape_numel = std::accumulate(fused_reshape2_shape.begin(),
+                                              fused_reshape2_shape.end(),
+                                              1,
+                                              std::multiplies<int64_t>());
+
+  for (size_t i = 0; i < fused_reshape2_shape.size(); ++i) {
+    if (fused_reshape2_shape[i] == -1) {
+      fused_reshape2_shape[i] = -out_shape_numel / new_shape_numel;
+      break;
+    }
+  }
+
+  out->set_mem_desc(out_md.reshape(fused_reshape2_shape));
+  out->Resize(phi::make_ddim(fused_reshape2_shape));
+}
+
+static void SetOutMemDescWithLogicalLayoutFusesSupport(
+    const OneDNNContext& dev_ctx,
+    phi::DenseTensor* out,
+    const dnnl::memory::desc& out_md) {
+  const auto fused_unsqueeze2_axes =
+      dev_ctx.HasDnnAttr("fused_unsqueeze2_axes")
+          ? PADDLE_GET_CONST(std::vector<int>,
+                             dev_ctx.GetDnnAttr("fused_unsqueeze2_axes"))
+          : std::vector<int>();
+  const auto fused_reshape2_shape =
+      dev_ctx.HasDnnAttr("fused_reshape2_shape")
+          ? PADDLE_GET_CONST(std::vector<int>,
+                             dev_ctx.GetDnnAttr("fused_reshape2_shape"))
+          : std::vector<int>();
+  const auto fused_squeeze2_axes =
+      dev_ctx.HasDnnAttr("fused_squeeze2_axes")
+          ? PADDLE_GET_CONST(std::vector<int>,
+                             dev_ctx.GetDnnAttr("fused_squeeze2_axes"))
+          : std::vector<int>();
+
+  if (!fused_unsqueeze2_axes.empty()) {
+    SetOutMemDescWithUnsqueeze2FuseSupport(fused_unsqueeze2_axes, out, out_md);
+  } else if (!fused_reshape2_shape.empty()) {
+    SetOutMemDescWithReshape2FuseSupport(fused_reshape2_shape, out, out_md);
+  } else if (!fused_squeeze2_axes.empty()) {
+    out->set_mem_desc(out_md);
+    out->Resize(make_ddim(out_md.dims()));
+  } else {
+    out->set_mem_desc(out_md);
+  }
+}
+
 static DDim RowMatrixDimsFromVector(const DDim& x_dim) {
   return x_dim.size() > 1 ? x_dim : make_ddim({1, x_dim[0]});
 }
diff --git a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc
index a754cdffed14db..64f1f9f610861b 100644
--- a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc
+++ b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc
@@ -63,4 +63,4 @@ void TransposeGradKernel(const Context& dev_ctx,
 }  // namespace phi
 
 PD_REGISTER_KERNEL(
-    transpose_grad, OneDNN, ALL_LAYOUT, phi::TransposeGradKernel, float) {}
+    transpose_grad, OneDNN, ONEDNN, phi::TransposeGradKernel, float) {}
diff --git a/paddle/phi/kernels/onednn/transpose_kernel.cc b/paddle/phi/kernels/onednn/transpose_kernel.cc
new file mode 100644
index 00000000000000..26c89197e0d7f4
--- /dev/null
+++ b/paddle/phi/kernels/onednn/transpose_kernel.cc
@@ -0,0 +1,140 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/transpose_kernel.h"
+#include "paddle/fluid/framework/tensor_util.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+void SetInMemDescWithSqueeze2FuseSupport(
+    const std::vector<int> fused_squeeze2_axes,
+    DenseTensor* in,
+    const dnnl::memory::desc& in_md) {
+  const std::set<int64_t> squeeze2_axes_set(fused_squeeze2_axes.begin(),
+                                            fused_squeeze2_axes.end());
+  const std::vector<int64_t>& x_vec_dims = in_md.dims();
+  std::vector<int64_t> squeezed_op_tz(
+      x_vec_dims.size() - fused_squeeze2_axes.size(), 0);
+
+  int j = 0;
+  for (size_t i = 0; i < x_vec_dims.size(); ++i) {
+    if (squeeze2_axes_set.count(i) ||
+        squeeze2_axes_set.count(i - x_vec_dims.size())) {
+      PADDLE_ENFORCE_EQ(
+          x_vec_dims[i],
+          1,
+          errors::InvalidArgument(
+              "Squeeze2 input dim %d should be equal to one, but get %d.",
+              i,
+              x_vec_dims[i]));
+      continue;
+    }
+    squeezed_op_tz[j++] = x_vec_dims[i];
+  }
+
+  in->set_mem_desc(in_md.reshape(squeezed_op_tz));
+  in->Resize(make_ddim(squeezed_op_tz));
+}
+
+void SetInMemDescWithLogicalLayoutFusesSupport(
+    const OneDNNContext& dev_ctx,
+    DenseTensor* in,
+    const dnnl::memory::desc& in_md) {
+  const auto fused_squeeze2_axes =
+      dev_ctx.HasDnnAttr("fused_squeeze2_axes")
+          ? PADDLE_GET_CONST(std::vector<int>,
+                             dev_ctx.GetDnnAttr("fused_squeeze2_axes"))
+          : std::vector<int>();
+  if (fused_squeeze2_axes.empty()) {
+    in->set_mem_desc(in_md);
+    in->Resize(make_ddim(in_md.dims()));
+  } else {
+    SetInMemDescWithSqueeze2FuseSupport(fused_squeeze2_axes, in, in_md);
+  }
+}
+
+template <typename T, typename Context>
+void TransposeKernel(const Context& dev_ctx,
+                     const DenseTensor& x,
+                     const std::vector<int>& axis,
+                     DenseTensor* out) {
+  PADDLE_ENFORCE_EQ(
+      dev_ctx.GetPlace().GetType() == AllocationType::CPU,
+      true,
+      errors::PreconditionNotMet("oneDNN Transpose kernel must use CPUPlace"));
+
+  SetInMemDescWithLogicalLayoutFusesSupport(
+      dev_ctx, const_cast<DenseTensor*>(&x), x.mem_desc());
+
+  if (axis.size() == 1) {
+    paddle::framework::TensorCopy(x, x.place(), out);
+    out->set_mem_desc(x.mem_desc());
+    return;
+  }
+
+  auto x_vec_dims = vectorize(x.dims());
+  auto x_type = funcs::ToOneDNNDataType(x.dtype());
+  funcs::ReorderOneDNNHandler reorder_handler(
+      x_vec_dims, x.dtype(), x_type, dev_ctx.GetEngine());
+  auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
+      x.mem_desc(), funcs::to_void_cast(x.data<T>()));
+  auto dst_md =
+      dnnl::memory::desc(x_vec_dims,
+                         x.mem_desc().data_type(),
+                         funcs::GetPlainOneDNNFormat(x_vec_dims.size()));
+
+  // a trick is used here to fake transpose of out_md, so later it will be
+  // "untransposed", leaving output data in plain format tag
+  std::vector<int64_t> fake_strides(axis.size());
+  auto dims = dst_md.dims();
+  int total_stride = 1;
+  for (int i = static_cast<int>(dims.size()) - 1; i >= 0; --i) {
+    fake_strides[axis[i]] = total_stride;
+    total_stride *= dims[axis[i]];
+  }
+  dst_md =
+      dnnl::memory::desc(x_vec_dims, x.mem_desc().data_type(), fake_strides);
+  auto dst_data = dev_ctx.template Alloc<T>(out);
+  auto reorder_dst_memory_p =
+      std::make_shared<dnnl::memory>(dst_md, dev_ctx.GetEngine(), dst_data);
+  auto reorder_p = reorder_handler.AcquireReorder(reorder_dst_memory_p,
+                                                  reorder_src_memory_p);
+
+  auto& astream = OneDNNContext::tls().get_stream();
+  reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
+  astream.wait();
+
+  // it is needed because oneDNN's permute axis understand axes order in
+  // different way PaddlePaddle's transpose
+  std::vector<int> permute_axis(axis.size());
+  for (size_t i = 0; i < axis.size(); ++i) {
+    permute_axis[axis[i]] = i;
+  }
+  funcs::SetOutMemDescWithLogicalLayoutFusesSupport(
+      dev_ctx,
+      out,
+      reorder_dst_memory_p->get_desc().permute_axes(permute_axis));
+}
+}  // namespace phi
+
+PD_REGISTER_KERNEL(transpose,
+                   OneDNN,
+                   ONEDNN,
+                   phi::TransposeKernel,
+                   float,
+                   uint8_t,
+                   int8_t,
+                   phi::dtype::bfloat16) {}

From a559a664437d2d29953f35e8d5d0bdb4917d0538 Mon Sep 17 00:00:00 2001
From: Nyakku Shigure <sigure.qaq@gmail.com>
Date: Tue, 29 Nov 2022 19:39:02 +0800
Subject: [PATCH 041/154] [CodeStyle][isort] introduce isort (part3) (#48401)

---
 .../dygraph_to_static/bert_dygraph_model.py   |  4 +--
 .../unittests/dygraph_to_static/darknet.py    |  3 +-
 .../unittests/dygraph_to_static/decos.py      |  4 +--
 .../dygraph_to_static/predictor_utils.py      |  5 ++--
 .../seq2seq_dygraph_model.py                  |  8 ++---
 .../dygraph_to_static/simnet_dygraph_model.py |  6 ++--
 .../simnet_dygraph_model_v2.py                |  1 +
 .../dygraph_to_static/test_assert.py          |  3 +-
 .../dygraph_to_static/test_ast_util.py        | 15 +++++-----
 .../test_basic_api_transformation.py          | 10 +++----
 .../unittests/dygraph_to_static/test_bert.py  | 13 ++++----
 .../unittests/dygraph_to_static/test_bmn.py   | 14 ++++-----
 .../dygraph_to_static/test_break_continue.py  |  8 ++---
 .../dygraph_to_static/test_build_strategy.py  |  7 +++--
 .../dygraph_to_static/test_cache_program.py   | 10 +++----
 .../unittests/dygraph_to_static/test_cast.py  |  2 ++
 .../test_closure_analysis.py                  |  5 ++--
 .../dygraph_to_static/test_container.py       |  6 ++--
 .../test_convert_operators.py                 |  4 ++-
 .../test_cpu_cuda_to_tensor.py                |  4 ++-
 .../dygraph_to_static/test_cycle_gan.py       | 10 +++----
 .../dygraph_to_static/test_declarative.py     | 18 +++++------
 .../test_decorator_transform.py               | 10 ++++---
 .../dygraph_to_static/test_deepcopy.py        | 11 ++++---
 .../unittests/dygraph_to_static/test_dict.py  |  7 ++---
 .../dygraph_to_static/test_drop_path.py       |  1 +
 .../test_duplicate_output.py                  |  3 +-
 .../dygraph_to_static/test_fetch_feed.py      |  6 ++--
 .../dygraph_to_static/test_for_enumerate.py   |  5 ++--
 .../dygraph_to_static/test_full_name_usage.py |  4 ++-
 .../dygraph_to_static/test_function_spec.py   |  8 ++---
 .../unittests/dygraph_to_static/test_grad.py  |  8 +++--
 .../test_gradient_aggregation.py              |  3 +-
 .../dygraph_to_static/test_grid_generator.py  |  7 +++--
 .../dygraph_to_static/test_ifelse.py          | 16 +++++-----
 .../dygraph_to_static/test_isinstance.py      |  3 +-
 .../test_jit_property_save.py                 |  1 +
 .../unittests/dygraph_to_static/test_lac.py   | 15 +++++-----
 .../dygraph_to_static/test_lambda.py          |  5 ++--
 .../dygraph_to_static/test_layer_hook.py      |  8 +++--
 .../unittests/dygraph_to_static/test_list.py  |  5 ++--
 .../dygraph_to_static/test_logging_utils.py   |  5 ++--
 .../dygraph_to_static/test_logical.py         |  4 +--
 .../unittests/dygraph_to_static/test_lstm.py  |  8 +++--
 .../unittests/dygraph_to_static/test_mnist.py | 11 ++++---
 .../dygraph_to_static/test_mnist_amp.py       |  8 +++--
 .../dygraph_to_static/test_mnist_pure_fp16.py |  6 ++--
 .../dygraph_to_static/test_mobile_net.py      | 14 ++++-----
 .../dygraph_to_static/test_multi_forward.py   |  3 +-
 .../dygraph_to_static/test_op_attr.py         |  2 +-
 .../dygraph_to_static/test_origin_info.py     |  6 ++--
 .../dygraph_to_static/test_param_guard.py     |  7 +++--
 .../dygraph_to_static/test_params_no_grad.py  |  6 ++--
 .../dygraph_to_static/test_partial_program.py | 10 +++----
 .../unittests/dygraph_to_static/test_place.py |  3 +-
 .../unittests/dygraph_to_static/test_print.py |  3 +-
 .../test_program_translator.py                | 20 ++++++-------
 .../dygraph_to_static/test_ptb_lm.py          |  5 ++--
 .../dygraph_to_static/test_ptb_lm_v2.py       |  1 +
 .../test_reinforcement_learning.py            | 13 ++++----
 .../dygraph_to_static/test_resnet.py          | 11 ++++---
 .../dygraph_to_static/test_resnet_amp.py      |  2 +-
 .../test_resnet_pure_fp16.py                  |  2 +-
 .../dygraph_to_static/test_resnet_v2.py       |  5 ++--
 .../dygraph_to_static/test_return.py          | 13 ++++----
 .../dygraph_to_static/test_rollback.py        |  8 ++---
 .../dygraph_to_static/test_se_resnet.py       | 12 ++++----
 .../dygraph_to_static/test_sentiment.py       |  6 ++--
 .../dygraph_to_static/test_setter_helper.py   |  1 +
 .../dygraph_to_static/test_simnet.py          |  9 +++---
 .../dygraph_to_static/test_simnet_v2.py       |  5 ++--
 .../unittests/dygraph_to_static/test_slice.py |  1 +
 .../dygraph_to_static/test_spec_names.py      |  3 +-
 .../dygraph_to_static/test_static_analysis.py |  7 +++--
 .../dygraph_to_static/test_tensor_methods.py  |  4 ++-
 .../dygraph_to_static/test_tensor_shape.py    |  3 +-
 .../dygraph_to_static/test_to_tensor.py       |  6 ++--
 .../dygraph_to_static/test_transformer.py     |  8 ++---
 .../unittests/dygraph_to_static/test_tsm.py   | 10 ++++---
 .../dygraph_to_static/test_typehint.py        |  5 ++--
 .../dygraph_to_static/test_typing.py          |  6 ++--
 .../unittests/dygraph_to_static/test_utils.py |  6 ++--
 .../dygraph_to_static/test_warning.py         |  3 +-
 .../dygraph_to_static/test_yolov3.py          |  8 ++---
 .../transformer_dygraph_model.py              |  6 ++--
 .../dygraph_to_static/transformer_util.py     |  3 +-
 .../unittests/dygraph_to_static/yolov3.py     |  8 ++---
 .../tests/unittests/ipu/test_dy2static_ipu.py |  7 ++---
 .../fluid/tests/unittests/test_Tensor_type.py |  2 ++
 .../fluid/tests/unittests/test_manual_seed.py |  3 +-
 .../unittests/test_margin_cross_entropy_op.py |  5 ++--
 .../unittests/test_margin_rank_loss_op.py     |  4 ++-
 .../fluid/tests/unittests/test_marker_op.py   |  2 ++
 .../tests/unittests/test_masked_select_op.py  |  2 ++
 .../unittests/test_match_matrix_tensor_op.py  |  2 ++
 .../tests/unittests/test_math_op_patch.py     |  6 ++--
 .../unittests/test_math_op_patch_var_base.py  |  6 ++--
 .../fluid/tests/unittests/test_matmul_op.py   |  2 ++
 .../unittests/test_matmul_op_with_head.py     |  1 +
 .../tests/unittests/test_matmul_v2_op.py      |  5 ++--
 .../tests/unittests/test_matrix_nms_op.py     |  6 ++--
 .../tests/unittests/test_matrix_power_op.py   |  6 ++--
 .../tests/unittests/test_matrix_rank_op.py    |  2 +-
 .../unittests/test_max_min_amax_amin_op.py    |  2 ++
 .../fluid/tests/unittests/test_max_op.py      |  6 ++--
 .../fluid/tests/unittests/test_maximum_op.py  |  2 ++
 .../fluid/tests/unittests/test_maxout_op.py   |  4 ++-
 .../fluid/tests/unittests/test_mean_iou.py    |  1 +
 .../fluid/tests/unittests/test_mean_op.py     | 12 ++++----
 .../fluid/tests/unittests/test_median.py      |  2 ++
 .../fluid/tests/unittests/test_memcpy_op.py   |  6 ++--
 .../tests/unittests/test_memory_analysis.py   |  6 ++--
 .../test_memory_reuse_exclude_feed_var.py     |  6 ++--
 .../tests/unittests/test_memory_usage.py      |  5 ++--
 .../unittests/test_merge_selectedrows_op.py   |  4 ++-
 .../tests/unittests/test_merged_adam_op.py    |  4 ++-
 .../unittests/test_merged_momentum_op.py      |  6 ++--
 .../fluid/tests/unittests/test_meshgrid_op.py |  4 ++-
 .../fluid/tests/unittests/test_min_op.py      |  4 ++-
 .../unittests/test_mine_hard_examples_op.py   |  1 +
 .../fluid/tests/unittests/test_minimum_op.py  |  2 ++
 .../fluid/tests/unittests/test_minus_op.py    |  2 ++
 .../test_mix_precision_all_reduce_fuse.py     |  7 +++--
 .../fluid/tests/unittests/test_mode_op.py     |  2 ++
 .../tests/unittests/test_modelaverage.py      |  4 ++-
 .../unittests/test_modified_huber_loss_op.py  |  1 +
 .../fluid/tests/unittests/test_momentum_op.py |  8 +++--
 .../fluid/tests/unittests/test_monitor.py     |  7 +++--
 .../fluid/tests/unittests/test_mse_loss.py    |  4 ++-
 .../fluid/tests/unittests/test_mul_nn_grad.py |  7 +++--
 .../fluid/tests/unittests/test_mul_op.py      |  5 +++-
 .../tests/unittests/test_multi_dot_op.py      |  3 +-
 .../test_multi_label_soft_margin_loss.py      |  6 ++--
 .../tests/unittests/test_multiclass_nms_op.py | 10 ++++---
 .../unittests/test_multihead_attention.py     |  4 ++-
 .../tests/unittests/test_multimarginloss.py   |  6 ++--
 .../tests/unittests/test_multinomial_op.py    | 10 ++++---
 .../tests/unittests/test_multiplex_op.py      |  2 ++
 .../fluid/tests/unittests/test_multiply.py    |  2 +-
 .../test_multiprocess_dataloader_dataset.py   |  9 +++---
 .../test_multiprocess_dataloader_dynamic.py   | 22 +++++++-------
 .../test_multiprocess_dataloader_exception.py |  5 ++--
 ...ess_dataloader_iterable_dataset_dynamic.py | 22 +++++++-------
 ...ocess_dataloader_iterable_dataset_split.py |  1 +
 ...cess_dataloader_iterable_dataset_static.py |  1 +
 .../test_multiprocess_dataloader_static.py    |  1 +
 .../test_multiprocess_reader_exception.py     |  6 ++--
 .../fluid/tests/unittests/test_mv_op.py       |  6 ++--
 .../test_naive_best_fit_gpu_memory_limit.py   |  4 ++-
 .../fluid/tests/unittests/test_name_scope.py  |  3 +-
 .../fluid/tests/unittests/test_nan_inf.py     |  5 ++--
 .../tests/unittests/test_nan_to_num_op.py     |  2 ++
 .../fluid/tests/unittests/test_nanmean_api.py |  2 ++
 .../fluid/tests/unittests/test_nanmedian.py   |  2 ++
 .../fluid/tests/unittests/test_nansum_api.py  |  2 ++
 .../paddle/fluid/tests/unittests/test_nce.py  |  7 +++--
 .../tests/unittests/test_nearest_interp_op.py |  4 ++-
 .../unittests/test_nearest_interp_v2_op.py    |  6 ++--
 .../fluid/tests/unittests/test_neg_op.py      |  2 ++
 .../fluid/tests/unittests/test_newprofiler.py | 12 ++++----
 .../fluid/tests/unittests/test_nll_loss.py    |  8 +++--
 .../fluid/tests/unittests/test_nms_op.py      |  2 ++
 .../test_nn_functional_embedding_dygraph.py   |  3 +-
 .../test_nn_functional_embedding_static.py    |  2 ++
 .../unittests/test_nn_functional_hot_op.py    |  2 ++
 .../fluid/tests/unittests/test_nn_grad.py     |  7 +++--
 .../unittests/test_nn_margin_rank_loss.py     |  2 ++
 .../tests/unittests/test_nn_matmul_v2_grad.py |  5 ++--
 .../test_nn_quant_functional_layers.py        |  2 ++
 .../tests/unittests/test_nn_sigmoid_op.py     |  5 ++--
 .../fluid/tests/unittests/test_nonzero_api.py |  2 ++
 .../fluid/tests/unittests/test_norm_all.py    |  4 ++-
 .../tests/unittests/test_norm_nn_grad.py      | 10 +++----
 .../fluid/tests/unittests/test_norm_op.py     |  4 ++-
 .../fluid/tests/unittests/test_normal.py      |  4 ++-
 .../unittests/test_normalization_wrapper.py   |  4 ++-
 .../fluid/tests/unittests/test_normalize.py   |  6 ++--
 .../tests/unittests/test_npair_loss_op.py     |  6 ++--
 .../tests/unittests/test_npu_identity_op.py   |  2 ++
 .../tests/unittests/test_number_count_op.py   |  6 ++--
 .../fluid/tests/unittests/test_numel_op.py    |  4 ++-
 .../fluid/tests/unittests/test_one_hot_op.py  |  2 ++
 .../tests/unittests/test_one_hot_v2_op.py     |  4 ++-
 .../fluid/tests/unittests/test_ones_like.py   |  7 +++--
 .../fluid/tests/unittests/test_ones_op.py     |  2 +-
 .../fluid/tests/unittests/test_onnx_export.py |  3 +-
 .../unittests/test_op_function_generator.py   |  8 +++--
 .../tests/unittests/test_op_name_conflict.py  |  6 ++--
 .../tests/unittests/test_op_support_gpu.py    |  1 +
 .../fluid/tests/unittests/test_op_version.py  |  2 +-
 .../tests/unittests/test_operator_desc.py     |  1 -
 .../fluid/tests/unittests/test_ops_nms.py     |  6 ++--
 .../fluid/tests/unittests/test_optimizer.py   | 13 ++++----
 .../unittests/test_optimizer_for_varbase.py   |  5 ++--
 .../tests/unittests/test_optimizer_grad.py    |  6 ++--
 .../test_optimizer_in_control_flow.py         |  3 +-
 .../fluid/tests/unittests/test_outer.py       |  2 +-
 .../tests/unittests/test_overlap_add_op.py    |  5 ++--
 .../fluid/tests/unittests/test_pad2d_op.py    |  3 +-
 .../fluid/tests/unittests/test_pad3d_op.py    |  7 +++--
 .../tests/unittests/test_pad_constant_like.py |  1 +
 .../fluid/tests/unittests/test_pad_op.py      |  7 +++--
 .../test_paddle_imperative_double_grad.py     |  8 +++--
 .../unittests/test_paddle_multiprocessing.py  |  5 ++--
 .../unittests/test_paddle_save_load_binary.py |  9 +++---
 .../tests/unittests/test_pairwise_distance.py |  6 ++--
 .../test_parallel_dygraph_dataparallel.py     | 10 +++----
 ...t_parallel_dygraph_dataparallel_cpuonly.py |  8 ++---
 .../test_parallel_dygraph_transformer_gloo.py |  3 +-
 .../unittests/test_parallel_executor_crf.py   |  9 +++---
 .../test_parallel_executor_drop_scope.py      |  6 ++--
 .../test_parallel_executor_dry_run.py         |  7 +++--
 ..._parallel_executor_feed_persistable_var.py | 10 ++++---
 .../test_parallel_executor_fetch_feed.py      | 10 ++++---
 ...st_parallel_executor_fetch_isolated_var.py |  6 ++--
 ...test_parallel_executor_fix_op_run_order.py |  8 +++--
 ...el_executor_inference_feed_partial_data.py |  6 ++--
 .../unittests/test_parallel_executor_mnist.py |  8 ++---
 .../unittests/test_parallel_executor_pg.py    | 10 ++++---
 .../test_parallel_executor_profiler.py        |  3 +-
 .../test_parallel_executor_run_cinn.py        |  6 ++--
 ...arallel_executor_run_load_infer_program.py |  3 +-
 ...st_parallel_executor_seresnext_base_cpu.py |  5 ++--
 ...st_parallel_executor_seresnext_base_gpu.py |  5 ++--
 ...utor_seresnext_with_fuse_all_reduce_cpu.py |  5 ++--
 ...utor_seresnext_with_fuse_all_reduce_gpu.py |  5 ++--
 ...llel_executor_seresnext_with_reduce_cpu.py |  4 ++-
 ...llel_executor_seresnext_with_reduce_gpu.py |  3 +-
 ...test_parallel_executor_test_while_train.py | 14 +++++----
 .../test_parallel_executor_transformer.py     | 14 +++++----
 ...l_ssa_graph_inference_feed_partial_data.py |  3 +-
 .../fluid/tests/unittests/test_parameter.py   | 16 +++++-----
 .../tests/unittests/test_partial_concat_op.py |  3 +-
 ...test_partial_eager_deletion_transformer.py |  3 +-
 .../tests/unittests/test_partial_sum_op.py    |  3 +-
 .../tests/unittests/test_pass_builder.py      | 14 +++++----
 .../tests/unittests/test_pixel_shuffle.py     |  7 +++--
 .../tests/unittests/test_pixel_unshuffle.py   |  7 +++--
 .../fluid/tests/unittests/test_poisson_op.py  |  6 ++--
 .../unittests/test_polygon_box_transform.py   |  4 ++-
 .../fluid/tests/unittests/test_pool1d_api.py  |  6 ++--
 .../fluid/tests/unittests/test_pool2d_api.py  | 12 ++++----
 .../fluid/tests/unittests/test_pool2d_op.py   |  5 ++--
 .../fluid/tests/unittests/test_pool3d_api.py  | 13 ++++----
 .../fluid/tests/unittests/test_pool_max_op.py |  1 +
 .../test_positive_negative_pair_op.py         |  3 +-
 .../paddle/fluid/tests/unittests/test_pow.py  |  2 +-
 .../test_pow2_decay_with_linear_warmup_op.py  |  6 ++--
 .../unittests/test_precision_recall_op.py     |  1 +
 .../fluid/tests/unittests/test_prelu_op.py    |  8 +++--
 .../fluid/tests/unittests/test_print_op.py    |  2 +-
 .../tests/unittests/test_prior_box_op.py      |  4 ++-
 .../fluid/tests/unittests/test_prod_op.py     |  4 ++-
 .../fluid/tests/unittests/test_profiler.py    | 11 +++----
 .../fluid/tests/unittests/test_program.py     |  4 +--
 .../unittests/test_program_prune_backward.py  | 15 +++++-----
 .../tests/unittests/test_program_to_string.py |  3 +-
 .../fluid/tests/unittests/test_protobuf.py    |  3 +-
 .../tests/unittests/test_protobuf_descs.py    |  1 +
 .../unittests/test_proximal_adagrad_op.py     |  1 +
 .../tests/unittests/test_proximal_gd_op.py    |  1 +
 .../fluid/tests/unittests/test_prune.py       |  7 +++--
 .../test_prune_gate_by_capacity_op.py         |  4 ++-
 .../tests/unittests/test_ps_dispatcher.py     |  3 +-
 .../tests/unittests/test_psroi_pool_op.py     |  6 ++--
 .../unittests/test_pull_gpups_sparse_op.py    |  2 ++
 .../tests/unittests/test_put_along_axis_op.py |  4 ++-
 .../fluid/tests/unittests/test_py_func_op.py  |  8 +++--
 .../unittests/test_py_reader_combination.py   |  6 ++--
 .../unittests/test_py_reader_error_msg.py     |  4 ++-
 .../test_py_reader_lod_level_share.py         |  3 +-
 .../unittests/test_py_reader_pin_memory.py    |  4 ++-
 .../unittests/test_py_reader_push_pop.py      |  6 ++--
 .../unittests/test_py_reader_return_list.py   |  6 ++--
 .../test_py_reader_sample_generator.py        |  8 +++--
 .../test_py_reader_using_executor.py          | 14 +++++----
 .../tests/unittests/test_pybind_interface.py  |  1 +
 .../fluid/tests/unittests/test_pylayer_op.py  |  3 +-
 .../tests/unittests/test_pyramid_hash_op.py   |  2 ++
 .../test_python_bf16_numpy_datatype.py        |  3 +-
 .../fluid/tests/unittests/test_qr_op.py       |  6 ++--
 .../test_quantile_and_nanquantile.py          |  2 ++
 .../fluid/tests/unittests/test_query_op.py    |  1 +
 .../fluid/tests/unittests/test_queue.py       |  5 ++--
 .../fluid/tests/unittests/test_rad2deg.py     |  2 ++
 .../fluid/tests/unittests/test_rand_op.py     |  5 ++--
 .../tests/unittests/test_randint_like.py      |  4 ++-
 .../fluid/tests/unittests/test_randint_op.py  |  8 +++--
 .../fluid/tests/unittests/test_randn_op.py    |  4 ++-
 .../tests/unittests/test_random_crop_op.py    |  1 +
 .../tests/unittests/test_random_routing_op.py |  4 ++-
 .../fluid/tests/unittests/test_random_seed.py |  5 ++--
 .../fluid/tests/unittests/test_randperm_op.py |  4 ++-
 .../fluid/tests/unittests/test_range.py       |  6 ++--
 .../tests/unittests/test_rank_attention_op.py |  5 ++--
 .../tests/unittests/test_rank_loss_op.py      |  1 +
 .../unittests/test_raw_program_optimizer.py   |  7 +++--
 .../tests/unittests/test_reader_reset.py      |  8 +++--
 .../tests/unittests/test_real_imag_op.py      |  3 +-
 .../tests/unittests/test_recurrent_op.py      | 11 +++----
 .../fluid/tests/unittests/test_reduce_op.py   |  6 ++--
 .../tests/unittests/test_reducescatter.py     |  3 +-
 .../tests/unittests/test_reducescatter_api.py |  5 ++--
 .../fluid/tests/unittests/test_registry.py    |  5 ++--
 .../fluid/tests/unittests/test_regularizer.py |  8 +++--
 .../tests/unittests/test_regularizer_api.py   |  8 +++--
 .../fluid/tests/unittests/test_renorm_op.py   |  4 ++-
 .../unittests/test_reorder_lod_tensor.py      |  8 +++--
 .../unittests/test_repeat_interleave_op.py    |  4 ++-
 .../tests/unittests/test_require_version.py   |  5 ++--
 .../test_reset_grad_inplace_version.py        |  4 ++-
 .../fluid/tests/unittests/test_reshape_op.py  |  3 +-
 .../unittests/test_resnet50_with_cinn.py      |  4 ++-
 .../tests/unittests/test_retain_graph.py      |  4 ++-
 .../test_retinanet_detection_output.py        |  6 ++--
 .../fluid/tests/unittests/test_reverse_op.py  | 13 ++++----
 .../fluid/tests/unittests/test_rmsprop_op.py  |  5 ++--
 .../tests/unittests/test_rnn_cell_api.py      | 19 +++++-------
 .../tests/unittests/test_rnn_decode_api.py    | 13 ++++----
 .../unittests/test_rnn_memory_helper_op.py    |  5 ++--
 .../fluid/tests/unittests/test_rnn_op.py      | 11 +++----
 .../tests/unittests/test_roi_align_op.py      |  6 ++--
 .../test_roi_perspective_transform_op.py      |  5 ++--
 .../fluid/tests/unittests/test_roll_op.py     |  4 ++-
 .../fluid/tests/unittests/test_rot90_op.py    |  2 ++
 .../fluid/tests/unittests/test_row_conv_op.py |  2 ++
 .../unittests/test_rpn_target_assign_op.py    | 13 +++++---
 .../fluid/tests/unittests/test_rrelu_op.py    |  6 ++--
 .../paddle/fluid/tests/unittests/test_run.py  |  9 +++---
 ...est_run_fluid_by_module_or_command_line.py |  2 +-
 .../tests/unittests/test_run_program_op.py    | 11 +++----
 .../test_runtime_and_compiletime_exception.py |  2 ++
 .../tests/unittests/test_sample_logits_op.py  |  3 +-
 .../tests/unittests/test_sampling_id_op.py    |  3 +-
 ...est_save_inference_model_conditional_op.py |  2 +-
 .../unittests/test_save_model_without_var.py  |  3 +-
 .../unittests/test_saved_tensors_hooks.py     |  1 +
 .../fluid/tests/unittests/test_scale_op.py    |  8 +++--
 .../test_scaled_dot_product_attention.py      |  2 ++
 .../tests/unittests/test_scatter_nd_op.py     |  4 ++-
 .../fluid/tests/unittests/test_scatter_op.py  |  6 ++--
 .../fluid/tests/unittests/test_scope.py       |  3 +-
 .../tests/unittests/test_searchsorted_op.py   |  2 ++
 .../fluid/tests/unittests/test_seed_op.py     |  2 ++
 .../fluid/tests/unittests/test_segment_ops.py |  4 +--
 .../unittests/test_select_input_output_op.py  |  2 ++
 .../tests/unittests/test_selected_rows.py     |  4 ++-
 .../fluid/tests/unittests/test_selu_op.py     |  4 ++-
 .../tests/unittests/test_set_bool_attr.py     |  3 +-
 .../tests/unittests/test_set_value_op.py      |  5 ++--
 .../fluid/tests/unittests/test_sgd_op.py      |  8 +++--
 .../fluid/tests/unittests/test_sgd_op_bf16.py | 12 ++++----
 .../paddle/fluid/tests/unittests/test_sgn.py  |  2 ++
 .../fluid/tests/unittests/test_shape_op.py    |  2 ++
 .../tests/unittests/test_shard_index_op.py    |  1 +
 .../tests/unittests/test_share_data_op.py     |  2 ++
 .../tests/unittests/test_shrink_rnn_memory.py | 13 ++++----
 .../tests/unittests/test_shuffle_batch_op.py  |  6 ++--
 .../unittests/test_shuffle_channel_op.py      |  1 +
 .../unittests/test_sigmoid_focal_loss.py      |  6 ++--
 .../unittests/test_sigmoid_focal_loss_op.py   |  9 +++---
 .../fluid/tests/unittests/test_sign_op.py     |  8 +++--
 .../fluid/tests/unittests/test_signal.py      |  3 +-
 .../tests/unittests/test_simple_rnn_op.py     |  9 +++---
 .../fluid/tests/unittests/test_slice_op.py    | 13 ++++----
 .../fluid/tests/unittests/test_slice_var.py   |  5 ++--
 .../tests/unittests/test_smooth_l1_loss.py    |  6 ++--
 .../tests/unittests/test_smooth_l1_loss_op.py |  2 ++
 .../tests/unittests/test_soft_margin_loss.py  |  6 ++--
 .../fluid/tests/unittests/test_softmax2d.py   |  4 ++-
 .../unittests/test_softmax_mask_fuse_op.py    |  4 ++-
 ...est_softmax_mask_fuse_upper_triangle_op.py |  4 ++-
 .../fluid/tests/unittests/test_softmax_op.py  |  4 ++-
 .../test_softmax_with_cross_entropy_op.py     |  7 +++--
 .../fluid/tests/unittests/test_solve_op.py    |  5 +++-
 .../fluid/tests/unittests/test_sort_op.py     |  4 ++-
 .../tests/unittests/test_space_to_depth_op.py |  4 ++-
 .../tests/unittests/test_sparse_addmm_op.py   |  8 +++--
 .../unittests/test_sparse_attention_op.py     | 10 ++++---
 .../tests/unittests/test_sparse_conv_op.py    |  4 ++-
 .../tests/unittests/test_sparse_copy_op.py    |  2 ++
 .../unittests/test_sparse_elementwise_op.py   |  3 +-
 .../test_sparse_fused_attention_op.py         |  6 ++--
 .../tests/unittests/test_sparse_matmul_op.py  | 10 ++++---
 .../tests/unittests/test_sparse_model.py      |  4 ++-
 .../unittests/test_sparse_momentum_op.py      |  1 +
 .../tests/unittests/test_sparse_mv_op.py      | 11 +++----
 .../tests/unittests/test_sparse_norm_op.py    |  8 +++--
 .../tests/unittests/test_sparse_pooling_op.py |  4 ++-
 .../tests/unittests/test_sparse_reshape_op.py |  6 ++--
 .../tests/unittests/test_sparse_softmax_op.py |  7 +++--
 .../unittests/test_sparse_transpose_op.py     |  6 ++--
 .../tests/unittests/test_sparse_unary_op.py   |  2 ++
 .../tests/unittests/test_sparse_utils_op.py   |  2 ++
 .../test_spawn_and_init_parallel_env.py       |  5 ++--
 .../tests/unittests/test_spectral_norm_op.py  |  5 ++--
 .../test_split_and_merge_lod_tensor_op.py     | 13 ++++----
 .../fluid/tests/unittests/test_split_op.py    |  4 ++-
 .../tests/unittests/test_split_program.py     | 10 ++++---
 .../fluid/tests/unittests/test_splits_api.py  |  2 ++
 .../fluid/tests/unittests/test_spp_op.py      |  4 +--
 .../tests/unittests/test_square_error_cost.py |  4 ++-
 .../unittests/test_squared_l2_distance_op.py  |  1 +
 .../unittests/test_squared_l2_norm_op.py      |  4 ++-
 .../fluid/tests/unittests/test_squeeze2_op.py |  8 ++---
 .../fluid/tests/unittests/test_squeeze_op.py  |  8 ++---
 .../fluid/tests/unittests/test_stack_op.py    |  6 ++--
 .../unittests/test_state_dict_convert.py      |  6 ++--
 ...t_static_model_parallel_fused_attention.py |  3 +-
 ...static_model_parallel_fused_feedforward.py |  3 +-
 ..._model_parallel_fused_multi_transformer.py |  3 +-
 .../unittests/test_static_save_load_bf16.py   | 10 ++++---
 .../unittests/test_static_save_load_large.py  | 11 +++----
 ...tatic_shape_inferrence_for_shape_tensor.py |  3 +-
 .../fluid/tests/unittests/test_std_layer.py   |  2 ++
 .../fluid/tests/unittests/test_stft_op.py     |  7 +++--
 .../tests/unittests/test_strided_slice_op.py  |  8 +++--
 .../fluid/tests/unittests/test_subtract_op.py |  2 ++
 .../fluid/tests/unittests/test_sum_op.py      | 16 +++++-----
 .../fluid/tests/unittests/test_svd_op.py      |  4 ++-
 .../fluid/tests/unittests/test_switch.py      |  2 +-
 .../tests/unittests/test_switch_autotune.py   | 12 ++++----
 .../fluid/tests/unittests/test_switch_case.py |  5 ++--
 .../unittests/test_sync_batch_norm_op.py      | 14 ++++-----
 .../paddle/fluid/tests/unittests/test_take.py |  2 ++
 .../unittests/test_take_along_axis_op.py      |  2 ++
 .../tests/unittests/test_target_assign_op.py  |  3 +-
 .../tests/unittests/test_tdm_child_op.py      |  2 ++
 .../tests/unittests/test_tdm_sampler_op.py    |  4 ++-
 .../test_teacher_student_sigmoid_loss_op.py   |  4 +--
 .../tests/unittests/test_temporal_shift_op.py |  1 +
 .../fluid/tests/unittests/test_tensor.py      |  8 +++--
 .../unittests/test_tensor_array_to_tensor.py  |  2 ++
 .../tests/unittests/test_tensor_copy_from.py  |  4 ++-
 .../tests/unittests/test_tensor_data_ptr.py   |  1 +
 .../tests/unittests/test_tensor_fill_.py      |  4 ++-
 .../unittests/test_tensor_fill_diagonal_.py   |  4 ++-
 .../test_tensor_fill_diagonal_tensor.py       |  6 ++--
 .../test_tensor_fill_diagonal_tensor_.py      |  6 ++--
 .../unittests/test_tensor_register_hook.py    |  5 ++--
 ...st_tensor_scalar_type_promotion_dynamic.py |  1 +
 ...est_tensor_scalar_type_promotion_static.py |  4 +--
 .../tests/unittests/test_tensor_to_list.py    |  4 ++-
 .../tests/unittests/test_tensor_to_numpy.py   |  4 ++-
 .../unittests/test_tensor_type_promotion.py   |  1 +
 .../fluid/tests/unittests/test_tensor_uva.py  |  6 ++--
 .../tests/unittests/test_tensor_zero_.py      |  4 ++-
 .../fluid/tests/unittests/test_tensordot.py   |  3 +-
 .../fluid/tests/unittests/test_tf32_cublas.py |  2 ++
 .../fluid/tests/unittests/test_tf32_cudnn.py  |  1 +
 .../fluid/tests/unittests/test_tile_op.py     |  8 +++--
 .../fluid/tests/unittests/test_top_k_op.py    |  2 ++
 .../fluid/tests/unittests/test_top_k_v2_op.py |  2 ++
 .../fluid/tests/unittests/test_trace_op.py    |  4 ++-
 .../unittests/test_traced_layer_err_msg.py    |  3 +-
 .../fluid/tests/unittests/test_trainable.py   |  6 ++--
 .../tests/unittests/test_trainer_desc.py      |  3 +-
 .../tests/unittests/test_transfer_dtype_op.py |  3 +-
 .../unittests/test_transfer_layout_op.py      |  5 ++--
 .../tests/unittests/test_transformer_api.py   | 11 +++----
 .../tests/unittests/test_translated_layer.py  |  6 ++--
 .../tests/unittests/test_transpose_op.py      | 10 ++++---
 .../tests/unittests/test_tree_conv_op.py      |  6 ++--
 .../unittests/test_triangular_solve_op.py     |  9 +++---
 .../tests/unittests/test_tril_indices_op.py   |  2 ++
 .../tests/unittests/test_tril_triu_op.py      |  2 ++
 .../unittests/test_trilinear_interp_op.py     |  4 ++-
 .../unittests/test_trilinear_interp_v2_op.py  |  6 ++--
 .../unittests/test_triplet_margin_loss.py     |  6 ++--
 .../test_triplet_margin_with_distance_loss.py |  6 ++--
 .../tests/unittests/test_triu_indices_op.py   |  2 ++
 .../fluid/tests/unittests/test_trunc_op.py    |  2 ++
 .../test_truncated_gaussian_random_op.py      |  1 +
 .../fluid/tests/unittests/test_unbind_op.py   |  2 ++
 .../fluid/tests/unittests/test_unfold_op.py   |  4 ++-
 .../unittests/test_uniform_random_bf16_op.py  |  4 ++-
 .../test_uniform_random_inplace_op.py         |  6 ++--
 .../tests/unittests/test_uniform_random_op.py | 10 +++----
 .../fluid/tests/unittests/test_unique.py      |  2 ++
 .../unittests/test_unique_consecutive_op.py   |  3 +-
 .../fluid/tests/unittests/test_unique_name.py |  1 +
 .../unittests/test_unique_with_counts.py      |  2 ++
 .../fluid/tests/unittests/test_unpool1d_op.py |  2 ++
 .../fluid/tests/unittests/test_unpool3d_op.py |  2 ++
 .../fluid/tests/unittests/test_unpool_op.py   | 30 +++++++++++--------
 .../tests/unittests/test_unsqueeze2_op.py     |  2 +-
 .../tests/unittests/test_unsqueeze_op.py      |  6 ++--
 .../fluid/tests/unittests/test_unstack_op.py  |  6 ++--
 .../unittests/test_update_loss_scaling_op.py  |  2 ++
 .../fluid/tests/unittests/test_var_base.py    |  5 ++--
 .../fluid/tests/unittests/test_var_conv_2d.py |  1 +
 .../fluid/tests/unittests/test_var_info.py    |  6 ++--
 .../fluid/tests/unittests/test_variable.py    |  8 ++---
 .../tests/unittests/test_variance_layer.py    |  2 ++
 .../fluid/tests/unittests/test_version.py     |  2 +-
 .../tests/unittests/test_viterbi_decode_op.py |  6 ++--
 .../fluid/tests/unittests/test_warpctc_op.py  |  8 +++--
 .../tests/unittests/test_weight_decay.py      |  6 ++--
 .../unittests/test_weight_normalization.py    |  4 ++-
 .../fluid/tests/unittests/test_where_index.py |  6 ++--
 .../fluid/tests/unittests/test_where_op.py    |  4 ++-
 .../tests/unittests/test_while_loop_op.py     |  5 ++--
 .../fluid/tests/unittests/test_while_op.py    | 10 ++++---
 .../fluid/tests/unittests/test_yolo_box_op.py |  2 ++
 .../tests/unittests/test_yolov3_loss_op.py    |  4 +--
 .../tests/unittests/test_zero_dim_tensor.py   |  8 +++--
 .../fluid/tests/unittests/test_zeropad2d.py   |  4 ++-
 .../tests/unittests/test_zeros_like_op.py     | 10 +++----
 .../fluid/tests/unittests/test_zeros_op.py    |  2 ++
 .../unittests/tokenizer/bert_tokenizer.py     |  8 ++---
 .../tests/unittests/transformer_model.py      |  1 +
 .../xpu/collective_allgather_op_xpu.py        |  5 ++--
 .../xpu/collective_allreduce_op_xpu.py        |  6 ++--
 .../xpu/collective_identity_op_xpu.py         |  5 ++--
 .../unittests/xpu/get_test_cover_info.py      |  3 +-
 ...allel_dygraph_dataparallel_with_pylayer.py |  3 +-
 .../xpu/parallel_dygraph_gradient_check.py    |  3 +-
 ...el_dygraph_gradient_check_in_eager_mode.py |  5 ++--
 .../tests/unittests/xpu/process_group_bkcl.py |  7 +++--
 .../unittests/xpu/test_accuracy_op_xpu.py     |  9 +++---
 .../unittests/xpu/test_activation_op_xpu.py   |  9 +++---
 .../unittests/xpu/test_adadelta_op_xpu.py     | 10 ++++---
 .../tests/unittests/xpu/test_adam_op_xpu.py   | 11 +++----
 .../tests/unittests/xpu/test_adamw_op_xpu.py  | 10 +++----
 .../test_amp_check_finite_and_scale_op_xpu.py |  6 ++--
 .../unittests/xpu/test_arg_max_op_xpu.py      |  8 +++--
 .../unittests/xpu/test_argsort_op_xpu.py      | 10 ++++---
 .../tests/unittests/xpu/test_assign_op_xpu.py |  1 +
 .../unittests/xpu/test_assign_value_op_xpu.py | 12 ++++----
 .../unittests/xpu/test_batch_norm_op_xpu.py   | 12 ++++----
 .../unittests/xpu/test_bce_loss_op_xpu.py     |  8 +++--
 .../xpu/test_bilinear_interp_op_xpu.py        |  4 +--
 .../xpu/test_bilinear_interp_v2_op_xpu.py     | 11 ++++---
 .../unittests/xpu/test_bitwise_op_xpu.py      |  8 +++--
 .../tests/unittests/xpu/test_bmm_op_xpu.py    |  6 ++--
 .../unittests/xpu/test_c_embedding_op_xpu.py  |  2 +-
 .../tests/unittests/xpu/test_cast_op_xpu.py   | 13 ++++----
 .../unittests/xpu/test_clip_by_norm_op_xpu.py |  6 ++--
 .../tests/unittests/xpu/test_clip_op_xpu.py   | 12 ++++----
 .../xpu/test_coalesce_tensor_op_xpu.py        |  9 ++++--
 .../xpu/test_collective_allgather_xpu.py      |  8 ++---
 .../xpu/test_collective_allreduce_xpu.py      |  8 ++---
 .../unittests/xpu/test_collective_base_xpu.py | 12 ++++----
 .../xpu/test_collective_identity_xpu.py       |  8 ++---
 .../xpu/test_collective_process_group.py      |  1 +
 .../unittests/xpu/test_compare_op_xpu.py      |  9 ++++--
 .../tests/unittests/xpu/test_concat_op_xpu.py |  7 +++--
 .../tests/unittests/xpu/test_conv2d_op_xpu.py |  9 +++---
 .../xpu/test_conv2d_transpose_op_xpu.py       |  5 ++--
 .../tests/unittests/xpu/test_conv3d_op_xpu.py | 10 +++----
 .../tests/unittests/xpu/test_cumsum_op_xpu.py | 11 +++----
 .../xpu/test_deformable_conv_op_xpu.py        |  9 +++---
 .../xpu/test_depthwise_conv2d_op_xpu.py       |  3 +-
 .../unittests/xpu/test_device_guard_xpu.py    |  5 ++--
 .../unittests/xpu/test_dropout_op_xpu.py      |  8 +++--
 .../xpu/test_elementwise_add_op_xpu.py        | 15 ++++++----
 .../xpu/test_elementwise_add_op_xpu_kp.py     |  9 ++++--
 .../xpu/test_elementwise_div_op_xpu.py        |  8 +++--
 .../xpu/test_elementwise_floordiv_op_xpu.py   |  6 ++--
 .../xpu/test_elementwise_max_op_xpu.py        |  6 ++--
 .../xpu/test_elementwise_min_op_xpu.py        |  6 ++--
 .../xpu/test_elementwise_mod_op_xpu.py        |  9 +++---
 .../xpu/test_elementwise_mul_op_xpu.py        | 10 ++++---
 .../xpu/test_elementwise_pow_op_xpu.py        |  6 ++--
 .../xpu/test_elementwise_sub_op_xpu.py        | 11 ++++---
 .../tests/unittests/xpu/test_empty_op_xpu.py  |  8 +++--
 .../unittests/xpu/test_expand_as_v2_op_xpu.py | 12 ++++----
 .../unittests/xpu/test_expand_v2_op_xpu.py    | 10 ++++---
 .../xpu/test_fill_any_like_op_xpu.py          |  6 ++--
 .../xpu/test_fill_constant_op_xpu.py          |  6 ++--
 .../unittests/xpu/test_flatten2_op_xpu.py     |  7 +++--
 .../test_flatten_contiguous_range_op_xpu.py   | 10 ++++---
 .../unittests/xpu/test_flatten_op_xpu.py      |  7 +++--
 .../xpu/test_fleet_exe_dist_model_run_xpu.py  |  8 +++--
 .../xpu/test_fused_attention_op_xpu.py        | 26 ++++++++--------
 .../xpu/test_fused_feedforward_op_xpu.py      | 23 +++++++-------
 .../test_fused_gemm_epilogue_grad_op_xpu.py   |  8 +++--
 .../xpu/test_fused_gemm_epilogue_op_xpu.py    | 10 ++++---
 .../test_fused_resnet_basic_block_op_xpu.py   | 14 +++++----
 .../unittests/xpu/test_gather_nd_op_xpu.py    |  8 +++--
 .../tests/unittests/xpu/test_gather_op_xpu.py |  8 ++---
 .../xpu/test_gaussian_random_op_xpu.py        |  7 +++--
 .../unittests/xpu/test_gen_bkcl_id_op.py      |  5 ++--
 .../xpu/test_generate_proposals_v2_op_xpu.py  | 12 ++++----
 .../unittests/xpu/test_grid_sampler_op_xpu.py |  9 +++---
 .../unittests/xpu/test_huber_loss_op_xpu.py   |  9 +++---
 .../unittests/xpu/test_index_select_op_xpu.py |  9 +++---
 .../xpu/test_instance_norm_op_xpu.py          |  8 +++--
 .../xpu/test_iou_similarity_op_xpu.py         |  6 ++--
 .../unittests/xpu/test_kldiv_loss_op_xpu.py   |  8 +++--
 .../unittests/xpu/test_label_smooth_op_xpu.py |  8 +++--
 .../tests/unittests/xpu/test_lamb_op_xpu.py   |  7 +++--
 .../unittests/xpu/test_layer_norm_op_xpu.py   | 11 ++++---
 .../unittests/xpu/test_log_loss_op_xpu.py     |  2 ++
 .../unittests/xpu/test_log_softmax_op_xpu.py  | 11 +++----
 .../unittests/xpu/test_logical_op_xpu.py      |  8 +++--
 .../unittests/xpu/test_logsumexp_op_xpu.py    |  5 ++--
 .../xpu/test_lookup_table_v2_op_xpu.py        |  9 +++---
 .../xpu/test_masked_select_op_xpu.py          | 10 ++++---
 .../tests/unittests/xpu/test_matmul_op_xpu.py | 11 +++----
 .../unittests/xpu/test_matmul_v2_op_xpu.py    |  8 ++---
 .../tests/unittests/xpu/test_mean_op_xpu.py   |  6 ++--
 .../xpu/test_merged_momentum_op_xpu.py        |  8 ++---
 .../xpu/test_merged_momentum_op_xpu_base.py   |  6 ++--
 .../unittests/xpu/test_momentum_op_xpu.py     | 11 +++----
 .../tests/unittests/xpu/test_mul_op_xpu.py    |  7 +++--
 .../xpu/test_nearest_interp_op_xpu.py         |  3 +-
 .../xpu/test_nearest_interp_v2_op_xpu.py      |  9 +++---
 .../unittests/xpu/test_one_hot_op_xpu.py      | 10 ++++---
 .../unittests/xpu/test_one_hot_v2_op_xpu.py   |  8 +++--
 .../tests/unittests/xpu/test_p_norm_op_xpu.py |  8 +++--
 .../tests/unittests/xpu/test_pad3d_op_xpu.py  | 13 ++++----
 .../xpu/test_parallel_dygraph_dataparallel.py | 12 ++++----
 .../tests/unittests/xpu/test_pool2d_op_xpu.py |  7 +++--
 ...st_pow2_decay_with_linear_warmup_op_xpu.py |  8 ++---
 .../unittests/xpu/test_prior_box_op_xpu.py    |  9 +++---
 .../tests/unittests/xpu/test_range_xpu.py     |  8 +++--
 .../unittests/xpu/test_reduce_all_op_xpu.py   |  8 +++--
 .../unittests/xpu/test_reduce_amax_op_xpu.py  |  8 +++--
 .../unittests/xpu/test_reduce_amin_op_xpu.py  |  8 +++--
 .../unittests/xpu/test_reduce_any_op_xpu.py   |  8 +++--
 .../unittests/xpu/test_reduce_max_op_xpu.py   |  8 +++--
 .../unittests/xpu/test_reduce_mean_op_xpu.py  |  6 ++--
 .../unittests/xpu/test_reduce_min_op_xpu.py   |  8 +++--
 .../unittests/xpu/test_reduce_prod_op_xpu.py  |  9 +++---
 .../unittests/xpu/test_reduce_sum_op_xpu.py   |  8 +++--
 .../unittests/xpu/test_refactor_op_xpu.py     | 11 +++----
 .../unittests/xpu/test_reshape2_op_xpu.py     |  9 +++---
 .../unittests/xpu/test_rmsprop_op_xpu.py      | 11 +++----
 .../tests/unittests/xpu/test_rnn_op_xpu.py    | 13 ++++----
 .../unittests/xpu/test_roi_align_op_xpu.py    | 10 ++++---
 .../tests/unittests/xpu/test_roll_op_xpu.py   |  8 +++--
 .../unittests/xpu/test_sampling_id_op_xpu.py  |  5 ++--
 .../tests/unittests/xpu/test_scale_op_xpu.py  | 11 +++----
 .../unittests/xpu/test_scatter_op_xpu.py      | 10 ++++---
 .../xpu/test_sequence_conv_op_xpu.py          | 13 +++++---
 .../xpu/test_sequence_unpad_op_xpu.py         | 11 ++++---
 .../tests/unittests/xpu/test_sgd_op_xpu.py    | 15 +++++-----
 .../tests/unittests/xpu/test_shape_op_xpu.py  |  6 ++--
 ...igmoid_cross_entropy_with_logits_op_xpu.py | 11 ++++---
 .../tests/unittests/xpu/test_sign_op_xpu.py   |  9 +++---
 .../tests/unittests/xpu/test_slice_op_xpu.py  |  8 +++--
 .../unittests/xpu/test_softmax_op_xpu.py      |  8 +++--
 .../test_softmax_with_cross_entropy_op_xpu.py | 11 +++----
 .../tests/unittests/xpu/test_split_op_xpu.py  |  7 +++--
 .../unittests/xpu/test_squeeze2_op_xpu.py     |  6 ++--
 .../unittests/xpu/test_squeeze_op_xpu.py      | 12 ++++----
 .../tests/unittests/xpu/test_stack_op_xpu.py  |  7 +++--
 .../xpu/test_strided_slice_op_xpu.py          |  8 +++--
 .../tests/unittests/xpu/test_sum_op_xpu.py    | 11 +++----
 .../xpu/test_temporal_shift_op_xpu.py         | 11 +++----
 .../tests/unittests/xpu/test_tile_op_xpu.py   | 10 ++++---
 .../tests/unittests/xpu/test_top_k_op_xpu.py  |  8 +++--
 .../unittests/xpu/test_top_k_v2_op_xpu.py     |  8 +++--
 .../unittests/xpu/test_transpose_op_xpu.py    |  4 ++-
 .../unittests/xpu/test_tril_triu_op_xpu.py    | 10 ++++---
 .../test_truncated_gaussian_random_op_xpu.py  | 10 ++++---
 .../xpu/test_uniform_random_op_xpu.py         |  4 ++-
 .../unittests/xpu/test_unsqueeze2_op_xpu.py   |  8 ++---
 .../unittests/xpu/test_unsqueeze_op_xpu.py    |  8 ++---
 .../xpu/test_update_loss_scaling_op_xpu.py    | 13 ++++----
 .../unittests/xpu/test_warpctc_op_xpu.py      | 14 +++++----
 .../unittests/xpu/test_where_index_xpu.py     | 15 +++++-----
 .../tests/unittests/xpu/test_where_op_xpu.py  | 15 +++++-----
 .../tests/unittests/xpu/test_while_op_xpu.py  |  8 +++--
 .../tests/unittests/xpu/test_xpu_place.py     |  5 ++--
 .../unittests/xpu/test_xpu_stream_event.py    |  6 ++--
 .../unittests/xpu/test_zero_dim_tensor_xpu.py |  6 ++--
 668 files changed, 2496 insertions(+), 1609 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
index a2c1a73864bbb5..b41e7614ce83b7 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from transformer_dygraph_model import MultiHeadAttention, PrePostProcessLayer
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.dygraph import Embedding, Layer, Linear
 from paddle.jit.api import declarative
 
-from transformer_dygraph_model import MultiHeadAttention, PrePostProcessLayer
-
 
 class PositionwiseFeedForwardLayer(Layer):
     def __init__(
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py
index b532a368214452..db05875d2314f6 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py
@@ -14,11 +14,10 @@
 
 import paddle
 import paddle.fluid as fluid
+from paddle.fluid.dygraph.nn import BatchNorm
 from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.regularizer import L2Decay
 
-from paddle.fluid.dygraph.nn import BatchNorm
-
 
 class ConvBNLayer(fluid.dygraph.Layer):
     def __init__(
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/decos.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/decos.py
index 73bbfffcabf8c7..dcea6e82fe339b 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/decos.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/decos.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-
 from functools import wraps
 
+import paddle
+
 
 def deco1(fun):
     @wraps(fun)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/predictor_utils.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/predictor_utils.py
index 2e49f3778ac27c..ef3575e04aefb6 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/predictor_utils.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/predictor_utils.py
@@ -15,10 +15,9 @@
 import os
 
 import numpy as np
-import paddle.fluid as fluid
 
-from paddle.fluid.core import AnalysisConfig
-from paddle.fluid.core import create_paddle_predictor
+import paddle.fluid as fluid
+from paddle.fluid.core import AnalysisConfig, create_paddle_predictor
 
 
 class PredictorTools:
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
index c34c780c549ffa..bfc23a71fe571f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
@@ -14,15 +14,15 @@
 # limitations under the License.
 
 import numpy as np
+from seq2seq_utils import Seq2SeqModelHyperParams as args
+
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import ParamAttr
-from paddle.fluid import layers
+from paddle.fluid import ParamAttr, layers
 from paddle.fluid.dygraph import Layer
 from paddle.fluid.dygraph.base import to_variable
-from paddle.jit.api import declarative
 from paddle.fluid.dygraph.nn import Embedding
-from seq2seq_utils import Seq2SeqModelHyperParams as args
+from paddle.jit.api import declarative
 
 INF = 1.0 * 1e5
 alpha = 0.6
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
index 63d1229da705a1..d6589a53a0bdd3 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from functools import reduce
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.param_attr as attr
-
-from functools import reduce
-from paddle.jit.api import declarative
 from paddle.fluid.dygraph import Embedding, Layer, Linear
+from paddle.jit.api import declarative
 from paddle.static import Variable
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py
index 70fb0726c09c04..03cd5e699e3369 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from functools import reduce
+
 import paddle
 from paddle.static import Variable
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_assert.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_assert.py
index c9399236b72b78..e6f821cdb45a98 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_assert.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_assert.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy
 import unittest
 
+import numpy
+
 import paddle
 import paddle.fluid as fluid
 from paddle.jit import ProgramTranslator
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ast_util.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ast_util.py
index 8315caf3e32c56..b81ed5f0b4360a 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ast_util.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ast_util.py
@@ -12,21 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import textwrap
-from paddle.utils import gast
 import inspect
-import numpy as np
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_func
+import textwrap
+import unittest
 
+import numpy as np
 from ifelse_simple_func import (
     dyfunc_with_if_else,
     dyfunc_with_if_else2,
     nested_if_else,
 )
 
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_func
+from paddle.utils import gast
+
 
 class TestAST2Func(unittest.TestCase):
     """
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py
index 2d0dd5ff5666a8..69765c1b80f225 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py
@@ -12,19 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import unittest
 import inspect
-from paddle.utils import gast
+import unittest
+
+import numpy as np
 
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.dygraph as dygraph
-
 from paddle import to_tensor
 from paddle.fluid.dygraph import to_variable
-from paddle.jit.api import dygraph_to_static_func
 from paddle.fluid.dygraph.dygraph_to_static.utils import is_dygraph_api
+from paddle.jit.api import dygraph_to_static_func
+from paddle.utils import gast
 
 SEED = 2020
 np.random.seed(SEED)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py
index 3ce9b78e083ab5..65b571e79cf76c 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py
@@ -13,20 +13,19 @@
 # limitations under the License.
 
 import os
-import time
 import tempfile
+import time
 import unittest
+
 import numpy as np
+from bert_dygraph_model import PretrainModelLayer
+from bert_utils import get_bert_config, get_feed_data_reader
+from predictor_utils import PredictorTools
 
 import paddle
 import paddle.fluid as fluid
-from paddle.jit import ProgramTranslator
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
-
-from bert_dygraph_model import PretrainModelLayer
-from bert_utils import get_bert_config, get_feed_data_reader
-
-from predictor_utils import PredictorTools
+from paddle.jit import ProgramTranslator
 
 program_translator = ProgramTranslator()
 place = (
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
index 5949903fc9936e..eaf37e7ea7a753 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
@@ -12,20 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import math
-import numpy as np
+import os
+import tempfile
 import unittest
+
+import numpy as np
+from predictor_utils import PredictorTools
+
 import paddle
-import tempfile
-from paddle.jit import to_static
 import paddle.fluid as fluid
 from paddle.fluid import ParamAttr
 from paddle.fluid.dygraph import to_variable
-from paddle.jit import ProgramTranslator
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
-
-from predictor_utils import PredictorTools
+from paddle.jit import ProgramTranslator, to_static
 
 SEED = 2000
 DATATYPE = 'float32'
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_break_continue.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_break_continue.py
index 5c826def66355f..b3d1b5b2cb4f63 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_break_continue.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_break_continue.py
@@ -13,14 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-from paddle.jit.api import declarative
-from paddle.jit.dy2static.program_translator import (
-    ProgramTranslator,
-)
 from paddle.fluid.dygraph.dygraph_to_static.utils import Dygraph2StaticException
+from paddle.jit.api import declarative
+from paddle.jit.dy2static.program_translator import ProgramTranslator
 
 SEED = 2020
 np.random.seed(SEED)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_build_strategy.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_build_strategy.py
index 439ca7720c16c9..96ae39c0cc0d19 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_build_strategy.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_build_strategy.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
-import numpy as np
-from paddle.jit import ProgramTranslator
 
+import numpy as np
 from test_resnet import ResNetHelper
 
+import paddle
+from paddle.jit import ProgramTranslator
+
 program_translator = ProgramTranslator()
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cache_program.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cache_program.py
index 45383c739232ee..968dbcb4c15273 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cache_program.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cache_program.py
@@ -13,17 +13,17 @@
 # limitations under the License.
 
 import unittest
-import numpy as np
 from collections import Counter
+
+import numpy as np
+from test_fetch_feed import Linear, Pool2D
+
 import paddle
 import paddle.fluid as fluid
-
-from paddle.jit.api import declarative
 from paddle.jit import ProgramTranslator
+from paddle.jit.api import declarative
 from paddle.jit.dy2static import convert_to_static
 
-from test_fetch_feed import Pool2D, Linear
-
 
 class TestCacheProgram(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cast.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cast.py
index 43df75ae152156..cb7685444913d4 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cast.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cast.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle.fluid as fluid
 from paddle.jit.api import declarative
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_closure_analysis.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_closure_analysis.py
index 0210e260f8238a..b0156699161e7f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_closure_analysis.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_closure_analysis.py
@@ -12,15 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import inspect
 import unittest
 
+from numpy import append
+
 import paddle
 from paddle.fluid.dygraph.dygraph_to_static.utils import (
     FunctionNameLivenessAnalysis,
 )
 from paddle.utils import gast
-import inspect
-from numpy import append
 
 global_a = []
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_container.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_container.py
index 526e397bb861c3..f18b4093ebfeca 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_container.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_container.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import os
-import paddle
+import tempfile
 import unittest
+
 import numpy as np
-import tempfile
+
+import paddle
 
 
 class BufferLayers(paddle.nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_operators.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_operators.py
index 4b2e815f88a520..812abb18ff3560 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_operators.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_operators.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+
 import paddle
-import unittest
 
 
 class CallNotExist(paddle.nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cpu_cuda_to_tensor.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cpu_cuda_to_tensor.py
index 60f7f70ad4bd7e..24f0bd84556a2d 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cpu_cuda_to_tensor.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cpu_cuda_to_tensor.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
 
+import paddle
+
 
 class TestCpuCuda(unittest.TestCase):
     def test_cpu_cuda(self):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py
index 92009c09bfbac2..17972d7798c534 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py
@@ -23,23 +23,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-import time
+import os
 import random
+import time
 import unittest
+
 import numpy as np
 from PIL import Image, ImageOps
 
-import os
-
 # Use GPU:0 to elimate the influence of other tasks.
 os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.dygraph import to_variable
-from paddle.jit.api import declarative
+from paddle.fluid.dygraph.nn import BatchNorm, Conv2DTranspose
 from paddle.jit import ProgramTranslator
-from paddle.fluid.dygraph.nn import Conv2DTranspose, BatchNorm
+from paddle.jit.api import declarative
 
 # Note: Set True to eliminate randomness.
 #     1. For one operation, cuDNN has several algorithms,
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py
index 02c9ab3695803b..b9ae4c5759d761 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py
@@ -12,25 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import unittest
 import os
 import tempfile
+import unittest
+
+import numpy as np
+from test_basic_api_transformation import dyfunc_to_variable
+
 import paddle
 import paddle.fluid as fluid
-from paddle.static import InputSpec
-from paddle.fluid.dygraph import (
-    to_variable,
-    Layer,
-)
-from paddle.jit.api import declarative
+from paddle.fluid.dygraph import Layer, to_variable
 from paddle.jit import ProgramTranslator
+from paddle.jit.api import declarative
 from paddle.jit.dy2static.program_translator import (
     ConcreteProgram,
     StaticFunction,
 )
-
-from test_basic_api_transformation import dyfunc_to_variable
+from paddle.static import InputSpec
 
 program_trans = ProgramTranslator()
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_decorator_transform.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_decorator_transform.py
index 13fd569c920b48..946718da74c32f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_decorator_transform.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_decorator_transform.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
-import numpy as np
-import decos
 import warnings
-from functools import wraps
 from contextlib import contextmanager
+from functools import wraps
+
+import decos
+import numpy as np
+
+import paddle
 
 
 def deco1(func):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_deepcopy.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_deepcopy.py
index 7a8593066654fa..798dd434b2cd93 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_deepcopy.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_deepcopy.py
@@ -13,14 +13,13 @@
 # limitations under the License.
 
 import unittest
-import paddle
-import numpy as np
-from paddle.jit.dy2static.program_translator import (
-    StaticFunction,
-)
+from copy import deepcopy
 
+import numpy as np
 from test_rollback import Net, foo
-from copy import deepcopy
+
+import paddle
+from paddle.jit.dy2static.program_translator import StaticFunction
 
 
 class TestDeepCopy(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py
index 40f0c5085a0456..2f049581ecec9b 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py
@@ -12,15 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
 
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 from paddle.jit import to_static
-from paddle.jit.dy2static.program_translator import (
-    ProgramTranslator,
-)
+from paddle.jit.dy2static.program_translator import ProgramTranslator
 
 PLACE = (
     fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_drop_path.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_drop_path.py
index f42fb02fcbc8c7..30242eb1cd7c88 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_drop_path.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_drop_path.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_duplicate_output.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_duplicate_output.py
index 625051d9364e5c..cbd4fd85de39cf 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_duplicate_output.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_duplicate_output.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
 
+import numpy as np
+
 import paddle
 
 np.random.seed(1)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_fetch_feed.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_fetch_feed.py
index d58d24ea073cb5..b09ce1eab4e439 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_fetch_feed.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_fetch_feed.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-from paddle.jit.api import declarative
 from paddle.jit import ProgramTranslator
+from paddle.jit.api import declarative
 
 SEED = 2020
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_for_enumerate.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_for_enumerate.py
index 1003d1f025a957..ed2480ab85232d 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_for_enumerate.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_for_enumerate.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import unittest
 import os
 import tempfile
+import unittest
+
+import numpy as np
 
 import paddle
 import paddle.fluid as fluid
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_full_name_usage.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_full_name_usage.py
index 01b2a38c36ad48..0777279942ee0e 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_full_name_usage.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_full_name_usage.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import unittest
 from paddle.jit import to_static
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_function_spec.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_function_spec.py
index 2f0672f7185d1f..b7a26169f3a2d8 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_function_spec.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_function_spec.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-from paddle.static import InputSpec
-from paddle.fluid.dygraph.dygraph_to_static.function_spec import FunctionSpec
+import unittest
 
 from test_declarative import foo_func
 
-import unittest
+import paddle
+from paddle.fluid.dygraph.dygraph_to_static.function_spec import FunctionSpec
+from paddle.static import InputSpec
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_grad.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_grad.py
index c56a710d443de2..e359514e4c8d69 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_grad.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_grad.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import paddle
-import unittest
 import os
 import tempfile
+import unittest
+
+import numpy as np
+
+import paddle
 
 
 class GradLayer(paddle.nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_gradient_aggregation.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_gradient_aggregation.py
index aced4b35f1d5f9..5de1eee84c7dc5 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_gradient_aggregation.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_gradient_aggregation.py
@@ -14,9 +14,10 @@
 
 import unittest
 
-import paddle
 import numpy as np
 
+import paddle
+
 SEED = 2020
 np.random.seed(SEED)
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_grid_generator.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_grid_generator.py
index d7b9d3869728de..f46ae0eb6bfa48 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_grid_generator.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_grid_generator.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-from paddle import nn, ParamAttr
+import unittest
 
 import numpy as np
-import unittest
+
+import paddle
+from paddle import ParamAttr, nn
 
 np.random.seed(2020)
 paddle.seed(2020)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py
index 78273da028a2a0..89b9e871564f9f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py
@@ -12,17 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
 
-import paddle
-from paddle.jit.api import declarative
-from paddle.jit.dy2static.program_translator import (
-    ProgramTranslator,
-)
-from paddle.fluid.dygraph.dygraph_to_static.utils import Dygraph2StaticException
-import paddle.fluid.core as core
-
+import numpy as np
 from ifelse_simple_func import (
     NetWithControlFlowIf,
     add_fn,
@@ -49,6 +41,12 @@
     nested_if_else_3,
 )
 
+import paddle
+import paddle.fluid.core as core
+from paddle.fluid.dygraph.dygraph_to_static.utils import Dygraph2StaticException
+from paddle.jit.api import declarative
+from paddle.jit.dy2static.program_translator import ProgramTranslator
+
 np.random.seed(1)
 
 if fluid.is_compiled_with_cuda():
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_isinstance.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_isinstance.py
index 3c4c330e0b1c11..490dda5bc40493 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_isinstance.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_isinstance.py
@@ -23,9 +23,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-import numpy as np
 import unittest
 
+import numpy as np
+
 import paddle
 import paddle.nn as nn
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_jit_property_save.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_jit_property_save.py
index 0d26ab51e1a51a..911eea6f7602a1 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_jit_property_save.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_jit_property_save.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py
index 3f8c9ad251fa31..1d4b5850e4fd33 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py
@@ -13,24 +13,23 @@
 # limitations under the License.
 
 import math
+import os
+import tempfile
 import time
-import numpy as np
 import unittest
 
-import os
-import tempfile
+import numpy as np
 
 os.environ["CUDA_VISIBLE_DEVICES"] = "2"
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph import to_variable
-from paddle.fluid.dygraph import Embedding, Linear, GRUUnit
-from paddle.jit.api import declarative
-from paddle.jit import ProgramTranslator
+from paddle import _legacy_C_ops
+from paddle.fluid.dygraph import Embedding, GRUUnit, Linear, to_variable
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
 from paddle.fluid.framework import _non_static_mode
-from paddle import _legacy_C_ops
+from paddle.jit import ProgramTranslator
+from paddle.jit.api import declarative
 
 SEED = 2020
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lambda.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lambda.py
index 078322d15bd318..ce6ca928430df1 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lambda.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lambda.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-
 from paddle.jit.api import declarative
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_layer_hook.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_layer_hook.py
index f0df076a4bb593..ad9a1c0f4c150b 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_layer_hook.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_layer_hook.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import paddle
 import os
-import numpy as np
 import tempfile
+import unittest
+
+import numpy as np
+
+import paddle
 
 
 def forward_post_hook1(layer, input, output):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_list.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_list.py
index f5aa8ffffbadb1..54762c793ff435 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_list.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_list.py
@@ -15,11 +15,12 @@
 
 import unittest
 
-import paddle
 import numpy as np
+
+import paddle
 import paddle.fluid as fluid
-from paddle.jit.api import declarative
 from paddle.fluid.layers.utils import map_structure
+from paddle.jit.api import declarative
 
 SEED = 2020
 np.random.seed(SEED)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logging_utils.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logging_utils.py
index cdc63ca353ee28..93e5fae00d410f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logging_utils.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logging_utils.py
@@ -17,12 +17,11 @@
 import os
 import sys
 import unittest
-
-from paddle.utils import gast
+from unittest import mock
 
 import paddle
 from paddle.fluid.dygraph.dygraph_to_static import logging_utils
-from unittest import mock
+from paddle.utils import gast
 
 
 class TestLoggingUtils(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logical.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logical.py
index 847635bbc632bd..97213982ba6f78 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logical.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logical.py
@@ -17,15 +17,15 @@
 
 import unittest
 
-from paddle.utils import gast
 import numpy as np
 
 import paddle
 import paddle.fluid as fluid
-from paddle.jit import ProgramTranslator
 from paddle.fluid.dygraph.dygraph_to_static.logical_transformer import (
     cmpop_node_to_str,
 )
+from paddle.jit import ProgramTranslator
+from paddle.utils import gast
 
 program_translator = ProgramTranslator()
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lstm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lstm.py
index b092d65da2e084..447488c5ef90ba 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lstm.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lstm.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+import tempfile
+import unittest
+
 import numpy as np
+
 import paddle
-import unittest
 from paddle import nn
-import os
-import tempfile
 
 
 class LSTMLayer(nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
index f8fcbff1f4519e..af942cdb8d70d2 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
@@ -12,23 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import os
 import tempfile
+import unittest
 from time import time
 
 import numpy as np
+from predictor_utils import PredictorTools
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.base import switch_to_static_graph
 from paddle.fluid.dygraph import to_variable
-from paddle.fluid.dygraph.nn import Linear, Pool2D
-from paddle.fluid.optimizer import AdamOptimizer
+from paddle.fluid.dygraph.base import switch_to_static_graph
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
+from paddle.fluid.dygraph.nn import Linear, Pool2D
 from paddle.fluid.framework import _test_eager_guard
-
-from predictor_utils import PredictorTools
+from paddle.fluid.optimizer import AdamOptimizer
 
 SEED = 2020
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_amp.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_amp.py
index 3610e72b9539b9..014062152903e3 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_amp.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_amp.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
-import numpy as np
 from time import time
-from test_mnist import MNIST, TestMNIST, SEED
+
+import numpy as np
+from test_mnist import MNIST, SEED, TestMNIST
+
+import paddle
 from paddle.fluid.optimizer import AdamOptimizer
 
 if paddle.fluid.is_compiled_with_cuda():
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_pure_fp16.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_pure_fp16.py
index 8bdf81537429bf..683d097cc9e19e 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_pure_fp16.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist_pure_fp16.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
-import numpy as np
 from time import time
+
+import numpy as np
 from test_mnist import MNIST, SEED, TestMNIST
 
+import paddle
+
 if paddle.fluid.is_compiled_with_cuda():
     paddle.fluid.set_flags({'FLAGS_cudnn_deterministic': True})
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
index 18308dbd85b41f..8b74bd7e9848be 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
@@ -15,19 +15,19 @@
 import os
 import tempfile
 import time
+import unittest
+
 import numpy as np
+from predictor_utils import PredictorTools
+
 import paddle
 import paddle.fluid as fluid
+from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
+from paddle.fluid.dygraph.nn import BatchNorm, Linear, Pool2D
 from paddle.fluid.initializer import MSRA
 from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.dygraph.nn import Pool2D, BatchNorm, Linear
-from paddle.jit.api import declarative
 from paddle.jit import ProgramTranslator
-from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
-
-import unittest
-
-from predictor_utils import PredictorTools
+from paddle.jit.api import declarative
 
 # Note: Set True to eliminate randomness.
 #     1. For one operation, cuDNN has several algorithms,
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_multi_forward.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_multi_forward.py
index f320e9f010cfc3..17a9a661dd44ab 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_multi_forward.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_multi_forward.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import paddle
 import paddle.fluid as fluid
-import unittest
 
 
 class MyLayer(paddle.nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_op_attr.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_op_attr.py
index 15c5d92772e4b3..d474d80b63e60d 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_op_attr.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_op_attr.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
 
+import paddle
 from paddle.static import InputSpec
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_origin_info.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_origin_info.py
index eedbed04f0b53e..65c8fb4a1fd704 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_origin_info.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_origin_info.py
@@ -15,12 +15,9 @@
 import sys
 import unittest
 
-from paddle.jit.dy2static import (
-    DygraphToStaticAst,
-)
 from paddle.fluid.dygraph.dygraph_to_static.origin_info import (
-    Location,
     ORIGI_INFO,
+    Location,
     OriginInfo,
     attach_origin_info,
     create_and_update_origin_info_map,
@@ -30,6 +27,7 @@
 )
 from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_func
 from paddle.jit.api import declarative
+from paddle.jit.dy2static import DygraphToStaticAst
 
 
 def simple_func(x):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_param_guard.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_param_guard.py
index cb133e31f65dac..9270b50b28d339 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_param_guard.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_param_guard.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import unittest
 
-from paddle.jit import to_static, ProgramTranslator
+import numpy as np
+
+import paddle
+from paddle.jit import ProgramTranslator, to_static
 
 
 class NetWithParameterList(paddle.nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_params_no_grad.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_params_no_grad.py
index 68a74875342f89..ec712c90ab46bc 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_params_no_grad.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_params_no_grad.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import paddle
-import paddle.nn as nn
 import paddle.distributed as dist
-
-import unittest
+import paddle.nn as nn
 
 
 class Net(nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_partial_program.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_partial_program.py
index 2b69ebacd6f9f5..132b6c555de580 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_partial_program.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_partial_program.py
@@ -12,16 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+from test_fetch_feed import Linear
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.layers.utils import flatten
-from paddle.jit.api import declarative
 from paddle.jit import ProgramTranslator
-
-from test_fetch_feed import Linear
-
-import unittest
+from paddle.jit.api import declarative
 
 SEED = 2020
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_place.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_place.py
index bd414685604730..60a5fd7bb14b4b 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_place.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_place.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
 
+import paddle
+
 
 class TestPlace(unittest.TestCase):
     def test_place(self):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_print.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_print.py
index c6d290c6d0f6cc..a593bd37a9174c 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_print.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_print.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy
 import unittest
 
+import numpy
+
 import paddle.fluid as fluid
 from paddle.jit import ProgramTranslator
 from paddle.jit.api import declarative
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
index 8b9234bd0f8ebd..86f5626f344c51 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
@@ -12,26 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import astor
-from paddle.utils import gast
 import inspect
-import numpy as np
 import textwrap
 import unittest
 
-import paddle
-import paddle.fluid as fluid
-from paddle.jit import ProgramTranslator
-from paddle.jit.api import declarative
-from paddle.fluid.dygraph.dygraph_to_static.utils import func_to_source_code
-import paddle.jit.dy2static as _jst
-
+import astor
+import numpy as np
 from ifelse_simple_func import (
     dyfunc_with_if_else,
     dyfunc_with_if_else_early_return1,
     dyfunc_with_if_else_early_return2,
 )
 
+import paddle
+import paddle.fluid as fluid
+import paddle.jit.dy2static as _jst
+from paddle.fluid.dygraph.dygraph_to_static.utils import func_to_source_code
+from paddle.jit import ProgramTranslator
+from paddle.jit.api import declarative
+from paddle.utils import gast
+
 np.random.seed(0)
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py
index 1c537528bb7cba..1dc77a658bbd00 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py
@@ -17,13 +17,14 @@
 import unittest
 
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-from paddle.jit import ProgramTranslator
 from paddle.fluid.dygraph.base import to_variable
-from paddle.jit.api import declarative
 from paddle.fluid.dygraph.nn import Embedding
 from paddle.fluid.optimizer import SGDOptimizer
+from paddle.jit import ProgramTranslator
+from paddle.jit.api import declarative
 
 PRINT_STEP = 20
 SEED = 2020
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py
index 2a57e0b9857f67..f589c37c2fbfd1 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py
@@ -17,6 +17,7 @@
 import unittest
 
 import numpy as np
+
 import paddle
 
 PRINT_STEP = 20
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
index a634cec0da5605..f812cfef165f13 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
@@ -12,18 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import gym
-import math
 import itertools
+import math
+import unittest
+
+import gym
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.dygraph.nn as nn
-from paddle.fluid.dygraph import to_variable, Layer
-from paddle.jit.api import declarative
+from paddle.fluid.dygraph import Layer, to_variable
 from paddle.jit import ProgramTranslator
-
-import unittest
+from paddle.jit.api import declarative
 
 SEED = 2020
 program_translator = ProgramTranslator()
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
index 72f78be55a5604..0dca14c462044f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
@@ -12,21 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import math
-import time
+import os
 import tempfile
+import time
 import unittest
 
 import numpy as np
+from predictor_utils import PredictorTools
 
 import paddle
 import paddle.fluid as fluid
-from paddle.jit import ProgramTranslator
-from paddle.fluid.dygraph.nn import BatchNorm, Linear, Pool2D
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
-
-from predictor_utils import PredictorTools
+from paddle.fluid.dygraph.nn import BatchNorm, Linear, Pool2D
+from paddle.jit import ProgramTranslator
 
 SEED = 2020
 IMAGENET1000 = 1281167
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py
index dbd98f2e000a60..ae7368ac9857b2 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py
@@ -16,11 +16,11 @@
 import unittest
 
 import numpy as np
+from test_resnet import SEED, ResNet, optimizer_setting
 
 import paddle
 import paddle.fluid as fluid
 from paddle.jit import ProgramTranslator
-from test_resnet import ResNet, optimizer_setting, SEED
 
 # NOTE: Reduce batch_size from 8 to 2 to avoid unittest timeout.
 batch_size = 2
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py
index 2162e44917bcd5..058be1b07afb52 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py
@@ -16,11 +16,11 @@
 import unittest
 
 import numpy as np
+from test_resnet import SEED, ResNet, optimizer_setting
 
 import paddle
 import paddle.fluid as fluid
 from paddle.jit import ProgramTranslator
-from test_resnet import ResNet, optimizer_setting, SEED
 
 # NOTE: Reduce batch_size from 8 to 2 to avoid unittest timeout.
 batch_size = 2
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py
index 23fffaa066ada6..112ff2f1d0f95a 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py
@@ -16,16 +16,15 @@
 
 os.environ["FLAGS_enable_eager_mode"] = "0"
 import math
+import tempfile
 import time
 import unittest
-import tempfile
 
 import numpy as np
+from predictor_utils import PredictorTools
 
 import paddle
 
-from predictor_utils import PredictorTools
-
 SEED = 2020
 IMAGENET1000 = 1281167
 base_lr = 0.001
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_return.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_return.py
index a194cede9c0dfa..76e3de2a5ec870 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_return.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_return.py
@@ -12,17 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
+import numpy as np
+from ifelse_simple_func import dyfunc_with_if_else
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.jit import to_static
-from paddle.jit import ProgramTranslator
 from paddle.fluid.dygraph.dygraph_to_static.utils import Dygraph2StaticException
-
-import unittest
-import numpy as np
-
-from ifelse_simple_func import dyfunc_with_if_else
+from paddle.jit import ProgramTranslator, to_static
 
 SEED = 2020
 np.random.seed(SEED)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_rollback.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_rollback.py
index da80237b27cd13..e7dabd9af31ea4 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_rollback.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_rollback.py
@@ -13,12 +13,12 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
+
+import paddle
 from paddle.fluid.dygraph.dygraph_to_static.utils import func_to_source_code
-from paddle.jit.dy2static.program_translator import (
-    StaticFunction,
-)
+from paddle.jit.dy2static.program_translator import StaticFunction
 
 
 class Net(paddle.nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
index 49d8f2df3aff3d..e58555003e9e04 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
@@ -14,21 +14,21 @@
 
 import logging
 import math
-import time
-import unittest
 import os
 import tempfile
+import time
+import unittest
+
 import numpy as np
+from predictor_utils import PredictorTools
 
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.base import to_variable
+from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
 from paddle.fluid.dygraph.nn import BatchNorm, Linear, Pool2D
-from paddle.jit.api import declarative
 from paddle.jit import ProgramTranslator
-from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
-
-from predictor_utils import PredictorTools
+from paddle.jit.api import declarative
 
 SEED = 2020
 np.random.seed(SEED)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py
index f81c9a40c94692..98ce0ca7780504 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py
@@ -13,17 +13,17 @@
 # limitations under the License.
 import time
 import unittest
+
 import numpy as np
+from test_lac import DynamicGRU
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.nn import Linear, Embedding
 from paddle.fluid.dygraph import to_variable
+from paddle.fluid.dygraph.nn import Embedding, Linear
 from paddle.jit import ProgramTranslator
 from paddle.jit.api import declarative
 
-from test_lac import DynamicGRU
-
 SEED = 2020
 program_translator = ProgramTranslator()
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_setter_helper.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_setter_helper.py
index 405eb089d0c51b..725dc032d5e201 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_setter_helper.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_setter_helper.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 from paddle.fluid.dygraph.dygraph_to_static.utils import GetterSetterHelper
 
 vars = [1, 2, 3, 4, 5]
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet.py
index e4fb982166a326..394783565309d0 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet.py
@@ -13,15 +13,16 @@
 # limitations under the License.
 
 import argparse
-import numpy as np
-import paddle
-import paddle.fluid as fluid
 import random
 import unittest
 
-from paddle.jit import ProgramTranslator
+import numpy as np
 from simnet_dygraph_model import BOW, HingeLoss
 
+import paddle
+import paddle.fluid as fluid
+from paddle.jit import ProgramTranslator
+
 SEED = 102
 random.seed(SEED)
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet_v2.py
index b1ed858f85a125..46afc02f641e78 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet_v2.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet_v2.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import argparse
-import numpy as np
-import paddle
 import random
 import unittest
 
+import numpy as np
 from simnet_dygraph_model_v2 import BOW, HingeLoss
 
+import paddle
+
 SEED = 102
 random.seed(SEED)
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_slice.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_slice.py
index 658d628d4bb208..87eb6e51e74a50 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_slice.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_slice.py
@@ -15,6 +15,7 @@
 import os
 import tempfile
 import unittest
+
 import numpy as np
 
 import paddle
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_spec_names.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_spec_names.py
index c34635d7d7296f..2725030835402d 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_spec_names.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_spec_names.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import paddle
 from paddle.nn import Layer
-import unittest
 
 
 class Net(Layer):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_static_analysis.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_static_analysis.py
index 7a2e0e09b21455..22598ede71b74f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_static_analysis.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_static_analysis.py
@@ -12,17 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.utils import gast
 import inspect
+import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import unittest
-
 from paddle.fluid.dygraph.dygraph_to_static import (
     NodeVarType,
     StaticAnalysisVisitor,
 )
+from paddle.utils import gast
 
 
 def func_to_test1(a, b):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_methods.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_methods.py
index 67ea0a28bc0dd9..307e8f422de6e6 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_methods.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_methods.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+
 import paddle
-import unittest
 
 
 @paddle.jit.to_static
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_shape.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_shape.py
index cb6ec59ee6bc88..4e29f2bf6b44a6 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_shape.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_shape.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
 
-import unittest
 import paddle
 import paddle.fluid as fluid
 from paddle.jit.api import declarative
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_to_tensor.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_to_tensor.py
index 2e8e9f06a1b05a..b0131263c4e69f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_to_tensor.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_to_tensor.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy
+
 import paddle
-import unittest
-from paddle.fluid.framework import program_guard, Program
 from paddle.fluid import core
+from paddle.fluid.framework import Program, program_guard
 
 
 def case0(x):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_transformer.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_transformer.py
index d9eb890394f800..8d62d17537dc22 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_transformer.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_transformer.py
@@ -14,14 +14,11 @@
 
 import logging
 import os
-import time
 import tempfile
+import time
 import unittest
 
 import numpy as np
-import paddle
-import paddle.fluid as fluid
-
 import transformer_util as util
 from transformer_dygraph_model import (
     CrossEntropyCriterion,
@@ -29,6 +26,9 @@
     position_encoding_init,
 )
 
+import paddle
+import paddle.fluid as fluid
+
 trainer_count = 1
 place = (
     fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
index 064ab0c3a502cb..e8d4bcd9fd27bd 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
@@ -13,18 +13,20 @@
 # limitations under the License.
 
 import argparse
-import numpy as np
 import os
 import random
 import sys
 import unittest
+
+import numpy as np
+from tsm_config_utils import merge_configs, parse_config, print_configs
+
 import paddle
 import paddle.fluid as fluid
-from paddle.jit.api import declarative
-from paddle.jit import ProgramTranslator
 from paddle.fluid.dygraph import to_variable
 from paddle.fluid.dygraph.nn import BatchNorm, Linear, Pool2D
-from tsm_config_utils import merge_configs, parse_config, print_configs
+from paddle.jit import ProgramTranslator
+from paddle.jit.api import declarative
 
 random.seed(0)
 np.random.seed(0)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_typehint.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_typehint.py
index 3096eff3406265..fa141482b5413f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_typehint.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_typehint.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import paddle.fluid as fluid
 import unittest
 
+import numpy as np
+
+import paddle.fluid as fluid
 from paddle.jit.api import declarative
 
 SEED = 2020
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_typing.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_typing.py
index e123187c274cf3..1053c64dfd0a32 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_typing.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_typing.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 import os
 import tempfile
-import paddle
 import unittest
-import numpy as np
 from typing import Dict, List, Tuple
 
+import numpy as np
+
+import paddle
+
 
 class BaseLayer(paddle.nn.Layer):
     def __init__(self, in_size, out_size):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_utils.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_utils.py
index 4edb81504458c4..a310ab65307c58 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_utils.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_utils.py
@@ -15,8 +15,10 @@
 import types
 import unittest
 
-from paddle.fluid.dygraph.dygraph_to_static.utils import index_in_list
-from paddle.fluid.dygraph.dygraph_to_static.utils import is_paddle_func
+from paddle.fluid.dygraph.dygraph_to_static.utils import (
+    index_in_list,
+    is_paddle_func,
+)
 
 
 class TestIndexInList(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_warning.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_warning.py
index a232c70fa71728..e74e06f8f9b682 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_warning.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_warning.py
@@ -13,8 +13,9 @@
 # limitations under the License.
 
 import unittest
-import paddle
 import warnings
+
+import paddle
 from paddle.fluid.layers.control_flow import cond
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_yolov3.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_yolov3.py
index 50144e74b93c93..7168708f1c739a 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_yolov3.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_yolov3.py
@@ -12,17 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import random
 import time
 import unittest
 
+import numpy as np
+from yolov3 import YOLOv3, cfg
+
 import paddle
 import paddle.fluid as fluid
-from paddle.jit import ProgramTranslator
 from paddle.fluid.dygraph import to_variable
-
-from yolov3 import cfg, YOLOv3
+from paddle.jit import ProgramTranslator
 
 paddle.enable_static()
 random.seed(0)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
index ea9394001a7d51..c996c54d053a69 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
@@ -14,8 +14,10 @@
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
+import paddle.nn.functional as F
 from paddle.fluid.dygraph import (
     Embedding,
     Layer,
@@ -23,10 +25,8 @@
     Linear,
     to_variable,
 )
-from paddle.jit.api import dygraph_to_static_func
 from paddle.fluid.layers.utils import map_structure
-import paddle
-import paddle.nn.functional as F
+from paddle.jit.api import dygraph_to_static_func
 
 
 def position_encoding_init(n_position, d_pos_vec):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_util.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_util.py
index bb0011de6bc9ed..5922f492a84af5 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_util.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_util.py
@@ -15,11 +15,12 @@
 import pickle
 import warnings
 from functools import partial
+
 import numpy as np
 
 import paddle
-import paddle.fluid as fluid
 import paddle.dataset.wmt16 as wmt16
+import paddle.fluid as fluid
 
 
 def get_input_descs(args, mode="train"):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py
index c005b9e99be169..5af50594e18bff 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py
@@ -15,15 +15,13 @@
 import os
 import sys
 
-import paddle
+from darknet import ConvBNLayer, DarkNet53_conv_body
 
+import paddle
 import paddle.fluid as fluid
-from paddle.jit.api import declarative
 from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.regularizer import L2Decay
-
-from darknet import DarkNet53_conv_body
-from darknet import ConvBNLayer
+from paddle.jit.api import declarative
 
 
 class AttrDict(dict):
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py
index 90551314d9fb01..77b68a9dee6bd7 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py
@@ -14,16 +14,15 @@
 
 import tempfile
 import unittest
+from functools import partial
 
 import numpy as np
+
 import paddle
-from paddle.jit.dy2static.program_translator import (
-    ProgramCache,
-)
 from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUD2STest
 from paddle.jit import to_static
+from paddle.jit.dy2static.program_translator import ProgramCache
 from paddle.optimizer.lr import LRScheduler
-from functools import partial
 
 
 class SimpleLayer(paddle.nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_Tensor_type.py b/python/paddle/fluid/tests/unittests/test_Tensor_type.py
index f18818b8ef5d9e..35efdf7ed48e02 100644
--- a/python/paddle/fluid/tests/unittests/test_Tensor_type.py
+++ b/python/paddle/fluid/tests/unittests/test_Tensor_type.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid.core as core
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_manual_seed.py b/python/paddle/fluid/tests/unittests/test_manual_seed.py
index ac697a49dbd669..419ba5dba888df 100644
--- a/python/paddle/fluid/tests/unittests/test_manual_seed.py
+++ b/python/paddle/fluid/tests/unittests/test_manual_seed.py
@@ -14,9 +14,10 @@
 
 import unittest
 
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import numpy as np
 
 
 class TestManualSeed(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_margin_cross_entropy_op.py b/python/paddle/fluid/tests/unittests/test_margin_cross_entropy_op.py
index d8a5f653a9ae66..8b2136db7ccd45 100644
--- a/python/paddle/fluid/tests/unittests/test_margin_cross_entropy_op.py
+++ b/python/paddle/fluid/tests/unittests/test_margin_cross_entropy_op.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
-from paddle.fluid import core
-from paddle.fluid import Program, program_guard
+from paddle.fluid import Program, core, program_guard
 
 
 def stable_softmax_comm(x):
diff --git a/python/paddle/fluid/tests/unittests/test_margin_rank_loss_op.py b/python/paddle/fluid/tests/unittests/test_margin_rank_loss_op.py
index a931296fa364d7..bf08137b100b72 100644
--- a/python/paddle/fluid/tests/unittests/test_margin_rank_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_margin_rank_loss_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-from paddle import fluid
+
 import paddle
+from paddle import fluid
 
 
 class TestMarginRankLossOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_marker_op.py b/python/paddle/fluid/tests/unittests/test_marker_op.py
index ed19915c41e008..7b6293def5287a 100644
--- a/python/paddle/fluid/tests/unittests/test_marker_op.py
+++ b/python/paddle/fluid/tests/unittests/test_marker_op.py
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import unittest
+
 from op_test import OpTest
+
 from paddle.distributed.fleet.meta_optimizers.common import OpRole
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_masked_select_op.py b/python/paddle/fluid/tests/unittests/test_masked_select_op.py
index 7774eb23dc1712..14d06a3d36b825 100644
--- a/python/paddle/fluid/tests/unittests/test_masked_select_op.py
+++ b/python/paddle/fluid/tests/unittests/test_masked_select_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_match_matrix_tensor_op.py b/python/paddle/fluid/tests/unittests/test_match_matrix_tensor_op.py
index 8b46676704e055..1fab1030e8d648 100644
--- a/python/paddle/fluid/tests/unittests/test_match_matrix_tensor_op.py
+++ b/python/paddle/fluid/tests/unittests/test_match_matrix_tensor_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle.fluid as fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_math_op_patch.py b/python/paddle/fluid/tests/unittests/test_math_op_patch.py
index 61c843e9780c7b..4ad708df5da7f7 100644
--- a/python/paddle/fluid/tests/unittests/test_math_op_patch.py
+++ b/python/paddle/fluid/tests/unittests/test_math_op_patch.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
+import numpy
+import numpy as np
 from decorator_helper import prog_scope
+
 import paddle
 import paddle.fluid as fluid
-import numpy
-import numpy as np
 
 
 class TestMathOpPatches(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py
index c4c8c72faaa441..972ce32ca6fcf2 100644
--- a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py
+++ b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import inspect
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import numpy as np
-import inspect
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_matmul_op.py b/python/paddle/fluid/tests/unittests/test_matmul_op.py
index 004ac2459641c9..a5835fd266e096 100644
--- a/python/paddle/fluid/tests/unittests/test_matmul_op.py
+++ b/python/paddle/fluid/tests/unittests/test_matmul_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
diff --git a/python/paddle/fluid/tests/unittests/test_matmul_op_with_head.py b/python/paddle/fluid/tests/unittests/test_matmul_op_with_head.py
index a36d6149b221a7..c1cc6e1402f138 100644
--- a/python/paddle/fluid/tests/unittests/test_matmul_op_with_head.py
+++ b/python/paddle/fluid/tests/unittests/test_matmul_op_with_head.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py
index a7c199bb4b3fd0..868cec1d592b72 100644
--- a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py
@@ -13,14 +13,15 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest, convert_float_to_uint16, get_numeric_gradient
-from paddle.fluid.tests.unittests.testsuite import create_op
-import paddle.fluid.core as core
 
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.tests.unittests.testsuite import create_op
 
 
 def reference_matmul(X, Y, transpose_X=False, transpose_Y=False):
diff --git a/python/paddle/fluid/tests/unittests/test_matrix_nms_op.py b/python/paddle/fluid/tests/unittests/test_matrix_nms_op.py
index fe43d42cb45bf3..980c0342facafa 100644
--- a/python/paddle/fluid/tests/unittests/test_matrix_nms_op.py
+++ b/python/paddle/fluid/tests/unittests/test_matrix_nms_op.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
 import unittest
+
 import numpy as np
-import copy
 from op_test import OpTest
+
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
-import paddle
 
 
 def python_matrix_nms(
diff --git a/python/paddle/fluid/tests/unittests/test_matrix_power_op.py b/python/paddle/fluid/tests/unittests/test_matrix_power_op.py
index 0632af3a3a3231..29f82b0350d651 100644
--- a/python/paddle/fluid/tests/unittests/test_matrix_power_op.py
+++ b/python/paddle/fluid/tests/unittests/test_matrix_power_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle
-from op_test import OpTest
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_matrix_rank_op.py b/python/paddle/fluid/tests/unittests/test_matrix_rank_op.py
index 671d0831a57bdb..86e751336e6e94 100644
--- a/python/paddle/fluid/tests/unittests/test_matrix_rank_op.py
+++ b/python/paddle/fluid/tests/unittests/test_matrix_rank_op.py
@@ -16,10 +16,10 @@
 
 import numpy as np
 
-from paddle.fluid.tests.unittests.op_test import OpTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+from paddle.fluid.tests.unittests.op_test import OpTest
 
 paddle.enable_static()
 SEED = 2049
diff --git a/python/paddle/fluid/tests/unittests/test_max_min_amax_amin_op.py b/python/paddle/fluid/tests/unittests/test_max_min_amax_amin_op.py
index 5800cd63141a02..33659356ba97b0 100644
--- a/python/paddle/fluid/tests/unittests/test_max_min_amax_amin_op.py
+++ b/python/paddle/fluid/tests/unittests/test_max_min_amax_amin_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_max_op.py b/python/paddle/fluid/tests/unittests/test_max_op.py
index 181343bd5f66ca..9cac427d76f3ea 100644
--- a/python/paddle/fluid/tests/unittests/test_max_op.py
+++ b/python/paddle/fluid/tests/unittests/test_max_op.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import check_out_dtype
+from test_sum_op import TestReduceOPTensorAxisBase
+
 import paddle
-from paddle.fluid.framework import _test_eager_guard
 import paddle.fluid.core as core
-from test_sum_op import TestReduceOPTensorAxisBase
+from paddle.fluid.framework import _test_eager_guard
 
 
 class ApiMaxTest(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_maximum_op.py b/python/paddle/fluid/tests/unittests/test_maximum_op.py
index 0e5ce1c9ff9920..7c58bc63d1d038 100644
--- a/python/paddle/fluid/tests/unittests/test_maximum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_maximum_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_maxout_op.py b/python/paddle/fluid/tests/unittests/test_maxout_op.py
index 2c34333bd38e5e..1554b246e8a684 100644
--- a/python/paddle/fluid/tests/unittests/test_maxout_op.py
+++ b/python/paddle/fluid/tests/unittests/test_maxout_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle
 import paddle.fluid.core as core
 import paddle.nn.functional as F
-from op_test import OpTest
 from paddle.fluid.framework import _test_eager_guard
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_mean_iou.py b/python/paddle/fluid/tests/unittests/test_mean_iou.py
index c37e519c2cf517..7b7162ebd4a23d 100644
--- a/python/paddle/fluid/tests/unittests/test_mean_iou.py
+++ b/python/paddle/fluid/tests/unittests/test_mean_iou.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_mean_op.py b/python/paddle/fluid/tests/unittests/test_mean_op.py
index a27752f2a9d6e7..5999b3ee0362ab 100644
--- a/python/paddle/fluid/tests/unittests/test_mean_op.py
+++ b/python/paddle/fluid/tests/unittests/test_mean_op.py
@@ -13,16 +13,18 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
 from op_test import OpTest, OpTestTool
+from test_sum_op import TestReduceOPTensorAxisBase
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
-from test_sum_op import TestReduceOPTensorAxisBase
-import gradient_checker
-from decorator_helper import prog_scope
+import paddle.fluid.core as core
 import paddle.fluid.layers as layers
+from paddle.fluid import Program, program_guard
 
 np.random.seed(10)
 
diff --git a/python/paddle/fluid/tests/unittests/test_median.py b/python/paddle/fluid/tests/unittests/test_median.py
index c717293b4cd87f..a62e722dd04969 100644
--- a/python/paddle/fluid/tests/unittests/test_median.py
+++ b/python/paddle/fluid/tests/unittests/test_median.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 from paddle.static import Program, program_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_memcpy_op.py b/python/paddle/fluid/tests/unittests/test_memcpy_op.py
index 65a246d963624d..395c32bd98a180 100755
--- a/python/paddle/fluid/tests/unittests/test_memcpy_op.py
+++ b/python/paddle/fluid/tests/unittests/test_memcpy_op.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
+
+import numpy as np
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_memory_analysis.py b/python/paddle/fluid/tests/unittests/test_memory_analysis.py
index 8f38433d20c9b9..51140d5c18ef44 100644
--- a/python/paddle/fluid/tests/unittests/test_memory_analysis.py
+++ b/python/paddle/fluid/tests/unittests/test_memory_analysis.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
+from simple_nets import simple_fc_net
+
 import paddle
 from paddle.fluid.memory_analysis import (
-    pre_allocate_memory,
     get_max_memory_info,
+    pre_allocate_memory,
 )
-from simple_nets import simple_fc_net
 
 
 class TestMemoryAnalysis(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py b/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py
index 839c21b151a266..392559a1b58e2a 100644
--- a/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py
+++ b/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
-import numpy as np
 import unittest
 
+import numpy as np
+
+import paddle.fluid as fluid
+
 
 class TestMemoryReuseExcludeFeedVar(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_memory_usage.py b/python/paddle/fluid/tests/unittests/test_memory_usage.py
index 8c41d0853fc642..973dba893d4404 100644
--- a/python/paddle/fluid/tests/unittests/test_memory_usage.py
+++ b/python/paddle/fluid/tests/unittests/test_memory_usage.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
 import contextlib
 import unittest
 
+import paddle
+import paddle.fluid as fluid
+
 
 def train_simulator(test_batch_size=10):
     if test_batch_size <= 0:
diff --git a/python/paddle/fluid/tests/unittests/test_merge_selectedrows_op.py b/python/paddle/fluid/tests/unittests/test_merge_selectedrows_op.py
index a22571289b28ee..17ab880e6fde55 100644
--- a/python/paddle/fluid/tests/unittests/test_merge_selectedrows_op.py
+++ b/python/paddle/fluid/tests/unittests/test_merge_selectedrows_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid.core as core
+
 import numpy as np
+
+import paddle.fluid.core as core
 from paddle.fluid.op import Operator
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_merged_adam_op.py b/python/paddle/fluid/tests/unittests/test_merged_adam_op.py
index 49d1cf4bd79a0c..6ff2cb90bc039f 100644
--- a/python/paddle/fluid/tests/unittests/test_merged_adam_op.py
+++ b/python/paddle/fluid/tests/unittests/test_merged_adam_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
+
+import paddle
 from paddle import _C_ops, _legacy_C_ops
 from paddle.fluid.framework import in_dygraph_mode
 
diff --git a/python/paddle/fluid/tests/unittests/test_merged_momentum_op.py b/python/paddle/fluid/tests/unittests/test_merged_momentum_op.py
index dcba88284fc10e..08d200a911f3cb 100644
--- a/python/paddle/fluid/tests/unittests/test_merged_momentum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_merged_momentum_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
-import paddle
+from collections import OrderedDict
+
 import numpy as np
+
+import paddle
 from paddle.fluid.layer_helper import LayerHelper
-from collections import OrderedDict
 
 
 def run_momentum_op(
diff --git a/python/paddle/fluid/tests/unittests/test_meshgrid_op.py b/python/paddle/fluid/tests/unittests/test_meshgrid_op.py
index 3c3da16c958c4e..521fcf90b89dba 100644
--- a/python/paddle/fluid/tests/unittests/test_meshgrid_op.py
+++ b/python/paddle/fluid/tests/unittests/test_meshgrid_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_min_op.py b/python/paddle/fluid/tests/unittests/test_min_op.py
index a81b63dbb586d6..083da6e96ac878 100644
--- a/python/paddle/fluid/tests/unittests/test_min_op.py
+++ b/python/paddle/fluid/tests/unittests/test_min_op.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import check_out_dtype
+from test_sum_op import TestReduceOPTensorAxisBase
+
 import paddle
 import paddle.fluid.core as core
 from paddle.fluid.framework import _test_eager_guard
-from test_sum_op import TestReduceOPTensorAxisBase
 
 
 class ApiMinTest(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_mine_hard_examples_op.py b/python/paddle/fluid/tests/unittests/test_mine_hard_examples_op.py
index 2c6417094bc2d6..dacb8b69dab4a9 100644
--- a/python/paddle/fluid/tests/unittests/test_mine_hard_examples_op.py
+++ b/python/paddle/fluid/tests/unittests/test_mine_hard_examples_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_minimum_op.py b/python/paddle/fluid/tests/unittests/test_minimum_op.py
index 81771950c08233..1ebd40728ca2c6 100644
--- a/python/paddle/fluid/tests/unittests/test_minimum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_minimum_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_minus_op.py b/python/paddle/fluid/tests/unittests/test_minus_op.py
index 40b4ddd3b579b2..465fca228d958f 100644
--- a/python/paddle/fluid/tests/unittests/test_minus_op.py
+++ b/python/paddle/fluid/tests/unittests/test_minus_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py b/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py
index 62243890305e5d..9d42b68e144bad 100644
--- a/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py
+++ b/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid.core as core
 import unittest
 
 import numpy as np
+from parallel_executor_test_base import DeviceType, TestParallelExecutorBase
+from simple_nets import init_data
+
 import paddle
 import paddle.fluid as fluid
-from simple_nets import init_data
-from parallel_executor_test_base import TestParallelExecutorBase, DeviceType
+import paddle.fluid.core as core
 
 batch_size = 12
 img_shape = [1, 28, 28]
diff --git a/python/paddle/fluid/tests/unittests/test_mode_op.py b/python/paddle/fluid/tests/unittests/test_mode_op.py
index bec5707ac95650..428a46a2894361 100644
--- a/python/paddle/fluid/tests/unittests/test_mode_op.py
+++ b/python/paddle/fluid/tests/unittests/test_mode_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_modelaverage.py b/python/paddle/fluid/tests/unittests/test_modelaverage.py
index b917733749e7e6..937d293a81dac3 100644
--- a/python/paddle/fluid/tests/unittests/test_modelaverage.py
+++ b/python/paddle/fluid/tests/unittests/test_modelaverage.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
 import paddle.nn as nn
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_modified_huber_loss_op.py b/python/paddle/fluid/tests/unittests/test_modified_huber_loss_op.py
index 99ce37834b5c38..61eb0c3db25ef0 100644
--- a/python/paddle/fluid/tests/unittests/test_modified_huber_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_modified_huber_loss_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_momentum_op.py b/python/paddle/fluid/tests/unittests/test_momentum_op.py
index fd9b8b88016bd7..102ef0a5fc5fde 100644
--- a/python/paddle/fluid/tests/unittests/test_momentum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_momentum_op.py
@@ -13,14 +13,16 @@
 # limitations under the License.
 
 import unittest
+
+import numpy
 import numpy as np
-import paddle.fluid.core as core
-from paddle.fluid.op import Operator
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
-import numpy
+import paddle.fluid.core as core
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.op import Operator
 
 
 def calculate_momentum_by_numpy(
diff --git a/python/paddle/fluid/tests/unittests/test_monitor.py b/python/paddle/fluid/tests/unittests/test_monitor.py
index dafb23c7d411e6..205414668f9f0c 100644
--- a/python/paddle/fluid/tests/unittests/test_monitor.py
+++ b/python/paddle/fluid/tests/unittests/test_monitor.py
@@ -19,11 +19,12 @@
 
 paddle.enable_static()
 
-import paddle.fluid as fluid
-import paddle.fluid.core as core
 import os
-import unittest
 import tempfile
+import unittest
+
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 
 class TestDatasetWithStat(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_mse_loss.py b/python/paddle/fluid/tests/unittests/test_mse_loss.py
index 1cf52d4d6742bd..99fbe012953f3d 100644
--- a/python/paddle/fluid/tests/unittests/test_mse_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_mse_loss.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid.executor import Executor
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py b/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py
index 94e12e337ec91a..099280161a97b3 100644
--- a/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py
@@ -13,14 +13,15 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 import paddle.fluid.core as core
-import gradient_checker
-from decorator_helper import prog_scope
+import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_mul_op.py b/python/paddle/fluid/tests/unittests/test_mul_op.py
index 5469caed157530..0f093438ed81c4 100644
--- a/python/paddle/fluid/tests/unittests/test_mul_op.py
+++ b/python/paddle/fluid/tests/unittests/test_mul_op.py
@@ -12,13 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
+
 import paddle.fluid.core as core
-import sys
 
 sys.path.append("..")
 from op_test import OpTest
+
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_multi_dot_op.py b/python/paddle/fluid/tests/unittests/test_multi_dot_op.py
index 341b34c0d8aadc..e5afa760bd98bf 100644
--- a/python/paddle/fluid/tests/unittests/test_multi_dot_op.py
+++ b/python/paddle/fluid/tests/unittests/test_multi_dot_op.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-from op_test import OpTest
 from numpy.linalg import multi_dot
 from op_test import OpTest
+
 import paddle
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_multi_label_soft_margin_loss.py b/python/paddle/fluid/tests/unittests/test_multi_label_soft_margin_loss.py
index b5236553ca665e..af8481df1719c9 100644
--- a/python/paddle/fluid/tests/unittests/test_multi_label_soft_margin_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_multi_label_soft_margin_loss.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import unittest
 
+import numpy as np
+
+import paddle
+
 
 def call_MultiLabelSoftMarginLoss_layer(
     input,
diff --git a/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py b/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py
index 6cc6fdd4311eb5..2b846c0fed8da7 100644
--- a/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py
+++ b/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
 import unittest
+
 import numpy as np
-import copy
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
+from paddle import _C_ops, _legacy_C_ops
 from paddle.fluid import (
     Program,
-    program_guard,
-    in_dygraph_mode,
     _non_static_mode,
+    in_dygraph_mode,
+    program_guard,
 )
 from paddle.fluid.layer_helper import LayerHelper
-from paddle import _C_ops, _legacy_C_ops
 
 
 def multiclass_nms3(
diff --git a/python/paddle/fluid/tests/unittests/test_multihead_attention.py b/python/paddle/fluid/tests/unittests/test_multihead_attention.py
index 6577953ed355c3..9d94a7713b1f6e 100644
--- a/python/paddle/fluid/tests/unittests/test_multihead_attention.py
+++ b/python/paddle/fluid/tests/unittests/test_multihead_attention.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import numpy as np
 
 
 class TestMultiheadAttention(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_multimarginloss.py b/python/paddle/fluid/tests/unittests/test_multimarginloss.py
index 3a54f752eb2fb8..eecd070413a48a 100644
--- a/python/paddle/fluid/tests/unittests/test_multimarginloss.py
+++ b/python/paddle/fluid/tests/unittests/test_multimarginloss.py
@@ -13,10 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import unittest
 
+import numpy as np
+
+import paddle
+
 
 def call_MultiMarginLoss_layer(
     input,
diff --git a/python/paddle/fluid/tests/unittests/test_multinomial_op.py b/python/paddle/fluid/tests/unittests/test_multinomial_op.py
index f79dee65250d55..c251dc696cd2df 100644
--- a/python/paddle/fluid/tests/unittests/test_multinomial_op.py
+++ b/python/paddle/fluid/tests/unittests/test_multinomial_op.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
+import numpy as np
+from op_test import OpTest
+from test_attribute_var import UnittestBase
+
 import paddle
 import paddle.fluid as fluid
-from op_test import OpTest
-import numpy as np
-import os
 from paddle.fluid import Program, program_guard
-from test_attribute_var import UnittestBase
 
 
 def sample_output_one_dimension(out, dim):
diff --git a/python/paddle/fluid/tests/unittests/test_multiplex_op.py b/python/paddle/fluid/tests/unittests/test_multiplex_op.py
index 75a08f99eccace..3afd01427d5eb5 100644
--- a/python/paddle/fluid/tests/unittests/test_multiplex_op.py
+++ b/python/paddle/fluid/tests/unittests/test_multiplex_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_multiply.py b/python/paddle/fluid/tests/unittests/test_multiply.py
index a2c213ade9c4cd..ef9cfb2b1dc13b 100755
--- a/python/paddle/fluid/tests/unittests/test_multiply.py
+++ b/python/paddle/fluid/tests/unittests/test_multiply.py
@@ -18,8 +18,8 @@
 
 import paddle
 import paddle.tensor as tensor
-from paddle.static import Program, program_guard
 from paddle.fluid.framework import _test_eager_guard
+from paddle.static import Program, program_guard
 
 
 class TestMultiplyApi(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dataset.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dataset.py
index f3872a388c0616..a09b613a85cc31 100755
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dataset.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dataset.py
@@ -13,19 +13,20 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
 import paddle.fluid as fluid
+from paddle.fluid.framework import _test_eager_guard
 from paddle.io import (
+    ChainDataset,
+    ComposeDataset,
+    DataLoader,
     Dataset,
     IterableDataset,
     TensorDataset,
-    ComposeDataset,
-    ChainDataset,
-    DataLoader,
 )
-from paddle.fluid.framework import _test_eager_guard
 
 IMAGE_SIZE = 32
 
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py
index 1c50866020d192..b2b0d32d72a2ca 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py
@@ -15,25 +15,23 @@
 import sys
 import time
 import unittest
-import numpy as np
-
-import paddle.fluid as fluid
-from paddle.io import DataLoader
-from paddle.fluid.dygraph.nn import Linear
 
+import numpy as np
 from test_multiprocess_dataloader_static import (
-    RandomDataset,
-    RandomBatchedDataset,
-    prepare_places,
-)
-from test_multiprocess_dataloader_static import (
-    EPOCH_NUM,
     BATCH_SIZE,
+    CLASS_NUM,
+    EPOCH_NUM,
     IMAGE_SIZE,
     SAMPLE_NUM,
-    CLASS_NUM,
+    RandomBatchedDataset,
+    RandomDataset,
+    prepare_places,
 )
 
+import paddle.fluid as fluid
+from paddle.fluid.dygraph.nn import Linear
+from paddle.io import DataLoader
+
 
 class SimpleFCNet(fluid.dygraph.Layer):
     def __init__(self):
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py
index f60c73f45773ac..4a99824578435a 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import multiprocessing
+import unittest
+
 import numpy as np
 
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.io import Dataset, IterableDataset, BatchSampler, DataLoader
 from paddle.fluid.dataloader.dataloader_iter import _worker_loop
+from paddle.io import BatchSampler, DataLoader, Dataset, IterableDataset
 
 
 class RandomDataset(Dataset):
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py
index 692d68adebd8e5..1ce77249ea5787 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py
@@ -15,25 +15,23 @@
 import sys
 import time
 import unittest
-import numpy as np
-
-import paddle.fluid as fluid
-from paddle.io import DataLoader
-from paddle.fluid.dygraph.nn import Linear
 
+import numpy as np
 from test_multiprocess_dataloader_iterable_dataset_static import (
-    RandomDataset,
-    RandomBatchedDataset,
-    prepare_places,
-)
-from test_multiprocess_dataloader_iterable_dataset_static import (
-    EPOCH_NUM,
     BATCH_SIZE,
+    CLASS_NUM,
+    EPOCH_NUM,
     IMAGE_SIZE,
     SAMPLE_NUM,
-    CLASS_NUM,
+    RandomBatchedDataset,
+    RandomDataset,
+    prepare_places,
 )
 
+import paddle.fluid as fluid
+from paddle.fluid.dygraph.nn import Linear
+from paddle.io import DataLoader
+
 
 class SimpleFCNet(fluid.dygraph.Layer):
     def __init__(self):
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_split.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_split.py
index 57ae8bf46f8634..27068fe74073cb 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_split.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_split.py
@@ -14,6 +14,7 @@
 
 import math
 import unittest
+
 import numpy as np
 
 import paddle.fluid as fluid
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py
index 49f12ea657e930..8808654e03ed5a 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py
@@ -15,6 +15,7 @@
 import sys
 import time
 import unittest
+
 import numpy as np
 
 import paddle.fluid as fluid
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py
index 853d7ec1924059..283c68c1a13b86 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py
@@ -15,6 +15,7 @@
 import sys
 import time
 import unittest
+
 import numpy as np
 
 import paddle
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_reader_exception.py b/python/paddle/fluid/tests/unittests/test_multiprocess_reader_exception.py
index 3eae95ef34b057..03257b75fb38c9 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_reader_exception.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_reader_exception.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 from paddle.reader import multiprocess_reader
-import unittest
-import numpy as np
 
 
 class ReaderException(Exception):
diff --git a/python/paddle/fluid/tests/unittests/test_mv_op.py b/python/paddle/fluid/tests/unittests/test_mv_op.py
index d2381a21e99fd5..45c6a3a48ad261 100644
--- a/python/paddle/fluid/tests/unittests/test_mv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_mv_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle
-from paddle.static import program_guard, Program
 from op_test import OpTest
 
+import paddle
+from paddle.static import Program, program_guard
+
 
 class TestMVOp(OpTest):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_naive_best_fit_gpu_memory_limit.py b/python/paddle/fluid/tests/unittests/test_naive_best_fit_gpu_memory_limit.py
index c6846d7068e0c6..f87e149fc3c63e 100644
--- a/python/paddle/fluid/tests/unittests/test_naive_best_fit_gpu_memory_limit.py
+++ b/python/paddle/fluid/tests/unittests/test_naive_best_fit_gpu_memory_limit.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
 
+import paddle.fluid as fluid
+
 fluid.core.globals()['FLAGS_allocator_strategy'] = 'naive_best_fit'
 
 if fluid.is_compiled_with_cuda():
diff --git a/python/paddle/fluid/tests/unittests/test_name_scope.py b/python/paddle/fluid/tests/unittests/test_name_scope.py
index eb61e27e740804..372547722cb187 100644
--- a/python/paddle/fluid/tests/unittests/test_name_scope.py
+++ b/python/paddle/fluid/tests/unittests/test_name_scope.py
@@ -13,8 +13,9 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
 
 
 class TestNameScope(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_nan_inf.py b/python/paddle/fluid/tests/unittests/test_nan_inf.py
index 3d6ab7396e7dd1..289b5ec40f9d5f 100644
--- a/python/paddle/fluid/tests/unittests/test_nan_inf.py
+++ b/python/paddle/fluid/tests/unittests/test_nan_inf.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import os
-import sys
 import subprocess
+import sys
+import unittest
+
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_nan_to_num_op.py b/python/paddle/fluid/tests/unittests/test_nan_to_num_op.py
index a1a5cb1f3881d7..7db79e4e80e553 100644
--- a/python/paddle/fluid/tests/unittests/test_nan_to_num_op.py
+++ b/python/paddle/fluid/tests/unittests/test_nan_to_num_op.py
@@ -14,7 +14,9 @@
 
 import unittest
 from typing import Optional
+
 import numpy as np
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_nanmean_api.py b/python/paddle/fluid/tests/unittests/test_nanmean_api.py
index bcc9ac5c77a2c2..368251520fe1a1 100644
--- a/python/paddle/fluid/tests/unittests/test_nanmean_api.py
+++ b/python/paddle/fluid/tests/unittests/test_nanmean_api.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_nanmedian.py b/python/paddle/fluid/tests/unittests/test_nanmedian.py
index 952263ae94b877..aeceadb0ea9b88 100644
--- a/python/paddle/fluid/tests/unittests/test_nanmedian.py
+++ b/python/paddle/fluid/tests/unittests/test_nanmedian.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_nansum_api.py b/python/paddle/fluid/tests/unittests/test_nansum_api.py
index 16ce26d4dcc4a5..5c56eb21947ab7 100644
--- a/python/paddle/fluid/tests/unittests/test_nansum_api.py
+++ b/python/paddle/fluid/tests/unittests/test_nansum_api.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_nce.py b/python/paddle/fluid/tests/unittests/test_nce.py
index 2691bf2c98cae7..359cc50fb58ed2 100644
--- a/python/paddle/fluid/tests/unittests/test_nce.py
+++ b/python/paddle/fluid/tests/unittests/test_nce.py
@@ -12,15 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
+
+import numpy as np
+from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.initializer as initializer
 from paddle.fluid import Program, program_guard
 
-from op_test import OpTest
-
 
 def nce(
     input, weight, bias, sample_weight, labels, num_classes, num_sample_class
diff --git a/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py b/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py
index f8a997e4fbab6b..4a910e4d83c290 100755
--- a/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid.core as core
+
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 
 def nearest_neighbor_interp_np(
diff --git a/python/paddle/fluid/tests/unittests/test_nearest_interp_v2_op.py b/python/paddle/fluid/tests/unittests/test_nearest_interp_v2_op.py
index fb1e1b08ceab99..e0f62dfa187ca2 100755
--- a/python/paddle/fluid/tests/unittests/test_nearest_interp_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_nearest_interp_v2_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid.core as core
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.nn.functional import interpolate
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_neg_op.py b/python/paddle/fluid/tests/unittests/test_neg_op.py
index 260d0fa35dc4b5..53f01b94d303c7 100644
--- a/python/paddle/fluid/tests/unittests/test_neg_op.py
+++ b/python/paddle/fluid/tests/unittests/test_neg_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_newprofiler.py b/python/paddle/fluid/tests/unittests/test_newprofiler.py
index 3a6dfddad85f3b..6dfe6250f91b67 100755
--- a/python/paddle/fluid/tests/unittests/test_newprofiler.py
+++ b/python/paddle/fluid/tests/unittests/test_newprofiler.py
@@ -12,16 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+import tempfile
 import unittest
+
 import numpy as np
-import tempfile
-import os
+
 import paddle
-import paddle.profiler as profiler
-import paddle.profiler.utils as utils
 import paddle.nn as nn
 import paddle.nn.functional as F
-from paddle.io import Dataset, DataLoader
+import paddle.profiler as profiler
+import paddle.profiler.utils as utils
+from paddle.io import DataLoader, Dataset
 
 
 class TestProfiler(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_nll_loss.py b/python/paddle/fluid/tests/unittests/test_nll_loss.py
index a598c501ec0526..159c682ce27984 100644
--- a/python/paddle/fluid/tests/unittests/test_nll_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_nll_loss.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
-import numpy as np
 import unittest
+
+import numpy as np
 from op_test import OpTest
+
+import paddle
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_nms_op.py b/python/paddle/fluid/tests/unittests/test_nms_op.py
index 80f2da013251d6..19a122f602e3a9 100755
--- a/python/paddle/fluid/tests/unittests/test_nms_op.py
+++ b/python/paddle/fluid/tests/unittests/test_nms_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py
index a7062e600258e1..a584324c6eb3ff 100644
--- a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py
@@ -14,8 +14,9 @@
 
 import unittest
 
-import paddle
 import numpy as np
+
+import paddle
 from paddle.fluid.framework import _test_eager_guard
 
 paddle.disable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py
index b5c354c7486f67..1051ef8d4b00db 100644
--- a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle.fluid as fluid
 import paddle.nn.functional as functional
 
diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py b/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py
index f7126a998818f3..0faf226fac4d4f 100644
--- a/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_nn_grad.py b/python/paddle/fluid/tests/unittests/test_nn_grad.py
index c774351db998f1..657d3f4dfb0829 100644
--- a/python/paddle/fluid/tests/unittests/test_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_grad.py
@@ -13,14 +13,15 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 import paddle.fluid.core as core
-import gradient_checker
-from decorator_helper import prog_scope
+import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py b/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py
index 2ddfd653a40f34..9ebd1158648ca5 100644
--- a/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_nn_matmul_v2_grad.py b/python/paddle/fluid/tests/unittests/test_nn_matmul_v2_grad.py
index 0c3bca6ba0a886..917386bcac5294 100644
--- a/python/paddle/fluid/tests/unittests/test_nn_matmul_v2_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_matmul_v2_grad.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
 
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import gradient_checker
-from decorator_helper import prog_scope
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_nn_quant_functional_layers.py b/python/paddle/fluid/tests/unittests/test_nn_quant_functional_layers.py
index 6edc691f23a086..7c1c8b54609108 100644
--- a/python/paddle/fluid/tests/unittests/test_nn_quant_functional_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_quant_functional_layers.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py b/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py
index 45840d20319155..1e8738340bb603 100644
--- a/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle
-import paddle.fluid.core as core
+
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.nn as nn
 import paddle.nn.functional as functional
 
diff --git a/python/paddle/fluid/tests/unittests/test_nonzero_api.py b/python/paddle/fluid/tests/unittests/test_nonzero_api.py
index 77363d1d3e73b7..dce29b96e55596 100644
--- a/python/paddle/fluid/tests/unittests/test_nonzero_api.py
+++ b/python/paddle/fluid/tests/unittests/test_nonzero_api.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
diff --git a/python/paddle/fluid/tests/unittests/test_norm_all.py b/python/paddle/fluid/tests/unittests/test_norm_all.py
index 17047d349cd162..98367452abbe3e 100644
--- a/python/paddle/fluid/tests/unittests/test_norm_all.py
+++ b/python/paddle/fluid/tests/unittests/test_norm_all.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest, convert_float_to_uint16
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle import _C_ops, _legacy_C_ops
-from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph
+from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
 
 
 # hack method for test p_norm final state
diff --git a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py
index ed6b94432a491c..3142d63afcb1f3 100644
--- a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py
@@ -13,15 +13,15 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
 
+import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 import paddle.fluid.core as core
-import gradient_checker
-import paddle
-
-from decorator_helper import prog_scope
+import paddle.fluid.layers as layers
 
 
 class TestInstanceNormDoubleGradCheck(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_norm_op.py b/python/paddle/fluid/tests/unittests/test_norm_op.py
index d5b360cb729c9c..82e87967716ff5 100644
--- a/python/paddle/fluid/tests/unittests/test_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_norm_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest, skip_check_grad_ci
+
 import paddle
 import paddle.fluid as fluid
-from op_test import OpTest, skip_check_grad_ci
 
 
 def l2_norm(x, axis, epsilon):
diff --git a/python/paddle/fluid/tests/unittests/test_normal.py b/python/paddle/fluid/tests/unittests/test_normal.py
index d24f5f02d2bc03..76e9c7a2f328e5 100644
--- a/python/paddle/fluid/tests/unittests/test_normal.py
+++ b/python/paddle/fluid/tests/unittests/test_normal.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
 import unittest
+
 import numpy as np
+
 import paddle
-import copy
 
 np.random.seed(10)
 paddle.seed(10)
diff --git a/python/paddle/fluid/tests/unittests/test_normalization_wrapper.py b/python/paddle/fluid/tests/unittests/test_normalization_wrapper.py
index 72acbee5d51e2a..0ed4be44b6d0f5 100644
--- a/python/paddle/fluid/tests/unittests/test_normalization_wrapper.py
+++ b/python/paddle/fluid/tests/unittests/test_normalization_wrapper.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import numpy as np
 
 
 class TestNormalization(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_normalize.py b/python/paddle/fluid/tests/unittests/test_normalize.py
index e4fed2904e977b..bd35cbd998ec40 100644
--- a/python/paddle/fluid/tests/unittests/test_normalize.py
+++ b/python/paddle/fluid/tests/unittests/test_normalize.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+
 import paddle
-import paddle.nn.functional as F
 import paddle.fluid as fluid
-import numpy as np
+import paddle.nn.functional as F
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_npair_loss_op.py b/python/paddle/fluid/tests/unittests/test_npair_loss_op.py
index 97a732325aab9e..efbf8a98a87b8d 100644
--- a/python/paddle/fluid/tests/unittests/test_npair_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_npair_loss_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid as fluid
+
+import numpy as np
+
 import paddle
+import paddle.fluid as fluid
 import paddle.fluid.core as core
-import numpy as np
 from paddle.fluid import Program, program_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_npu_identity_op.py b/python/paddle/fluid/tests/unittests/test_npu_identity_op.py
index b79811dabe8eee..da87384c4566c3 100644
--- a/python/paddle/fluid/tests/unittests/test_npu_identity_op.py
+++ b/python/paddle/fluid/tests/unittests/test_npu_identity_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_number_count_op.py b/python/paddle/fluid/tests/unittests/test_number_count_op.py
index a31fb1a5978c12..032d582035dc09 100644
--- a/python/paddle/fluid/tests/unittests/test_number_count_op.py
+++ b/python/paddle/fluid/tests/unittests/test_number_count_op.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import op_test
-import numpy as np
 import unittest
+
+import numpy as np
+import op_test
+
 import paddle
 import paddle.fluid.core as core
 from paddle.distributed.models.moe import utils
diff --git a/python/paddle/fluid/tests/unittests/test_numel_op.py b/python/paddle/fluid/tests/unittests/test_numel_op.py
index fbc43bf4b8469f..1878c8409f5a38 100644
--- a/python/paddle/fluid/tests/unittests/test_numel_op.py
+++ b/python/paddle/fluid/tests/unittests/test_numel_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
 
 
 class TestNumelOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_one_hot_op.py b/python/paddle/fluid/tests/unittests/test_one_hot_op.py
index 4ca0017052eb94..d0f9c338f92ab4 100644
--- a/python/paddle/fluid/tests/unittests/test_one_hot_op.py
+++ b/python/paddle/fluid/tests/unittests/test_one_hot_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py b/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py
index 23d277c04e59fb..0be5ee13a1b27d 100644
--- a/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.framework import Program, program_guard, _test_eager_guard
+from paddle.fluid.framework import Program, _test_eager_guard, program_guard
 
 
 class TestOneHotOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_ones_like.py b/python/paddle/fluid/tests/unittests/test_ones_like.py
index fe7a66acecdf4b..0ee8ab313a53b9 100644
--- a/python/paddle/fluid/tests/unittests/test_ones_like.py
+++ b/python/paddle/fluid/tests/unittests/test_ones_like.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-from paddle import _C_ops
-from paddle import ones_like
-from paddle.fluid import core, Program, program_guard
+from paddle import _C_ops, ones_like
+from paddle.fluid import Program, core, program_guard
 from paddle.fluid.framework import convert_np_dtype_to_dtype_
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_ones_op.py b/python/paddle/fluid/tests/unittests/test_ones_op.py
index a82842b3c72aaa..7c93de4b1a446b 100644
--- a/python/paddle/fluid/tests/unittests/test_ones_op.py
+++ b/python/paddle/fluid/tests/unittests/test_ones_op.py
@@ -13,11 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
 import paddle.fluid as fluid
-import numpy as np
 
 
 class ApiOnesTest(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_onnx_export.py b/python/paddle/fluid/tests/unittests/test_onnx_export.py
index ca5ac65d93bbb5..e4e461bdf025fc 100644
--- a/python/paddle/fluid/tests/unittests/test_onnx_export.py
+++ b/python/paddle/fluid/tests/unittests/test_onnx_export.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle
 
+import paddle
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_op_function_generator.py b/python/paddle/fluid/tests/unittests/test_op_function_generator.py
index 9b261b9eff940d..8e7afd0f1adec2 100644
--- a/python/paddle/fluid/tests/unittests/test_op_function_generator.py
+++ b/python/paddle/fluid/tests/unittests/test_op_function_generator.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
-from paddle.fluid.framework import in_dygraph_mode
+
+import numpy as np
+
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
-from paddle.jit.api import TracedLayer
-import numpy as np
 from paddle import _legacy_C_ops
+from paddle.fluid.framework import in_dygraph_mode
+from paddle.jit.api import TracedLayer
 
 
 class TestTracedLayer(fluid.dygraph.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_op_name_conflict.py b/python/paddle/fluid/tests/unittests/test_op_name_conflict.py
index e255eb7b01f967..a3b96b8f444e9b 100644
--- a/python/paddle/fluid/tests/unittests/test_op_name_conflict.py
+++ b/python/paddle/fluid/tests/unittests/test_op_name_conflict.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
-import numpy as np
 import unittest
 
+import numpy as np
+
+import paddle.fluid as fluid
+
 
 class TestOpNameConflict(unittest.TestCase):
     def test_conflict(self):
diff --git a/python/paddle/fluid/tests/unittests/test_op_support_gpu.py b/python/paddle/fluid/tests/unittests/test_op_support_gpu.py
index fe7fd6ab3d5f8f..b9209f75a57c98 100644
--- a/python/paddle/fluid/tests/unittests/test_op_support_gpu.py
+++ b/python/paddle/fluid/tests/unittests/test_op_support_gpu.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle.fluid.core as core
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_op_version.py b/python/paddle/fluid/tests/unittests/test_op_version.py
index a677a6d3511fc9..2e7230d06bb3f4 100644
--- a/python/paddle/fluid/tests/unittests/test_op_version.py
+++ b/python/paddle/fluid/tests/unittests/test_op_version.py
@@ -14,8 +14,8 @@
 
 import unittest
 
-import paddle.utils as utils
 import paddle.fluid as fluid
+import paddle.utils as utils
 
 
 class OpLastCheckpointCheckerTest(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_operator_desc.py b/python/paddle/fluid/tests/unittests/test_operator_desc.py
index af3eecec826c22..e04b2ee19ba1b0 100644
--- a/python/paddle/fluid/tests/unittests/test_operator_desc.py
+++ b/python/paddle/fluid/tests/unittests/test_operator_desc.py
@@ -15,7 +15,6 @@
 import unittest
 
 import paddle.fluid.core as core
-
 from paddle.fluid.framework import Program, default_startup_program
 
 main_program = default_startup_program()
diff --git a/python/paddle/fluid/tests/unittests/test_ops_nms.py b/python/paddle/fluid/tests/unittests/test_ops_nms.py
index be4d5f4921324a..3b44b8ee444ca0 100644
--- a/python/paddle/fluid/tests/unittests/test_ops_nms.py
+++ b/python/paddle/fluid/tests/unittests/test_ops_nms.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import os
+import tempfile
 import unittest
+
 import numpy as np
-import paddle
 from test_nms_op import nms
-import tempfile
+
+import paddle
 
 
 def _find(condition):
diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py
index 4cfc6c66a50d5b..b3d6c75d9a12de 100644
--- a/python/paddle/fluid/tests/unittests/test_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer.py
@@ -16,21 +16,22 @@
 import tempfile
 import unittest
 
+import numpy
+import numpy as np
+
+import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.fluid.framework as framework
 import paddle.fluid.optimizer as optimizer
-import paddle.fluid.core as core
-import numpy as np
 from paddle.fluid.backward import append_backward
 from paddle.fluid.framework import (
     Program,
-    program_guard,
+    _test_eager_guard,
     convert_np_dtype_to_dtype_,
+    program_guard,
 )
-from paddle.fluid.framework import _test_eager_guard
-import paddle
 from paddle.io import Dataset
-import numpy
 
 
 class TestOptimizer(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_for_varbase.py b/python/paddle/fluid/tests/unittests/test_optimizer_for_varbase.py
index fd73c042cb2da7..183423ba9896c8 100644
--- a/python/paddle/fluid/tests/unittests/test_optimizer_for_varbase.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer_for_varbase.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
 
+import numpy as np
+
 import paddle
 import paddle.optimizer as optimizer
-from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph
+from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
 
 
 class TestOptimizerForVarBase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
index df2277703b2344..d5e5a7a200c93a 100644
--- a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
@@ -13,15 +13,15 @@
 # limitations under the License.
 
 import unittest
-import numpy as np
 from collections import defaultdict
 
+import numpy as np
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.optimizer as optimizer
 from paddle.fluid.backward import _append_grad_suffix_
 
-import paddle
-
 paddle.enable_static()
 
 np.random.seed(10)
diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py
index ee3e443a9dfc2f..de9c02f776835d 100644
--- a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py
@@ -16,12 +16,13 @@
 import unittest
 
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.fluid.layers as layers
 import paddle.fluid.optimizer as optimizer
 from paddle.fluid.framework import Program, program_guard
-import paddle.fluid.core as core
 
 BATCH_SIZE = 1
 INPUT_SIZE = 784
diff --git a/python/paddle/fluid/tests/unittests/test_outer.py b/python/paddle/fluid/tests/unittests/test_outer.py
index 5625618776c29d..dfd185433a430d 100644
--- a/python/paddle/fluid/tests/unittests/test_outer.py
+++ b/python/paddle/fluid/tests/unittests/test_outer.py
@@ -17,8 +17,8 @@
 import numpy as np
 
 import paddle
-from paddle.static import Program, program_guard
 from paddle.fluid.framework import _test_eager_guard
+from paddle.static import Program, program_guard
 
 
 class TestMultiplyApi(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_overlap_add_op.py b/python/paddle/fluid/tests/unittests/test_overlap_add_op.py
index 430fde53aceed2..50e96e12a1d7c0 100644
--- a/python/paddle/fluid/tests/unittests/test_overlap_add_op.py
+++ b/python/paddle/fluid/tests/unittests/test_overlap_add_op.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import paddle
 import unittest
 
+import numpy as np
 from op_test import OpTest
 
+import paddle
+
 
 def overlap_add(x, hop_length, axis=-1):
     assert axis in [0, -1], 'axis should be 0/-1.'
diff --git a/python/paddle/fluid/tests/unittests/test_pad2d_op.py b/python/paddle/fluid/tests/unittests/test_pad2d_op.py
index 320d615f63b9fb..3f26961873a439 100644
--- a/python/paddle/fluid/tests/unittests/test_pad2d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pad2d_op.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
 from op_test import OpTest
-import unittest
 
 
 class TestPad2dOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_pad3d_op.py b/python/paddle/fluid/tests/unittests/test_pad3d_op.py
index ee4a9aed670739..cd93f48b7ebfe3 100644
--- a/python/paddle/fluid/tests/unittests/test_pad3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pad3d_op.py
@@ -13,14 +13,15 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
+import paddle.fluid.core as core
 import paddle.nn as nn
 import paddle.nn.functional as F
-import paddle.fluid.core as core
-
-from paddle.fluid import Program, program_guard, Executor, default_main_program
+from paddle.fluid import Executor, Program, default_main_program, program_guard
 
 
 class TestPad3dOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_pad_constant_like.py b/python/paddle/fluid/tests/unittests/test_pad_constant_like.py
index ee957096b5b194..e304bdf29e4c2a 100644
--- a/python/paddle/fluid/tests/unittests/test_pad_constant_like.py
+++ b/python/paddle/fluid/tests/unittests/test_pad_constant_like.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_pad_op.py b/python/paddle/fluid/tests/unittests/test_pad_op.py
index 916f0399d9f01e..c78692597c1350 100644
--- a/python/paddle/fluid/tests/unittests/test_pad_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pad_op.py
@@ -14,15 +14,16 @@
 
 import os
 import unittest
+
 import numpy as np
 from op_test import OpTest
+from test_attribute_var import UnittestBase
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
 
-from test_attribute_var import UnittestBase
-
 
 class TestPadOp(OpTest):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py b/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py
index 9d7bfc6888f1a8..e992fe1f34ec54 100644
--- a/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
-from paddle.fluid.wrapped_decorator import wrap_decorator
 import unittest
 from unittest import TestCase
+
 import numpy as np
+
 import paddle
-from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph
+import paddle.fluid as fluid
+from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
+from paddle.fluid.wrapped_decorator import wrap_decorator
 
 
 def _dygraph_guard_(func):
diff --git a/python/paddle/fluid/tests/unittests/test_paddle_multiprocessing.py b/python/paddle/fluid/tests/unittests/test_paddle_multiprocessing.py
index e050cde7ccc690..9117a2d86a09d7 100644
--- a/python/paddle/fluid/tests/unittests/test_paddle_multiprocessing.py
+++ b/python/paddle/fluid/tests/unittests/test_paddle_multiprocessing.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import gc
-import unittest
+import os
 import time
+import unittest
+
 import paddle
 import paddle.incubate.multiprocessing as mp
 
diff --git a/python/paddle/fluid/tests/unittests/test_paddle_save_load_binary.py b/python/paddle/fluid/tests/unittests/test_paddle_save_load_binary.py
index 5eca9492ee3961..478570100e03ef 100644
--- a/python/paddle/fluid/tests/unittests/test_paddle_save_load_binary.py
+++ b/python/paddle/fluid/tests/unittests/test_paddle_save_load_binary.py
@@ -12,17 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import numpy as np
-from io import BytesIO
 import os
 import platform
 import tempfile
+import unittest
+from io import BytesIO
+
+import numpy as np
+from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.framework as framework
-from test_imperative_base import new_program_scope
 
 IMAGE_SIZE = 784
 
diff --git a/python/paddle/fluid/tests/unittests/test_pairwise_distance.py b/python/paddle/fluid/tests/unittests/test_pairwise_distance.py
index 675196f57ebb26..8e7463abd9464e 100644
--- a/python/paddle/fluid/tests/unittests/test_pairwise_distance.py
+++ b/python/paddle/fluid/tests/unittests/test_pairwise_distance.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import numpy as np
-import unittest
 
 
 def np_pairwise_distance(x, y, p=2.0, epsilon=1e-6, keepdim=False):
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
index a452f2b2b15d65..9e4be19dacadea 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
@@ -12,18 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import time
-import paddle.fluid as fluid
 import copy
 import os
 import subprocess
+import time
+import unittest
 
+import paddle.fluid as fluid
 from paddle.distributed.utils.launch_utils import (
+    TrainerProc,
     find_free_ports,
-    watch_local_trainers,
     get_cluster,
-    TrainerProc,
+    watch_local_trainers,
 )
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel_cpuonly.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel_cpuonly.py
index 540c668a6f8edd..c3f3caa1383c92 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel_cpuonly.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel_cpuonly.py
@@ -12,17 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import time
 import copy
 import os
 import subprocess
+import time
+import unittest
 
 from paddle.distributed.utils.launch_utils import (
+    TrainerProc,
     find_free_ports,
-    watch_local_trainers,
     get_cluster,
-    TrainerProc,
+    watch_local_trainers,
 )
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer_gloo.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer_gloo.py
index 1af28e65cd7edb..5fc66fe78e6624 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer_gloo.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer_gloo.py
@@ -15,9 +15,10 @@
 import os
 import unittest
 
-import paddle.fluid as fluid
 from test_dist_base import TestDistBase
 
+import paddle.fluid as fluid
+
 flag_name = os.path.splitext(__file__)[0]
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py
index 7f30c15735ffba..b49ebd663fcbb7 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+import unittest
+
+import paddle
 import paddle.dataset.conll05 as conll05
 import paddle.fluid as fluid
-from paddle.fluid import compiler
 import paddle.fluid.core as core
-import unittest
-import paddle
-import os
+from paddle.fluid import compiler
 
 word_dict, verb_dict, label_dict = conll05.get_dict()
 word_dict_len = len(word_dict)
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_drop_scope.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_drop_scope.py
index e0e545448b5b3b..ea6805a6bf9d86 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_drop_scope.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_drop_scope.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
+import numpy
+
 import paddle
 import paddle.fluid as fluid
-import numpy
-import os
 
 
 class TestParallelExecutorDropExeScope(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py
index 51b234c3719141..69de05ad8c35d7 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
+import os
+import unittest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import compiler
-import unittest
-import logging
-import os
 
 os.environ['CPU_NUM'] = str(4)
 
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_feed_persistable_var.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_feed_persistable_var.py
index 4b5781921e2961..98d0c8df8c184c 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_feed_persistable_var.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_feed_persistable_var.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+import unittest
 from functools import partial
+
 import numpy
-import unittest
+from simple_nets import init_data, simple_fc_net
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
-from simple_nets import init_data, simple_fc_net
-import os
+import paddle.fluid.core as core
 
 
 class TestFeedPersistableVar(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
index 950ff45e86f39b..f00595b1145e71 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 
 import math
+import os
+import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import compiler
 import paddle.fluid.core as core
-import unittest
-import numpy as np
-import os
+from paddle.fluid import compiler
 
 
 def Lenet(data, class_dim):
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py
index 9cfd502b3a376f..b18525d727bcfd 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
+
 import paddle
-import os
+import paddle.fluid as fluid
 
 
 def enable_parallel_ssa_executor(enabled=True):
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_fix_op_run_order.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_fix_op_run_order.py
index 4dcf4673a09efd..8c047878ac9c4b 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fix_op_run_order.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fix_op_run_order.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
-from paddle.vision.models import resnet18
+
+import paddle
+import paddle.fluid as fluid
 from paddle.nn import CrossEntropyLoss
+from paddle.vision.models import resnet18
 
 
 class TestFixOpRunOrder(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_inference_feed_partial_data.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_inference_feed_partial_data.py
index 37ccb429b6e968..2704352460d41e 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_inference_feed_partial_data.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_inference_feed_partial_data.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
-import numpy as np
 import unittest
 
+import numpy as np
+
+import paddle.fluid as fluid
+
 
 class TestInferencePartialFeed(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
index 1341ccaad31af9..4dc0020b91fd46 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
@@ -12,15 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
 
 import numpy as np
-import paddle.fluid.core as core
+from parallel_executor_test_base import DeviceType, TestParallelExecutorBase
+
 import paddle
-import os
 import paddle.fluid as fluid
-from parallel_executor_test_base import TestParallelExecutorBase, DeviceType
-from parallel_executor_test_base import DeviceType
+import paddle.fluid.core as core
 
 
 def simple_fc_net(use_feed):
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py
index 8813c962f8b6d0..45a4ecffa8c059 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py
@@ -12,16 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
 
 import numpy as np
-import os
 
 os.environ['FLAGS_enable_parallel_graph'] = str(1)
-import paddle.fluid.core as core
 import os
-from parallel_executor_test_base import TestParallelExecutorBase, DeviceType
-from simple_nets import simple_fc_net, init_data
+
+from parallel_executor_test_base import DeviceType, TestParallelExecutorBase
+from simple_nets import init_data, simple_fc_net
+
+import paddle.fluid.core as core
 
 
 class TestMNIST(TestParallelExecutorBase):
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_profiler.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_profiler.py
index 4d8dc470fa9e46..9b250c35014d48 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_profiler.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_profiler.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.tests.unittests.test_profiler import TestProfiler
-import os
 
 # NCCL 2.7 decides to use shared memory while NCCL 2.6 didn't, hence causing the error.
 # include/shm.h:28 NCCL WARN Call to posix_fallocate failed: No space left on device
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py
index 3bb546eb917929..135944f145b46d 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 
 import logging
-import numpy as np
 import os
-import paddle
 import shutil
 import tempfile
 import unittest
 
+import numpy as np
+
+import paddle
+
 paddle.enable_static()
 
 logging.basicConfig(
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_load_infer_program.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_load_infer_program.py
index 461461f7975291..2e92c8189e6704 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_load_infer_program.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_load_infer_program.py
@@ -14,8 +14,9 @@
 
 import unittest
 
+from simple_nets import init_data, simple_fc_net
+
 import paddle.fluid as fluid
-from simple_nets import simple_fc_net, init_data
 
 
 class TestMNIST(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_cpu.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_cpu.py
index 79f5fd4c3ca307..7c9c9968c4a182 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_cpu.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_cpu.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
-import seresnext_net
-from seresnext_test_base import TestResnetBase, DeviceType
 from functools import partial
 
+import seresnext_net
+from seresnext_test_base import DeviceType, TestResnetBase
+
 
 class TestResnetCPU(TestResnetBase):
     def test_seresnext_with_learning_rate_decay(self):
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_gpu.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_gpu.py
index 6f7f0f507a4ed3..dd339645f5f06f 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_gpu.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_gpu.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
-import seresnext_net
-from seresnext_test_base import TestResnetBase, DeviceType
 from functools import partial
 
+import seresnext_net
+from seresnext_test_base import DeviceType, TestResnetBase
+
 
 class TestResnetGPU(TestResnetBase):
     def test_seresnext_with_learning_rate_decay(self):
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_fuse_all_reduce_cpu.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_fuse_all_reduce_cpu.py
index 46b7bb83147bdc..0a8bac2df0ec49 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_fuse_all_reduce_cpu.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_fuse_all_reduce_cpu.py
@@ -18,10 +18,11 @@
 fluid.core._set_fuse_parameter_memory_size(131072)
 
 import unittest
-import seresnext_net
-from seresnext_test_base import TestResnetBase, DeviceType
 from functools import partial
 
+import seresnext_net
+from seresnext_test_base import DeviceType, TestResnetBase
+
 
 class TestResnetWithFuseAllReduceCPU(TestResnetBase):
     def test_seresnext_with_fused_all_reduce(self):
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_fuse_all_reduce_gpu.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_fuse_all_reduce_gpu.py
index ca349fc1100902..151beb96b3c474 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_fuse_all_reduce_gpu.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_fuse_all_reduce_gpu.py
@@ -18,10 +18,11 @@
 fluid.core._set_fuse_parameter_memory_size(131072)
 
 import unittest
-import seresnext_net
-from seresnext_test_base import TestResnetBase, DeviceType
 from functools import partial
 
+import seresnext_net
+from seresnext_test_base import DeviceType, TestResnetBase
+
 
 class TestResnetWithFuseAllReduceGPU(TestResnetBase):
     def test_seresnext_with_fused_all_reduce(self):
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py
index abd650468c5313..e5ab71f2cafec6 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_cpu.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
-from parallel_executor_test_base import TestParallelExecutorBase, DeviceType
+
 import seresnext_net
+from parallel_executor_test_base import DeviceType, TestParallelExecutorBase
+
 import paddle.fluid.core as core
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_gpu.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_gpu.py
index e4cce04e2a413b..187f837e7e7b1e 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_gpu.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_with_reduce_gpu.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 from test_parallel_executor_seresnext_with_reduce_cpu import (
-    TestResnetWithReduceBase,
     DeviceType,
+    TestResnetWithReduceBase,
 )
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py
index 9391424692b43b..9d5d884c27b5d5 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py
@@ -12,15 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
+import os
+import sys
+import unittest
+
+import numpy as np
 from simple_nets import simple_fc_net
+
 import paddle.fluid as fluid
-from paddle.fluid import compiler
 import paddle.fluid.core as core
-import numpy as np
-import unittest
-import os
-import sys
-import math
+from paddle.fluid import compiler
 
 
 class ParallelExecutorTestingDuringTraining(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py
index 47f11e380430aa..e7c40e8a7eb2b4 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py
@@ -12,15 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import transformer_model
-import numpy as np
-from parallel_executor_test_base import TestParallelExecutorBase, DeviceType
+import os
 import unittest
+
+import numpy as np
+import transformer_model
+from feed_data_reader import FeedDataReader
+from parallel_executor_test_base import DeviceType, TestParallelExecutorBase
+
 import paddle
-import paddle.fluid.core as core
 import paddle.dataset.wmt16 as wmt16
-import os
-from feed_data_reader import FeedDataReader
+import paddle.fluid.core as core
 
 os.environ['CPU_NUM'] = str(4)
 
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_ssa_graph_inference_feed_partial_data.py b/python/paddle/fluid/tests/unittests/test_parallel_ssa_graph_inference_feed_partial_data.py
index cf6862a4761b02..fe318e772c5c65 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_ssa_graph_inference_feed_partial_data.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_ssa_graph_inference_feed_partial_data.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
 
+import paddle.fluid as fluid
+
 fluid.core.globals()['FLAGS_enable_parallel_graph'] = 1
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/test_parameter.py b/python/paddle/fluid/tests/unittests/test_parameter.py
index d077fb98362850..bb4a8bfab7b80c 100644
--- a/python/paddle/fluid/tests/unittests/test_parameter.py
+++ b/python/paddle/fluid/tests/unittests/test_parameter.py
@@ -12,21 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import copy
+import unittest
+
+import numpy as np
+
 import paddle
+import paddle.fluid.core as core
+import paddle.fluid.io as io
 from paddle.fluid.dygraph import guard
+from paddle.fluid.executor import Executor
 from paddle.fluid.framework import (
-    default_main_program,
+    ParamBase,
     Variable,
     _test_eager_guard,
-    ParamBase,
+    default_main_program,
 )
-import paddle.fluid.core as core
-from paddle.fluid.executor import Executor
-import paddle.fluid.io as io
 from paddle.fluid.initializer import ConstantInitializer
-import numpy as np
 
 paddle.enable_static()
 main_program = default_main_program()
diff --git a/python/paddle/fluid/tests/unittests/test_partial_concat_op.py b/python/paddle/fluid/tests/unittests/test_partial_concat_op.py
index 8046d8fa1d7e5a..61a201970402a7 100644
--- a/python/paddle/fluid/tests/unittests/test_partial_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/test_partial_concat_op.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import random
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import random
 
 
 def np_partial_concat(inputs, start, length):
diff --git a/python/paddle/fluid/tests/unittests/test_partial_eager_deletion_transformer.py b/python/paddle/fluid/tests/unittests/test_partial_eager_deletion_transformer.py
index eb9cae94b73b71..bd3eb6d377645a 100644
--- a/python/paddle/fluid/tests/unittests/test_partial_eager_deletion_transformer.py
+++ b/python/paddle/fluid/tests/unittests/test_partial_eager_deletion_transformer.py
@@ -13,8 +13,9 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
 
 fluid.core._set_eager_deletion_mode(0.0, 0.55, True)
 
diff --git a/python/paddle/fluid/tests/unittests/test_partial_sum_op.py b/python/paddle/fluid/tests/unittests/test_partial_sum_op.py
index 9a5304f7b42aec..86c952008c8518 100644
--- a/python/paddle/fluid/tests/unittests/test_partial_sum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_partial_sum_op.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import random
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import random
 
 
 class TestPartialSumOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_pass_builder.py b/python/paddle/fluid/tests/unittests/test_pass_builder.py
index d8b29249a0847c..dfdcaac934e020 100644
--- a/python/paddle/fluid/tests/unittests/test_pass_builder.py
+++ b/python/paddle/fluid/tests/unittests/test_pass_builder.py
@@ -12,16 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
+import os
+import sys
+import tempfile
+import unittest
+
+import numpy as np
 from simple_nets import simple_fc_net
+
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid import compiler
-import numpy as np
-import unittest
-import os
-import sys
-import math
-import tempfile
 
 
 class TestPassBuilder(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py b/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py
index 2b20ed0fc88fd2..196a4ddbd40056 100644
--- a/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py
+++ b/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import unittest
-import numpy as np
 
+import numpy as np
 from op_test import OpTest
+
 import paddle
-import paddle.nn.functional as F
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
+import paddle.nn.functional as F
 
 
 def pixel_shuffle_np(x, up_factor, data_format="NCHW"):
diff --git a/python/paddle/fluid/tests/unittests/test_pixel_unshuffle.py b/python/paddle/fluid/tests/unittests/test_pixel_unshuffle.py
index 8f4de9314d3c6f..2aa064c2dc8699 100644
--- a/python/paddle/fluid/tests/unittests/test_pixel_unshuffle.py
+++ b/python/paddle/fluid/tests/unittests/test_pixel_unshuffle.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import unittest
-import numpy as np
 
+import numpy as np
 from op_test import OpTest
+
 import paddle
-import paddle.nn.functional as F
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
+import paddle.nn.functional as F
 
 
 def pixel_unshuffle_np(x, down_factor, data_format="NCHW"):
diff --git a/python/paddle/fluid/tests/unittests/test_poisson_op.py b/python/paddle/fluid/tests/unittests/test_poisson_op.py
index 4e4a20c77cb5fc..3c2fa7c1cbae4a 100644
--- a/python/paddle/fluid/tests/unittests/test_poisson_op.py
+++ b/python/paddle/fluid/tests/unittests/test_poisson_op.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
 import unittest
-import paddle
+
 import numpy as np
 from op_test import OpTest
-import math
+
+import paddle
 from paddle.fluid.framework import _test_eager_guard
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py b/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py
index b5deecc47007dd..82390ef1ca5969 100644
--- a/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py
+++ b/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
 from op_test import OpTest
 
+import paddle.fluid as fluid
+
 
 def PolygonBoxRestore(input):
     shape = input.shape
diff --git a/python/paddle/fluid/tests/unittests/test_pool1d_api.py b/python/paddle/fluid/tests/unittests/test_pool1d_api.py
index a3aa60e0ebd10a..73d75d63c413bd 100644
--- a/python/paddle/fluid/tests/unittests/test_pool1d_api.py
+++ b/python/paddle/fluid/tests/unittests/test_pool1d_api.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
+import numpy as np
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.nn.functional as F
-import numpy as np
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_pool2d_api.py b/python/paddle/fluid/tests/unittests/test_pool2d_api.py
index 7e100d4c90558f..b7dcbaf9b5d887 100644
--- a/python/paddle/fluid/tests/unittests/test_pool2d_api.py
+++ b/python/paddle/fluid/tests/unittests/test_pool2d_api.py
@@ -13,18 +13,20 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-from paddle.fluid.framework import _test_eager_guard
-from paddle.nn.functional import avg_pool2d, max_pool2d
 from test_pool2d_op import (
     avg_pool2D_forward_naive,
     max_pool2D_forward_naive,
     pool2D_forward_naive,
 )
 
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.framework import _test_eager_guard
+from paddle.nn.functional import avg_pool2d, max_pool2d
+
 
 class TestPool2D_API(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_pool2d_op.py b/python/paddle/fluid/tests/unittests/test_pool2d_op.py
index 3692ef86279ef0..fb802b60b87274 100644
--- a/python/paddle/fluid/tests/unittests/test_pool2d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pool2d_op.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
-import paddle.fluid.core as core
-from paddle.fluid.tests.unittests.op_test import OpTest
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
+from paddle.fluid.tests.unittests.op_test import OpTest
 
 
 def adaptive_start_index(index, input_size, output_size):
diff --git a/python/paddle/fluid/tests/unittests/test_pool3d_api.py b/python/paddle/fluid/tests/unittests/test_pool3d_api.py
index 29882a6c8bce81..09fb7210f87c84 100644
--- a/python/paddle/fluid/tests/unittests/test_pool3d_api.py
+++ b/python/paddle/fluid/tests/unittests/test_pool3d_api.py
@@ -12,20 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-from paddle.fluid.framework import _test_eager_guard
-from paddle.nn.functional import avg_pool3d, max_pool3d
-from paddle.fluid.framework import _test_eager_guard
 from test_pool3d_op import (
     avg_pool3D_forward_naive,
     max_pool3D_forward_naive,
     pool3D_forward_naive,
 )
 
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.framework import _test_eager_guard
+from paddle.nn.functional import avg_pool3d, max_pool3d
+
 
 class TestPool3D_API(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_pool_max_op.py b/python/paddle/fluid/tests/unittests/test_pool_max_op.py
index f324adb6c8d544..89822c145401a7 100644
--- a/python/paddle/fluid/tests/unittests/test_pool_max_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pool_max_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_positive_negative_pair_op.py b/python/paddle/fluid/tests/unittests/test_positive_negative_pair_op.py
index d666157e7d2877..a24e3a507fa5a9 100644
--- a/python/paddle/fluid/tests/unittests/test_positive_negative_pair_op.py
+++ b/python/paddle/fluid/tests/unittests/test_positive_negative_pair_op.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import itertools
+import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_pow.py b/python/paddle/fluid/tests/unittests/test_pow.py
index ea42c6f3e12ee3..77a5a8a7d25ba4 100755
--- a/python/paddle/fluid/tests/unittests/test_pow.py
+++ b/python/paddle/fluid/tests/unittests/test_pow.py
@@ -17,8 +17,8 @@
 import numpy as np
 
 import paddle
-from paddle.static import Program, program_guard
 import paddle.fluid.core as core
+from paddle.static import Program, program_guard
 
 DYNAMIC = 1
 STATIC = 2
diff --git a/python/paddle/fluid/tests/unittests/test_pow2_decay_with_linear_warmup_op.py b/python/paddle/fluid/tests/unittests/test_pow2_decay_with_linear_warmup_op.py
index 9249f51b04653f..87aa63ef21ef0f 100644
--- a/python/paddle/fluid/tests/unittests/test_pow2_decay_with_linear_warmup_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pow2_decay_with_linear_warmup_op.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import paddle
 from paddle.fluid.contrib.layers.nn import pow2_decay_with_linear_warmup
-from paddle.optimizer.lr import LinearWarmup
-from paddle.optimizer.lr import PolynomialDecay
-import unittest
+from paddle.optimizer.lr import LinearWarmup, PolynomialDecay
 
 
 def gen_pow2_warmup_op_lr(warmup_steps, total_steps, base_lr, end_lr, place):
diff --git a/python/paddle/fluid/tests/unittests/test_precision_recall_op.py b/python/paddle/fluid/tests/unittests/test_precision_recall_op.py
index 8bc80de3429d58..97f3d7e7724a47 100644
--- a/python/paddle/fluid/tests/unittests/test_precision_recall_op.py
+++ b/python/paddle/fluid/tests/unittests/test_precision_recall_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_prelu_op.py b/python/paddle/fluid/tests/unittests/test_prelu_op.py
index c267286be9dc4f..50b703bc27c725 100644
--- a/python/paddle/fluid/tests/unittests/test_prelu_op.py
+++ b/python/paddle/fluid/tests/unittests/test_prelu_op.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-from paddle.fluid import Program
 from op_test import OpTest, skip_check_grad_ci
+
 import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.nn.functional as F
+from paddle.fluid import Program
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_print_op.py b/python/paddle/fluid/tests/unittests/test_print_op.py
index 372515f4f025d1..75e8e93987dd5c 100755
--- a/python/paddle/fluid/tests/unittests/test_print_op.py
+++ b/python/paddle/fluid/tests/unittests/test_print_op.py
@@ -15,13 +15,13 @@
 import unittest
 
 import numpy as np
+from simple_nets import init_data, simple_fc_net
 
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
 from paddle.fluid import core
 from paddle.fluid.framework import switch_main_program
-from simple_nets import simple_fc_net, init_data
 from paddle.static import Program, program_guard
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_prior_box_op.py b/python/paddle/fluid/tests/unittests/test_prior_box_op.py
index 00a5804e76f2ef..493b952d57bf30 100644
--- a/python/paddle/fluid/tests/unittests/test_prior_box_op.py
+++ b/python/paddle/fluid/tests/unittests/test_prior_box_op.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
 import unittest
+
 import numpy as np
-import math
 from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_prod_op.py b/python/paddle/fluid/tests/unittests/test_prod_op.py
index 24605da11e8a7c..47b41aafc32731 100644
--- a/python/paddle/fluid/tests/unittests/test_prod_op.py
+++ b/python/paddle/fluid/tests/unittests/test_prod_op.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
 from test_sum_op import TestReduceOPTensorAxisBase
 
+import paddle
+
 
 class TestProdOp(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_profiler.py b/python/paddle/fluid/tests/unittests/test_profiler.py
index 011d9fc4dfd059..e248a4f7f5b1dd 100644
--- a/python/paddle/fluid/tests/unittests/test_profiler.py
+++ b/python/paddle/fluid/tests/unittests/test_profiler.py
@@ -12,18 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import os
 import tempfile
+import unittest
+
 import numpy as np
+
 import paddle
-import paddle.utils as utils
 import paddle.fluid as fluid
-import paddle.fluid.profiler as profiler
-import paddle.fluid.layers as layers
 import paddle.fluid.core as core
+import paddle.fluid.layers as layers
+import paddle.fluid.profiler as profiler
 import paddle.fluid.proto.profiler.profiler_pb2 as profiler_pb2
-
+import paddle.utils as utils
 from paddle.utils.flops import flops
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_program.py b/python/paddle/fluid/tests/unittests/test_program.py
index 834222be8cd1f6..1b38cf4f5fae9a 100644
--- a/python/paddle/fluid/tests/unittests/test_program.py
+++ b/python/paddle/fluid/tests/unittests/test_program.py
@@ -14,10 +14,10 @@
 
 import unittest
 
-from paddle.fluid.framework import Program, default_main_program, program_guard
 import paddle
-import paddle.fluid.layers as layers
 import paddle.fluid as fluid
+import paddle.fluid.layers as layers
+from paddle.fluid.framework import Program, default_main_program, program_guard
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
index 9be70eeca2a508..04c707b320f9aa 100755
--- a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
@@ -12,21 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import contextlib
 import unittest
 
-import contextlib
 import numpy as np
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-from simple_nets import init_data, simple_fc_net, fc_with_batchnorm
 import seresnext_net
+from fake_reader import fake_imdb_reader
+from simple_nets import fc_with_batchnorm, init_data, simple_fc_net
 from test_parallel_executor_transformer import (
-    transformer,
-    get_feed_data_reader,
     DeviceType,
+    get_feed_data_reader,
+    transformer,
 )
-from fake_reader import fake_imdb_reader
+
 import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 
 def lstm_net(use_feed):
diff --git a/python/paddle/fluid/tests/unittests/test_program_to_string.py b/python/paddle/fluid/tests/unittests/test_program_to_string.py
index fda96d90467701..58edaab5823022 100644
--- a/python/paddle/fluid/tests/unittests/test_program_to_string.py
+++ b/python/paddle/fluid/tests/unittests/test_program_to_string.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
 
+import paddle.fluid as fluid
+
 
 class TestProgram(unittest.TestCase):
     def test_program_to_string(self):
diff --git a/python/paddle/fluid/tests/unittests/test_protobuf.py b/python/paddle/fluid/tests/unittests/test_protobuf.py
index c3f1fa80185bfc..7e5ecf955d061a 100644
--- a/python/paddle/fluid/tests/unittests/test_protobuf.py
+++ b/python/paddle/fluid/tests/unittests/test_protobuf.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid.proto.framework_pb2 as framework_pb2
 import unittest
 
+import paddle.fluid.proto.framework_pb2 as framework_pb2
+
 
 class TestFrameworkProto(unittest.TestCase):
     def test_all(self):
diff --git a/python/paddle/fluid/tests/unittests/test_protobuf_descs.py b/python/paddle/fluid/tests/unittests/test_protobuf_descs.py
index 1099855eec7ebe..6efa642373799f 100644
--- a/python/paddle/fluid/tests/unittests/test_protobuf_descs.py
+++ b/python/paddle/fluid/tests/unittests/test_protobuf_descs.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle.fluid.core as core
 from paddle.fluid.framework import Program
 
diff --git a/python/paddle/fluid/tests/unittests/test_proximal_adagrad_op.py b/python/paddle/fluid/tests/unittests/test_proximal_adagrad_op.py
index 3c2689585061af..45d25d3a213502 100644
--- a/python/paddle/fluid/tests/unittests/test_proximal_adagrad_op.py
+++ b/python/paddle/fluid/tests/unittests/test_proximal_adagrad_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_proximal_gd_op.py b/python/paddle/fluid/tests/unittests/test_proximal_gd_op.py
index 137594b9a08e13..d55c1ffcc2d8d7 100644
--- a/python/paddle/fluid/tests/unittests/test_proximal_gd_op.py
+++ b/python/paddle/fluid/tests/unittests/test_proximal_gd_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_prune.py b/python/paddle/fluid/tests/unittests/test_prune.py
index 5b7596d813d743..2c0c9078285f7c 100644
--- a/python/paddle/fluid/tests/unittests/test_prune.py
+++ b/python/paddle/fluid/tests/unittests/test_prune.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import contextlib
+import os
 import unittest
 
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.framework as framework
-import numpy as np
-import os
-import contextlib
 
 
 class TestPrune(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_prune_gate_by_capacity_op.py b/python/paddle/fluid/tests/unittests/test_prune_gate_by_capacity_op.py
index 0caab5573470ef..62d9a4ea1cb7f9 100644
--- a/python/paddle/fluid/tests/unittests/test_prune_gate_by_capacity_op.py
+++ b/python/paddle/fluid/tests/unittests/test_prune_gate_by_capacity_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
+
+import paddle
 from paddle.distributed.models.moe import utils
 from paddle.fluid import core
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_ps_dispatcher.py b/python/paddle/fluid/tests/unittests/test_ps_dispatcher.py
index 88d6c6f45ac47f..8bb74fe7490c0b 100644
--- a/python/paddle/fluid/tests/unittests/test_ps_dispatcher.py
+++ b/python/paddle/fluid/tests/unittests/test_ps_dispatcher.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 from paddle.fluid.incubate.fleet.parameter_server.ir.ps_dispatcher import (
-    RoundRobin,
     HashName,
     PSDispatcher,
+    RoundRobin,
 )
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_psroi_pool_op.py b/python/paddle/fluid/tests/unittests/test_psroi_pool_op.py
index 0bb1adb4ad65fd..40f3c52d4fc030 100644
--- a/python/paddle/fluid/tests/unittests/test_psroi_pool_op.py
+++ b/python/paddle/fluid/tests/unittests/test_psroi_pool_op.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import math
-import numpy as np
 import unittest
+
+import numpy as np
 from op_test import OpTest
 
+import paddle
+
 
 def calc_psroi_pool(
     x,
diff --git a/python/paddle/fluid/tests/unittests/test_pull_gpups_sparse_op.py b/python/paddle/fluid/tests/unittests/test_pull_gpups_sparse_op.py
index 3b2eb69a549bf8..b537a31b3aae87 100644
--- a/python/paddle/fluid/tests/unittests/test_pull_gpups_sparse_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pull_gpups_sparse_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.layers.nn import _pull_gpups_sparse
diff --git a/python/paddle/fluid/tests/unittests/test_put_along_axis_op.py b/python/paddle/fluid/tests/unittests/test_put_along_axis_op.py
index b5a1cb25e4d58e..3b2cf82fbfd391 100644
--- a/python/paddle/fluid/tests/unittests/test_put_along_axis_op.py
+++ b/python/paddle/fluid/tests/unittests/test_put_along_axis_op.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
 import unittest
+
 import numpy as np
-import copy
 from op_test import OpTest
+
 import paddle
 from paddle.framework import core
 
diff --git a/python/paddle/fluid/tests/unittests/test_py_func_op.py b/python/paddle/fluid/tests/unittests/test_py_func_op.py
index a764dbf4a595bb..51d7af4993009c 100644
--- a/python/paddle/fluid/tests/unittests/test_py_func_op.py
+++ b/python/paddle/fluid/tests/unittests/test_py_func_op.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import os
-import paddle.fluid as fluid
-from paddle.fluid import compiler
-import paddle
 import unittest
+
 import numpy as np
 
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import compiler
+
 dev_cnt = 2
 if fluid.core.is_compiled_with_cuda():
     dev_cnt = fluid.core.get_cuda_device_count()
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_combination.py b/python/paddle/fluid/tests/unittests/test_py_reader_combination.py
index 1c399b89cec57b..e238eeaf4cb854 100644
--- a/python/paddle/fluid/tests/unittests/test_py_reader_combination.py
+++ b/python/paddle/fluid/tests/unittests/test_py_reader_combination.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
 
+import paddle
+import paddle.fluid as fluid
+
 
 class TestPyReaderCombination(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_error_msg.py b/python/paddle/fluid/tests/unittests/test_py_reader_error_msg.py
index 40cdcdcc19f3a5..0a198647dcca1f 100644
--- a/python/paddle/fluid/tests/unittests/test_py_reader_error_msg.py
+++ b/python/paddle/fluid/tests/unittests/test_py_reader_error_msg.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
+
 import paddle
+import paddle.fluid as fluid
 
 
 class TestPyReaderErrorMsg(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_lod_level_share.py b/python/paddle/fluid/tests/unittests/test_py_reader_lod_level_share.py
index ef55a226dea43a..c7fb6a8df59514 100644
--- a/python/paddle/fluid/tests/unittests/test_py_reader_lod_level_share.py
+++ b/python/paddle/fluid/tests/unittests/test_py_reader_lod_level_share.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
 
+import paddle.fluid as fluid
+
 
 class TestLoDLevelShare(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_pin_memory.py b/python/paddle/fluid/tests/unittests/test_py_reader_pin_memory.py
index 4268d6fbcc196e..509d5f65292b4a 100644
--- a/python/paddle/fluid/tests/unittests/test_py_reader_pin_memory.py
+++ b/python/paddle/fluid/tests/unittests/test_py_reader_pin_memory.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import numpy as np
 
 
 def user_reader(inputs):
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py b/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py
index 4ccaaf0274669b..7f2dc7817c8df2 100644
--- a/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py
+++ b/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid as fluid
-import numpy as np
 from threading import Thread
 
+import numpy as np
+
+import paddle.fluid as fluid
+
 
 def feed_data(feed_queue, inputs):
     for in_data in inputs:
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_return_list.py b/python/paddle/fluid/tests/unittests/test_py_reader_return_list.py
index bfc08245ee33b5..d18a66b82854cb 100644
--- a/python/paddle/fluid/tests/unittests/test_py_reader_return_list.py
+++ b/python/paddle/fluid/tests/unittests/test_py_reader_return_list.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
 
+import paddle
+import paddle.fluid as fluid
+
 
 class TestPyReader(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_sample_generator.py b/python/paddle/fluid/tests/unittests/test_py_reader_sample_generator.py
index fa1297d46b6b96..19cd4f546acbcd 100644
--- a/python/paddle/fluid/tests/unittests/test_py_reader_sample_generator.py
+++ b/python/paddle/fluid/tests/unittests/test_py_reader_sample_generator.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import paddle.fluid as fluid
 import math
+import os
 import unittest
+
 import numpy as np
-import os
+
+import paddle
+import paddle.fluid as fluid
 
 os.environ['CPU_NUM'] = '1'
 
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py
index dca3b061cbff00..01ab760b6e2c32 100644
--- a/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py
+++ b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py
@@ -12,16 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import multiprocessing
+import os
+import threading
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import compiler
-import paddle.fluid.unique_name as unique_name
 import paddle.fluid.core as core
-import numpy as np
-import threading
-import multiprocessing
-import os
+import paddle.fluid.unique_name as unique_name
+from paddle.fluid import compiler
 
 os.environ['CPU_NUM'] = str(4)
 
diff --git a/python/paddle/fluid/tests/unittests/test_pybind_interface.py b/python/paddle/fluid/tests/unittests/test_pybind_interface.py
index 854aa7f3e9e76a..1b9de400931d27 100644
--- a/python/paddle/fluid/tests/unittests/test_pybind_interface.py
+++ b/python/paddle/fluid/tests/unittests/test_pybind_interface.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 from paddle.fluid import core
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_pylayer_op.py b/python/paddle/fluid/tests/unittests/test_pylayer_op.py
index f99be249cbd860..5e78a40097f962 100644
--- a/python/paddle/fluid/tests/unittests/test_pylayer_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pylayer_op.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
-from paddle.autograd.py_layer import LegacyPyLayer, EagerPyLayer
+from paddle.autograd.py_layer import EagerPyLayer, LegacyPyLayer
 from paddle.fluid.framework import _test_eager_guard, in_dygraph_mode
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_pyramid_hash_op.py b/python/paddle/fluid/tests/unittests/test_pyramid_hash_op.py
index d84730fa3bd12d..6af32d58705466 100644
--- a/python/paddle/fluid/tests/unittests/test_pyramid_hash_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pyramid_hash_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle.fluid as fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_python_bf16_numpy_datatype.py b/python/paddle/fluid/tests/unittests/test_python_bf16_numpy_datatype.py
index 4a650dc07b973d..36c77b8449501b 100644
--- a/python/paddle/fluid/tests/unittests/test_python_bf16_numpy_datatype.py
+++ b/python/paddle/fluid/tests/unittests/test_python_bf16_numpy_datatype.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
 from paddle_bfloat import bfloat16
-import unittest
 
 
 class TestBF16DataType(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_qr_op.py b/python/paddle/fluid/tests/unittests/test_qr_op.py
index 39360d2e721588..44dbeb902b9f62 100644
--- a/python/paddle/fluid/tests/unittests/test_qr_op.py
+++ b/python/paddle/fluid/tests/unittests/test_qr_op.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import itertools
+import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from op_test import OpTest
 
 
 class TestQrOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_quantile_and_nanquantile.py b/python/paddle/fluid/tests/unittests/test_quantile_and_nanquantile.py
index 3d7406c457e475..60e3ae08d36ec8 100644
--- a/python/paddle/fluid/tests/unittests/test_quantile_and_nanquantile.py
+++ b/python/paddle/fluid/tests/unittests/test_quantile_and_nanquantile.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 
 API_list = [
diff --git a/python/paddle/fluid/tests/unittests/test_query_op.py b/python/paddle/fluid/tests/unittests/test_query_op.py
index a97530febfa619..edb1848f03bafc 100644
--- a/python/paddle/fluid/tests/unittests/test_query_op.py
+++ b/python/paddle/fluid/tests/unittests/test_query_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle
 from paddle.fluid import core
 
diff --git a/python/paddle/fluid/tests/unittests/test_queue.py b/python/paddle/fluid/tests/unittests/test_queue.py
index bfc2f1e0e6f221..0b06cfb1d29d11 100644
--- a/python/paddle/fluid/tests/unittests/test_queue.py
+++ b/python/paddle/fluid/tests/unittests/test_queue.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
 
+import numpy as np
+
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 import paddle.fluid.core as core
+import paddle.fluid.layers as layers
 
 
 class TestQueue(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_rad2deg.py b/python/paddle/fluid/tests/unittests/test_rad2deg.py
index 220cf96f04f545..7332a113b87ea2 100644
--- a/python/paddle/fluid/tests/unittests/test_rad2deg.py
+++ b/python/paddle/fluid/tests/unittests/test_rad2deg.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_rand_op.py b/python/paddle/fluid/tests/unittests/test_rand_op.py
index 35038054144244..f4daf2b7ec4de4 100644
--- a/python/paddle/fluid/tests/unittests/test_rand_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rand_op.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
+import paddle
+import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle import rand
-import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
-import paddle
 
 
 class TestRandOpError(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_randint_like.py b/python/paddle/fluid/tests/unittests/test_randint_like.py
index 32a717d3815bff..76e7b204be475e 100644
--- a/python/paddle/fluid/tests/unittests/test_randint_like.py
+++ b/python/paddle/fluid/tests/unittests/test_randint_like.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
-from paddle.static import program_guard, Program
+from paddle.static import Program, program_guard
 
 
 # Test python API
diff --git a/python/paddle/fluid/tests/unittests/test_randint_op.py b/python/paddle/fluid/tests/unittests/test_randint_op.py
index dcb59a97d0f859..6289d814367340 100644
--- a/python/paddle/fluid/tests/unittests/test_randint_op.py
+++ b/python/paddle/fluid/tests/unittests/test_randint_op.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
+import paddle
+import paddle.fluid as fluid
 from paddle.fluid import core
 from paddle.fluid.framework import _test_eager_guard
-from paddle.static import program_guard, Program
-import paddle.fluid as fluid
+from paddle.static import Program, program_guard
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_randn_op.py b/python/paddle/fluid/tests/unittests/test_randn_op.py
index d136e3fd57e14a..8c9d89bd4037a2 100644
--- a/python/paddle/fluid/tests/unittests/test_randn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_randn_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid.core as core
-from paddle.static import program_guard, Program
+from paddle.static import Program, program_guard
 
 
 class TestRandnOp(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_random_crop_op.py b/python/paddle/fluid/tests/unittests/test_random_crop_op.py
index abad6d4cb9d2cc..08355378207c13 100644
--- a/python/paddle/fluid/tests/unittests/test_random_crop_op.py
+++ b/python/paddle/fluid/tests/unittests/test_random_crop_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_random_routing_op.py b/python/paddle/fluid/tests/unittests/test_random_routing_op.py
index 03cce6c9caa8c0..e9320e5fa72b84 100644
--- a/python/paddle/fluid/tests/unittests/test_random_routing_op.py
+++ b/python/paddle/fluid/tests/unittests/test_random_routing_op.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid.core as core
 from paddle.distributed.models.moe import utils
diff --git a/python/paddle/fluid/tests/unittests/test_random_seed.py b/python/paddle/fluid/tests/unittests/test_random_seed.py
index 97f3fa56dc5264..5a3e92eb9fdd4a 100644
--- a/python/paddle/fluid/tests/unittests/test_random_seed.py
+++ b/python/paddle/fluid/tests/unittests/test_random_seed.py
@@ -14,12 +14,13 @@
 """Test cloud role maker."""
 
 import unittest
-import paddle.fluid.generator as generator
 
-import paddle.fluid as fluid
 import numpy as np
+
 import paddle
+import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.fluid.generator as generator
 
 
 class TestGeneratorSeed(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_randperm_op.py b/python/paddle/fluid/tests/unittests/test_randperm_op.py
index 12543a727a2117..307541cb897a48 100644
--- a/python/paddle/fluid/tests/unittests/test_randperm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_randperm_op.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid.core as core
-from paddle.static import program_guard, Program
 from paddle.fluid.framework import _test_eager_guard
+from paddle.static import Program, program_guard
 
 
 def check_randperm_out(n, data_np):
diff --git a/python/paddle/fluid/tests/unittests/test_range.py b/python/paddle/fluid/tests/unittests/test_range.py
index cd6057e13b5c94..07a6a1b21eaa2f 100644
--- a/python/paddle/fluid/tests/unittests/test_range.py
+++ b/python/paddle/fluid/tests/unittests/test_range.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+from functools import partial
+
 import numpy as np
 from op_test import OpTest
-from functools import partial
+
+import paddle
 
 
 def arange_wrapper(start, end, step, dtype=None):
diff --git a/python/paddle/fluid/tests/unittests/test_rank_attention_op.py b/python/paddle/fluid/tests/unittests/test_rank_attention_op.py
index 679475a58843cc..74ceba3de6096f 100644
--- a/python/paddle/fluid/tests/unittests/test_rank_attention_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rank_attention_op.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import random
 import unittest
+
 import numpy as np
-import random
-from op_test import OpTest
 from op_test import OpTest
+
 import paddle.fluid.core as core
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_rank_loss_op.py b/python/paddle/fluid/tests/unittests/test_rank_loss_op.py
index 49820853aa1154..e246310ddaaca1 100644
--- a/python/paddle/fluid/tests/unittests/test_rank_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rank_loss_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_raw_program_optimizer.py b/python/paddle/fluid/tests/unittests/test_raw_program_optimizer.py
index fe86a667d1e880..085500380e1f89 100644
--- a/python/paddle/fluid/tests/unittests/test_raw_program_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_raw_program_optimizer.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
 
+import numpy as np
+
 import paddle
-import paddle.fluid as fluid
 import paddle.distributed.fleet as fleet
-import numpy as np
-import os
+import paddle.fluid as fluid
 
 
 class TestRawProgramOptimizer(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_reader_reset.py b/python/paddle/fluid/tests/unittests/test_reader_reset.py
index edd3c6c28b1135..bbdade712e1113 100644
--- a/python/paddle/fluid/tests/unittests/test_reader_reset.py
+++ b/python/paddle/fluid/tests/unittests/test_reader_reset.py
@@ -15,11 +15,13 @@
 import os
 
 os.environ['CPU_NUM'] = str(1)
+import unittest
+
+import numpy as np
+
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid import compiler
-import paddle
-import numpy as np
-import unittest
 
 
 class TestReaderReset(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_real_imag_op.py b/python/paddle/fluid/tests/unittests/test_real_imag_op.py
index bcd8f3c561edcd..6f186063df3163 100644
--- a/python/paddle/fluid/tests/unittests/test_real_imag_op.py
+++ b/python/paddle/fluid/tests/unittests/test_real_imag_op.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
 
 import paddle
 import paddle.fluid as fluid
 import paddle.static as static
-from op_test import OpTest
 
 numpy_apis = {
     "real": np.real,
diff --git a/python/paddle/fluid/tests/unittests/test_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_recurrent_op.py
index 9abfba1b70f2ea..cf7459fcadd4fa 100644
--- a/python/paddle/fluid/tests/unittests/test_recurrent_op.py
+++ b/python/paddle/fluid/tests/unittests/test_recurrent_op.py
@@ -13,16 +13,17 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-import numpy as np
 import paddle.fluid.core as core
-
+import paddle.fluid.layers as layers
 from paddle.fluid import ParamAttr
-from paddle.fluid.framework import Program, grad_var_name
-from paddle.fluid.executor import Executor
 from paddle.fluid.backward import append_backward
+from paddle.fluid.executor import Executor
+from paddle.fluid.framework import Program, grad_var_name
 
 np.random.seed(123)
 
diff --git a/python/paddle/fluid/tests/unittests/test_reduce_op.py b/python/paddle/fluid/tests/unittests/test_reduce_op.py
index eed188379da492..f6434e662942cb 100644
--- a/python/paddle/fluid/tests/unittests/test_reduce_op.py
+++ b/python/paddle/fluid/tests/unittests/test_reduce_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-from op_test import OpTest, skip_check_grad_ci, convert_float_to_uint16
+from op_test import OpTest, convert_float_to_uint16, skip_check_grad_ci
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
 from paddle.fluid.framework import convert_np_dtype_to_dtype_
 
diff --git a/python/paddle/fluid/tests/unittests/test_reducescatter.py b/python/paddle/fluid/tests/unittests/test_reducescatter.py
index ed46c272bca470..0d542507959395 100644
--- a/python/paddle/fluid/tests/unittests/test_reducescatter.py
+++ b/python/paddle/fluid/tests/unittests/test_reducescatter.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
-import paddle
 
 from test_collective_base import TestDistBase
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_reducescatter_api.py b/python/paddle/fluid/tests/unittests/test_reducescatter_api.py
index e5d6b261536857..8153f2f81c0ada 100644
--- a/python/paddle/fluid/tests/unittests/test_reducescatter_api.py
+++ b/python/paddle/fluid/tests/unittests/test_reducescatter_api.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid as fluid
-import paddle
 
 from test_collective_base import TestDistBase
 
+import paddle
+import paddle.fluid as fluid
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_registry.py b/python/paddle/fluid/tests/unittests/test_registry.py
index 3fc6370b6c754f..1427d0e07548aa 100644
--- a/python/paddle/fluid/tests/unittests/test_registry.py
+++ b/python/paddle/fluid/tests/unittests/test_registry.py
@@ -14,11 +14,12 @@
 
 import unittest
 
-import paddle
-import paddle.fluid as fluid
 import numpy as np
 from decorator_helper import prog_scope
 
+import paddle
+import paddle.fluid as fluid
+
 
 class TestRegistry(unittest.TestCase):
     @prog_scope()
diff --git a/python/paddle/fluid/tests/unittests/test_regularizer.py b/python/paddle/fluid/tests/unittests/test_regularizer.py
index 989ad205e466cd..ba81625a04d085 100644
--- a/python/paddle/fluid/tests/unittests/test_regularizer.py
+++ b/python/paddle/fluid/tests/unittests/test_regularizer.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import contextlib
+import random
 import unittest
 from functools import partial
-import contextlib
+
 import numpy as np
-import random
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.fluid.framework as framework
 import paddle.fluid.regularizer as regularizer
 from paddle.fluid.backward import append_backward
diff --git a/python/paddle/fluid/tests/unittests/test_regularizer_api.py b/python/paddle/fluid/tests/unittests/test_regularizer_api.py
index e7b3bb38d77e88..aee1e8c25eee6a 100644
--- a/python/paddle/fluid/tests/unittests/test_regularizer_api.py
+++ b/python/paddle/fluid/tests/unittests/test_regularizer_api.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import contextlib
+import random
 import unittest
 from functools import partial
-import contextlib
+
 import numpy as np
-import random
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 
 def bow_net(
diff --git a/python/paddle/fluid/tests/unittests/test_renorm_op.py b/python/paddle/fluid/tests/unittests/test_renorm_op.py
index 38362f039eaa28..7a4cac34ecdae5 100644
--- a/python/paddle/fluid/tests/unittests/test_renorm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_renorm_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
+
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py
index 22aa94eb86aae8..2e480f7ca15fdf 100644
--- a/python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import functools
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.layers.control_flow import lod_rank_table
 from paddle.fluid import Program, program_guard
-import numpy as np
-import functools
+from paddle.fluid.layers.control_flow import lod_rank_table
 
 
 def convert_to_offset(lod):
diff --git a/python/paddle/fluid/tests/unittests/test_repeat_interleave_op.py b/python/paddle/fluid/tests/unittests/test_repeat_interleave_op.py
index 7332c36e1d2841..093cb17b631e12 100644
--- a/python/paddle/fluid/tests/unittests/test_repeat_interleave_op.py
+++ b/python/paddle/fluid/tests/unittests/test_repeat_interleave_op.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
 from op_test import OpTest
+
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_require_version.py b/python/paddle/fluid/tests/unittests/test_require_version.py
index 92066e95392c0e..24ffb9f289ecaf 100644
--- a/python/paddle/fluid/tests/unittests/test_require_version.py
+++ b/python/paddle/fluid/tests/unittests/test_require_version.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
-import paddle.fluid as fluid
-import paddle.version as fluid_version
 import warnings
+
 import paddle
+import paddle.fluid as fluid
+import paddle.version as fluid_version
 
 
 class VersionTest(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_reset_grad_inplace_version.py b/python/paddle/fluid/tests/unittests/test_reset_grad_inplace_version.py
index a13a0d2c9e8807..24cfe0a51df204 100644
--- a/python/paddle/fluid/tests/unittests/test_reset_grad_inplace_version.py
+++ b/python/paddle/fluid/tests/unittests/test_reset_grad_inplace_version.py
@@ -11,11 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import unittest
+
 import numpy as np
+
 import paddle
 from paddle import _legacy_C_ops
 from paddle.fluid.framework import _test_eager_guard
-import unittest
 
 paddle.set_device('cpu')
 
diff --git a/python/paddle/fluid/tests/unittests/test_reshape_op.py b/python/paddle/fluid/tests/unittests/test_reshape_op.py
index 4445cf34261461..046c576ce914b0 100755
--- a/python/paddle/fluid/tests/unittests/test_reshape_op.py
+++ b/python/paddle/fluid/tests/unittests/test_reshape_op.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import unittest
-import numpy as np
 
+import numpy as np
 from op_test import OpTest, convert_float_to_uint16
+
 import paddle
 import paddle.fluid as fluid
 from paddle.static import Program, program_guard
diff --git a/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py b/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py
index 754bb0dcb5918a..42f7c608a97026 100644
--- a/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py
+++ b/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import logging
+import unittest
+
 import numpy as np
+
 import paddle
-import unittest
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_retain_graph.py b/python/paddle/fluid/tests/unittests/test_retain_graph.py
index ff0c6ba86e8ec0..ad9739f1986ed0 100644
--- a/python/paddle/fluid/tests/unittests/test_retain_graph.py
+++ b/python/paddle/fluid/tests/unittests/test_retain_graph.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import unittest
 
 paddle.disable_static()
 SEED = 2020
diff --git a/python/paddle/fluid/tests/unittests/test_retinanet_detection_output.py b/python/paddle/fluid/tests/unittests/test_retinanet_detection_output.py
index 5d8527477af7a8..2e43d10129c8f5 100644
--- a/python/paddle/fluid/tests/unittests/test_retinanet_detection_output.py
+++ b/python/paddle/fluid/tests/unittests/test_retinanet_detection_output.py
@@ -12,15 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
 import unittest
+
 import numpy as np
-import math
 from op_test import OpTest
 from test_anchor_generator_op import anchor_generator_in_python
 from test_multiclass_nms_op import nms
+
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
-import paddle
 
 
 def multiclass_nms(prediction, class_num, keep_top_k, nms_threshold):
diff --git a/python/paddle/fluid/tests/unittests/test_reverse_op.py b/python/paddle/fluid/tests/unittests/test_reverse_op.py
index 467a385e46d261..995eca8473d157 100644
--- a/python/paddle/fluid/tests/unittests/test_reverse_op.py
+++ b/python/paddle/fluid/tests/unittests/test_reverse_op.py
@@ -14,17 +14,18 @@
 
 import os
 import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
 from op_test import OpTest
+from test_attribute_var import UnittestBase
+
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import core
-import gradient_checker
-from decorator_helper import prog_scope
 import paddle.fluid.layers as layers
-
-from paddle.fluid.framework import program_guard, Program
-from test_attribute_var import UnittestBase
+from paddle.fluid import core
+from paddle.fluid.framework import Program, program_guard
 
 
 class TestReverseOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py
index 5845b421fb511a..5e3e899eb67bdf 100644
--- a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py
@@ -15,10 +15,11 @@
 import unittest
 
 import numpy as np
+
+import paddle
+import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.op import Operator
-import paddle.fluid as fluid
-import paddle
 
 
 def create_selected_rows_and_tensor(
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
index a143076a743613..73995d0ee00db7 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
@@ -13,24 +13,21 @@
 # limitations under the License.
 
 import unittest
+
 import numpy
+import numpy as np
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 import paddle.fluid.core as core
-from paddle.fluid.framework import program_guard, Program
-
+import paddle.fluid.layers as layers
+import paddle.fluid.layers.utils as utils
+from paddle.fluid import contrib, framework
+from paddle.fluid.contrib.layers import basic_lstm
 from paddle.fluid.executor import Executor
-from paddle.fluid import framework
-
-from paddle.fluid.layers.rnn import LSTMCell, GRUCell, RNNCell
+from paddle.fluid.framework import Program, program_guard
 from paddle.fluid.layers import rnn as dynamic_rnn
-from paddle.fluid import contrib
-from paddle.fluid.contrib.layers import basic_lstm
-import paddle.fluid.layers.utils as utils
-
-import numpy as np
+from paddle.fluid.layers.rnn import GRUCell, LSTMCell, RNNCell
 
 
 class TestLSTMCellError(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
index 2bd5d3dd57d4f7..077c8d5e68e75a 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
@@ -14,21 +14,20 @@
 
 import random
 import unittest
+
 import numpy as np
 
 import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+import paddle.fluid.layers as layers
 import paddle.nn as nn
 from paddle import Model, set_device
-from paddle.static import InputSpec as Input
 from paddle.fluid.dygraph import Layer
-from paddle.nn import BeamSearchDecoder, dynamic_decode
-
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-import paddle.fluid.core as core
-
 from paddle.fluid.executor import Executor
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import BeamSearchDecoder, dynamic_decode
+from paddle.static import InputSpec as Input
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_memory_helper_op.py b/python/paddle/fluid/tests/unittests/test_rnn_memory_helper_op.py
index f2014c29f91c76..b94d8a1e1a6158 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_memory_helper_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_memory_helper_op.py
@@ -14,10 +14,11 @@
 
 import unittest
 
-from paddle.fluid.framework import Program
-from paddle.fluid.executor import Executor
 import numpy as np
+
 import paddle.fluid.core as core
+from paddle.fluid.executor import Executor
+from paddle.fluid.framework import Program
 
 
 class RNNMemoryHelperOpTest(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_op.py b/python/paddle/fluid/tests/unittests/test_rnn_op.py
index 5dd255562e6411..40970690e08140 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_op.py
@@ -12,18 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import numpy as np
-import paddle.fluid.core as core
-import paddle
 import random
 import sys
+import unittest
 
+import numpy as np
 from op_test import OpTest
 
+import paddle
+import paddle.fluid.core as core
+
 sys.path.append("./rnn")
-from rnn_numpy import LSTM
 from convert import get_params_for_net
+from rnn_numpy import LSTM
 
 random.seed(2)
 np.set_printoptions(threshold=np.inf)
diff --git a/python/paddle/fluid/tests/unittests/test_roi_align_op.py b/python/paddle/fluid/tests/unittests/test_roi_align_op.py
index c26b6691f66f9d..2f6093606fe4ff 100644
--- a/python/paddle/fluid/tests/unittests/test_roi_align_op.py
+++ b/python/paddle/fluid/tests/unittests/test_roi_align_op.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
+import math
 import unittest
+
 import numpy as np
-import math
 from op_test import OpTest
 
+import paddle
+
 
 class TestROIAlignOp(OpTest):
     def set_data(self):
diff --git a/python/paddle/fluid/tests/unittests/test_roi_perspective_transform_op.py b/python/paddle/fluid/tests/unittests/test_roi_perspective_transform_op.py
index d03bdcd59202a4..6a82d953bb24c6 100644
--- a/python/paddle/fluid/tests/unittests/test_roi_perspective_transform_op.py
+++ b/python/paddle/fluid/tests/unittests/test_roi_perspective_transform_op.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
+from math import floor, sqrt
+
 import numpy as np
 from op_test import OpTest
-from math import sqrt
-from math import floor
+
 from paddle import fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_roll_op.py b/python/paddle/fluid/tests/unittests/test_roll_op.py
index bf11b0d86f6688..1bb4e3392617b0 100644
--- a/python/paddle/fluid/tests/unittests/test_roll_op.py
+++ b/python/paddle/fluid/tests/unittests/test_roll_op.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
 from op_test import OpTest
+
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_rot90_op.py b/python/paddle/fluid/tests/unittests/test_rot90_op.py
index 0bbad1b4fb938a..a6b249ab190201 100644
--- a/python/paddle/fluid/tests/unittests/test_rot90_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rot90_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_row_conv_op.py b/python/paddle/fluid/tests/unittests/test_row_conv_op.py
index 6f4df64ce503e9..ff50fa125d9614 100644
--- a/python/paddle/fluid/tests/unittests/test_row_conv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_row_conv_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 from paddle import fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py b/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
index ce37fedaf59ff5..98cad29ac2e9d6 100644
--- a/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
@@ -13,13 +13,18 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
 from op_test import OpTest
 from test_anchor_generator_op import anchor_generator_in_python
-from test_generate_proposal_labels_op import _generate_groundtruth
-from test_generate_proposal_labels_op import _bbox_overlaps, _box_to_delta
+from test_generate_proposal_labels_op import (
+    _bbox_overlaps,
+    _box_to_delta,
+    _generate_groundtruth,
+)
+
+import paddle.fluid as fluid
+from paddle.fluid import Program, program_guard
 
 
 def rpn_target_assign(
diff --git a/python/paddle/fluid/tests/unittests/test_rrelu_op.py b/python/paddle/fluid/tests/unittests/test_rrelu_op.py
index 9da00696fa393c..847675ee6f58f8 100644
--- a/python/paddle/fluid/tests/unittests/test_rrelu_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rrelu_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
-import paddle.fluid.core as core
 from op_test import OpTest
+
 import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.nn.functional as F
 from paddle.fluid import dygraph
 
diff --git a/python/paddle/fluid/tests/unittests/test_run.py b/python/paddle/fluid/tests/unittests/test_run.py
index 514384c0192f72..da95f190c5fd64 100644
--- a/python/paddle/fluid/tests/unittests/test_run.py
+++ b/python/paddle/fluid/tests/unittests/test_run.py
@@ -12,13 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
+import os
+import random
 import subprocess
-import sys, os
+import sys
 import tempfile
-
-import random
-
+import unittest
 from os import listdir
 from os.path import isfile, join
 
diff --git a/python/paddle/fluid/tests/unittests/test_run_fluid_by_module_or_command_line.py b/python/paddle/fluid/tests/unittests/test_run_fluid_by_module_or_command_line.py
index df626dc6dded7e..473ff182aefc3b 100644
--- a/python/paddle/fluid/tests/unittests/test_run_fluid_by_module_or_command_line.py
+++ b/python/paddle/fluid/tests/unittests/test_run_fluid_by_module_or_command_line.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import os
 import sys
+import unittest
 
 
 class TestRunFluidByModule(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_run_program_op.py b/python/paddle/fluid/tests/unittests/test_run_program_op.py
index 265594c26ede75..afa1fe2321944a 100644
--- a/python/paddle/fluid/tests/unittests/test_run_program_op.py
+++ b/python/paddle/fluid/tests/unittests/test_run_program_op.py
@@ -14,19 +14,20 @@
 
 import contextlib
 import unittest
+
 import numpy as np
 
 import paddle
-from paddle import _legacy_C_ops
 import paddle.fluid as fluid
+from paddle import _legacy_C_ops
 from paddle.fluid import core, framework
-from paddle.fluid.layers.utils import _hash_with_id
-from paddle.fluid.framework import _in_eager_mode_
+from paddle.fluid.dygraph.base import switch_to_static_graph
 from paddle.fluid.executor import (
-    _is_enable_standalone_executor,
     _is_dy2st_enable_standalone_executor,
+    _is_enable_standalone_executor,
 )
-from paddle.fluid.dygraph.base import switch_to_static_graph
+from paddle.fluid.framework import _in_eager_mode_
+from paddle.fluid.layers.utils import _hash_with_id
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_runtime_and_compiletime_exception.py b/python/paddle/fluid/tests/unittests/test_runtime_and_compiletime_exception.py
index d498035327ba4a..c761313d688d32 100644
--- a/python/paddle/fluid/tests/unittests/test_runtime_and_compiletime_exception.py
+++ b/python/paddle/fluid/tests/unittests/test_runtime_and_compiletime_exception.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_sample_logits_op.py b/python/paddle/fluid/tests/unittests/test_sample_logits_op.py
index 8bc0b442d8f2c0..6ca278c050df43 100644
--- a/python/paddle/fluid/tests/unittests/test_sample_logits_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sample_logits_op.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import collections
+import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_sampling_id_op.py b/python/paddle/fluid/tests/unittests/test_sampling_id_op.py
index 2e73a90abdb5b8..ae84e98aaa746a 100644
--- a/python/paddle/fluid/tests/unittests/test_sampling_id_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sampling_id_op.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
-import paddle.fluid as fluid
 import paddle
+import paddle.fluid as fluid
 
 
 class TestSamplingIdShape(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_save_inference_model_conditional_op.py b/python/paddle/fluid/tests/unittests/test_save_inference_model_conditional_op.py
index 79021c89d43df1..58c018f48519cf 100644
--- a/python/paddle/fluid/tests/unittests/test_save_inference_model_conditional_op.py
+++ b/python/paddle/fluid/tests/unittests/test_save_inference_model_conditional_op.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 
 import os
-import unittest
 import tempfile
+import unittest
 
 import paddle
 import paddle.nn.functional as F
diff --git a/python/paddle/fluid/tests/unittests/test_save_model_without_var.py b/python/paddle/fluid/tests/unittests/test_save_model_without_var.py
index ac02edc42e7f8d..13940068070296 100644
--- a/python/paddle/fluid/tests/unittests/test_save_model_without_var.py
+++ b/python/paddle/fluid/tests/unittests/test_save_model_without_var.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import warnings
 import unittest
+import warnings
+
 import paddle.fluid as fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_saved_tensors_hooks.py b/python/paddle/fluid/tests/unittests/test_saved_tensors_hooks.py
index c945eee068a547..f43ec14cfc06cb 100644
--- a/python/paddle/fluid/tests/unittests/test_saved_tensors_hooks.py
+++ b/python/paddle/fluid/tests/unittests/test_saved_tensors_hooks.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle
 from paddle.autograd import PyLayer
 
diff --git a/python/paddle/fluid/tests/unittests/test_scale_op.py b/python/paddle/fluid/tests/unittests/test_scale_op.py
index 8082128a02fce4..06e8846a4b71d8 100644
--- a/python/paddle/fluid/tests/unittests/test_scale_op.py
+++ b/python/paddle/fluid/tests/unittests/test_scale_op.py
@@ -13,16 +13,18 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
 from op_test import OpTest, convert_float_to_uint16
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.fluid.layers as layers
 from paddle.fluid.op import Operator
 from paddle.static import Program, program_guard
-import gradient_checker
-from decorator_helper import prog_scope
-import paddle.fluid.layers as layers
 
 
 class TestScaleOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_scaled_dot_product_attention.py b/python/paddle/fluid/tests/unittests/test_scaled_dot_product_attention.py
index 7f702482440e51..2b0e28adf8f2bd 100644
--- a/python/paddle/fluid/tests/unittests/test_scaled_dot_product_attention.py
+++ b/python/paddle/fluid/tests/unittests/test_scaled_dot_product_attention.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_scatter_nd_op.py b/python/paddle/fluid/tests/unittests/test_scatter_nd_op.py
index 5fef3d6d3f9f45..b0c58839b3df22 100644
--- a/python/paddle/fluid/tests/unittests/test_scatter_nd_op.py
+++ b/python/paddle/fluid/tests/unittests/test_scatter_nd_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
 from paddle.fluid.dygraph.base import switch_to_static_graph
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_scatter_op.py b/python/paddle/fluid/tests/unittests/test_scatter_op.py
index a830ed0a9e291d..dc8f8681b96db8 100644
--- a/python/paddle/fluid/tests/unittests/test_scatter_op.py
+++ b/python/paddle/fluid/tests/unittests/test_scatter_op.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 import numpy as np
-import os
+from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
-from op_test import OpTest
 import paddle.fluid.core as core
 from paddle.fluid.dygraph.base import switch_to_static_graph
 
diff --git a/python/paddle/fluid/tests/unittests/test_scope.py b/python/paddle/fluid/tests/unittests/test_scope.py
index 6eaab539fc3333..d08f72d7c8c8c3 100644
--- a/python/paddle/fluid/tests/unittests/test_scope.py
+++ b/python/paddle/fluid/tests/unittests/test_scope.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid.core
 import unittest
 
+import paddle.fluid.core
+
 
 class TestScope(unittest.TestCase):
     def test_create_destroy(self):
diff --git a/python/paddle/fluid/tests/unittests/test_searchsorted_op.py b/python/paddle/fluid/tests/unittests/test_searchsorted_op.py
index d185fbaa110d0f..e5980cea170e75 100644
--- a/python/paddle/fluid/tests/unittests/test_searchsorted_op.py
+++ b/python/paddle/fluid/tests/unittests/test_searchsorted_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_seed_op.py b/python/paddle/fluid/tests/unittests/test_seed_op.py
index 14aa2c4f4dd75d..e7073d706afe76 100644
--- a/python/paddle/fluid/tests/unittests/test_seed_op.py
+++ b/python/paddle/fluid/tests/unittests/test_seed_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.static as static
 
diff --git a/python/paddle/fluid/tests/unittests/test_segment_ops.py b/python/paddle/fluid/tests/unittests/test_segment_ops.py
index 5457e04dc7b2df..801364e71644b8 100644
--- a/python/paddle/fluid/tests/unittests/test_segment_ops.py
+++ b/python/paddle/fluid/tests/unittests/test_segment_ops.py
@@ -15,11 +15,11 @@
 import unittest
 
 import numpy as np
+from op_test import OpTest
+
 import paddle
 import paddle.fluid.core as core
 
-from op_test import OpTest
-
 
 def compute_segment_sum(x, segment_ids):
     length = segment_ids[-1] + 1
diff --git a/python/paddle/fluid/tests/unittests/test_select_input_output_op.py b/python/paddle/fluid/tests/unittests/test_select_input_output_op.py
index eeab0ca874d965..c294e5f6e50bff 100644
--- a/python/paddle/fluid/tests/unittests/test_select_input_output_op.py
+++ b/python/paddle/fluid/tests/unittests/test_select_input_output_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_selected_rows.py b/python/paddle/fluid/tests/unittests/test_selected_rows.py
index a66dc40dfb1f71..f6fc71ef6ac712 100644
--- a/python/paddle/fluid/tests/unittests/test_selected_rows.py
+++ b/python/paddle/fluid/tests/unittests/test_selected_rows.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid.core as core
 import unittest
+
 import numpy as np
 
+import paddle.fluid.core as core
+
 
 class TestSelectedRows(unittest.TestCase):
     def test_selected_rows(self):
diff --git a/python/paddle/fluid/tests/unittests/test_selu_op.py b/python/paddle/fluid/tests/unittests/test_selu_op.py
index 381f3aa5ef5631..1cd638b37836fd 100644
--- a/python/paddle/fluid/tests/unittests/test_selu_op.py
+++ b/python/paddle/fluid/tests/unittests/test_selu_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid.core as core
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.nn.functional as F
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_set_bool_attr.py b/python/paddle/fluid/tests/unittests/test_set_bool_attr.py
index 2c233141d095ea..0a43e57e903eb0 100644
--- a/python/paddle/fluid/tests/unittests/test_set_bool_attr.py
+++ b/python/paddle/fluid/tests/unittests/test_set_bool_attr.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
 
+import paddle.fluid as fluid
+
 
 class TestAttrSet(unittest.TestCase):
     def test_set_bool_attr(self):
diff --git a/python/paddle/fluid/tests/unittests/test_set_value_op.py b/python/paddle/fluid/tests/unittests/test_set_value_op.py
index 3d1fb9d526dde7..ff2df8c1a8e990 100644
--- a/python/paddle/fluid/tests/unittests/test_set_value_op.py
+++ b/python/paddle/fluid/tests/unittests/test_set_value_op.py
@@ -15,13 +15,14 @@
 # Test set_value op in static mode
 
 import unittest
+from functools import reduce
+
 import numpy as np
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.layer_helper import LayerHelper
-from functools import reduce
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.layer_helper import LayerHelper
 
 
 class TestSetValueBase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op.py b/python/paddle/fluid/tests/unittests/test_sgd_op.py
index 255a4799984968..26c4dd18c13a1d 100644
--- a/python/paddle/fluid/tests/unittests/test_sgd_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sgd_op.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-from paddle.fluid.op import Operator
 from op_test import OpTest
+
 import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.op import Operator
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py b/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py
index ae7e87d1b3f715..dd62aa369b5f0d 100644
--- a/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py
+++ b/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import struct
 import unittest
+
 import numpy as np
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.amp as amp
 from paddle.fluid.op import Operator
 from paddle.fluid.tests.unittests.op_test import (
-    convert_float_to_uint16,
-    convert_uint16_to_float,
     OpTest,
     OpTestTool,
+    convert_float_to_uint16,
+    convert_uint16_to_float,
 )
-import paddle
-import paddle.static.amp as amp
-import struct
 
 
 @unittest.skipIf(
diff --git a/python/paddle/fluid/tests/unittests/test_sgn.py b/python/paddle/fluid/tests/unittests/test_sgn.py
index 75d5d1b7847c4d..bf9517f42767c3 100644
--- a/python/paddle/fluid/tests/unittests/test_sgn.py
+++ b/python/paddle/fluid/tests/unittests/test_sgn.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_shape_op.py b/python/paddle/fluid/tests/unittests/test_shape_op.py
index f4de948d229a61..8853cb217938be 100644
--- a/python/paddle/fluid/tests/unittests/test_shape_op.py
+++ b/python/paddle/fluid/tests/unittests/test_shape_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 from paddle.fluid import core
 from paddle.fluid.op import Operator
diff --git a/python/paddle/fluid/tests/unittests/test_shard_index_op.py b/python/paddle/fluid/tests/unittests/test_shard_index_op.py
index 8c13e2b44c3c54..dfbb98a7913725 100644
--- a/python/paddle/fluid/tests/unittests/test_shard_index_op.py
+++ b/python/paddle/fluid/tests/unittests/test_shard_index_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_share_data_op.py b/python/paddle/fluid/tests/unittests/test_share_data_op.py
index b6f2d1ecbec46c..765cbd93ab1f2c 100644
--- a/python/paddle/fluid/tests/unittests/test_share_data_op.py
+++ b/python/paddle/fluid/tests/unittests/test_share_data_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 from paddle.fluid import core
 from paddle.fluid.op import Operator
 
diff --git a/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py b/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py
index c30c2095c9bac7..24c7e44afcb2ab 100644
--- a/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py
+++ b/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py
@@ -13,17 +13,16 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid.core as core
-from paddle.fluid.executor import Executor
 import paddle.fluid.layers as layers
 from paddle.fluid.backward import append_backward
-from paddle.fluid.framework import switch_main_program
-from paddle.fluid.framework import Program, program_guard
-import numpy as np
-
-from paddle.fluid.layers.control_flow import shrink_memory
-from paddle.fluid.layers.control_flow import lod_rank_table
+from paddle.fluid.executor import Executor
+from paddle.fluid.framework import Program, program_guard, switch_main_program
+from paddle.fluid.layers.control_flow import lod_rank_table, shrink_memory
 
 
 class TestShrinkRNNMemoryBase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_shuffle_batch_op.py b/python/paddle/fluid/tests/unittests/test_shuffle_batch_op.py
index 1725e71bafe35e..ec830b5cf1ded7 100644
--- a/python/paddle/fluid/tests/unittests/test_shuffle_batch_op.py
+++ b/python/paddle/fluid/tests/unittests/test_shuffle_batch_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 """This is unit test of Test shuffle_batch Op."""
 
+import os
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
 from op_test import OpTest
-import os
+
+import paddle.fluid as fluid
 
 
 class TestShuffleBatchOpBase(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_shuffle_channel_op.py b/python/paddle/fluid/tests/unittests/test_shuffle_channel_op.py
index 01bdd48ff24ee3..a93d830f989066 100644
--- a/python/paddle/fluid/tests/unittests/test_shuffle_channel_op.py
+++ b/python/paddle/fluid/tests/unittests/test_shuffle_channel_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss.py b/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss.py
index e36ba383c07efc..8cdaaa5459b0da 100644
--- a/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import numpy as np
-import unittest
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss_op.py b/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss_op.py
index 4b9831f8e8d0a5..689fc30b5803f1 100644
--- a/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss_op.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
+import math
 import unittest
+
 import numpy as np
-import math
-import copy
 from op_test import OpTest
+
 import paddle.fluid as fluid
-from paddle.fluid import core
-from paddle.fluid import Program, program_guard
+from paddle.fluid import Program, core, program_guard
 
 
 def sigmoid_focal_loss_forward(
diff --git a/python/paddle/fluid/tests/unittests/test_sign_op.py b/python/paddle/fluid/tests/unittests/test_sign_op.py
index 2db25d0eaee505..c48de7d58663fb 100644
--- a/python/paddle/fluid/tests/unittests/test_sign_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sign_op.py
@@ -13,15 +13,17 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid import Program, program_guard
-import gradient_checker
-from decorator_helper import prog_scope
 import paddle.fluid.layers as layers
+from paddle.fluid import Program, program_guard
 
 
 class TestSignOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_signal.py b/python/paddle/fluid/tests/unittests/test_signal.py
index 6b485d56d79acb..19a0dd433ce2ce 100644
--- a/python/paddle/fluid/tests/unittests/test_signal.py
+++ b/python/paddle/fluid/tests/unittests/test_signal.py
@@ -17,10 +17,11 @@
 import unittest
 
 import numpy as np
+import scipy.signal
 from numpy import fft
 from numpy.lib.stride_tricks import as_strided
+
 import paddle
-import scipy.signal
 
 paddle.set_default_dtype('float64')
 
diff --git a/python/paddle/fluid/tests/unittests/test_simple_rnn_op.py b/python/paddle/fluid/tests/unittests/test_simple_rnn_op.py
index fa129cda06627a..5563a57fe7ce8a 100644
--- a/python/paddle/fluid/tests/unittests/test_simple_rnn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_simple_rnn_op.py
@@ -12,18 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import random
+import sys
 import unittest
-import numpy as np
 
+import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid.core as core
-import random
-import sys
 
 sys.path.append("./rnn")
-from rnn_numpy import SimpleRNN
 from convert import get_params_for_net
+from rnn_numpy import SimpleRNN
 
 random.seed(2)
 np.set_printoptions(threshold=np.inf)
diff --git a/python/paddle/fluid/tests/unittests/test_slice_op.py b/python/paddle/fluid/tests/unittests/test_slice_op.py
index d45fb9832d7fbd..616fc33a743400 100644
--- a/python/paddle/fluid/tests/unittests/test_slice_op.py
+++ b/python/paddle/fluid/tests/unittests/test_slice_op.py
@@ -13,16 +13,17 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
-import paddle.fluid.core as core
+from decorator_helper import prog_scope
 from op_test import OpTest, convert_float_to_uint16
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
+
 import paddle
-from paddle.fluid.framework import _test_eager_guard, _enable_legacy_dygraph
-import gradient_checker
-from decorator_helper import prog_scope
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.fluid.layers as layers
+from paddle.fluid.framework import _enable_legacy_dygraph, _test_eager_guard
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_slice_var.py b/python/paddle/fluid/tests/unittests/test_slice_var.py
index b97e5243c3299b..84c04c65424baa 100644
--- a/python/paddle/fluid/tests/unittests/test_slice_var.py
+++ b/python/paddle/fluid/tests/unittests/test_slice_var.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import random
 import unittest
-from paddle.fluid.transpiler.distribute_transpiler import slice_variable
+
 import paddle.fluid as fluid
-import random
+from paddle.fluid.transpiler.distribute_transpiler import slice_variable
 
 
 class TestSliceVar(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py b/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py
index 4e078e506c7d68..6f1565c093966d 100644
--- a/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import numpy as np
-import unittest
 
 
 def smooth_l1_loss_forward(val, delta):
diff --git a/python/paddle/fluid/tests/unittests/test_smooth_l1_loss_op.py b/python/paddle/fluid/tests/unittests/test_smooth_l1_loss_op.py
index 4b31e7ead6b943..ba251c05ac69e0 100644
--- a/python/paddle/fluid/tests/unittests/test_smooth_l1_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_smooth_l1_loss_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle.fluid as fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_soft_margin_loss.py b/python/paddle/fluid/tests/unittests/test_soft_margin_loss.py
index 98918fb4b0babf..9396d07e8680ea 100644
--- a/python/paddle/fluid/tests/unittests/test_soft_margin_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_soft_margin_loss.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import unittest
 
+import numpy as np
+
+import paddle
+
 
 def test_static_layer(
     place,
diff --git a/python/paddle/fluid/tests/unittests/test_softmax2d.py b/python/paddle/fluid/tests/unittests/test_softmax2d.py
index 8297ca9c908146..61d4bb93106805 100644
--- a/python/paddle/fluid/tests/unittests/test_softmax2d.py
+++ b/python/paddle/fluid/tests/unittests/test_softmax2d.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from test_softmax_op import ref_softmax
+
 import paddle
 import paddle.fluid.core as core
-from test_softmax_op import ref_softmax
 
 
 class TestSoftmax2DAPI(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_op.py b/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_op.py
index c36b8362c84bcf..f56e15856054e5 100644
--- a/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_op.py
+++ b/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid.core as core
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.incubate as incubate
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_upper_triangle_op.py b/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_upper_triangle_op.py
index 84032336402c4d..8d6d866fe91197 100644
--- a/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_upper_triangle_op.py
+++ b/python/paddle/fluid/tests/unittests/test_softmax_mask_fuse_upper_triangle_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid.core as core
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.incubate as incubate
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_softmax_op.py b/python/paddle/fluid/tests/unittests/test_softmax_op.py
index 18a5737225fa93..4f689762b83ba0 100644
--- a/python/paddle/fluid/tests/unittests/test_softmax_op.py
+++ b/python/paddle/fluid/tests/unittests/test_softmax_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest, convert_float_to_uint16
-import paddle.fluid.core as core
+
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.nn.functional as F
 
 np.random.seed(10)
diff --git a/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py b/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py
index a623a311ccf1c5..d9549cb2823313 100644
--- a/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py
+++ b/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import unittest
-import numpy as np
-import paddle
-import paddle.fluid.core as core
 
+import numpy as np
 from op_test import OpTest
 from test_softmax_op import stable_softmax
 
+import paddle
+import paddle.fluid.core as core
+
 
 def cross_entropy(softmax, label, soft_label, axis, ignore_index=-1):
     if soft_label:
diff --git a/python/paddle/fluid/tests/unittests/test_solve_op.py b/python/paddle/fluid/tests/unittests/test_solve_op.py
index 4fcea5e67d5905..7abd5e432c72f6 100644
--- a/python/paddle/fluid/tests/unittests/test_solve_op.py
+++ b/python/paddle/fluid/tests/unittests/test_solve_op.py
@@ -12,14 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.w
 
+import sys
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid.core as core
-import sys
 
 sys.path.append("..")
 from op_test import OpTest
+
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_sort_op.py b/python/paddle/fluid/tests/unittests/test_sort_op.py
index 387f5474dd4b91..9cf9a26eed9191 100644
--- a/python/paddle/fluid/tests/unittests/test_sort_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sort_op.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import numpy as np
 import paddle.fluid.core as core
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_space_to_depth_op.py b/python/paddle/fluid/tests/unittests/test_space_to_depth_op.py
index 9c43836e2949fc..d478067f0ac6fa 100644
--- a/python/paddle/fluid/tests/unittests/test_space_to_depth_op.py
+++ b/python/paddle/fluid/tests/unittests/test_space_to_depth_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
 from op_test import OpTest
 
+import paddle.fluid as fluid
+
 
 class TestSpaceToDepthOp(OpTest):
     @staticmethod
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py b/python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py
index 2917f96c442d72..43be65aba3d1a2 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
-import unittest
 import os
 import re
+import unittest
+
+import numpy as np
+
+import paddle
 
 paddle.set_default_dtype('float64')
 
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_attention_op.py b/python/paddle/fluid/tests/unittests/test_sparse_attention_op.py
index e10b20a073aec0..6f343a99a18448 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_attention_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_attention_op.py
@@ -12,16 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
+import os
+import re
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid.core as core
+
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.nn.functional as F
-import os
-import re
-import copy
 
 
 def get_cuda_version():
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py b/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py
index 0b9c88aad9de0a..49c538050590d5 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
+import paddle.sparse as sparse
 from paddle.fluid import core
 from paddle.fluid.framework import _test_eager_guard
-import paddle.sparse as sparse
 
 
 class TestSparseConv(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_copy_op.py b/python/paddle/fluid/tests/unittests/test_sparse_copy_op.py
index c31dc4e159fce8..1bebe89f3ebf03 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_copy_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_copy_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_elementwise_op.py b/python/paddle/fluid/tests/unittests/test_sparse_elementwise_op.py
index 1f9c46e41aa1ac..e2a98b170e91ca 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_elementwise_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_elementwise_op.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import unittest
-from operator import __add__, __sub__, __mul__, __truediv__
+from operator import __add__, __mul__, __sub__, __truediv__
 
 import numpy as np
+
 import paddle
 import paddle.sparse as sparse
 
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py b/python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py
index 8506ac02a6f9da..75344b3b7064d3 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
+import copy
 import math
+import os
 import re
-import copy
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid.core as core
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py b/python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py
index bc45b5ca80a34c..eb608dd379cacf 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
-import scipy.sparse as sp
-import unittest
 import os
 import re
+import unittest
+
+import numpy as np
+import scipy.sparse as sp
+
+import paddle
 
 paddle.set_default_dtype('float64')
 
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_model.py b/python/paddle/fluid/tests/unittests/test_sparse_model.py
index 52f37e60dfbc96..6671be6f451988 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_model.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_model.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
-from paddle.sparse import nn
 from paddle.fluid.framework import _test_eager_guard
+from paddle.sparse import nn
 
 
 class TestGradientAdd(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_momentum_op.py b/python/paddle/fluid/tests/unittests/test_sparse_momentum_op.py
index fefb31255247a5..5eb3f8157388bf 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_momentum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_momentum_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_mv_op.py b/python/paddle/fluid/tests/unittests/test_sparse_mv_op.py
index ae04ddc7a487ae..831928867b2d1c 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_mv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_mv_op.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-from paddle.fluid.framework import _test_eager_guard
-
-import numpy as np
-import unittest
 import os
 import re
+import unittest
+
+import numpy as np
+
+import paddle
+from paddle.fluid.framework import _test_eager_guard
 
 paddle.seed(100)
 
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py b/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py
index 4f48b08e9f8cba..4620f7ad463908 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_norm_op.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
 import unittest
+
 import numpy as np
+
 import paddle
-from paddle.sparse import nn
-import paddle.sparse as sparse
 import paddle.fluid as fluid
-import copy
+import paddle.sparse as sparse
+from paddle.sparse import nn
 
 
 class TestSparseBatchNorm(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_pooling_op.py b/python/paddle/fluid/tests/unittests/test_sparse_pooling_op.py
index 5425ebb6ea98db..1d3aaf28b8bb85 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_pooling_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_pooling_op.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
 import unittest
+
 import numpy as np
+
 import paddle
 from paddle.fluid.framework import _test_eager_guard
-import copy
 
 
 class TestMaxPool3DFunc(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_reshape_op.py b/python/paddle/fluid/tests/unittests/test_sparse_reshape_op.py
index fd748c4dcb2c9a..34b7a95299f3e0 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_reshape_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_reshape_op.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import unittest
 
+import numpy as np
+
+import paddle
+
 
 class TestReshape(unittest.TestCase):
     """
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_softmax_op.py b/python/paddle/fluid/tests/unittests/test_sparse_softmax_op.py
index ef1f672047fb36..bc8e1a4020dc0e 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_softmax_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_softmax_op.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-from paddle.fluid.framework import _test_eager_guard
+import unittest
 
 import numpy as np
 import scipy.sparse as sp
-import unittest
+
+import paddle
+from paddle.fluid.framework import _test_eager_guard
 
 np.random.seed(2022)
 
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_transpose_op.py b/python/paddle/fluid/tests/unittests/test_sparse_transpose_op.py
index 06f221bfe6c81a..a96b79cf6ec60f 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_transpose_op.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import unittest
+
+import numpy as np
+
+import paddle
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py b/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py
index 84e8c0fde5da11..edb7393bf30563 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 from paddle.fluid.framework import convert_np_dtype_to_dtype_
 
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py
index 8659491da2e9ae..d781097e642855 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_spawn_and_init_parallel_env.py b/python/paddle/fluid/tests/unittests/test_spawn_and_init_parallel_env.py
index e05b1cdaaf378f..059bcad2b49576 100644
--- a/python/paddle/fluid/tests/unittests/test_spawn_and_init_parallel_env.py
+++ b/python/paddle/fluid/tests/unittests/test_spawn_and_init_parallel_env.py
@@ -12,20 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import multiprocessing
 import os
 import unittest
 
 import paddle
 import paddle.distributed as dist
 from paddle.distributed.spawn import (
+    _get_default_nprocs,
     _get_subprocess_env_list,
     _options_valid_check,
-    _get_default_nprocs,
 )
-
 from paddle.fluid import core
 from paddle.fluid.dygraph import parallel_helper
-import multiprocessing
 
 # NOTE(chenweihang): Coverage CI is currently not able to count python3
 # unittest, so the unittests here covers some cases that will only be
diff --git a/python/paddle/fluid/tests/unittests/test_spectral_norm_op.py b/python/paddle/fluid/tests/unittests/test_spectral_norm_op.py
index 5cff9628d8dd03..939406945d0d4c 100644
--- a/python/paddle/fluid/tests/unittests/test_spectral_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_spectral_norm_op.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
 from op_test import OpTest, skip_check_grad_ci
 
-from paddle.fluid.framework import program_guard, Program
+import paddle.fluid as fluid
+from paddle.fluid.framework import Program, program_guard
 
 
 def spectral_norm(weight, u, v, dim, power_iters, eps):
diff --git a/python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py b/python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py
index fefa11be9aa586..c5350445a9de33 100644
--- a/python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py
+++ b/python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py
@@ -13,17 +13,18 @@
 # limitations under the License.
 
 import unittest
-from paddle.fluid import Program, program_guard
+
+import numpy as np
+
 import paddle
 import paddle.fluid.core as core
-import numpy as np
 import paddle.fluid.layers as layers
-from paddle.fluid.framework import Program, program_guard
-from paddle.fluid.executor import Executor
+from paddle.fluid import Program, program_guard
 from paddle.fluid.backward import append_backward
-from paddle.fluid.layers.control_flow import split_lod_tensor
-from paddle.fluid.layers.control_flow import merge_lod_tensor
+from paddle.fluid.executor import Executor
+from paddle.fluid.framework import Program, program_guard
 from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.layers.control_flow import merge_lod_tensor, split_lod_tensor
 
 
 class TestCPULoDTensorArrayOps(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_split_op.py b/python/paddle/fluid/tests/unittests/test_split_op.py
index e087cc8b1bb84c..cc1b7f7307d3f2 100644
--- a/python/paddle/fluid/tests/unittests/test_split_op.py
+++ b/python/paddle/fluid/tests/unittests/test_split_op.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
 from op_test import OpTest, convert_float_to_uint16
+
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, core, program_guard
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_split_program.py b/python/paddle/fluid/tests/unittests/test_split_program.py
index bab36fd88ce4ab..e79c67d28c5e3e 100644
--- a/python/paddle/fluid/tests/unittests/test_split_program.py
+++ b/python/paddle/fluid/tests/unittests/test_split_program.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.distributed.passes.pass_utils import split_program
-from paddle.vision.models import resnet18 as resnet
-import paddle
-import paddle.nn as nn
 import unittest
+
 import numpy as np
 
+import paddle
+import paddle.nn as nn
+from paddle.distributed.passes.pass_utils import split_program
+from paddle.vision.models import resnet18 as resnet
+
 
 class TestSplitProgram(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_splits_api.py b/python/paddle/fluid/tests/unittests/test_splits_api.py
index 491fed74f775c4..40083388d63e08 100644
--- a/python/paddle/fluid/tests/unittests/test_splits_api.py
+++ b/python/paddle/fluid/tests/unittests/test_splits_api.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_spp_op.py b/python/paddle/fluid/tests/unittests/test_spp_op.py
index 776cde9b553987..fbf3440352590b 100644
--- a/python/paddle/fluid/tests/unittests/test_spp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_spp_op.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-from test_pool2d_op import max_pool2D_forward_naive
-from test_pool2d_op import avg_pool2D_forward_naive
+from test_pool2d_op import avg_pool2D_forward_naive, max_pool2D_forward_naive
 
 
 class TestSppOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_square_error_cost.py b/python/paddle/fluid/tests/unittests/test_square_error_cost.py
index 498eee8051e9c5..1fd516c0504ad8 100644
--- a/python/paddle/fluid/tests/unittests/test_square_error_cost.py
+++ b/python/paddle/fluid/tests/unittests/test_square_error_cost.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid.core as core
+
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.fluid.layers as layers
 from paddle.fluid.executor import Executor
 
diff --git a/python/paddle/fluid/tests/unittests/test_squared_l2_distance_op.py b/python/paddle/fluid/tests/unittests/test_squared_l2_distance_op.py
index 21fdb592402f5a..579681ab0c0980 100644
--- a/python/paddle/fluid/tests/unittests/test_squared_l2_distance_op.py
+++ b/python/paddle/fluid/tests/unittests/test_squared_l2_distance_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_squared_l2_norm_op.py b/python/paddle/fluid/tests/unittests/test_squared_l2_norm_op.py
index 8e09d83975a60e..8124254e7b2cca 100644
--- a/python/paddle/fluid/tests/unittests/test_squared_l2_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_squared_l2_norm_op.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
+
+import numpy as np
 from numpy import linalg as LA
 from op_test import OpTest
+
 import paddle
 from paddle import _C_ops, _legacy_C_ops
 from paddle.framework import in_dygraph_mode
diff --git a/python/paddle/fluid/tests/unittests/test_squeeze2_op.py b/python/paddle/fluid/tests/unittests/test_squeeze2_op.py
index 4fbb08562b9c82..b8374da08727aa 100755
--- a/python/paddle/fluid/tests/unittests/test_squeeze2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_squeeze2_op.py
@@ -12,16 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
 
 import numpy as np
-import os
 from op_test import OpTest
-import paddle
-from paddle.fluid.framework import program_guard, Program
-
 from test_attribute_var import UnittestBase
 
+import paddle
+from paddle.fluid.framework import Program, program_guard
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_squeeze_op.py b/python/paddle/fluid/tests/unittests/test_squeeze_op.py
index e4ea13844b705c..dd3fc5e3c2423e 100755
--- a/python/paddle/fluid/tests/unittests/test_squeeze_op.py
+++ b/python/paddle/fluid/tests/unittests/test_squeeze_op.py
@@ -14,16 +14,16 @@
 
 import unittest
 
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
+from op_test import OpTest, convert_float_to_uint16
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
-from op_test import OpTest, convert_float_to_uint16
 import paddle.fluid.core as core
-import gradient_checker
-from decorator_helper import prog_scope
 import paddle.fluid.layers as layers
+from paddle.fluid import Program, program_guard
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_stack_op.py b/python/paddle/fluid/tests/unittests/test_stack_op.py
index 15947f40f03dec..95f589672c6e84 100644
--- a/python/paddle/fluid/tests/unittests/test_stack_op.py
+++ b/python/paddle/fluid/tests/unittests/test_stack_op.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
+
+import numpy as np
+from op_test import OpTest, convert_float_to_uint16
+
 import paddle
 import paddle.fluid as fluid
-from op_test import OpTest, convert_float_to_uint16
 from paddle.fluid.framework import Program, program_guard
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_state_dict_convert.py b/python/paddle/fluid/tests/unittests/test_state_dict_convert.py
index 77a181613374f2..e12b115f333c0d 100644
--- a/python/paddle/fluid/tests/unittests/test_state_dict_convert.py
+++ b/python/paddle/fluid/tests/unittests/test_state_dict_convert.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
+import numpy as np
+
 import paddle
 import paddle.nn as nn
-import numpy as np
-import unittest
 
 
 class MyModel(nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_attention.py b/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_attention.py
index 24ee5a36775561..3d91171d0d0872 100644
--- a/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_attention.py
+++ b/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_attention.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 from test_dist_base import TestDistBase
 
-import os
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_feedforward.py b/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_feedforward.py
index c6153f2be0b937..6e942bb48740b3 100644
--- a/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_feedforward.py
+++ b/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_feedforward.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 from test_dist_base import TestDistBase
 
-import os
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_multi_transformer.py b/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_multi_transformer.py
index d34ced7b4f416c..7aea973743ee07 100644
--- a/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_multi_transformer.py
+++ b/python/paddle/fluid/tests/unittests/test_static_model_parallel_fused_multi_transformer.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
+
 from test_dist_base import TestDistBase
 
-import os
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load_bf16.py b/python/paddle/fluid/tests/unittests/test_static_save_load_bf16.py
index fe578907b866e7..5a13133bf79f2a 100644
--- a/python/paddle/fluid/tests/unittests/test_static_save_load_bf16.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load_bf16.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+import tempfile
 import unittest
+
+import numpy as np
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.fluid.framework as framework
 from paddle.fluid.optimizer import SGDOptimizer
 from paddle.fluid.tests.unittests.test_imperative_base import new_program_scope
 from paddle.fluid.tests.unittests.test_static_save_load import PtbModel
-import numpy as np
-import tempfile
-import os
 
 
 @unittest.skipIf(
diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load_large.py b/python/paddle/fluid/tests/unittests/test_static_save_load_large.py
index ddae3373b5a8ab..0231c133845bbf 100644
--- a/python/paddle/fluid/tests/unittests/test_static_save_load_large.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load_large.py
@@ -12,15 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+import tempfile
 import unittest
+
+import numpy as np
+from test_imperative_base import new_program_scope
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.framework as framework
-from test_imperative_base import new_program_scope
-
-import numpy as np
-import os
-import tempfile
 
 LARGE_PARAM = 2**26
 
diff --git a/python/paddle/fluid/tests/unittests/test_static_shape_inferrence_for_shape_tensor.py b/python/paddle/fluid/tests/unittests/test_static_shape_inferrence_for_shape_tensor.py
index c6fd490cb60ddc..17e7f69a3b49e4 100644
--- a/python/paddle/fluid/tests/unittests/test_static_shape_inferrence_for_shape_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_static_shape_inferrence_for_shape_tensor.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
 
+import paddle
+
 
 class StaticShapeInferrenceTest(unittest.TestCase):
     def test_static_graph(self):
diff --git a/python/paddle/fluid/tests/unittests/test_std_layer.py b/python/paddle/fluid/tests/unittests/test_std_layer.py
index cc3e7740f45938..df5977eae8809a 100644
--- a/python/paddle/fluid/tests/unittests/test_std_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_std_layer.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_stft_op.py b/python/paddle/fluid/tests/unittests/test_stft_op.py
index 91206c3da12615..4821c7270a04bf 100644
--- a/python/paddle/fluid/tests/unittests/test_stft_op.py
+++ b/python/paddle/fluid/tests/unittests/test_stft_op.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-from numpy.lib.stride_tricks import as_strided
-import paddle
 import unittest
 
+import numpy as np
+from numpy.lib.stride_tricks import as_strided
 from op_test import OpTest
 
+import paddle
+
 
 def frame_from_librosa(x, frame_length, hop_length, axis=-1):
     if axis == -1 and not x.flags["C_CONTIGUOUS"]:
diff --git a/python/paddle/fluid/tests/unittests/test_strided_slice_op.py b/python/paddle/fluid/tests/unittests/test_strided_slice_op.py
index 2be7bbe05189fa..996b0c4a33338f 100644
--- a/python/paddle/fluid/tests/unittests/test_strided_slice_op.py
+++ b/python/paddle/fluid/tests/unittests/test_strided_slice_op.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from op_test import OpTest
-import numpy as np
 import unittest
-import paddle.fluid as fluid
+
+import numpy as np
+from op_test import OpTest
+
 import paddle
+import paddle.fluid as fluid
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_subtract_op.py b/python/paddle/fluid/tests/unittests/test_subtract_op.py
index ce58725d050172..441ae2b8141958 100644
--- a/python/paddle/fluid/tests/unittests/test_subtract_op.py
+++ b/python/paddle/fluid/tests/unittests/test_subtract_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_sum_op.py b/python/paddle/fluid/tests/unittests/test_sum_op.py
index 6c8cc00ea8fd94..68fdfcb9908e80 100644
--- a/python/paddle/fluid/tests/unittests/test_sum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sum_op.py
@@ -13,25 +13,27 @@
 # limitations under the License.
 
 import os
-import unittest
 import tempfile
+import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
 from op_test import OpTest
+
 import paddle
-from paddle import enable_static
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.fluid.layers as layers
+import paddle.inference as paddle_infer
+from paddle import enable_static
+from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.op import Operator
 from paddle.fluid.tests.unittests.op_test import (
     OpTest,
     convert_float_to_uint16,
     convert_uint16_to_float,
 )
-from paddle.fluid.framework import _test_eager_guard
-import paddle.inference as paddle_infer
-import gradient_checker
-from decorator_helper import prog_scope
-import paddle.fluid.layers as layers
 
 
 class TestSumOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_svd_op.py b/python/paddle/fluid/tests/unittests/test_svd_op.py
index 146b18ae20cd8d..cf91162d9d8ca4 100644
--- a/python/paddle/fluid/tests/unittests/test_svd_op.py
+++ b/python/paddle/fluid/tests/unittests/test_svd_op.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest, skip_check_grad_ci
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from op_test import OpTest, skip_check_grad_ci
 
 
 class TestSvdOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_switch.py b/python/paddle/fluid/tests/unittests/test_switch.py
index 31fe519593fca5..250d844a3fab6a 100644
--- a/python/paddle/fluid/tests/unittests/test_switch.py
+++ b/python/paddle/fluid/tests/unittests/test_switch.py
@@ -15,8 +15,8 @@
 import unittest
 
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 import paddle.fluid.framework as framework
+import paddle.fluid.layers as layers
 from paddle.fluid.executor import Executor
 from paddle.fluid.framework import default_startup_program
 
diff --git a/python/paddle/fluid/tests/unittests/test_switch_autotune.py b/python/paddle/fluid/tests/unittests/test_switch_autotune.py
index 7b423af63fd14f..5f54b567ba8cd7 100644
--- a/python/paddle/fluid/tests/unittests/test_switch_autotune.py
+++ b/python/paddle/fluid/tests/unittests/test_switch_autotune.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import unittest
-import numpy as np
-import tempfile
-import warnings
 import json
 import os
+import tempfile
+import unittest
+import warnings
+
+import numpy as np
+
+import paddle
 
 
 class SimpleNet(paddle.nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_switch_case.py b/python/paddle/fluid/tests/unittests/test_switch_case.py
index aad70fe789bfe9..1b6b460397c91b 100644
--- a/python/paddle/fluid/tests/unittests/test_switch_case.py
+++ b/python/paddle/fluid/tests/unittests/test_switch_case.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
+from functools import partial
+
+import numpy as np
 
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.fluid.layers as layers
 from paddle.fluid.framework import Program, program_guard
-from functools import partial
 
 
 class TestAPISwitchCase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
index 70e8d0209a1f03..ef42ab8a522590 100644
--- a/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
@@ -16,18 +16,18 @@
 for both FP64 and FP16 input.
 """
 
+import os
 import unittest
+
 import numpy as np
-import os
+from decorator_helper import prog_scope
+from op_test import OpTest, _set_use_system_allocator
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 import paddle.nn as nn
-from paddle.fluid import compiler
-from paddle.fluid import Program, program_guard
-
-from op_test import OpTest, _set_use_system_allocator
-from decorator_helper import prog_scope
+from paddle.fluid import Program, compiler, program_guard
 
 _set_use_system_allocator(True)
 
diff --git a/python/paddle/fluid/tests/unittests/test_take.py b/python/paddle/fluid/tests/unittests/test_take.py
index f713d777c16832..bf16efa87ab899 100644
--- a/python/paddle/fluid/tests/unittests/test_take.py
+++ b/python/paddle/fluid/tests/unittests/test_take.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_take_along_axis_op.py b/python/paddle/fluid/tests/unittests/test_take_along_axis_op.py
index a7a65aec326115..da3fa64417fe60 100644
--- a/python/paddle/fluid/tests/unittests/test_take_along_axis_op.py
+++ b/python/paddle/fluid/tests/unittests/test_take_along_axis_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 from paddle.framework import core
 
diff --git a/python/paddle/fluid/tests/unittests/test_target_assign_op.py b/python/paddle/fluid/tests/unittests/test_target_assign_op.py
index 103f28a2024151..c0c336a040819f 100644
--- a/python/paddle/fluid/tests/unittests/test_target_assign_op.py
+++ b/python/paddle/fluid/tests/unittests/test_target_assign_op.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import random
 import unittest
+
 import numpy as np
-import random
 from op_test import OpTest
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_tdm_child_op.py b/python/paddle/fluid/tests/unittests/test_tdm_child_op.py
index 95c49c904db5b4..5d261dd1efb0b2 100644
--- a/python/paddle/fluid/tests/unittests/test_tdm_child_op.py
+++ b/python/paddle/fluid/tests/unittests/test_tdm_child_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle.fluid as fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py b/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py
index 9c9ce12078008e..7a649a8c238dbe 100644
--- a/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py
+++ b/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py
@@ -14,10 +14,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid.core as core
+
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 
 def create_tdm_travel():
diff --git a/python/paddle/fluid/tests/unittests/test_teacher_student_sigmoid_loss_op.py b/python/paddle/fluid/tests/unittests/test_teacher_student_sigmoid_loss_op.py
index 307a4edcf185c2..984a47831064ea 100644
--- a/python/paddle/fluid/tests/unittests/test_teacher_student_sigmoid_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_teacher_student_sigmoid_loss_op.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from math import exp, log
+
 import numpy as np
-from math import log
-from math import exp
 from op_test import OpTest
 from scipy.special import logit
 
diff --git a/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py b/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py
index 13dc115f6bc4d6..265cf42934c7b7 100644
--- a/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py
+++ b/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor.py
index ef8e60df9d160b..f22693efad4cc4 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
-import paddle.fluid.core as core
+import numbers
 import unittest
+
 import numpy as np
-import numbers
+
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 
 class TestTensorPtr(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py
index 0dbfadd25bb35f..67899f150cc26d 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_copy_from.py b/python/paddle/fluid/tests/unittests/test_tensor_copy_from.py
index 3f2b00e8e73371..087da70861d426 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_copy_from.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_copy_from.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
+
+import paddle
 from paddle.fluid.core import LoDTensor as Tensor
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_data_ptr.py b/python/paddle/fluid/tests/unittests/test_tensor_data_ptr.py
index 10e816370ecac2..1f0462ff41bfca 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_data_ptr.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_data_ptr.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_.py
index 7998cbcc19e482..effe4461c76eb6 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_fill_.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
+
 import paddle
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py
index b3c68f93204399..931c85a7644e28 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
+
 import paddle
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py
index 8dd9b327f784d4..f5902fadf40529 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
-import paddle.nn.functional as F
 import unittest
+
 import numpy as np
+
 import paddle
+import paddle.fluid as fluid
+import paddle.nn.functional as F
 from paddle.fluid.framework import _enable_legacy_dygraph
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor_.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor_.py
index 17be54720c97ad..6c4531beee7ab6 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor_.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor_.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
-import paddle.nn.functional as F
 import unittest
+
 import numpy as np
+
 import paddle
+import paddle.fluid as fluid
+import paddle.nn.functional as F
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py
index c7683d4b99a28e..c557d4bc378a0d 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
-import paddle.nn as nn
-from paddle.fluid.framework import _test_eager_guard
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.nn as nn
+from paddle.fluid.framework import _test_eager_guard
 
 
 class SimpleNet(nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_dynamic.py b/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_dynamic.py
index 432a00fa429963..3a7aca28ac8534 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_dynamic.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_dynamic.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_static.py b/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_static.py
index 7949d2457e9286..8ec524e579e7df 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_static.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_scalar_type_promotion_static.py
@@ -13,11 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 
 import paddle
-from paddle.static import program_guard
-from paddle.static import Program
+from paddle.static import Program, program_guard
 
 # Support types are ref from `paddle.tensor.math`
 # - Related paddle dtypes:
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_to_list.py b/python/paddle/fluid/tests/unittests/test_tensor_to_list.py
index 709ff84179f24c..c8f438325ae30b 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_to_list.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_to_list.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
+
 import paddle
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_to_numpy.py b/python/paddle/fluid/tests/unittests/test_tensor_to_numpy.py
index 30843455ca5e25..9504c86f7be74d 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_to_numpy.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_to_numpy.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
 
+import paddle.fluid as fluid
+
 
 class TensorToNumpyTest(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_type_promotion.py b/python/paddle/fluid/tests/unittests/test_tensor_type_promotion.py
index f6a1f7e5def413..811c2e583d701b 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_type_promotion.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_type_promotion.py
@@ -14,6 +14,7 @@
 
 import unittest
 import warnings
+
 import paddle
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_uva.py b/python/paddle/fluid/tests/unittests/test_tensor_uva.py
index 8fe03148f49211..ab4fd6ed432ec8 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_uva.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_uva.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 import unittest
+
 import numpy as np
+
+import paddle
 from paddle.fluid import core
-from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph
+from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
 
 
 class TestTensorCopyFrom(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_zero_.py b/python/paddle/fluid/tests/unittests/test_tensor_zero_.py
index 2870ad8e75a192..e1337c8de60119 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_zero_.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_zero_.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
+
 import paddle
+import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_tensordot.py b/python/paddle/fluid/tests/unittests/test_tensordot.py
index 8709772c18e14a..dd8529e50eef8d 100644
--- a/python/paddle/fluid/tests/unittests/test_tensordot.py
+++ b/python/paddle/fluid/tests/unittests/test_tensordot.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
 
+import numpy as np
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_tf32_cublas.py b/python/paddle/fluid/tests/unittests/test_tf32_cublas.py
index ce97a7ad173dbd..f18e1aa72e76a5 100644
--- a/python/paddle/fluid/tests/unittests/test_tf32_cublas.py
+++ b/python/paddle/fluid/tests/unittests/test_tf32_cublas.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_tf32_cudnn.py b/python/paddle/fluid/tests/unittests/test_tf32_cudnn.py
index cb57c93b71cb52..455de28d9faf7d 100644
--- a/python/paddle/fluid/tests/unittests/test_tf32_cudnn.py
+++ b/python/paddle/fluid/tests/unittests/test_tf32_cudnn.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle.fluid.core as core
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_tile_op.py b/python/paddle/fluid/tests/unittests/test_tile_op.py
index b6ca4b5711ac50..5e2756a8d24248 100644
--- a/python/paddle/fluid/tests/unittests/test_tile_op.py
+++ b/python/paddle/fluid/tests/unittests/test_tile_op.py
@@ -13,14 +13,16 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import Program, core, program_guard
-import gradient_checker
-from decorator_helper import prog_scope
 import paddle.fluid.layers as layers
+from paddle.fluid import Program, core, program_guard
 
 
 # Situation 1: repeat_times is a list (without tensor)
diff --git a/python/paddle/fluid/tests/unittests/test_top_k_op.py b/python/paddle/fluid/tests/unittests/test_top_k_op.py
index d722d3f622fe5e..d03c88a0d779e5 100644
--- a/python/paddle/fluid/tests/unittests/test_top_k_op.py
+++ b/python/paddle/fluid/tests/unittests/test_top_k_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py b/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py
index 60d2502a56b2aa..2a8af4d4ad9a67 100644
--- a/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_trace_op.py b/python/paddle/fluid/tests/unittests/test_trace_op.py
index 8bceee2fdfdf66..b86422de074cf0 100644
--- a/python/paddle/fluid/tests/unittests/test_trace_op.py
+++ b/python/paddle/fluid/tests/unittests/test_trace_op.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.tensor as tensor
-import paddle
 
 
 class TestTraceOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_traced_layer_err_msg.py b/python/paddle/fluid/tests/unittests/test_traced_layer_err_msg.py
index c1642e9c1c9237..8beda249844425 100644
--- a/python/paddle/fluid/tests/unittests/test_traced_layer_err_msg.py
+++ b/python/paddle/fluid/tests/unittests/test_traced_layer_err_msg.py
@@ -14,10 +14,11 @@
 # limitations under the License.
 
 import os
-import numpy as np
 import tempfile
 import unittest
 
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.nn as nn
diff --git a/python/paddle/fluid/tests/unittests/test_trainable.py b/python/paddle/fluid/tests/unittests/test_trainable.py
index bbd7ae55d30df0..28b78b6b043867 100644
--- a/python/paddle/fluid/tests/unittests/test_trainable.py
+++ b/python/paddle/fluid/tests/unittests/test_trainable.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from collections import Counter
 import unittest
+from collections import Counter
+
+from simple_nets import init_data
+
 import paddle
 import paddle.fluid as fluid
-from simple_nets import init_data
 
 
 def test_trainable():
diff --git a/python/paddle/fluid/tests/unittests/test_trainer_desc.py b/python/paddle/fluid/tests/unittests/test_trainer_desc.py
index bc584439fdfd88..0614f402db48a8 100644
--- a/python/paddle/fluid/tests/unittests/test_trainer_desc.py
+++ b/python/paddle/fluid/tests/unittests/test_trainer_desc.py
@@ -16,9 +16,10 @@
 including config, etc.
 """
 
-import paddle.fluid as fluid
 import unittest
 
+import paddle.fluid as fluid
+
 
 class TestTrainerDesc(unittest.TestCase):
     """TestCases for TrainerDesc."""
diff --git a/python/paddle/fluid/tests/unittests/test_transfer_dtype_op.py b/python/paddle/fluid/tests/unittests/test_transfer_dtype_op.py
index 364e6ff167ca12..c8a391720e314e 100644
--- a/python/paddle/fluid/tests/unittests/test_transfer_dtype_op.py
+++ b/python/paddle/fluid/tests/unittests/test_transfer_dtype_op.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest, convert_float_to_uint16, convert_uint16_to_float
 
 import paddle
 import paddle.fluid.core as core
-from op_test import OpTest, convert_uint16_to_float, convert_float_to_uint16
 
 
 class TestTransferDtypeOpFp32ToFp64(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_transfer_layout_op.py b/python/paddle/fluid/tests/unittests/test_transfer_layout_op.py
index 48c2897b005490..6c26a70694ac23 100644
--- a/python/paddle/fluid/tests/unittests/test_transfer_layout_op.py
+++ b/python/paddle/fluid/tests/unittests/test_transfer_layout_op.py
@@ -13,14 +13,15 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
 
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid.framework import Program, program_guard
 from paddle.fluid.layer_helper import LayerHelper
-from op_test import OpTest
 
 
 # default kNCHW
diff --git a/python/paddle/fluid/tests/unittests/test_transformer_api.py b/python/paddle/fluid/tests/unittests/test_transformer_api.py
index 3f2e2185f9ee26..0bdd25f4486083 100644
--- a/python/paddle/fluid/tests/unittests/test_transformer_api.py
+++ b/python/paddle/fluid/tests/unittests/test_transformer_api.py
@@ -12,20 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 from paddle.nn.layer.transformer import (
     MultiHeadAttention,
-    TransformerEncoderLayer,
+    Transformer,
+    TransformerDecoder,
     TransformerDecoderLayer,
     TransformerEncoder,
-    TransformerDecoder,
-    Transformer,
+    TransformerEncoderLayer,
 )
 
-import unittest
-
 
 def generate_basic_params(mode="attn", self_attention=True):
     batch_size, query_length = [np.random.randint(2, 10) for _ in range(2)]
diff --git a/python/paddle/fluid/tests/unittests/test_translated_layer.py b/python/paddle/fluid/tests/unittests/test_translated_layer.py
index 6df20a551fd752..0ca87394b6f3dd 100644
--- a/python/paddle/fluid/tests/unittests/test_translated_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_translated_layer.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+import tempfile
 import unittest
+
 import numpy as np
-import tempfile
-import os
+
 import paddle
 import paddle.nn as nn
 import paddle.optimizer as opt
diff --git a/python/paddle/fluid/tests/unittests/test_transpose_op.py b/python/paddle/fluid/tests/unittests/test_transpose_op.py
index 5a310f6bf89437..ee56a2aad44e9a 100644
--- a/python/paddle/fluid/tests/unittests/test_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_transpose_op.py
@@ -13,15 +13,17 @@
 # limitations under the License.
 
 import unittest
+
+import gradient_checker
 import numpy as np
-from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16
+from decorator_helper import prog_scope
+
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
 import paddle.fluid.core as core
-import gradient_checker
-from decorator_helper import prog_scope
 import paddle.fluid.layers as layers
+from paddle.fluid import Program, program_guard
+from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_tree_conv_op.py b/python/paddle/fluid/tests/unittests/test_tree_conv_op.py
index d4582c3ad2e504..1c85dbf5e6cc35 100644
--- a/python/paddle/fluid/tests/unittests/test_tree_conv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_tree_conv_op.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
-from paddle.fluid.framework import program_guard, Program
 from op_test import OpTest
-import unittest
+
 import paddle.fluid as fluid
+from paddle.fluid.framework import Program, program_guard
 
 
 def collect_node_patch(og, max_depth):
diff --git a/python/paddle/fluid/tests/unittests/test_triangular_solve_op.py b/python/paddle/fluid/tests/unittests/test_triangular_solve_op.py
index c71619e3137420..802cf4f9a62942 100644
--- a/python/paddle/fluid/tests/unittests/test_triangular_solve_op.py
+++ b/python/paddle/fluid/tests/unittests/test_triangular_solve_op.py
@@ -12,16 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.w
 
+import sys
 import unittest
-import numpy as np
 
-import sys
+import numpy as np
 
 sys.path.append("..")
-import paddle
 from op_test import OpTest
+
+import paddle
 import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard, core
+from paddle.fluid import Program, core, program_guard
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_tril_indices_op.py b/python/paddle/fluid/tests/unittests/test_tril_indices_op.py
index 0d18b32304ead7..98236ee235ddf4 100644
--- a/python/paddle/fluid/tests/unittests/test_tril_indices_op.py
+++ b/python/paddle/fluid/tests/unittests/test_tril_indices_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_tril_triu_op.py b/python/paddle/fluid/tests/unittests/test_tril_triu_op.py
index b0b754dc9630d1..93740ab6b9bace 100644
--- a/python/paddle/fluid/tests/unittests/test_tril_triu_op.py
+++ b/python/paddle/fluid/tests/unittests/test_tril_triu_op.py
@@ -12,8 +12,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.tensor as tensor
diff --git a/python/paddle/fluid/tests/unittests/test_trilinear_interp_op.py b/python/paddle/fluid/tests/unittests/test_trilinear_interp_op.py
index 901d25846613fc..66d55884224ad7 100755
--- a/python/paddle/fluid/tests/unittests/test_trilinear_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_trilinear_interp_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid.core as core
+
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.nn.functional import interpolate
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_trilinear_interp_v2_op.py b/python/paddle/fluid/tests/unittests/test_trilinear_interp_v2_op.py
index 67ab8805dc1902..88b4b607261bf9 100755
--- a/python/paddle/fluid/tests/unittests/test_trilinear_interp_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_trilinear_interp_v2_op.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid.core as core
+
+import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid.framework import _test_eager_guard
 from paddle.nn.functional import interpolate
-import paddle
 
 np.random.seed(123)
 
diff --git a/python/paddle/fluid/tests/unittests/test_triplet_margin_loss.py b/python/paddle/fluid/tests/unittests/test_triplet_margin_loss.py
index 59f63a699d5696..66b150df214df3 100644
--- a/python/paddle/fluid/tests/unittests/test_triplet_margin_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_triplet_margin_loss.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import unittest
 
+import numpy as np
+
+import paddle
+
 
 def call_TripletMarginLoss_layer(
     input,
diff --git a/python/paddle/fluid/tests/unittests/test_triplet_margin_with_distance_loss.py b/python/paddle/fluid/tests/unittests/test_triplet_margin_with_distance_loss.py
index c8d4da36a1cb5f..c1a1a55c49e4c1 100644
--- a/python/paddle/fluid/tests/unittests/test_triplet_margin_with_distance_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_triplet_margin_with_distance_loss.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import unittest
 
+import numpy as np
+
+import paddle
+
 
 def call_TripletMarginDistanceLoss_layer(
     input,
diff --git a/python/paddle/fluid/tests/unittests/test_triu_indices_op.py b/python/paddle/fluid/tests/unittests/test_triu_indices_op.py
index 1ad6b7bbd458f8..a666d63b7d0067 100644
--- a/python/paddle/fluid/tests/unittests/test_triu_indices_op.py
+++ b/python/paddle/fluid/tests/unittests/test_triu_indices_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_trunc_op.py b/python/paddle/fluid/tests/unittests/test_trunc_op.py
index f8e23bd8074885..664cf00a21b841 100644
--- a/python/paddle/fluid/tests/unittests/test_trunc_op.py
+++ b/python/paddle/fluid/tests/unittests/test_trunc_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_truncated_gaussian_random_op.py b/python/paddle/fluid/tests/unittests/test_truncated_gaussian_random_op.py
index 70e79b4898af3f..941e56b808efd5 100644
--- a/python/paddle/fluid/tests/unittests/test_truncated_gaussian_random_op.py
+++ b/python/paddle/fluid/tests/unittests/test_truncated_gaussian_random_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy
 
 import paddle
diff --git a/python/paddle/fluid/tests/unittests/test_unbind_op.py b/python/paddle/fluid/tests/unittests/test_unbind_op.py
index 6f719ae7d4d5cc..82cec33d59e685 100644
--- a/python/paddle/fluid/tests/unittests/test_unbind_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unbind_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest, convert_float_to_uint16
+
 import paddle
 import paddle.fluid as fluid
 import paddle.tensor as tensor
diff --git a/python/paddle/fluid/tests/unittests/test_unfold_op.py b/python/paddle/fluid/tests/unittests/test_unfold_op.py
index 4396e8215f8654..3deb20ed91f5f3 100644
--- a/python/paddle/fluid/tests/unittests/test_unfold_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unfold_op.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
+
+import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py
index 6ebaf0d64e6735..81529828020ef0 100644
--- a/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py
+++ b/python/paddle/fluid/tests/unittests/test_uniform_random_bf16_op.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest, convert_uint16_to_float
+
 import paddle
+import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.op import Operator
-import paddle.fluid as fluid
 from paddle.fluid.tests.unittests.test_uniform_random_op import (
     output_hist,
     output_hist_diag,
diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py
index ea30b0da0ab46f..446df7cd9f54e0 100644
--- a/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py
+++ b/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import numpy as np
 from paddle.fluid.framework import (
-    _enable_legacy_dygraph,
     _disable_legacy_dygraph,
+    _enable_legacy_dygraph,
 )
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py
index ddad1f60c5128f..58078cbd71b8ad 100644
--- a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py
+++ b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py
@@ -14,17 +14,17 @@
 
 import os
 import unittest
+
 import numpy as np
 from op_test import OpTest
+from test_attribute_var import UnittestBase
+
 import paddle
-import paddle.fluid.core as core
-import paddle
-from paddle.fluid.op import Operator
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
 from paddle.fluid.framework import _test_eager_guard
-
-from test_attribute_var import UnittestBase
+from paddle.fluid.op import Operator
 from paddle.tensor import random
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_unique.py b/python/paddle/fluid/tests/unittests/test_unique.py
index 6e739bb385950c..c56ec313a395c4 100644
--- a/python/paddle/fluid/tests/unittests/test_unique.py
+++ b/python/paddle/fluid/tests/unittests/test_unique.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_unique_consecutive_op.py b/python/paddle/fluid/tests/unittests/test_unique_consecutive_op.py
index b8a3096dbd62f8..86872aff9c7da3 100644
--- a/python/paddle/fluid/tests/unittests/test_unique_consecutive_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unique_consecutive_op.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid.core as core
 
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 
 def reference_unique_consecutive(X, return_inverse=False, return_counts=False):
diff --git a/python/paddle/fluid/tests/unittests/test_unique_name.py b/python/paddle/fluid/tests/unittests/test_unique_name.py
index a423247370d144..0b7ff761b769fb 100644
--- a/python/paddle/fluid/tests/unittests/test_unique_name.py
+++ b/python/paddle/fluid/tests/unittests/test_unique_name.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import paddle.fluid as fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_unique_with_counts.py b/python/paddle/fluid/tests/unittests/test_unique_with_counts.py
index 6a5b58454b56c2..cc72b9e11d4d9c 100644
--- a/python/paddle/fluid/tests/unittests/test_unique_with_counts.py
+++ b/python/paddle/fluid/tests/unittests/test_unique_with_counts.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 
diff --git a/python/paddle/fluid/tests/unittests/test_unpool1d_op.py b/python/paddle/fluid/tests/unittests/test_unpool1d_op.py
index 787d9367f5890c..72a314618597ec 100644
--- a/python/paddle/fluid/tests/unittests/test_unpool1d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unpool1d_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.nn.functional as F
 
diff --git a/python/paddle/fluid/tests/unittests/test_unpool3d_op.py b/python/paddle/fluid/tests/unittests/test_unpool3d_op.py
index af17073758fdfd..abbabb43a51850 100644
--- a/python/paddle/fluid/tests/unittests/test_unpool3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unpool3d_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 import paddle.nn.functional as F
 from paddle.fluid import core
diff --git a/python/paddle/fluid/tests/unittests/test_unpool_op.py b/python/paddle/fluid/tests/unittests/test_unpool_op.py
index 4ae6919552cadc..80445d32a59001 100644
--- a/python/paddle/fluid/tests/unittests/test_unpool_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unpool_op.py
@@ -14,13 +14,14 @@
 
 import os
 import unittest
+
 import numpy as np
 from op_test import OpTest
+from test_attribute_var import UnittestBase
+
 import paddle
 import paddle.nn.functional as F
-from paddle.fluid import Program, program_guard, core
-
-from test_attribute_var import UnittestBase
+from paddle.fluid import Program, core, program_guard
 
 
 def _unpool_output_size(x, kernel_size, stride, padding, output_size):
@@ -252,11 +253,12 @@ def data_outputsize_error2():
 
 class TestUnpoolOpAPI_dy(unittest.TestCase):
     def test_case(self):
+        import numpy as np
+
         import paddle
-        import paddle.nn.functional as F
-        import paddle.fluid.core as core
         import paddle.fluid as fluid
-        import numpy as np
+        import paddle.fluid.core as core
+        import paddle.nn.functional as F
 
         if core.is_compiled_with_cuda():
             place = core.CUDAPlace(0)
@@ -292,11 +294,12 @@ def test_case(self):
 
 class TestUnpoolOpAPI_dy2(unittest.TestCase):
     def test_case(self):
+        import numpy as np
+
         import paddle
-        import paddle.nn.functional as F
-        import paddle.fluid.core as core
         import paddle.fluid as fluid
-        import numpy as np
+        import paddle.fluid.core as core
+        import paddle.nn.functional as F
 
         if core.is_compiled_with_cuda():
             place = core.CUDAPlace(0)
@@ -332,10 +335,11 @@ def test_case(self):
 
 class TestUnpoolOpAPI_dy3(unittest.TestCase):
     def test_case(self):
+        import numpy as np
+
         import paddle
-        import paddle.fluid.core as core
         import paddle.fluid as fluid
-        import numpy as np
+        import paddle.fluid.core as core
 
         if core.is_compiled_with_cuda():
             place = core.CUDAPlace(0)
@@ -373,9 +377,9 @@ def test_case(self):
 class TestUnpoolOpAPI_st(unittest.TestCase):
     def test_case(self):
         import paddle
-        import paddle.nn.functional as F
-        import paddle.fluid.core as core
         import paddle.fluid as fluid
+        import paddle.fluid.core as core
+        import paddle.nn.functional as F
 
         paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_unsqueeze2_op.py b/python/paddle/fluid/tests/unittests/test_unsqueeze2_op.py
index 8de9185162a2bb..a4854477ffc31e 100755
--- a/python/paddle/fluid/tests/unittests/test_unsqueeze2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unsqueeze2_op.py
@@ -15,10 +15,10 @@
 import unittest
 
 import numpy as np
+from op_test import OpTest
 
 import paddle
 import paddle.fluid as fluid
-from op_test import OpTest
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py b/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
index b73c2a3906ff48..8bfac13d9ab5b4 100755
--- a/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
@@ -14,14 +14,14 @@
 
 import unittest
 
+import gradient_checker
 import numpy as np
+from decorator_helper import prog_scope
+from op_test import OpTest, convert_float_to_uint16
 
 import paddle
 import paddle.fluid as fluid
-from op_test import OpTest, convert_float_to_uint16
 import paddle.fluid.core as core
-import gradient_checker
-from decorator_helper import prog_scope
 import paddle.fluid.layers as layers
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_unstack_op.py b/python/paddle/fluid/tests/unittests/test_unstack_op.py
index 87095d8eeac24e..1dda05fb0a6b86 100755
--- a/python/paddle/fluid/tests/unittests/test_unstack_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unstack_op.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from op_test import OpTest
-import numpy as np
 import unittest
+
+import numpy as np
+from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_update_loss_scaling_op.py b/python/paddle/fluid/tests/unittests/test_update_loss_scaling_op.py
index 7f1e6c8614f88c..2d945496294e40 100644
--- a/python/paddle/fluid/tests/unittests/test_update_loss_scaling_op.py
+++ b/python/paddle/fluid/tests/unittests/test_update_loss_scaling_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle.fluid as fluid
 import paddle.fluid.contrib.mixed_precision.amp_nn as amp_nn
 
diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py
index c832093ed59095..e4db05ecc34231 100644
--- a/python/paddle/fluid/tests/unittests/test_var_base.py
+++ b/python/paddle/fluid/tests/unittests/test_var_base.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
 import unittest
+
 import numpy as np
-import copy
 
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph
+from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
 
 
 class TestVarBase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_var_conv_2d.py b/python/paddle/fluid/tests/unittests/test_var_conv_2d.py
index ef313a13944afa..278a4bc5aef92c 100644
--- a/python/paddle/fluid/tests/unittests/test_var_conv_2d.py
+++ b/python/paddle/fluid/tests/unittests/test_var_conv_2d.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest, skip_check_grad_ci
 
diff --git a/python/paddle/fluid/tests/unittests/test_var_info.py b/python/paddle/fluid/tests/unittests/test_var_info.py
index d03fb2387bd0da..2626c8f9fca681 100644
--- a/python/paddle/fluid/tests/unittests/test_var_info.py
+++ b/python/paddle/fluid/tests/unittests/test_var_info.py
@@ -16,10 +16,12 @@
 including create, config, run, etc.
 """
 
-import paddle.fluid as fluid
-import numpy as np
 import unittest
 
+import numpy as np
+
+import paddle.fluid as fluid
+
 
 class TestVarInfo(unittest.TestCase):
     """TestCases for Dataset."""
diff --git a/python/paddle/fluid/tests/unittests/test_variable.py b/python/paddle/fluid/tests/unittests/test_variable.py
index b8e020eca945ee..68887bb200023d 100644
--- a/python/paddle/fluid/tests/unittests/test_variable.py
+++ b/python/paddle/fluid/tests/unittests/test_variable.py
@@ -15,16 +15,16 @@
 import unittest
 from functools import reduce
 
+import numpy as np
+
 import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid.framework import (
     Program,
     convert_np_dtype_to_dtype_,
     default_main_program,
 )
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-import numpy as np
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_variance_layer.py b/python/paddle/fluid/tests/unittests/test_variance_layer.py
index fc97a20533bcb0..6d9338542ddc17 100644
--- a/python/paddle/fluid/tests/unittests/test_variance_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_variance_layer.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_version.py b/python/paddle/fluid/tests/unittests/test_version.py
index 507ec953764dff..f19136554aacf5 100644
--- a/python/paddle/fluid/tests/unittests/test_version.py
+++ b/python/paddle/fluid/tests/unittests/test_version.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import re
+import unittest
 
 import paddle.version as fluid_version
 
diff --git a/python/paddle/fluid/tests/unittests/test_viterbi_decode_op.py b/python/paddle/fluid/tests/unittests/test_viterbi_decode_op.py
index b86be49eaed9dc..f6801d4f089d7a 100644
--- a/python/paddle/fluid/tests/unittests/test_viterbi_decode_op.py
+++ b/python/paddle/fluid/tests/unittests/test_viterbi_decode_op.py
@@ -8,12 +8,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import unittest
+
 import numpy as np
 from op_test import OpTest
+
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
-import unittest
-import paddle
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_warpctc_op.py b/python/paddle/fluid/tests/unittests/test_warpctc_op.py
index d9c129505e6e95..c2a272117c8c44 100644
--- a/python/paddle/fluid/tests/unittests/test_warpctc_op.py
+++ b/python/paddle/fluid/tests/unittests/test_warpctc_op.py
@@ -14,14 +14,16 @@
 
 import sys
 import unittest
+
 import numpy as np
 from op_test import OpTest
 from test_softmax_op import stable_softmax
-from paddle.fluid.framework import _test_eager_guard
-import paddle.fluid.core as core
-from paddle.fluid import Program, program_guard
+
 import paddle
+import paddle.fluid.core as core
 import paddle.nn.functional as F
+from paddle.fluid import Program, program_guard
+from paddle.fluid.framework import _test_eager_guard
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_weight_decay.py b/python/paddle/fluid/tests/unittests/test_weight_decay.py
index f4a1d0b965c091..b6bf5c920dfac6 100644
--- a/python/paddle/fluid/tests/unittests/test_weight_decay.py
+++ b/python/paddle/fluid/tests/unittests/test_weight_decay.py
@@ -13,14 +13,14 @@
 # limitations under the License.
 
 import contextlib
-
 import unittest
 from functools import partial
+
 import numpy as np
-import paddle
-import paddle.fluid.core as core
 
+import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import compiler
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_weight_normalization.py b/python/paddle/fluid/tests/unittests/test_weight_normalization.py
index ef77e6b5abb822..3acbf965df0af5 100644
--- a/python/paddle/fluid/tests/unittests/test_weight_normalization.py
+++ b/python/paddle/fluid/tests/unittests/test_weight_normalization.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import collections
 import unittest
+
 import numpy as np
-import collections
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_where_index.py b/python/paddle/fluid/tests/unittests/test_where_index.py
index 96b26dd6cae881..d6960621d763ce 100644
--- a/python/paddle/fluid/tests/unittests/test_where_index.py
+++ b/python/paddle/fluid/tests/unittests/test_where_index.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid.core as core
-from paddle.fluid.op import Operator
+
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
+from paddle.fluid.op import Operator
 
 
 class TestWhereIndexOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_where_op.py b/python/paddle/fluid/tests/unittests/test_where_op.py
index 7420753d2d359c..657d5ad53c9597 100644
--- a/python/paddle/fluid/tests/unittests/test_where_op.py
+++ b/python/paddle/fluid/tests/unittests/test_where_op.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle
 import paddle.fluid as fluid
-from op_test import OpTest
 from paddle.fluid import Program, program_guard
 from paddle.fluid.backward import append_backward
 from paddle.fluid.framework import _test_eager_guard
diff --git a/python/paddle/fluid/tests/unittests/test_while_loop_op.py b/python/paddle/fluid/tests/unittests/test_while_loop_op.py
index deaebf4a45d7f6..0b4cc57c77f496 100644
--- a/python/paddle/fluid/tests/unittests/test_while_loop_op.py
+++ b/python/paddle/fluid/tests/unittests/test_while_loop_op.py
@@ -12,15 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import unittest
 
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.fluid.layers as layers
-from paddle.fluid.framework import Program, program_guard
 from paddle.fluid.backward import append_backward
+from paddle.fluid.framework import Program, program_guard
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_while_op.py b/python/paddle/fluid/tests/unittests/test_while_op.py
index 341e6a8ef5644c..cb5f1e3664f7df 100644
--- a/python/paddle/fluid/tests/unittests/test_while_op.py
+++ b/python/paddle/fluid/tests/unittests/test_while_op.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 
 import unittest
+
+import numpy
+
 import paddle
-import paddle.fluid.layers as layers
-from paddle.fluid.executor import Executor
-import paddle.fluid.core as core
 import paddle.fluid as fluid
+import paddle.fluid.core as core
+import paddle.fluid.layers as layers
 from paddle.fluid.backward import append_backward
-import numpy
+from paddle.fluid.executor import Executor
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/test_yolo_box_op.py b/python/paddle/fluid/tests/unittests/test_yolo_box_op.py
index 8584332f02c07f..dd9edbd7422c43 100644
--- a/python/paddle/fluid/tests/unittests/test_yolo_box_op.py
+++ b/python/paddle/fluid/tests/unittests/test_yolo_box_op.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 from paddle.fluid.framework import _test_eager_guard
 
diff --git a/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py b/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py
index 9105f27e96d730..fa68189b1f415f 100644
--- a/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-from scipy.special import logit
-from scipy.special import expit
 from op_test import OpTest
+from scipy.special import expit, logit
 
 import paddle
 from paddle.fluid import core
diff --git a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
index 5911b496a32d28..e854b8489af140 100644
--- a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
+import numpy as np
+from decorator_helper import prog_scope
+
 import paddle
 import paddle.fluid as fluid
-from decorator_helper import prog_scope
 import paddle.nn.functional as F
-import numpy as np
-import unittest
 
 fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
 
diff --git a/python/paddle/fluid/tests/unittests/test_zeropad2d.py b/python/paddle/fluid/tests/unittests/test_zeropad2d.py
index 99a6b9143a911b..09b452fab473c2 100644
--- a/python/paddle/fluid/tests/unittests/test_zeropad2d.py
+++ b/python/paddle/fluid/tests/unittests/test_zeropad2d.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 from paddle import to_tensor
-from paddle.nn.functional import zeropad2d
 from paddle.nn import ZeroPad2D
+from paddle.nn.functional import zeropad2d
 
 
 class TestZeroPad2dAPIError(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_zeros_like_op.py b/python/paddle/fluid/tests/unittests/test_zeros_like_op.py
index 5712fc4df6e5f8..204eea2a48a6a9 100644
--- a/python/paddle/fluid/tests/unittests/test_zeros_like_op.py
+++ b/python/paddle/fluid/tests/unittests/test_zeros_like_op.py
@@ -13,14 +13,14 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-from paddle import zeros_like
-from paddle import _C_ops
-from paddle.fluid import core, Program, program_guard
-from paddle.fluid.framework import _test_eager_guard
-from paddle.fluid.framework import convert_np_dtype_to_dtype_
+from paddle import _C_ops, zeros_like
+from paddle.fluid import Program, core, program_guard
+from paddle.fluid.framework import _test_eager_guard, convert_np_dtype_to_dtype_
 
 
 class TestZerosLikeAPIError(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_zeros_op.py b/python/paddle/fluid/tests/unittests/test_zeros_op.py
index 59a87dba56d861..64c37831a61e12 100644
--- a/python/paddle/fluid/tests/unittests/test_zeros_op.py
+++ b/python/paddle/fluid/tests/unittests/test_zeros_op.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
diff --git a/python/paddle/fluid/tests/unittests/tokenizer/bert_tokenizer.py b/python/paddle/fluid/tests/unittests/tokenizer/bert_tokenizer.py
index 59c530b7d8aa93..979d48928bdc26 100755
--- a/python/paddle/fluid/tests/unittests/tokenizer/bert_tokenizer.py
+++ b/python/paddle/fluid/tests/unittests/tokenizer/bert_tokenizer.py
@@ -16,13 +16,13 @@
 import os
 import unicodedata
 
-from tokenizer_utils import PretrainedTokenizer
 from tokenizer_utils import (
-    convert_to_unicode,
-    whitespace_tokenize,
-    _is_whitespace,
+    PretrainedTokenizer,
     _is_control,
     _is_punctuation,
+    _is_whitespace,
+    convert_to_unicode,
+    whitespace_tokenize,
 )
 
 
diff --git a/python/paddle/fluid/tests/unittests/transformer_model.py b/python/paddle/fluid/tests/unittests/transformer_model.py
index a513ada96940f0..c956b0eabe016d 100644
--- a/python/paddle/fluid/tests/unittests/transformer_model.py
+++ b/python/paddle/fluid/tests/unittests/transformer_model.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from functools import partial
+
 import numpy as np
 
 import paddle
diff --git a/python/paddle/fluid/tests/unittests/xpu/collective_allgather_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/collective_allgather_op_xpu.py
index e42c5144a7b7ca..55304b8b40b438 100644
--- a/python/paddle/fluid/tests/unittests/xpu/collective_allgather_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/collective_allgather_op_xpu.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from test_collective_base_xpu import TestCollectiveRunnerBase, runtime_main
+
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import core
 import paddle.fluid.layers as layers
-from test_collective_base_xpu import TestCollectiveRunnerBase, runtime_main
+from paddle.fluid import core
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/collective_allreduce_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/collective_allreduce_op_xpu.py
index 889eecf6327de7..32196b77d258f0 100644
--- a/python/paddle/fluid/tests/unittests/xpu/collective_allreduce_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/collective_allreduce_op_xpu.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 
 import os
+
+from test_collective_base_xpu import TestCollectiveRunnerBase, runtime_main
+
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import core
 import paddle.fluid.layers as layers
-from test_collective_base_xpu import TestCollectiveRunnerBase, runtime_main
+from paddle.fluid import core
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/collective_identity_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/collective_identity_op_xpu.py
index 967d4fcc70a8f9..5a17539cbfb885 100644
--- a/python/paddle/fluid/tests/unittests/xpu/collective_identity_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/collective_identity_op_xpu.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from test_collective_base_xpu import TestCollectiveRunnerBase, runtime_main
+
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import core
 import paddle.fluid.layers as layers
-from test_collective_base_xpu import TestCollectiveRunnerBase, runtime_main
+from paddle.fluid import core
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py b/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py
index d645462c7dc98b..22131010d91c31 100644
--- a/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py
+++ b/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import fcntl
 import inspect
 import os
-import fcntl
+
 import numpy as np
 
 import paddle
diff --git a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_dataparallel_with_pylayer.py b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_dataparallel_with_pylayer.py
index 18ac7c88e19479..070770fbcafd4b 100644
--- a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_dataparallel_with_pylayer.py
+++ b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_dataparallel_with_pylayer.py
@@ -14,8 +14,9 @@
 
 import unittest
 
-import paddle
 import numpy as np
+
+import paddle
 import paddle.distributed as dist
 from paddle.autograd import PyLayer
 from paddle.distributed.fleet.utils.hybrid_parallel_util import (
diff --git a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check.py b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check.py
index b132e0e7e718f4..d68568dda47422 100644
--- a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check.py
+++ b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check.py
@@ -14,8 +14,9 @@
 
 import unittest
 
-import paddle
 import numpy as np
+
+import paddle
 import paddle.distributed as dist
 import paddle.fluid as fluid
 from paddle.nn import Linear
diff --git a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check_in_eager_mode.py b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check_in_eager_mode.py
index 1f4edaf3f3b6d9..8ae00ca4af41bc 100644
--- a/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check_in_eager_mode.py
+++ b/python/paddle/fluid/tests/unittests/xpu/parallel_dygraph_gradient_check_in_eager_mode.py
@@ -14,12 +14,13 @@
 
 import unittest
 
-import paddle
 import numpy as np
+
+import paddle
 import paddle.distributed as dist
 import paddle.fluid as fluid
-from paddle.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 paddle.seed(1024)
 np.random.seed(2021)
diff --git a/python/paddle/fluid/tests/unittests/xpu/process_group_bkcl.py b/python/paddle/fluid/tests/unittests/xpu/process_group_bkcl.py
index a106c630f3634c..0578df80107199 100644
--- a/python/paddle/fluid/tests/unittests/xpu/process_group_bkcl.py
+++ b/python/paddle/fluid/tests/unittests/xpu/process_group_bkcl.py
@@ -12,15 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import random
-import numpy as np
 import sys
+import unittest
+
+import numpy as np
 
 import paddle
 import paddle.distributed as dist
-from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.dygraph.parallel import ParallelEnv
+from paddle.fluid.framework import _test_eager_guard
 
 
 def init_process_group(strategy=None):
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py
index 760a45055ad388..082e883ded741d 100755
--- a/python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_accuracy_op_xpu.py
@@ -12,20 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
-import paddle
-
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py
index c30a472618c1c9..d5afa4a1b130db 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py
@@ -12,22 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
-
 from op_test import OpTest
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py
index 4ded307cba90ec..afd7a57c367006 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py
@@ -12,21 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
-import paddle.fluid as fluid
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_adam_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_adam_op_xpu.py
index fb42d564577467..154b2f54c14ea9 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_adam_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_adam_op_xpu.py
@@ -16,18 +16,19 @@
 
 sys.path.append("..")
 import unittest
-import numpy as np
-from paddle.fluid import core
-from paddle.fluid.op import Operator
-import paddle
 
+import numpy as np
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+from paddle.fluid import core
+from paddle.fluid.op import Operator
+
 
 class XPUTestAdamOp(XPUOpTestWrapper):
     def __init__(self):
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_adamw_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_adamw_op_xpu.py
index d9e0fb973c4d31..bbf5565e2cba83 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_adamw_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_adamw_op_xpu.py
@@ -17,18 +17,18 @@
 sys.path.append("..")
 
 import unittest
-import paddle
-import numpy as np
-import paddle.fluid as fluid
 
+import numpy as np
 from op_test_xpu import XPUOpTest
-
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+
 
 def adamw_step(inputs, attributes):
     param = inputs['Param']
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py
index b6e95e256901a9..388ace1ed33071 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py
@@ -15,16 +15,18 @@
 import sys
 
 sys.path.append("..")
-import paddle
 import unittest
+
 import numpy as np
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_arg_max_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_arg_max_op_xpu.py
index d041e859d9862b..d9a69216351a4a 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_arg_max_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_arg_max_op_xpu.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py
index c16d0fdb5e693d..12227622e65973 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import unittest
 import sys
+import unittest
+
+import numpy as np
 
 sys.path.append("..")
 
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_assign_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_assign_op_xpu.py
index 74f2a41a9eee29..9b20821c1384b0 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_assign_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_assign_op_xpu.py
@@ -16,6 +16,7 @@
 
 sys.path.append("..")
 import unittest
+
 import paddle
 
 '''
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_assign_value_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_assign_value_op_xpu.py
index abb8dff9ecb760..7de6af1b45c3c0 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_assign_value_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_assign_value_op_xpu.py
@@ -12,21 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
-import paddle.fluid as fluid
-import paddle.fluid.framework as framework
-import paddle.fluid.layers as layers
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
+
 import paddle
+import paddle.fluid as fluid
+import paddle.fluid.framework as framework
+import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py
index bf90dfd8705842..7c8f6d103467b3 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py
@@ -16,17 +16,19 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
-import paddle.fluid.core as core
-import paddle
-import paddle.fluid as fluid
-import paddle.nn.functional as F
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+import paddle.nn.functional as F
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_bce_loss_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_bce_loss_op_xpu.py
index 01256f1d9cc556..883063969ff6ae 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_bce_loss_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_bce_loss_op_xpu.py
@@ -15,16 +15,18 @@
 import sys
 
 sys.path.append("..")
-import paddle
-import numpy as np
 import unittest
+
+import numpy as np
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_op_xpu.py
index ab565059013473..d4166b9e0da4d4 100755
--- a/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_op_xpu.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
-import unittest
+
 import paddle
-import sys
 
 sys.path.append("..")
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_v2_op_xpu.py
index 842018ba8f4e98..974f5f67ba5d3a 100755
--- a/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_v2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_bilinear_interp_v2_op_xpu.py
@@ -12,20 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
-import paddle
-from op_test_xpu import XPUOpTest
 import unittest
+
+from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_bitwise_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_bitwise_op_xpu.py
index a3b2ee2bd02f6f..71aa969afc1474 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_bitwise_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_bitwise_op_xpu.py
@@ -12,21 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
 from op_test import OpTest
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_bmm_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_bmm_op_xpu.py
index fbc0ca66921403..d0d43dd94b0aa8 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_bmm_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_bmm_op_xpu.py
@@ -14,16 +14,18 @@
 
 sys.path.append("..")
 
-import paddle
 import unittest
+
 import numpy as np
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_c_embedding_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_c_embedding_op_xpu.py
index 854ec71cf7374d..b685458a3eed6f 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_c_embedding_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_c_embedding_op_xpu.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import sys
+import unittest
 
 sys.path.append("..")
 import paddle
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py
index a69c439c8cb6e2..cbff489638ddba 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py
@@ -16,19 +16,20 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
-import paddle
-import paddle.fluid.core as core
-import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
 from op_test_xpu import XPUOpTest
-
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid import Program, program_guard
+
 typeid_dict = {
     'int32': int(core.VarDesc.VarType.INT32),
     'int64': int(core.VarDesc.VarType.INT64),
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_clip_by_norm_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_clip_by_norm_op_xpu.py
index e439b9fc29dc93..206f65c10afcd4 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_clip_by_norm_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_clip_by_norm_op_xpu.py
@@ -16,15 +16,17 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
 from op_test_xpu import XPUOpTest
-import paddle
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 
 class XPUTestClipByNormOp(XPUOpTestWrapper):
     def __init__(self):
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
index cd6dd33b6facf6..9efb334ac7dd5e 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
@@ -16,19 +16,19 @@
 
 sys.path.append("..")
 import unittest
-import numpy as np
-import paddle.fluid as fluid
-from op_test_xpu import XPUOpTest
-import paddle
-from paddle.fluid import Program, program_guard
 
+import numpy as np
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import Program, program_guard
+
 
 class XPUTestClipOp(XPUOpTestWrapper):
     def __init__(self):
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_coalesce_tensor_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_coalesce_tensor_op_xpu.py
index 5370c1947a8dee..7b11ec3667527d 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_coalesce_tensor_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_coalesce_tensor_op_xpu.py
@@ -12,22 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
+
 from paddle.fluid import core
-import sys
 
 sys.path.append("..")
 
 alignment = 256
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_collective_allgather_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_collective_allgather_xpu.py
index 568b3039a955e7..be1326d176456e 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_collective_allgather_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_collective_allgather_xpu.py
@@ -12,20 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
-import paddle
-from paddle.fluid import core
 
 from test_collective_base_xpu import TestDistBase
 
-import sys
+import paddle
+from paddle.fluid import core
 
 sys.path.append("..")
 
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_collective_allreduce_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_collective_allreduce_xpu.py
index 3fa7405d548907..187494f50154e7 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_collective_allreduce_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_collective_allreduce_xpu.py
@@ -12,20 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
-import paddle
-from paddle.fluid import core
 
 from test_collective_base_xpu import TestDistBase
 
-import sys
+import paddle
+from paddle.fluid import core
 
 sys.path.append("..")
 
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_collective_base_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_collective_base_xpu.py
index b9be6077d4b4ff..31e061ed2a6318 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_collective_base_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_collective_base_xpu.py
@@ -12,15 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import unittest
-import time
 import os
-import sys
-import subprocess
 import pickle
+import subprocess
+import sys
 import tempfile
+import time
+import unittest
 from contextlib import closing
+
+import numpy as np
+
 import paddle.fluid as fluid
 import paddle.fluid.unique_name as nameGen
 from paddle.fluid import core
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_collective_identity_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_collective_identity_xpu.py
index 4d84efebbe2f29..3b5a2fa767a973 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_collective_identity_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_collective_identity_xpu.py
@@ -12,20 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
-import paddle
-from paddle.fluid import core
 
 from test_collective_base_xpu import TestDistBase
 
-import sys
+import paddle
+from paddle.fluid import core
 
 sys.path.append("..")
 
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_collective_process_group.py b/python/paddle/fluid/tests/unittests/xpu/test_collective_process_group.py
index 561522c9cae6ad..e33395c8105bcd 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_collective_process_group.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_collective_process_group.py
@@ -14,6 +14,7 @@
 
 import os
 import unittest
+
 from test_parallel_dygraph_dataparallel import TestMultipleXpus
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_compare_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_compare_op_xpu.py
index 7fbe1f6ccf7375..e16b9032f2ea48 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_compare_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_compare_op_xpu.py
@@ -16,11 +16,16 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
 from op_test_xpu import XPUOpTest
+from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
+    create_test_class,
+    get_xpu_op_support_types,
+)
+
 import paddle
-from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types
-from xpu.get_test_cover_info import XPUOpTestWrapper
 
 
 class TestCompareOpBase(XPUOpTest):
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_concat_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_concat_op_xpu.py
index e7e4ca073c05eb..c3f3e644481f3a 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_concat_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_concat_op_xpu.py
@@ -16,17 +16,18 @@
 
 sys.path.append("..")
 import unittest
-import numpy as np
 
-import paddle
+import numpy as np
 from op_test import skip_check_grad_ci
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py
index a7036f521817a6..5268872c3ab5c5 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py
@@ -16,17 +16,18 @@
 
 sys.path.append("..")
 import unittest
-import numpy as np
 
-import paddle.fluid.core as core
+import numpy as np
 from op_test_xpu import XPUOpTest
-import paddle
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid.core as core
+
 
 def conv2d_forward_naive(
     input,
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_transpose_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_conv2d_transpose_op_xpu.py
index ed996133930bcb..132f245e7fa5ad 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_transpose_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_conv2d_transpose_op_xpu.py
@@ -16,14 +16,15 @@
 
 sys.path.append("..")
 import unittest
-import numpy as np
 
+import numpy as np
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
+
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py
index 3b6b1a4363f16d..46dcd0c1302220 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py
@@ -16,15 +16,13 @@
 
 sys.path.append("..")
 import unittest
-import numpy as np
 
+import numpy as np
 from op_test_xpu import XPUOpTest
-import paddle.fluid as fluid
+from xpu.get_test_cover_info import XPUOpTestWrapper, create_test_class
+
 import paddle
-from xpu.get_test_cover_info import (
-    create_test_class,
-    XPUOpTestWrapper,
-)
+import paddle.fluid as fluid
 
 
 def conv3d_forward_naive(
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_cumsum_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_cumsum_op_xpu.py
index 5d15ddcff6e31e..90582ddfcc7806 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_cumsum_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_cumsum_op_xpu.py
@@ -12,22 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
-import paddle.fluid.core as core
-
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid.core as core
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_deformable_conv_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_deformable_conv_op_xpu.py
index 0c11942804ce39..3636b1858bc1bf 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_deformable_conv_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_deformable_conv_op_xpu.py
@@ -16,17 +16,18 @@
 
 sys.path.append("..")
 import unittest
-import numpy as np
 
-import paddle.fluid.core as core
+import numpy as np
 from op_test_xpu import OpTest, XPUOpTest
-import paddle
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid.core as core
+
 
 def dmc_bilinear(data_im, height, width, h, w):
     h_low = int(np.floor(h))
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_depthwise_conv2d_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_depthwise_conv2d_op_xpu.py
index b8cd6a5fd3d0dd..7ccf79170ddf2e 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_depthwise_conv2d_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_depthwise_conv2d_op_xpu.py
@@ -16,6 +16,7 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
 
 import paddle
@@ -23,9 +24,9 @@
 paddle.enable_static()
 from test_conv2d_op_xpu import XPUTestConv2DOp, XPUTestConv2DOp_v2
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_device_guard_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_device_guard_xpu.py
index 0989fb347c9ee7..ca4824f554bcc0 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_device_guard_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_device_guard_xpu.py
@@ -12,15 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import sys
+import unittest
 
 sys.path.append("..")
 
+import warnings
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import warnings
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py
index 36434ce2020258..5eb90f42498788 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py
@@ -16,19 +16,21 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
-from paddle import _legacy_C_ops
+from op_test_xpu import XPUOpTest
+
 import paddle
 import paddle.fluid as fluid
+from paddle import _legacy_C_ops
 from paddle.fluid import Program, program_guard
-from op_test_xpu import XPUOpTest
 
 paddle.enable_static()
 
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu.py
index ac9a371325aee1..5e3bcbaae32f59 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu.py
@@ -12,22 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import sys
 
+import numpy as np
+
 sys.path.append("..")
-import paddle
+import unittest
+
 from op_test import OpTest, skip_check_grad_ci
 from op_test_xpu import XPUOpTest
-import unittest
-import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import Program, program_guard
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu_kp.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu_kp.py
index 7f201d3487ed22..ea71e4d609aa7a 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu_kp.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu_kp.py
@@ -12,14 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import sys
 
+import numpy as np
+
 sys.path.append("..")
-import paddle
+import unittest
+
 from op_test import OpTest, skip_check_grad_ci
 from op_test_xpu import XPUOpTest
-import unittest
+
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_div_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_div_op_xpu.py
index 99c1820d894d83..10b8314b85a0b2 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_div_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_div_op_xpu.py
@@ -15,17 +15,19 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
-import paddle
-import paddle.fluid as fluid
 from op_test import skip_check_grad_ci
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_floordiv_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_floordiv_op_xpu.py
index f557221b488df7..9b01c005bd06a7 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_floordiv_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_floordiv_op_xpu.py
@@ -15,16 +15,18 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
-import paddle
 from op_test import OpTest
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 import random
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_max_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_max_op_xpu.py
index f2a05670f41b97..1d3e996bfd9e18 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_max_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_max_op_xpu.py
@@ -15,16 +15,18 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
 from op_test import skip_check_grad_ci
 from op_test_xpu import XPUOpTest
-import paddle
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_min_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_min_op_xpu.py
index 498410d6dbb68a..2764157ead4756 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_min_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_min_op_xpu.py
@@ -15,16 +15,18 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
 from op_test import skip_check_grad_ci
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mod_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mod_op_xpu.py
index 334cd0794b48ff..9b7c59737fc228 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mod_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mod_op_xpu.py
@@ -15,18 +15,19 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
 from op_test import OpTest
-import paddle.fluid as fluid
-
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mul_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mul_op_xpu.py
index 42ab74b1382f2d..cf74e4dd208c8d 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mul_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mul_op_xpu.py
@@ -15,18 +15,20 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
 from op_test import OpTest, skip_check_grad_ci
-import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import Program, program_guard
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_pow_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_pow_op_xpu.py
index 95315d55878c7f..4e1b9a64121884 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_pow_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_pow_op_xpu.py
@@ -15,16 +15,18 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
-import paddle
 from op_test import OpTest, skip_check_grad_ci
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_sub_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_sub_op_xpu.py
index b5a3d2f853ea6a..7c2676f23af8f1 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_sub_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_sub_op_xpu.py
@@ -12,20 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import sys
 
+import numpy as np
+
 sys.path.append("..")
-import paddle
+import unittest
+
 from op_test import skip_check_grad_ci
 from op_test_xpu import XPUOpTest
-import unittest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_empty_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_empty_op_xpu.py
index f11740d74d482d..0bc1094888b0ae 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_empty_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_empty_op_xpu.py
@@ -17,16 +17,18 @@
 sys.path.append("..")
 
 import unittest
+
 import numpy as np
-import paddle
 from op_test_xpu import XPUOpTest
-from paddle.fluid.framework import convert_np_dtype_to_dtype_
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+from paddle.fluid.framework import convert_np_dtype_to_dtype_
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py
index 246696be64315b..9f7d39ca346aad 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import unittest
 import sys
+import unittest
+
+import numpy as np
 
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
-import paddle
-import paddle.fluid as fluid
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+
 paddle.enable_static()
 np.random.seed(10)
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py
index e5a30ad65bc652..07039053dc0c6a 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import sys
+import unittest
+
 import numpy as np
 
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
-import paddle.fluid as fluid
-import paddle
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+
 paddle.enable_static()
 np.random.seed(10)
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fill_any_like_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fill_any_like_op_xpu.py
index 56b099b1e86bc0..af8f9518b5483d 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_fill_any_like_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_fill_any_like_op_xpu.py
@@ -16,16 +16,18 @@
 
 sys.path.append("..")
 
-import paddle
 import unittest
+
 import numpy as np
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py
index e09834075a7b95..3f7c1cf68b740b 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_fill_constant_op_xpu.py
@@ -16,16 +16,18 @@
 
 sys.path.append("..")
 import unittest
-import paddle
+
 import numpy as np
 from op_test import convert_float_to_uint16
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 
 class XPUTestFillConstantOp(XPUOpTestWrapper):
     def __init__(self):
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_flatten2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_flatten2_op_xpu.py
index 307889b4f26b0c..380da7b62d0b66 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_flatten2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_flatten2_op_xpu.py
@@ -12,19 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import sys
+import unittest
 
 sys.path.append("..")
 import numpy as np
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_flatten_contiguous_range_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_flatten_contiguous_range_op_xpu.py
index aa029301003be8..7a177651f1e934 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_flatten_contiguous_range_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_flatten_contiguous_range_op_xpu.py
@@ -15,19 +15,21 @@
 import sys
 
 sys.path.append("..")
-import numpy as np
-import unittest
 import sys
+import unittest
+
+import numpy as np
 
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
-import paddle
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_flatten_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_flatten_op_xpu.py
index 13efc51fa26102..9876b6c3815402 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_flatten_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_flatten_op_xpu.py
@@ -12,19 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import sys
+import unittest
 
 sys.path.append("..")
 import numpy as np
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fleet_exe_dist_model_run_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fleet_exe_dist_model_run_xpu.py
index 8d5091d32354ad..a55f82426bfa8a 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_fleet_exe_dist_model_run_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_fleet_exe_dist_model_run_xpu.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import paddle
-import numpy as np
 import os
 import tempfile
+import unittest
+
+import numpy as np
+
+import paddle
 from paddle.fluid import core
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_attention_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fused_attention_op_xpu.py
index 6462bec102ee50..64aa6570095fca 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_fused_attention_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_fused_attention_op_xpu.py
@@ -12,29 +12,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import sys
 
+import numpy as np
+
 sys.path.append("..")
 
-import paddle
-import paddle.nn.functional as F
-import paddle.incubate.nn.functional as incubate_f
-from paddle.nn.layer.norm import LayerNorm
-from paddle.nn.layer.common import Linear, Dropout
-from paddle.nn.layer.transformer import _convert_attention_mask
-from paddle import tensor
-from paddle.fluid import layers
 import unittest
-from op_test_xpu import XPUOpTest
-from paddle.fluid.framework import default_main_program
 
+from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.incubate.nn.functional as incubate_f
+import paddle.nn.functional as F
+from paddle import tensor
+from paddle.fluid import layers
+from paddle.fluid.framework import default_main_program
+from paddle.nn.layer.common import Dropout, Linear
+from paddle.nn.layer.norm import LayerNorm
+from paddle.nn.layer.transformer import _convert_attention_mask
+
 default_main_program().random_seed = 42
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_feedforward_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fused_feedforward_op_xpu.py
index f8a6fb75eba0e3..86b910dfad6ecb 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_fused_feedforward_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_fused_feedforward_op_xpu.py
@@ -11,25 +11,24 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import numpy as np
 import sys
 
+import numpy as np
+
 sys.path.append("..")
 
-import paddle
-from paddle.nn.layer import transformer
-import paddle.nn.functional as F
-import paddle.incubate.nn.functional as incubate_f
-from paddle.nn.layer.norm import LayerNorm
-from paddle.nn.layer.common import Linear, Dropout
 import unittest
+
 from op_test_xpu import XPUOpTest
-from paddle.fluid.framework import default_main_program
+from xpu.get_test_cover_info import XPUOpTestWrapper, create_test_class
 
-from xpu.get_test_cover_info import (
-    create_test_class,
-    XPUOpTestWrapper,
-)
+import paddle
+import paddle.incubate.nn.functional as incubate_f
+import paddle.nn.functional as F
+from paddle.fluid.framework import default_main_program
+from paddle.nn.layer import transformer
+from paddle.nn.layer.common import Dropout, Linear
+from paddle.nn.layer.norm import LayerNorm
 
 
 class XPUTestFusedFFNOp(XPUOpTestWrapper):
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_grad_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_grad_op_xpu.py
index 435f44e47361d7..01b265237c5081 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_grad_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_grad_op_xpu.py
@@ -18,16 +18,18 @@
 sys.path.append("..")
 
 import unittest
+
 import numpy as np
-import paddle
-import paddle.fluid.core as core
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid.core as core
+
 
 def get_outputs(DOut, X, Y):
     DX = np.dot(DOut, Y.T)
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_op_xpu.py
index 7c2a5ed2f09232..f6084df324ec59 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_op_xpu.py
@@ -17,17 +17,19 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
-import paddle
-import paddle.fluid.core as core
-from paddle import _legacy_C_ops
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid.core as core
+from paddle import _legacy_C_ops
+
 
 def gelu(x):
     y_ref = (
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py
index f45a4a135e3e96..68bf21abd1a06b 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py
@@ -16,19 +16,21 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
 from op_test import OpTest
+from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
+    create_test_class,
+    get_xpu_op_support_types,
+)
+
 import paddle
 import paddle.fluid as fluid
 import paddle.nn as nn
 from paddle.fluid import core
-from paddle.incubate.xpu.resnet_block import ResNetBasicBlock
 from paddle.fluid.framework import default_main_program
-from xpu.get_test_cover_info import (
-    create_test_class,
-    get_xpu_op_support_types,
-    XPUOpTestWrapper,
-)
+from paddle.incubate.xpu.resnet_block import ResNetBasicBlock
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_gather_nd_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_gather_nd_op_xpu.py
index ec4db1fd741bdd..97a62423a569cc 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_gather_nd_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_gather_nd_op_xpu.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_gather_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_gather_op_xpu.py
index b1a776bd49bf64..a57af602f97125 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_gather_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_gather_op_xpu.py
@@ -12,21 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import sys
+import unittest
 
 sys.path.append("..")
 
 import numpy as np
-
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py
index 89725bb8b6d4a0..62a0180dc876d0 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py
@@ -16,16 +16,17 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
-import paddle
-import paddle.fluid as fluid
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
+
 import paddle
+import paddle.fluid as fluid
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_gen_bkcl_id_op.py b/python/paddle/fluid/tests/unittests/xpu/test_gen_bkcl_id_op.py
index ebecc1b8b71f55..883476300a2b80 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_gen_bkcl_id_op.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_gen_bkcl_id_op.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import os
 import sys
+import unittest
 
 sys.path.append("..")
-from launch_function_helper import wait, _find_free_port
 from multiprocessing import Process
 
+from launch_function_helper import _find_free_port, wait
+
 os.environ['GLOG_vmodule'] = str("gen_bkcl_id_op*=10,gen_comm_id*=10")
 
 import paddle
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_generate_proposals_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_generate_proposals_v2_op_xpu.py
index d087e8bf7968b3..087d688f07e1b5 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_generate_proposals_v2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_generate_proposals_v2_op_xpu.py
@@ -12,24 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
+import copy
 import math
-import paddle
-import paddle.fluid.core as core
 
-import copy
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid.core as core
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_grid_sampler_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_grid_sampler_op_xpu.py
index eb992cd42132c2..3b2deaf4396bbc 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_grid_sampler_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_grid_sampler_op_xpu.py
@@ -12,21 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
-
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_huber_loss_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_huber_loss_op_xpu.py
index 50d77fc1a3d5f0..09d9dbd5071fb3 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_huber_loss_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_huber_loss_op_xpu.py
@@ -12,22 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
-
 from op_test import OpTest
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_index_select_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_index_select_op_xpu.py
index 816e7ac7967de7..85818e5a6a4ccc 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_index_select_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_index_select_op_xpu.py
@@ -12,23 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import sys
+import unittest
+
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 
 sys.path.append("..")
 
 import numpy as np
-
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_instance_norm_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_instance_norm_op_xpu.py
index 8fc1ca75240dbc..52f4d2b3992c2a 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_instance_norm_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_instance_norm_op_xpu.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import sys
 import unittest
 
+import numpy as np
+
+import paddle
+
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_iou_similarity_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_iou_similarity_op_xpu.py
index 548976ae62e91f..6701ec77de58f4 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_iou_similarity_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_iou_similarity_op_xpu.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy.random as random
-import sys
 
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
+
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_kldiv_loss_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_kldiv_loss_op_xpu.py
index 86a4327d6ce595..861103061b1608 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_kldiv_loss_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_kldiv_loss_op_xpu.py
@@ -14,17 +14,19 @@
 import sys
 
 sys.path.append("..")
-import paddle
 import unittest
+
 import numpy as np
-from paddle.nn.functional import kl_div
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+from paddle.nn.functional import kl_div
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_label_smooth_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_label_smooth_op_xpu.py
index 0a2c7b6b77331a..b83a32a313ad4d 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_label_smooth_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_label_smooth_op_xpu.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
-import paddle
+
 import numpy as np
-import sys
+
+import paddle
 
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_lamb_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_lamb_op_xpu.py
index 0c5354c957597f..70794de507f5e5 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_lamb_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_lamb_op_xpu.py
@@ -16,16 +16,17 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
 from op_test_xpu import XPUOpTest
-import paddle
-
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 
 def lamb_step(inputs, attributes):
     '''
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py
index 3c377db4c0b558..45dd221edfbd2c 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_layer_norm_op_xpu.py
@@ -12,19 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import sys
 import unittest
 from functools import reduce
 
+import numpy as np
+
+import paddle
+
 sys.path.append("..")
-from op_test_xpu import XPUOpTest
 from operator import mul
+
+from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_log_loss_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_log_loss_op_xpu.py
index 4d50fc2da7d521..00461e6f103d73 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_log_loss_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_log_loss_op_xpu.py
@@ -16,8 +16,10 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
 from op_test import OpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_log_softmax_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_log_softmax_op_xpu.py
index d961e46bd62bfd..36b4809b52ce49 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_log_softmax_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_log_softmax_op_xpu.py
@@ -12,22 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
-import paddle.nn.functional as F
-
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.nn.functional as F
+
 paddle.enable_static()
 np.random.seed(10)
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_logical_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_logical_op_xpu.py
index be1592494cafd5..0491e7ef5f1d5b 100755
--- a/python/paddle/fluid/tests/unittests/xpu/test_logical_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_logical_op_xpu.py
@@ -12,21 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
 from op_test import OpTest
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_logsumexp_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_logsumexp_op_xpu.py
index 932507066b9a85..46515eb6b1cfda 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_logsumexp_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_logsumexp_op_xpu.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import unittest
 import sys
+import unittest
+
+import paddle
 
 sys.path.append("..")
 import numpy as np
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py
index 039effe0275a71..f0db43acb685b5 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py
@@ -12,18 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 from op_test import OpTest, skip_check_grad_ci
+
 import paddle
-import paddle.fluid.core as core
-import paddle.fluid as fluid
-from paddle.fluid.op import Operator
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
+from paddle.fluid.op import Operator
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_masked_select_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_masked_select_op_xpu.py
index f596f22dd49938..64d52d077fe83e 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_masked_select_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_masked_select_op_xpu.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import unittest
 import sys
+import unittest
+
+import numpy as np
 
 sys.path.append("..")
 
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_matmul_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_matmul_op_xpu.py
index b439ffb5d20cbb..d5ad5cb6f76b3c 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_matmul_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_matmul_op_xpu.py
@@ -16,18 +16,19 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
 from op_test_xpu import XPUOpTest
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
-
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import Program, program_guard
+
 
 def reference_matmul(X, Y, transpose_X=False, transpose_Y=False):
     """Reference forward implementation using np.matmul."""
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py
index c2a1ab4ee0b621..c5d782400b3bbb 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py
@@ -16,17 +16,17 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
 from op_test_xpu import XPUOpTest
-
-import paddle
-
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 
 def reference_matmul(X, Y, transpose_X=False, transpose_Y=False):
     """Reference forward implementation using np.matmul."""
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py
index c86aaeea6cfb32..6021256f6962cf 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
@@ -26,9 +28,9 @@
 
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_merged_momentum_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_merged_momentum_op_xpu.py
index 591cb32541723d..1a6455a2a712e4 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_merged_momentum_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_merged_momentum_op_xpu.py
@@ -12,20 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import sys
+import unittest
 
 sys.path.append("..")
 
-import paddle
-
 from test_merged_momentum_op_xpu_base import TestMergedMomentumBase
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_merged_momentum_op_xpu_base.py b/python/paddle/fluid/tests/unittests/xpu/test_merged_momentum_op_xpu_base.py
index 05044f11ee41b1..10868585a903f0 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_merged_momentum_op_xpu_base.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_merged_momentum_op_xpu_base.py
@@ -16,10 +16,12 @@
 
 sys.path.append('..')
 import unittest
-import paddle
+from collections import OrderedDict
+
 import numpy as np
+
+import paddle
 from paddle.fluid.layer_helper import LayerHelper
-from collections import OrderedDict
 
 
 def run_momentum_op(
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_momentum_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_momentum_op_xpu.py
index 0ee2af0e2023c3..69c89a1a8408fe 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_momentum_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_momentum_op_xpu.py
@@ -12,22 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
-import paddle.fluid.core as core
-
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid.core as core
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_mul_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_mul_op_xpu.py
index 08b3b3e89a3c69..3654013398c10a 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_mul_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_mul_op_xpu.py
@@ -12,22 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
+
 import paddle
-import sys
 
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
+
 import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 
 paddle.enable_static()
 
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_op_xpu.py
index 926196f2e694b4..42ea4032e1f4bf 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_op_xpu.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import paddle
-import sys
 
 sys.path.append("..")
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_v2_op_xpu.py
index 0f2d4fd27ecc91..69bf56a2c44109 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_v2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_v2_op_xpu.py
@@ -12,21 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
-
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py
index d3f1a30066411e..253bd48045d2ed 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_op_xpu.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
-import paddle
-import paddle.fluid.core as core
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid.core as core
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py
index 19228435ea4abe..dce5e263b9545f 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py
@@ -12,19 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
+
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
-import sys
+import paddle.fluid.core as core
 
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_p_norm_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_p_norm_op_xpu.py
index a03a6e9939ab4c..959ae77ca0117d 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_p_norm_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_p_norm_op_xpu.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import sys
 import unittest
 
+import numpy as np
+
+import paddle
+
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_pad3d_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_pad3d_op_xpu.py
index df4fd640571f33..2522fa9f6cecd9 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_pad3d_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_pad3d_op_xpu.py
@@ -12,23 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
-from paddle.fluid import Program, program_guard, Executor, default_main_program
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.fluid import Executor, Program, default_main_program, program_guard
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_parallel_dygraph_dataparallel.py b/python/paddle/fluid/tests/unittests/xpu/test_parallel_dygraph_dataparallel.py
index 3c994ba72bc1d7..8bca3300c8a749 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_parallel_dygraph_dataparallel.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_parallel_dygraph_dataparallel.py
@@ -12,19 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import time
-import paddle
-import paddle.fluid as fluid
 import copy
 import os
 import subprocess
+import time
+import unittest
 
+import paddle
+import paddle.fluid as fluid
 from paddle.distributed.utils.launch_utils import (
+    TrainerProc,
     find_free_ports,
-    watch_local_trainers,
     get_cluster,
-    TrainerProc,
+    watch_local_trainers,
 )
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_pool2d_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_pool2d_op_xpu.py
index 36cb5dfaefd8bd..ed3ef31e8db73e 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_pool2d_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_pool2d_op_xpu.py
@@ -16,15 +16,16 @@
 
 sys.path.append("..")
 import unittest
-import numpy as np
 
+import numpy as np
 from op_test_xpu import XPUOpTest
-from test_pool2d_op import adaptive_start_index, adaptive_end_index
+from test_pool2d_op import adaptive_end_index, adaptive_start_index
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
+
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_pow2_decay_with_linear_warmup_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_pow2_decay_with_linear_warmup_op_xpu.py
index c7fa339f19b970..78ca6933181aae 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_pow2_decay_with_linear_warmup_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_pow2_decay_with_linear_warmup_op_xpu.py
@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
+import unittest
+
 import paddle
 from paddle.fluid.contrib.layers.nn import pow2_decay_with_linear_warmup
-from paddle.optimizer.lr import LinearWarmup
-from paddle.optimizer.lr import PolynomialDecay
-import unittest
-import sys
+from paddle.optimizer.lr import LinearWarmup, PolynomialDecay
 
 sys.path.append("..")
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_prior_box_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_prior_box_op_xpu.py
index acc05963ee4355..52d3ca875efdff 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_prior_box_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_prior_box_op_xpu.py
@@ -13,21 +13,22 @@
 # limitations under the License.
 
 import math
-import numpy as np
 import sys
 import unittest
 
-sys.path.append("..")
+import numpy as np
 
-import paddle
+sys.path.append("..")
 
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_range_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_range_xpu.py
index 02893acc0e2d1f..2870cbb7a7cc6e 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_range_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_range_xpu.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
-import paddle
+
 import numpy as np
-import sys
+
+import paddle
 
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_all_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_all_op_xpu.py
index df485b49ac1ea1..987b968b0a691d 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_all_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_all_op_xpu.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_amax_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_amax_op_xpu.py
index 4394340aa1c803..49ffef884d3db5 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_amax_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_amax_op_xpu.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_amin_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_amin_op_xpu.py
index 77c45ce06424f5..4f2ca6fea3ff87 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_amin_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_amin_op_xpu.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_any_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_any_op_xpu.py
index 032d138558d91b..a255dc390bcc00 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_any_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_any_op_xpu.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_max_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_max_op_xpu.py
index ee0922110e5bc4..462e7457afae11 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_max_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_max_op_xpu.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_mean_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_mean_op_xpu.py
index 41efec0c29b0e7..ed3d51ff4fd625 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_mean_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_mean_op_xpu.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
+
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_min_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_min_op_xpu.py
index 6639071b3b0d19..0227ffb77d1994 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_min_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_min_op_xpu.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_prod_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_prod_op_xpu.py
index ad2cb5143b9494..1e9c259f0e580c 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_prod_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_prod_op_xpu.py
@@ -12,21 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
-
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py
index dc377df7ab1414..d8a1e9efcbcddd 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_refactor_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_refactor_op_xpu.py
index aa87579755064f..1b424546d6432b 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_refactor_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_refactor_op_xpu.py
@@ -12,23 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
-from paddle.fluid import core
-
 from op_test import OpTest
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+from paddle.fluid import core
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py
index 987aaf54cfa869..01773e8a28c5b1 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py
@@ -12,21 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 import sys
 import unittest
 
-sys.path.append("..")
+import numpy as np
 
-import paddle
+sys.path.append("..")
 
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_rmsprop_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_rmsprop_op_xpu.py
index 29b980fa12804b..c2d6ff05870eb0 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_rmsprop_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_rmsprop_op_xpu.py
@@ -12,22 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
-import paddle.fluid.core as core
-
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid.core as core
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py
index 342123d0c84dbd..91e4c4469f0c89 100755
--- a/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py
@@ -13,21 +13,22 @@
 import sys
 
 sys.path.append("..")
-import unittest
-import numpy as np
-import paddle.fluid.core as core
-import paddle
 import random
+import unittest
 
+import numpy as np
 from op_test_xpu import XPUOpTest
 
+import paddle
+import paddle.fluid.core as core
+
 sys.path.append("../rnn")
-from rnn_numpy import LSTM
 from convert import get_params_for_net
+from rnn_numpy import LSTM
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 random.seed(2)
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_roi_align_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_roi_align_op_xpu.py
index f44596324327f2..87d52d18bfb43e 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_roi_align_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_roi_align_op_xpu.py
@@ -15,18 +15,20 @@
 import sys
 
 sys.path.append("..")
-import unittest
 import math
+import unittest
+
 import numpy as np
-import paddle.fluid.core as core
 from op_test_xpu import XPUOpTest
-import paddle
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid.core as core
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_roll_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_roll_op_xpu.py
index 4c64c6e2a3e62a..d747ade7b65164 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_roll_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_roll_op_xpu.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import sys
 import unittest
 
+import numpy as np
+
+import paddle
+
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sampling_id_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sampling_id_op_xpu.py
index d2e9aa481749fe..f2425c8f860880 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_sampling_id_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_sampling_id_op_xpu.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle.fluid as fluid
 import paddle
+import paddle.fluid as fluid
 
 
 class TestSamplingIdShape(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_scale_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_scale_op_xpu.py
index 47c95c2158b339..1dd41e90a17000 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_scale_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_scale_op_xpu.py
@@ -12,22 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
-from paddle.fluid import Program, program_guard
-
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+from paddle.fluid import Program, program_guard
+
 
 class XPUTestScaleOp(XPUOpTestWrapper):
     def __init__(self):
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_scatter_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_scatter_op_xpu.py
index 4cd2de7e6a08ae..565549f0f16bbb 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_scatter_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_scatter_op_xpu.py
@@ -12,21 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import unittest
 import sys
+import unittest
+
+import numpy as np
 
 sys.path.append("..")
 
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
     type_dict_str_to_numpy,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sequence_conv_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sequence_conv_op_xpu.py
index e15bfdbbe5849a..c3e49d123241d7 100755
--- a/python/paddle/fluid/tests/unittests/xpu/test_sequence_conv_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_sequence_conv_op_xpu.py
@@ -12,16 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import random
+import sys
 import unittest
+
 import numpy as np
+
 import paddle
-import random
-import sys
 
 sys.path.append("../")
 from op_test_xpu import XPUOpTest
-from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types
-from xpu.get_test_cover_info import XPUOpTestWrapper
+from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
+    create_test_class,
+    get_xpu_op_support_types,
+)
 
 paddle.enable_static()
 np.set_printoptions(threshold=np.inf)
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sequence_unpad_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sequence_unpad_op_xpu.py
index d24c98ad6c956f..17edbf907b2e61 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_sequence_unpad_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_sequence_unpad_op_xpu.py
@@ -12,23 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
-import paddle.fluid as fluid
 import unittest
+
 import numpy as np
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py
index 7929b0f3fc3151..e9cc501a87770a 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py
@@ -12,23 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-from paddle.fluid.op import Operator
-
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.op import Operator
+
 
 class XPUTestSgdOp(XPUOpTestWrapper):
     def __init__(self):
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_shape_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_shape_op_xpu.py
index 1a7a51b83149a4..8da888e1a41273 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_shape_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_shape_op_xpu.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
+
 import paddle
 from paddle.fluid import core
 from paddle.fluid.op import Operator
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sigmoid_cross_entropy_with_logits_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sigmoid_cross_entropy_with_logits_op_xpu.py
index 6221d4f608fb77..8c0b3e4c733847 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_sigmoid_cross_entropy_with_logits_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_sigmoid_cross_entropy_with_logits_op_xpu.py
@@ -12,22 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
-import paddle
-
+from scipy.special import expit, logit
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
-from scipy.special import logit
-from scipy.special import expit
+import paddle
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sign_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sign_op_xpu.py
index b498c5fc3a17e5..8743310a9c6974 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_sign_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_sign_op_xpu.py
@@ -12,21 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
-
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_slice_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_slice_op_xpu.py
index 5a77d9cb51ab8c..09368723a1f487 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_slice_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_slice_op_xpu.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import sys
 import unittest
 
+import numpy as np
+
+import paddle
+
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_softmax_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_softmax_op_xpu.py
index 736b3b7fbe68af..24c25bbe1a88ea 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_softmax_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_softmax_op_xpu.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import sys
 import unittest
 
+import numpy as np
+
+import paddle
+
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_softmax_with_cross_entropy_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_softmax_with_cross_entropy_op_xpu.py
index f4482e5edd716f..1ecc1eb4934ca2 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_softmax_with_cross_entropy_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_softmax_with_cross_entropy_op_xpu.py
@@ -16,18 +16,19 @@
 
 sys.path.append("..")
 
-from test_softmax_op import stable_softmax
-from op_test_xpu import XPUOpTest
-import paddle
-
 import unittest
+
 import numpy as np
+from op_test_xpu import XPUOpTest
+from test_softmax_op import stable_softmax
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 
 def cross_entropy(softmax, label, soft_label, axis, ignore_index=-1):
     if soft_label:
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_split_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_split_op_xpu.py
index c7dff56cb621a0..dca61b4b129a1d 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_split_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_split_op_xpu.py
@@ -16,16 +16,17 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
 from op_test_xpu import XPUOpTest
-import paddle
-from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_squeeze2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_squeeze2_op_xpu.py
index 1b2a52a6962f19..b9598bc3ca08ab 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_squeeze2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_squeeze2_op_xpu.py
@@ -12,19 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import sys
+import unittest
 
 sys.path.append("..")
 
 import numpy as np
-
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
+
 import paddle
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_squeeze_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_squeeze_op_xpu.py
index a87fe32d62e0de..1a3fb3bc2daa43 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_squeeze_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_squeeze_op_xpu.py
@@ -12,23 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import sys
+import unittest
 
 sys.path.append("..")
 
 import numpy as np
-
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import Program, program_guard
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py
index 1f215acdb7437a..86bbfb6a8ad20b 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py
@@ -16,17 +16,18 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
 from op_test import skip_check_grad_ci
 from op_test_xpu import XPUOpTest
-import paddle
-from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_strided_slice_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_strided_slice_op_xpu.py
index c73ead8d0f7077..7659ffd4ae0c6d 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_strided_slice_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_strided_slice_op_xpu.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import numpy as np
 import sys
 import unittest
 
+import numpy as np
+
+import paddle
+
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py
index 84be81718fbb0d..ec615324bcc635 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py
@@ -16,18 +16,19 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
 from op_test_xpu import XPUOpTest
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_temporal_shift_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_temporal_shift_op_xpu.py
index 5fce6052502b1b..44b8e0dcb5e1de 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_temporal_shift_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_temporal_shift_op_xpu.py
@@ -12,22 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 
-import paddle
-import paddle.nn.functional as F
-
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.nn.functional as F
+
 paddle.enable_static()
 np.random.seed(10)
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py
index ef6c65f71dd841..41fc20daffcdfe 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_tile_op_xpu.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
-import paddle
-import paddle.fluid as fluid
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+
 paddle.enable_static()
 np.random.seed(10)
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_top_k_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_top_k_op_xpu.py
index fe251b320a652c..8dfbddbb1cf59e 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_top_k_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_top_k_op_xpu.py
@@ -12,19 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_top_k_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_top_k_v2_op_xpu.py
index 5393834c370f16..ede2b76896b222 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_top_k_v2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_top_k_v2_op_xpu.py
@@ -12,19 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
-import paddle
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_transpose_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_transpose_op_xpu.py
index 1261487d393735..12507d206a43da 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_transpose_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_transpose_op_xpu.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
+
 import numpy as np
-import sys
 
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
+
 import paddle
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_tril_triu_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_tril_triu_op_xpu.py
index a80d6adfb94e2b..85dbdeeadea991 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_tril_triu_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_tril_triu_op_xpu.py
@@ -14,18 +14,20 @@
 
 sys.path.append("..")
 
-import paddle
-import paddle.fluid as fluid
-import paddle.tensor as tensor
 import unittest
+
 import numpy as np
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+import paddle.tensor as tensor
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_truncated_gaussian_random_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_truncated_gaussian_random_op_xpu.py
index ea79e9078dbaeb..130422613a1c59 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_truncated_gaussian_random_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_truncated_gaussian_random_op_xpu.py
@@ -16,17 +16,19 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.executor import Executor
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.executor import Executor
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_uniform_random_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_uniform_random_op_xpu.py
index 4847e9db7810a0..3dff72b5d680ca 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_uniform_random_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_uniform_random_op_xpu.py
@@ -16,13 +16,15 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
-import paddle
 from test_uniform_random_op import (
     TestUniformRandomOp,
     TestUniformRandomOpSelectedRows,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze2_op_xpu.py
index c745466fb24bfb..53ab29a322a113 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze2_op_xpu.py
@@ -12,21 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import sys
+import unittest
 
 sys.path.append("..")
 
 import numpy as np
-
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze_op_xpu.py
index 1f0cb53500f4a0..4f2b1d2b5a8adc 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_unsqueeze_op_xpu.py
@@ -12,21 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import sys
+import unittest
 
 sys.path.append("..")
 
 import numpy as np
-
-import paddle
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_update_loss_scaling_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_update_loss_scaling_op_xpu.py
index 1c8715b93cddb3..ad1cf1a5192b22 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_update_loss_scaling_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_update_loss_scaling_op_xpu.py
@@ -12,23 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import sys
+import unittest
 
 sys.path.append("..")
 import numpy as np
-from op_test_xpu import XPUOpTest
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.contrib.mixed_precision.amp_nn as amp_nn
-
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.contrib.mixed_precision.amp_nn as amp_nn
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py
index 8807b7be35b70b..a6c8ae8656ca82 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py
@@ -16,19 +16,21 @@
 
 sys.path.append("..")
 import unittest
+
 import numpy as np
-from test_softmax_op import stable_softmax
-import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
-import paddle
-import paddle.nn.functional as F
 from op_test_xpu import XPUOpTest
+from test_softmax_op import stable_softmax
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+import paddle.nn.functional as F
+from paddle.fluid import Program, program_guard
+
 paddle.enable_static()
 
 CUDA_BLOCK_SIZE = 32
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py
index 8123bcd73f96c8..939a5868643382 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py
@@ -12,23 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import unittest
 import sys
+import unittest
 
-sys.path.append("..")
+import numpy as np
 
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
+sys.path.append("..")
 
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import Program, program_guard
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_where_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_where_op_xpu.py
index bd6accf59d1c02..de13c31c282a4c 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_where_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_where_op_xpu.py
@@ -12,23 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-import unittest
 import sys
+import unittest
 
-sys.path.append("..")
+import numpy as np
 
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.backward import append_backward
+sys.path.append("..")
 
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
     create_test_class,
     get_xpu_op_support_types,
-    XPUOpTestWrapper,
 )
 
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.backward import append_backward
+
 paddle.enable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_while_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_while_op_xpu.py
index c1fa366cc88b2f..e0ee57d2bf9404 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_while_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_while_op_xpu.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import unittest
+
+import numpy
+
 import paddle
-import paddle.fluid.layers as layers
-from paddle.fluid.executor import Executor
 import paddle.fluid as fluid
+import paddle.fluid.layers as layers
 from paddle.fluid.backward import append_backward
-import numpy
+from paddle.fluid.executor import Executor
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_xpu_place.py b/python/paddle/fluid/tests/unittests/xpu/test_xpu_place.py
index 75679047301df4..2b25011d324796 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_xpu_place.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_xpu_place.py
@@ -12,12 +12,13 @@
 # see the license for the specific language governing permissions and
 # limitations under the license.
 
-import unittest
 import os
+import unittest
+
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import core
 import paddle.static as static
+from paddle.fluid import core
 
 
 class Test_XPU_Places(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_xpu_stream_event.py b/python/paddle/fluid/tests/unittests/xpu/test_xpu_stream_event.py
index 00808c3c289f33..9bf1d21c5ee57e 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_xpu_stream_event.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_xpu_stream_event.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.device import xpu
-import paddle
-
 import unittest
 
+import paddle
+from paddle.device import xpu
+
 
 class TestSynchronize(unittest.TestCase):
     def test_synchronize(self):
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py
index 3be5c315f3bcc0..6bde8ef947d7c8 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 import paddle.nn.functional as F
-import numpy as np
-import unittest
 
 paddle.set_device('xpu')
 

From 41f15537d6569b2ef6f0c67d8ce0c8a0d82bd45b Mon Sep 17 00:00:00 2001
From: HongyuJia <jiahongyu@baidu.com>
Date: Tue, 29 Nov 2022 20:38:36 +0800
Subject: [PATCH 042/154] rename use_cudnn to use_gpudnn in phi (#48443)

---
 paddle/fluid/pybind/eager_method.cc           | 24 +++++++++----------
 paddle/phi/api/lib/kernel_dispatch.cc         |  4 ++--
 paddle/phi/api/lib/kernel_dispatch.h          |  8 +++----
 paddle/phi/core/dense_tensor.cc               |  2 +-
 paddle/phi/core/dense_tensor_impl.cc          |  2 +-
 paddle/phi/core/tensor_meta.cc                |  8 +++----
 paddle/phi/core/tensor_meta.h                 |  6 ++---
 python/paddle/fluid/dygraph/nn.py             |  2 +-
 .../fluid/dygraph/varbase_patch_methods.py    |  6 ++---
 python/paddle/fluid/layers/nn.py              |  2 +-
 .../tests/unittests/test_egr_python_api.py    |  8 +++----
 python/paddle/nn/functional/pooling.py        |  6 ++---
 python/paddle/nn/functional/vision.py         |  2 +-
 13 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index 17d210cc2f176c..6f0bd5fb16d14e 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -1450,28 +1450,28 @@ static PyObject* tensor__copy_gradient_from(TensorObject* self,
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
-static PyObject* tensor__use_cudnn(TensorObject* self,
-                                   PyObject* args,
-                                   PyObject* kwargs) {
+static PyObject* tensor__use_gpudnn(TensorObject* self,
+                                    PyObject* args,
+                                    PyObject* kwargs) {
   EAGER_TRY
   PADDLE_ENFORCE(self->tensor.defined() && self->tensor.is_dense_tensor(),
                  paddle::platform::errors::Fatal(
-                     "function _use_cudnn is only effective for DenseTensor"));
+                     "function _use_gpudnn is only effective for DenseTensor"));
 
-  bool use_cudnn = pybind::CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 0), 0);
+  bool use_gpudnn = pybind::CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 0), 0);
 
-  // Set the same use_cudnn attribute, return directly
+  // Set the same use_gpudnn attribute, return directly
   phi::DenseTensor* dense_tensor =
       static_cast<phi::DenseTensor*>(self->tensor.impl().get());
   phi::DenseTensorMeta* dense_tensor_meta =
       phi::DenseTensorUtils::GetMutableMeta(dense_tensor);
-  if (use_cudnn == dense_tensor_meta->use_cudnn) {
+  if (use_gpudnn == dense_tensor_meta->use_gpudnn) {
     return ToPyObject(self->tensor);
   }
 
-  // Share all other members of Tensor except use_cudnn
+  // Share all other members of Tensor except use_gpudnn
   phi::DenseTensorMeta target_dense_meta = *dense_tensor_meta;
-  target_dense_meta.use_cudnn = use_cudnn;
+  target_dense_meta.use_gpudnn = use_gpudnn;
   phi::DenseTensor target_dense_tensor;
   target_dense_tensor.ShareDataWith(*dense_tensor);
   target_dense_tensor.set_meta(target_dense_meta);
@@ -1481,7 +1481,7 @@ static PyObject* tensor__use_cudnn(TensorObject* self,
       self->tensor.name());
   target_tensor.set_autograd_meta(self->tensor.mutable_autograd_meta());
   VLOG(4) << "Tensor: " << target_tensor.name()
-          << " set use_cudnn = " << use_cudnn;
+          << " set use_gpudnn = " << use_gpudnn;
 
   return ToPyObject(target_tensor);
   EAGER_CATCH_AND_THROW_RETURN_NULL
@@ -2053,8 +2053,8 @@ PyMethodDef variable_methods[] = {
      (PyCFunction)(void (*)(void))tensor__copy_gradient_from,
      METH_VARARGS | METH_KEYWORDS,
      NULL},
-    {"_tensor_use_cudnn",
-     (PyCFunction)(void (*)(void))tensor__use_cudnn,
+    {"_tensor_use_gpudnn",
+     (PyCFunction)(void (*)(void))tensor__use_gpudnn,
      METH_VARARGS | METH_KEYWORDS,
      NULL},
     /** the methods to adapt old dygraph, will be removed in the future **/
diff --git a/paddle/phi/api/lib/kernel_dispatch.cc b/paddle/phi/api/lib/kernel_dispatch.cc
index 941bc880b99f0b..074da80bbfb6e9 100644
--- a/paddle/phi/api/lib/kernel_dispatch.cc
+++ b/paddle/phi/api/lib/kernel_dispatch.cc
@@ -57,7 +57,7 @@ BackendSet GetTensorBackendSet(const phi::TensorBase& t) {
     phi::Backend backend_key = phi::TransToPhiBackend(t.place());
     BackendSet backend_set(backend_key);
     if (backend_key == Backend::GPU && phi::DenseTensor::classof(&t) &&
-        static_cast<const phi::DenseTensor&>(t).meta().use_cudnn) {
+        static_cast<const phi::DenseTensor&>(t).meta().use_gpudnn) {
       backend_set = backend_set | BackendSet(Backend::GPUDNN);
     }
     return backend_set;
@@ -126,7 +126,7 @@ Backend ParseBackend(const Tensor& tensor) {
   Backend backend_key = phi::TransToPhiBackend(tensor.place());
   if (backend_key == Backend::GPU &&
       phi::DenseTensor::classof(tensor.impl().get()) &&
-      static_cast<phi::DenseTensor*>(tensor.impl().get())->meta().use_cudnn) {
+      static_cast<phi::DenseTensor*>(tensor.impl().get())->meta().use_gpudnn) {
     return Backend::GPUDNN;
   }
   return backend_key;
diff --git a/paddle/phi/api/lib/kernel_dispatch.h b/paddle/phi/api/lib/kernel_dispatch.h
index bfe8eba2444b6c..23b375eaf6ebee 100644
--- a/paddle/phi/api/lib/kernel_dispatch.h
+++ b/paddle/phi/api/lib/kernel_dispatch.h
@@ -90,7 +90,7 @@ struct ArgsIterator {
 
 struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
   KernelKeySet key_set;
-  bool disable_cudnn = false;
+  bool disable_gpudnn = false;
   // this dtype_set is used for cache multi-inputs dtype and used for
   // data_promote
   DataTypeSet dtype_set{DataType::UNDEFINED};
@@ -101,9 +101,9 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
     // assign Backend
     BackendSet tensor_backend_set = detail::GetTensorBackendSet(tensor);
     key_set.backend_set = key_set.backend_set | tensor_backend_set;
-    // tensor's attribute use_cudnn=False, explicitly disable cudnn kernel
-    if (tensor_backend_set == BackendSet(Backend::GPU) || disable_cudnn) {
-      disable_cudnn = true;
+    // tensor's attribute use_gpudnn=False, explicitly disable gpudnn kernel
+    if (tensor_backend_set == BackendSet(Backend::GPU) || disable_gpudnn) {
+      disable_gpudnn = true;
       key_set.backend_set = key_set.backend_set - BackendSet(Backend::GPUDNN);
     }
     // assign DataLayout
diff --git a/paddle/phi/core/dense_tensor.cc b/paddle/phi/core/dense_tensor.cc
index 09ce2414150e1c..3c6f306e8c8f0d 100644
--- a/paddle/phi/core/dense_tensor.cc
+++ b/paddle/phi/core/dense_tensor.cc
@@ -200,7 +200,7 @@ void DenseTensor::set_meta(const DenseTensorMeta& meta) {
   meta_.layout = meta.layout;
   meta_.lod = meta.lod;
   meta_.offset = meta.offset;
-  meta_.use_cudnn = meta.use_cudnn;
+  meta_.use_gpudnn = meta.use_gpudnn;
 }
 
 /* @jim19930609: This interface will be further modified until we finalized the
diff --git a/paddle/phi/core/dense_tensor_impl.cc b/paddle/phi/core/dense_tensor_impl.cc
index 3906282187d4c6..2ddbaa589e6f62 100644
--- a/paddle/phi/core/dense_tensor_impl.cc
+++ b/paddle/phi/core/dense_tensor_impl.cc
@@ -357,7 +357,7 @@ DenseTensor& DenseTensor::ShareDataWith(const DenseTensor& src) {
   meta_.dtype = src.meta_.dtype;
   meta_.layout = src.meta_.layout;
   meta_.offset = src.meta_.offset;
-  meta_.use_cudnn = src.meta_.use_cudnn;
+  meta_.use_gpudnn = src.meta_.use_gpudnn;
   storage_properties_ =
       std::move(CopyStorageProperties(src.storage_properties_));
 #ifdef PADDLE_WITH_MKLDNN
diff --git a/paddle/phi/core/tensor_meta.cc b/paddle/phi/core/tensor_meta.cc
index 44b2dee358ad5f..cb2867c1dbee10 100644
--- a/paddle/phi/core/tensor_meta.cc
+++ b/paddle/phi/core/tensor_meta.cc
@@ -16,11 +16,11 @@ limitations under the License. */
 
 namespace phi {
 
-DenseTensorMeta::DenseTensorMeta() { use_cudnn = true; }
+DenseTensorMeta::DenseTensorMeta() { use_gpudnn = true; }
 
 DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims)
     : dims(dims), dtype(dtype) {
-  use_cudnn = true;
+  use_gpudnn = true;
 }
 
 DenseTensorMeta::DenseTensorMeta(DataType dtype,
@@ -28,7 +28,7 @@ DenseTensorMeta::DenseTensorMeta(DataType dtype,
                                  DataLayout layout,
                                  size_t offset)
     : dims(dims), dtype(dtype), layout(layout), offset(offset) {
-  use_cudnn = true;
+  use_gpudnn = true;
 }
 
 DenseTensorMeta::DenseTensorMeta(DataType dtype,
@@ -37,7 +37,7 @@ DenseTensorMeta::DenseTensorMeta(DataType dtype,
                                  const LoD& lod,
                                  size_t offset)
     : dims(dims), dtype(dtype), layout(layout), lod(lod), offset(offset) {
-  use_cudnn = true;
+  use_gpudnn = true;
 }
 
 bool DenseTensorMeta::valid() const noexcept {
diff --git a/paddle/phi/core/tensor_meta.h b/paddle/phi/core/tensor_meta.h
index 789a4422e25d1f..1d969ef7b3e132 100644
--- a/paddle/phi/core/tensor_meta.h
+++ b/paddle/phi/core/tensor_meta.h
@@ -65,9 +65,9 @@ struct DenseTensorMeta {
   bool valid() const noexcept;
 
   bool is_scalar{false};
-  /// \brief Determine whether using CuDNN speed-up library in the new dygraph.
+  /// \brief Determine whether using gpudnn speed-up library in the new dygraph.
   /// It maybe also support MKLDNN library in the near future.
-  bool use_cudnn{true};
+  bool use_gpudnn{true};
   DDim dims;
   DataType dtype{DataType::UNDEFINED};
   DataLayout layout{DataLayout::NCHW};
@@ -76,7 +76,7 @@ struct DenseTensorMeta {
 };
 
 inline bool operator==(const DenseTensorMeta& lhs, const DenseTensorMeta& rhs) {
-  return (lhs.is_scalar == rhs.is_scalar) && lhs.use_cudnn == rhs.use_cudnn &&
+  return (lhs.is_scalar == rhs.is_scalar) && lhs.use_gpudnn == rhs.use_gpudnn &&
          (lhs.dims == rhs.dims) && (lhs.dtype == rhs.dtype) &&
          (lhs.layout == rhs.layout) && (lhs.lod == rhs.lod) &&
          (lhs.offset == rhs.offset);
diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py
index 8ad463f2d3ad3d..bff3d5aacb935b 100644
--- a/python/paddle/fluid/dygraph/nn.py
+++ b/python/paddle/fluid/dygraph/nn.py
@@ -671,7 +671,7 @@ def __init__(
     def forward(self, input):
         if _non_static_mode():
             if not self._use_mkldnn and in_dygraph_mode():
-                input = input._use_cudnn(self._use_cudnn)
+                input = input._use_gpudnn(self._use_cudnn)
                 return _C_ops.pool2d(
                     input,
                     self._pool_size,
diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py
index ee57dc8cc2c4cb..e9b963a781db9f 100644
--- a/python/paddle/fluid/dygraph/varbase_patch_methods.py
+++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py
@@ -886,8 +886,8 @@ def _clear_data(self):
         self.get_tensor()._clear()
 
     @framework.dygraph_only
-    def _use_cudnn(self, use_cudnn=True):
-        return self._tensor_use_cudnn(use_cudnn)
+    def _use_gpudnn(self, use_gpudnn=True):
+        return self._tensor_use_gpudnn(use_gpudnn)
 
     @framework.dygraph_only
     def _uva(self, device_id=0):
@@ -1073,7 +1073,7 @@ def __hash__(self):
         setattr(core.eager.Tensor, "_uva", _uva)
         setattr(core.eager.Tensor, "_clear_data", _clear_data)
         setattr(core.eager.Tensor, "__hash__", __hash__)
-        setattr(core.eager.Tensor, "_use_cudnn", _use_cudnn)
+        setattr(core.eager.Tensor, "_use_gpudnn", _use_gpudnn)
     else:
         setattr(core.VarBase, "__name__", "Tensor")
         setattr(core.VarBase, "grad", grad)
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index e066be12eb7841..b59e0c4c800e8d 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -1852,7 +1852,7 @@ def is_list_or_tuple(ele):
 
     pool_padding = update_padding(pool_padding, data_format)
     if in_dygraph_mode():
-        input = input._use_cudnn(use_cudnn)
+        input = input._use_gpudnn(use_cudnn)
         return _C_ops.pool2d(
             input,
             pool_size,
diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py
index 25cdfe82af39b6..c5ecca10b2e550 100644
--- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py
+++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py
@@ -899,20 +899,20 @@ def test_clear(self):
             x._clear()
             self.assertFalse(x._is_initialized())
 
-    def test_use_cudnn(self):
+    def test_use_gpudnn(self):
         np_x = np.random.random((3, 8, 8))
         with _test_eager_guard():
             self.assertTrue(in_dygraph_mode())
             x = paddle.to_tensor(np_x, dtype="float64")
-            y = x._use_cudnn(False)
+            y = x._use_gpudnn(False)
             np.testing.assert_array_equal(x.numpy(), y.numpy())
-            y = x._use_cudnn(True)
+            y = x._use_gpudnn(True)
             np.testing.assert_array_equal(x.numpy(), y.numpy())
 
         self.assertFalse(in_dygraph_mode())
         x = paddle.to_tensor(np_x, dtype="float64")
         with self.assertRaises(AttributeError):
-            x = x._use_cudnn(False)
+            x = x._use_gpudnn(False)
 
 
 class EagerParamBaseUsageTestCase(unittest.TestCase):
diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py
index 3b8660a677cba9..9f92a6057b592b 100755
--- a/python/paddle/nn/functional/pooling.py
+++ b/python/paddle/nn/functional/pooling.py
@@ -1690,7 +1690,7 @@ def adaptive_avg_pool1d(x, output_size, name=None):
 
     x = unsqueeze(x, [2])
     if in_dygraph_mode():
-        x = x._use_cudnn(False)
+        x = x._use_gpudnn(False)
         pool_out = _C_ops.pool2d(
             x,
             pool_size,
@@ -1827,7 +1827,7 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
         output_size = utils._convert_to_tensor_list(output_size)
 
     if in_dygraph_mode():
-        x = x._use_cudnn(False)
+        x = x._use_gpudnn(False)
         return _C_ops.pool2d(
             x,
             output_size,
@@ -1972,7 +1972,7 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
             output_size[2] = in_w
 
     if in_dygraph_mode():
-        x = x._use_cudnn(False)
+        x = x._use_gpudnn(False)
         return _C_ops.pool3d(
             x,
             output_size,
diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py
index 54ed9903744e1b..c01f962d79dbc2 100644
--- a/python/paddle/nn/functional/vision.py
+++ b/python/paddle/nn/functional/vision.py
@@ -91,7 +91,7 @@ def affine_grid(theta, out_shape, align_corners=True, name=None):
             if isinstance(out_shape, Variable)
             else out_shape
         )
-        theta = theta._use_cudnn(use_cudnn)
+        theta = theta._use_gpudnn(use_cudnn)
         return _C_ops.affine_grid(theta, _out_shape, align_corners)
     elif in_dynamic_mode():
         _out_shape = (

From 048e0c558a9bc116b6cad98f1645fef3515419e8 Mon Sep 17 00:00:00 2001
From: HongyuJia <jiahongyu@baidu.com>
Date: Tue, 29 Nov 2022 20:38:45 +0800
Subject: [PATCH 043/154] clean elem_arithmetic not test.py (#48460)

---
 .../sharding/group_sharded_utils.py           |  2 +-
 .../meta_parallel/sharding/sharding_utils.py  |  2 +-
 python/paddle/distribution/normal.py          | 15 +++----
 python/paddle/distribution/uniform.py         | 11 ++----
 python/paddle/fluid/clip.py                   | 12 +++---
 .../extend_optimizer_with_weight_decay.py     |  4 +-
 .../paddle/fluid/contrib/layers/rnn_impl.py   | 18 ++++-----
 python/paddle/fluid/dygraph/rnn.py            | 39 +++++++++----------
 python/paddle/fluid/layer_helper_base.py      |  4 +-
 python/paddle/fluid/layers/rnn.py             |  7 ++--
 python/paddle/fluid/nets.py                   |  2 +-
 python/paddle/fluid/optimizer.py              |  8 ++--
 .../distributed/models/moe/grad_clip.py       |  4 +-
 13 files changed, 55 insertions(+), 73 deletions(-)

diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
index 39d88fef67d922..c12381c894e794 100644
--- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
+++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
@@ -138,7 +138,7 @@ def _dygraph_clip(self, params_grads):
             shape=[1], dtype=global_norm_var.dtype, value=self.clip_norm
         )
 
-        clip_var = layers.elementwise_div(
+        clip_var = paddle.divide(
             x=max_global_norm,
             y=paddle.maximum(x=global_norm_var, y=max_global_norm),
         )
diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py
index 89978cceff7c05..22f2eb8f1b8eab 100644
--- a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py
+++ b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py
@@ -135,7 +135,7 @@ def _dygraph_clip(self, params_grads):
             shape=[1], dtype=global_norm_var.dtype, value=self.clip_norm
         )
 
-        clip_var = layers.elementwise_div(
+        clip_var = paddle.divide(
             x=max_global_norm,
             y=paddle.maximum(x=global_norm_var, y=max_global_norm),
         )
diff --git a/python/paddle/distribution/normal.py b/python/paddle/distribution/normal.py
index 2d4b0bed98090c..f28b92ec86baea 100644
--- a/python/paddle/distribution/normal.py
+++ b/python/paddle/distribution/normal.py
@@ -22,9 +22,6 @@
 from paddle.fluid.data_feeder import check_type, convert_dtype
 from paddle.fluid.framework import _non_static_mode
 from paddle.fluid.layers import (
-    elementwise_add,
-    elementwise_div,
-    elementwise_sub,
     nn,
     tensor,
 )
@@ -191,14 +188,14 @@ def sample(self, shape=(), seed=0):
                 zero_tmp_shape, mean=0.0, std=1.0, seed=seed, dtype=self.dtype
             )
             output = normal_random_tmp * (zero_tmp_reshape + self.scale)
-            output = elementwise_add(output, self.loc, name=name)
+            output = paddle.add(output, self.loc, name=name)
             return output
         else:
             output_shape = shape + batch_shape
             output = nn.gaussian_random(
                 output_shape, mean=0.0, std=1.0, seed=seed, dtype=self.dtype
             ) * (tensor.zeros(output_shape, dtype=self.dtype) + self.scale)
-            output = elementwise_add(output, self.loc, name=name)
+            output = paddle.add(output, self.loc, name=name)
             if self.all_arg_is_float:
                 return paddle.reshape(output, shape, name=name)
             else:
@@ -243,7 +240,7 @@ def entropy(self):
         zero_tmp = tensor.fill_constant_batch_size_like(
             self.loc + self.scale, batch_shape, self.dtype, 0.0
         )
-        return elementwise_add(
+        return paddle.add(
             0.5 + zero_tmp,
             0.5 * math.log(2 * math.pi) + nn.log((self.scale + zero_tmp)),
             name=name,
@@ -264,7 +261,7 @@ def log_prob(self, value):
 
         var = self.scale * self.scale
         log_scale = nn.log(self.scale)
-        return elementwise_sub(
+        return paddle.subtract(
             -1.0 * ((value - self.loc) * (value - self.loc)) / (2.0 * var),
             log_scale + math.log(math.sqrt(2.0 * math.pi)),
             name=name,
@@ -284,7 +281,7 @@ def probs(self, value):
         value = self._check_values_dtype_in_probs(self.loc, value)
 
         var = self.scale * self.scale
-        return elementwise_div(
+        return paddle.divide(
             paddle.exp(
                 -1.0 * ((value - self.loc) * (value - self.loc)) / (2.0 * var)
             ),
@@ -333,6 +330,6 @@ def kl_divergence(self, other):
         var_ratio = var_ratio * var_ratio
         t1 = (self.loc - other.loc) / other.scale
         t1 = t1 * t1
-        return elementwise_add(
+        return paddle.add(
             0.5 * var_ratio, 0.5 * (t1 - 1.0 - nn.log(var_ratio)), name=name
         )
diff --git a/python/paddle/distribution/uniform.py b/python/paddle/distribution/uniform.py
index 9571cdb08c2591..f242dc3db0da93 100644
--- a/python/paddle/distribution/uniform.py
+++ b/python/paddle/distribution/uniform.py
@@ -24,9 +24,6 @@
     in_dygraph_mode,
 )
 from paddle.fluid.layers import (
-    elementwise_add,
-    elementwise_div,
-    elementwise_sub,
     nn,
     tensor,
 )
@@ -184,7 +181,7 @@ def sample(self, shape, seed=0):
             output = uniform_random_tmp_reshape * (
                 zero_tmp_reshape + self.high - self.low
             )
-            output = elementwise_add(output, self.low, name=name)
+            output = paddle.add(output, self.low, name=name)
             return output
         else:
             output_shape = shape + batch_shape
@@ -194,7 +191,7 @@ def sample(self, shape, seed=0):
                 tensor.zeros(output_shape, dtype=self.dtype)
                 + (self.high - self.low)
             )
-            output = elementwise_add(output, self.low, name=name)
+            output = paddle.add(output, self.low, name=name)
             if self.all_arg_is_float:
                 return paddle.reshape(output, shape, name=name)
             else:
@@ -235,7 +232,7 @@ def log_prob(self, value):
         ub_bool = value < self.high
         lb = tensor.cast(lb_bool, dtype=value.dtype)
         ub = tensor.cast(ub_bool, dtype=value.dtype)
-        return elementwise_sub(
+        return paddle.subtract(
             nn.log(lb * ub), nn.log(self.high - self.low), name=name
         )
 
@@ -273,7 +270,7 @@ def probs(self, value):
         ub_bool = value < self.high
         lb = tensor.cast(lb_bool, dtype=value.dtype)
         ub = tensor.cast(ub_bool, dtype=value.dtype)
-        return elementwise_div((lb * ub), (self.high - self.low), name=name)
+        return paddle.divide((lb * ub), (self.high - self.low), name=name)
 
     def entropy(self):
         r"""Shannon entropy in nats.
diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py
index 7162313ddaec95..525c3360f5effa 100644
--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@@ -548,16 +548,14 @@ def _dygraph_clip(self, params_grads):
         need_clip = False
         if not self.auto_skip_clip:  # always apply clip
             need_clip = True
-            clip_var = layers.elementwise_div(
+            clip_var = paddle.divide(
                 x=max_global_norm,
                 y=paddle.maximum(x=global_norm_var, y=max_global_norm),
             )
         elif global_norm_var > max_global_norm:
             # only when global_norm_var > max_global_norm, grad need clip
             need_clip = True
-            clip_var = layers.elementwise_div(
-                x=max_global_norm, y=global_norm_var
-            )
+            clip_var = paddle.divide(x=max_global_norm, y=global_norm_var)
 
         for p, g in params_grads:
             if g is None:
@@ -572,7 +570,7 @@ def _dygraph_clip(self, params_grads):
                     if clip_var.dtype != g.dtype
                     else clip_var
                 )
-                new_grad = layers.elementwise_mul(g, clip_input)
+                new_grad = paddle.multiply(g, clip_input)
                 params_and_grads.append((p, new_grad))
             else:
                 params_and_grads.append((p, g))
@@ -652,7 +650,7 @@ def _static_clip(self, params_grads):
                 max_global_norm = layers.fill_constant(
                     shape=[1], dtype=global_norm_var.dtype, value=self.clip_norm
                 )
-                scale_var = layers.elementwise_div(
+                scale_var = paddle.divide(
                     x=max_global_norm,
                     y=paddle.maximum(x=max_global_norm, y=global_norm_var),
                 )
@@ -729,7 +727,7 @@ def _create_operators(self, param, grad):
             group_norm_var = layers.sums(input=self.context[self.group_name])
             group_norm_var = paddle.sqrt(x=group_norm_var)
             clip_var = self.context[self.group_name + "_clip"]
-            group_scale_var = layers.elementwise_div(
+            group_scale_var = paddle.divide(
                 x=clip_var,
                 y=paddle.maximum(x=clip_var, y=group_norm_var),
             )
diff --git a/python/paddle/fluid/contrib/extend_optimizer/extend_optimizer_with_weight_decay.py b/python/paddle/fluid/contrib/extend_optimizer/extend_optimizer_with_weight_decay.py
index 53a010c23ce9dd..3a40c5ac80ac1c 100644
--- a/python/paddle/fluid/contrib/extend_optimizer/extend_optimizer_with_weight_decay.py
+++ b/python/paddle/fluid/contrib/extend_optimizer/extend_optimizer_with_weight_decay.py
@@ -95,9 +95,7 @@ def minimize(
             with param.block.program._optimized_guard(
                 [param, grad]
             ), framework.name_scope('weight decay'):
-                updated_param = paddle.fluid.layers.elementwise_sub(
-                    x=param, y=scaled_param
-                )
+                updated_param = paddle.subtract(x=param, y=scaled_param)
                 paddle.fluid.layers.assign(input=updated_param, output=param)
 
         optimize_ops = self.apply_optimize(
diff --git a/python/paddle/fluid/contrib/layers/rnn_impl.py b/python/paddle/fluid/contrib/layers/rnn_impl.py
index df6a38852ff8cb..5f9a4d2827fb22 100644
--- a/python/paddle/fluid/contrib/layers/rnn_impl.py
+++ b/python/paddle/fluid/contrib/layers/rnn_impl.py
@@ -153,7 +153,7 @@ def forward(self, input, pre_hidden):
 
         gate_input = layers.matmul(x=concat_input_hidden, y=self._gate_weight)
 
-        gate_input = layers.elementwise_add(gate_input, self._gate_bias)
+        gate_input = paddle.add(gate_input, self._gate_bias)
 
         gate_input = self._gate_activation(gate_input)
         r, u = layers.split(gate_input, num_or_sections=2, dim=1)
@@ -163,7 +163,7 @@ def forward(self, input, pre_hidden):
         candidate = layers.matmul(
             layers.concat([input, r_hidden], 1), self._candidate_weight
         )
-        candidate = layers.elementwise_add(candidate, self._candidate_bias)
+        candidate = paddle.add(candidate, self._candidate_bias)
 
         c = self._activation(candidate)
         new_hidden = u * pre_hidden + (1 - u) * c
@@ -876,18 +876,14 @@ def forward(self, input, pre_hidden, pre_cell):
         concat_input_hidden = layers.concat([input, pre_hidden], 1)
         gate_input = layers.matmul(x=concat_input_hidden, y=self._weight)
 
-        gate_input = layers.elementwise_add(gate_input, self._bias)
+        gate_input = paddle.add(gate_input, self._bias)
         i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1)
-        new_cell = layers.elementwise_add(
-            layers.elementwise_mul(
+        new_cell = paddle.add(
+            paddle.multiply(
                 pre_cell,
-                paddle.nn.functional.sigmoid(
-                    layers.elementwise_add(f, self._forget_bias)
-                ),
-            ),
-            layers.elementwise_mul(
-                paddle.nn.functional.sigmoid(i), paddle.tanh(j)
+                paddle.nn.functional.sigmoid(paddle.add(f, self._forget_bias)),
             ),
+            paddle.multiply(paddle.nn.functional.sigmoid(i), paddle.tanh(j)),
         )
         new_hidden = paddle.tanh(new_cell) * paddle.nn.functional.sigmoid(o)
 
diff --git a/python/paddle/fluid/dygraph/rnn.py b/python/paddle/fluid/dygraph/rnn.py
index fa88dc44bbd212..986d1c562b4056 100644
--- a/python/paddle/fluid/dygraph/rnn.py
+++ b/python/paddle/fluid/dygraph/rnn.py
@@ -18,7 +18,6 @@
     concat,
     fill_constant,
     matmul,
-    elementwise_add,
     elementwise_mul,
     split,
 )
@@ -217,23 +216,23 @@ def forward(self, input, pre_hidden, pre_cell):
 
         if self._use_cudnn_impl:
             igates = matmul(input, y=self._weight_ih, transpose_y=True)
-            igates = elementwise_add(igates, self._bias_ih)
+            igates = paddle.add(igates, self._bias_ih)
             hgates = matmul(pre_hidden, self._weight_hh, transpose_y=True)
-            hgates = elementwise_add(hgates, self._bias_hh)
+            hgates = paddle.add(hgates, self._bias_hh)
 
             chunked_igates = split(igates, num_or_sections=4, dim=1)
             chunked_hgates = split(hgates, num_or_sections=4, dim=1)
 
-            ingate = elementwise_add(chunked_igates[0], chunked_hgates[0])
+            ingate = paddle.add(chunked_igates[0], chunked_hgates[0])
             ingate = self._gate_activation(ingate)
 
-            forgetgate = elementwise_add(chunked_igates[1], chunked_hgates[1])
+            forgetgate = paddle.add(chunked_igates[1], chunked_hgates[1])
             forgetgate = self._gate_activation(forgetgate)
 
-            cellgate = elementwise_add(chunked_igates[2], chunked_hgates[2])
+            cellgate = paddle.add(chunked_igates[2], chunked_hgates[2])
             cellgate = self._activation(cellgate)
 
-            outgate = elementwise_add(chunked_igates[3], chunked_hgates[3])
+            outgate = paddle.add(chunked_igates[3], chunked_hgates[3])
             outgate = self._gate_activation(outgate)
 
             new_cell = (forgetgate * pre_cell) + (ingate * cellgate)
@@ -244,16 +243,14 @@ def forward(self, input, pre_hidden, pre_cell):
             concat_input_hidden = concat([input, pre_hidden], 1)
             gate_input = matmul(x=concat_input_hidden, y=self._weight)
 
-            gate_input = elementwise_add(gate_input, self._bias)
+            gate_input = paddle.add(gate_input, self._bias)
             i, j, f, o = split(gate_input, num_or_sections=4, dim=-1)
-            new_cell = elementwise_add(
-                elementwise_mul(
+            new_cell = paddle.add(
+                paddle.multiply(
                     pre_cell,
-                    self._gate_activation(
-                        elementwise_add(f, self._forget_bias)
-                    ),
+                    self._gate_activation(paddle.add(f, self._forget_bias)),
                 ),
-                elementwise_mul(
+                paddle.multiply(
                     paddle.nn.functional.sigmoid(i), paddle.tanh(j)
                 ),
             )
@@ -466,21 +463,21 @@ def forward(self, input, pre_hidden):
         if self._use_cudnn_impl:
 
             igates = matmul(input, y=self._weight_ih, transpose_y=True)
-            igates = elementwise_add(igates, self._bias_ih)
+            igates = paddle.add(igates, self._bias_ih)
             hgates = matmul(pre_hidden, self._weight_hh, transpose_y=True)
-            hgates = elementwise_add(hgates, self._bias_hh)
+            hgates = paddle.add(hgates, self._bias_hh)
 
             chunked_igates = split(igates, num_or_sections=3, dim=1)
             chunked_hgates = split(hgates, num_or_sections=3, dim=1)
 
-            reset_gate = elementwise_add(chunked_igates[0], chunked_hgates[0])
+            reset_gate = paddle.add(chunked_igates[0], chunked_hgates[0])
             reset_gate = self._gate_activation(reset_gate)
 
-            input_gate = elementwise_add(chunked_igates[1], chunked_hgates[1])
+            input_gate = paddle.add(chunked_igates[1], chunked_hgates[1])
             input_gate = self._gate_activation(input_gate)
 
             _temp = reset_gate * chunked_hgates[2]
-            new_gate = elementwise_add(chunked_igates[2], _temp)
+            new_gate = paddle.add(chunked_igates[2], _temp)
             new_gate = self._activation(new_gate)
 
             new_hidden = (pre_hidden - new_gate) * input_gate + new_gate
@@ -491,7 +488,7 @@ def forward(self, input, pre_hidden):
 
             gate_input = matmul(x=concat_input_hidden, y=self._gate_weight)
 
-            gate_input = elementwise_add(gate_input, self._gate_bias)
+            gate_input = paddle.add(gate_input, self._gate_bias)
             gate_input = self._gate_activation(gate_input)
             r, u = split(gate_input, num_or_sections=2, dim=1)
 
@@ -500,7 +497,7 @@ def forward(self, input, pre_hidden):
             candidate = matmul(
                 concat([input, r_hidden], 1), self._candidate_weight
             )
-            candidate = elementwise_add(candidate, self._candidate_bias)
+            candidate = paddle.add(candidate, self._candidate_bias)
 
             c = self._activation(candidate)
             new_hidden = u * pre_hidden + (1 - u) * c
diff --git a/python/paddle/fluid/layer_helper_base.py b/python/paddle/fluid/layer_helper_base.py
index 39eb4a09474002..ebdc1e60ab05a8 100644
--- a/python/paddle/fluid/layer_helper_base.py
+++ b/python/paddle/fluid/layer_helper_base.py
@@ -115,7 +115,7 @@ def to_variable(self, value, name=None):
             )
 
     def _create_weight_normalize(self, attr, shape, dtype):
-        from .layers import elementwise_mul, elementwise_div
+        from .layers import elementwise_mul
 
         # Remove these ops when LayerHelper and layers support indicating
         # program and block.
@@ -266,7 +266,7 @@ def __weight_normalize(g, v, dim):
             norm = __norm_except_dim(
                 v, dim=dim, block=self.main_program.current_block()
             )
-            scale = elementwise_div(
+            scale = paddle.divide(
                 x=g, y=norm
             )  # The shapes of g and norm are the same.
             # Currently, elementwise_mul only support broadcast when the shape
diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py
index a3bfd80de6d88e..c49d6d4d5283ee 100644
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -1125,10 +1125,9 @@ def _mask_probs(self, probs, finished):
         )
         # TODO: use where_op
         finished = tensor.cast(finished, dtype=probs.dtype)
-        probs = nn.elementwise_mul(
+        probs = paddle.multiply(
             paddle.tile(nn.unsqueeze(finished, [2]), [1, 1, self.vocab_size]),
             self.noend_mask_tensor,
-            axis=-1,
         ) - nn.elementwise_mul(probs, (finished - 1), axis=0)
         return probs
 
@@ -1503,7 +1502,7 @@ def _maybe_copy(state, new_state, step_mask):
             # To confirm states.finished/finished be consistent with
             # next_finished.
             tensor.assign(next_finished, finished)
-            next_sequence_lengths = nn.elementwise_add(
+            next_sequence_lengths = paddle.add(
                 sequence_lengths,
                 tensor.cast(
                     paddle.logical_not(finished), sequence_lengths.dtype
@@ -1663,7 +1662,7 @@ def _create_array_out_of_while(dtype):
             # Otherwise, perform logical OR which would not change the already
             # finished.
             next_finished = paddle.logical_or(next_finished, global_finished)
-            next_sequence_lengths = nn.elementwise_add(
+            next_sequence_lengths = paddle.add(
                 sequence_lengths,
                 tensor.cast(
                     paddle.logical_not(global_finished),
diff --git a/python/paddle/fluid/nets.py b/python/paddle/fluid/nets.py
index 5cd8380eba5865..3d4f187e18f329 100644
--- a/python/paddle/fluid/nets.py
+++ b/python/paddle/fluid/nets.py
@@ -390,7 +390,7 @@ def glu(input, dim=-1):
     )
     a, b = layers.split(input, num_or_sections=2, dim=dim)
     act_b = paddle.nn.functional.sigmoid(x=b)
-    out = layers.elementwise_mul(x=a, y=act_b)
+    out = paddle.multiply(x=a, y=act_b)
     return out
 
 
diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index 84c3f226ca9a60..c7a817e1d7594e 100755
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -7298,10 +7298,10 @@ def minimize(self, loss, startup_program=None):
                     for param_name in params:
                         fast_var = main_block.var(param_name)
                         slow_var = param_to_slow[param_name]
-                        tmp_var = layers.elementwise_add(
-                            layers.elementwise_mul(fast_var, alpha),
-                            layers.elementwise_mul(
-                                slow_var, layers.elementwise_sub(one_var, alpha)
+                        tmp_var = paddle.add(
+                            paddle.multiply(fast_var, alpha),
+                            paddle.multiply(
+                                slow_var, paddle.subtract(one_var, alpha)
                             ),
                         )
                         layers.assign(input=tmp_var, output=slow_var)
diff --git a/python/paddle/incubate/distributed/models/moe/grad_clip.py b/python/paddle/incubate/distributed/models/moe/grad_clip.py
index c3faa7bd202cc8..ca4922700b8f49 100644
--- a/python/paddle/incubate/distributed/models/moe/grad_clip.py
+++ b/python/paddle/incubate/distributed/models/moe/grad_clip.py
@@ -212,7 +212,7 @@ def _dygraph_clip(self, params_grads):
         max_global_norm = layers.fill_constant(
             shape=[1], dtype=global_norm_var.dtype, value=self.clip_norm
         )
-        clip_var = layers.elementwise_div(
+        clip_var = paddle.divide(
             x=max_global_norm,
             y=paddle.maximum(x=global_norm_var, y=max_global_norm),
         )
@@ -228,7 +228,7 @@ def _dygraph_clip(self, params_grads):
                 if g.dtype == core.VarDesc.VarType.FP16
                 else clip_var
             )
-            new_grad = layers.elementwise_mul(x=g, y=clip_input)
+            new_grad = paddle.multiply(x=g, y=clip_input)
             params_and_grads.append((p, new_grad))
         return params_and_grads
 

From 2005d45a95a752ab458152d939d686f615546713 Mon Sep 17 00:00:00 2001
From: HongyuJia <jiahongyu@baidu.com>
Date: Tue, 29 Nov 2022 20:38:51 +0800
Subject: [PATCH 044/154] clean elem_arithmetic part3 unittest (#48462)

---
 .../tests/unittests/dist_fleet_simnet_bow.py  |  6 ++---
 .../fluid/tests/unittests/dist_se_resnext.py  |  2 +-
 .../unittests/dygraph_to_static/darknet.py    |  2 +-
 .../seq2seq_dygraph_model.py                  | 15 +++++--------
 .../dygraph_to_static/simnet_dygraph_model.py |  4 ++--
 .../simnet_dygraph_model_v2.py                |  2 +-
 .../unittests/dygraph_to_static/test_bmn.py   | 22 +++++++++----------
 .../dygraph_to_static/test_cycle_gan.py       | 10 ++++-----
 .../dygraph_to_static/test_mobile_net.py      |  2 +-
 .../dygraph_to_static/test_ptb_lm.py          |  4 ++--
 .../test_reinforcement_learning.py            |  4 ++--
 .../dygraph_to_static/test_resnet.py          |  2 +-
 .../dygraph_to_static/test_se_resnet.py       |  2 +-
 .../unittests/dygraph_to_static/test_tsm.py   |  2 +-
 .../dygraph_to_static/test_word2vec.py        |  4 +---
 .../transformer_dygraph_model.py              |  3 +--
 .../test_amp_check_finite_and_scale_op_npu.py |  6 ++---
 .../npu/test_elementwise_div_op_npu.py        |  2 +-
 .../parallel_dygraph_sparse_embedding.py      |  2 +-
 19 files changed, 45 insertions(+), 51 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
index 306bcb5a8e920b..eb128f9be75fa5 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
@@ -57,7 +57,7 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
     cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
     cond = fluid.layers.cast(cond, dtype='float64')
     cond_3 = paddle.sum(cond)
-    acc = fluid.layers.elementwise_div(
+    acc = paddle.divide(
         cond_3,
         fluid.layers.fill_constant(
             shape=[1], value=batch_size * 1.0, dtype='float64'
@@ -68,13 +68,13 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
 
 
 def get_loss(cos_q_pt, cos_q_nt):
-    loss_op1 = fluid.layers.elementwise_sub(
+    loss_op1 = paddle.subtract(
         fluid.layers.fill_constant_batch_size_like(
             input=cos_q_pt, shape=[-1, 1], value=margin, dtype='float32'
         ),
         cos_q_pt,
     )
-    loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt)
+    loss_op2 = paddle.add(loss_op1, cos_q_nt)
     loss_op3 = paddle.maximum(
         fluid.layers.fill_constant_batch_size_like(
             input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'
diff --git a/python/paddle/fluid/tests/unittests/dist_se_resnext.py b/python/paddle/fluid/tests/unittests/dist_se_resnext.py
index 83befa76062d1a..3461be8228fc7c 100644
--- a/python/paddle/fluid/tests/unittests/dist_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/dist_se_resnext.py
@@ -163,7 +163,7 @@ def bottleneck_block(
 
         short = self.shortcut(input, num_filters * 2, stride)
 
-        return fluid.layers.elementwise_add(x=short, y=scale, act='relu')
+        return paddle.nn.functional.relu(paddle.add(x=short, y=scale))
 
     def conv_bn_layer(
         self, input, num_filters, filter_size, stride=1, groups=1, act=None
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py
index db05875d2314f6..9199d0c2d96b26 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py
@@ -114,7 +114,7 @@ def __init__(self, ch_in, ch_out, is_test=True):
     def forward(self, inputs):
         conv1 = self.conv1(inputs)
         conv2 = self.conv2(conv1)
-        out = fluid.layers.elementwise_add(x=inputs, y=conv2, act=None)
+        out = paddle.add(x=inputs, y=conv2)
         return out
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
index bfc23a71fe571f..e7af14446410f6 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
@@ -72,15 +72,13 @@ def forward(self, input, pre_hidden, pre_cell):
         concat_input_hidden = layers.concat([input, pre_hidden], 1)
         gate_input = layers.matmul(x=concat_input_hidden, y=self._weight)
 
-        gate_input = layers.elementwise_add(gate_input, self._bias)
+        gate_input = paddle.add(gate_input, self._bias)
         i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1)
-        new_cell = layers.elementwise_add(
-            layers.elementwise_mul(
+        new_cell = paddle.add(
+            paddle.multiply(
                 pre_cell, paddle.nn.functional.sigmoid(f + self._forget_bias)
             ),
-            layers.elementwise_mul(
-                paddle.nn.functional.sigmoid(i), paddle.tanh(j)
-            ),
+            paddle.multiply(paddle.nn.functional.sigmoid(i), paddle.tanh(j)),
         )
 
         new_hidden = paddle.tanh(new_cell) * paddle.nn.functional.sigmoid(o)
@@ -442,13 +440,12 @@ def beam_search(self, inputs):
                 np.array(noend_array, dtype='float32')
             )
 
-            step_log_probs = fluid.layers.elementwise_mul(
+            step_log_probs = paddle.multiply(
                 paddle.expand(
                     fluid.layers.unsqueeze(beam_finished, [2]),
                     [-1, -1, self.tar_vocab_size],
                 ),
                 noend_mask_tensor,
-                axis=-1,
             ) - fluid.layers.elementwise_mul(
                 step_log_probs, (beam_finished - 1), axis=0
             )
@@ -693,7 +690,7 @@ def attention(self, query, enc_output, mask=None):
 
         if mask is not None:
             attn = paddle.transpose(attn, [1, 0, 2])
-            attn = fluid.layers.elementwise_add(attn, mask * 1000000000, -1)
+            attn = paddle.add(attn, mask * 1000000000)
             attn = paddle.transpose(attn, [1, 0, 2])
         weight = fluid.layers.softmax(attn)
         weight_memory = fluid.layers.matmul(weight, memory)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
index d6589a53a0bdd3..b10a5dc55806c8 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
@@ -171,7 +171,7 @@ def ops(self, x, y):
         """
         operation
         """
-        add = fluid.layers.elementwise_add(x, y)
+        add = paddle.add(x, y)
         return add
 
 
@@ -190,7 +190,7 @@ def ops(self, x, y):
         """
         operation
         """
-        sub = fluid.layers.elementwise_sub(x, y)
+        sub = paddle.subtract(x, y)
         return sub
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py
index 03cd5e699e3369..06f460912b45be 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py
@@ -187,7 +187,7 @@ def ops(self, x, y):
         """
         operation
         """
-        sub = paddle.fluid.layers.elementwise_sub(x, y)
+        sub = paddle.subtract(x, y)
         return sub
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
index eaf37e7ea7a753..c0f5e8c0c3ce10 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
@@ -330,11 +330,11 @@ def bi_loss(pred_score, gt_label):
             coef_1 = 0.5 * ratio
             epsilon = 0.000001
             # temp = fluid.layers.log(pred_score + epsilon)
-            loss_pos = fluid.layers.elementwise_mul(
+            loss_pos = paddle.multiply(
                 fluid.layers.log(pred_score + epsilon), pmask
             )
             loss_pos = coef_1 * fluid.layers.reduce_mean(loss_pos)
-            loss_neg = fluid.layers.elementwise_mul(
+            loss_neg = paddle.multiply(
                 fluid.layers.log(1.0 - pred_score + epsilon), (1.0 - pmask)
             )
             loss_neg = coef_0 * fluid.layers.reduce_mean(loss_neg)
@@ -348,14 +348,14 @@ def bi_loss(pred_score, gt_label):
 
     def pem_reg_loss_func(pred_score, gt_iou_map, mask):
 
-        gt_iou_map = fluid.layers.elementwise_mul(gt_iou_map, mask)
+        gt_iou_map = paddle.multiply(gt_iou_map, mask)
 
         u_hmask = fluid.layers.cast(x=gt_iou_map > 0.7, dtype=DATATYPE)
         u_mmask = paddle.logical_and(gt_iou_map <= 0.7, gt_iou_map > 0.3)
         u_mmask = fluid.layers.cast(x=u_mmask, dtype=DATATYPE)
         u_lmask = paddle.logical_and(gt_iou_map <= 0.3, gt_iou_map >= 0.0)
         u_lmask = fluid.layers.cast(x=u_lmask, dtype=DATATYPE)
-        u_lmask = fluid.layers.elementwise_mul(u_lmask, mask)
+        u_lmask = paddle.multiply(u_lmask, mask)
 
         num_h = fluid.layers.cast(paddle.sum(u_hmask), dtype=DATATYPE)
         num_m = fluid.layers.cast(paddle.sum(u_mmask), dtype=DATATYPE)
@@ -367,7 +367,7 @@ def pem_reg_loss_func(pred_score, gt_iou_map, mask):
                 0.0, 1.0, [gt_iou_map.shape[1], gt_iou_map.shape[2]]
             ).astype(DATATYPE)
         )
-        u_smmask = fluid.layers.elementwise_mul(u_mmask, u_smmask)
+        u_smmask = paddle.multiply(u_mmask, u_smmask)
         u_smmask = fluid.layers.cast(x=(u_smmask > (1.0 - r_m)), dtype=DATATYPE)
 
         r_l = num_h / num_l
@@ -376,23 +376,23 @@ def pem_reg_loss_func(pred_score, gt_iou_map, mask):
                 0.0, 1.0, [gt_iou_map.shape[1], gt_iou_map.shape[2]]
             ).astype(DATATYPE)
         )
-        u_slmask = fluid.layers.elementwise_mul(u_lmask, u_slmask)
+        u_slmask = paddle.multiply(u_lmask, u_slmask)
         u_slmask = fluid.layers.cast(x=(u_slmask > (1.0 - r_l)), dtype=DATATYPE)
 
         weights = u_hmask + u_smmask + u_slmask
         weights.stop_gradient = True
         loss = fluid.layers.square_error_cost(pred_score, gt_iou_map)
-        loss = fluid.layers.elementwise_mul(loss, weights)
+        loss = paddle.multiply(loss, weights)
         loss = 0.5 * paddle.sum(loss) / paddle.sum(weights)
 
         return loss
 
     def pem_cls_loss_func(pred_score, gt_iou_map, mask):
-        gt_iou_map = fluid.layers.elementwise_mul(gt_iou_map, mask)
+        gt_iou_map = paddle.multiply(gt_iou_map, mask)
         gt_iou_map.stop_gradient = True
         pmask = fluid.layers.cast(x=(gt_iou_map > 0.9), dtype=DATATYPE)
         nmask = fluid.layers.cast(x=(gt_iou_map <= 0.9), dtype=DATATYPE)
-        nmask = fluid.layers.elementwise_mul(nmask, mask)
+        nmask = paddle.multiply(nmask, mask)
 
         num_positive = paddle.sum(pmask)
         num_entries = num_positive + paddle.sum(nmask)
@@ -400,11 +400,11 @@ def pem_cls_loss_func(pred_score, gt_iou_map, mask):
         coef_0 = 0.5 * ratio / (ratio - 1)
         coef_1 = 0.5 * ratio
         epsilon = 0.000001
-        loss_pos = fluid.layers.elementwise_mul(
+        loss_pos = paddle.multiply(
             fluid.layers.log(pred_score + epsilon), pmask
         )
         loss_pos = coef_1 * paddle.sum(loss_pos)
-        loss_neg = fluid.layers.elementwise_mul(
+        loss_neg = paddle.multiply(
             fluid.layers.log(1.0 - pred_score + epsilon), nmask
         )
         loss_neg = coef_0 * paddle.sum(loss_neg)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py
index 17972d7798c534..312d716af70624 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py
@@ -91,8 +91,8 @@ def forward(self, input_A, input_B):
         cyc_A = self.build_generator_resnet_9blocks_b(fake_B)
         cyc_B = self.build_generator_resnet_9blocks_a(fake_A)
 
-        diff_A = paddle.abs(fluid.layers.elementwise_sub(x=input_A, y=cyc_A))
-        diff_B = paddle.abs(fluid.layers.elementwise_sub(x=input_B, y=cyc_B))
+        diff_A = paddle.abs(paddle.subtract(x=input_A, y=cyc_A))
+        diff_B = paddle.abs(paddle.subtract(x=input_B, y=cyc_B))
         cyc_A_loss = fluid.layers.reduce_mean(diff_A) * lambda_A
         cyc_B_loss = fluid.layers.reduce_mean(diff_B) * lambda_B
         cyc_loss = cyc_A_loss + cyc_B_loss
@@ -106,7 +106,7 @@ def forward(self, input_A, input_B):
         idt_A = self.build_generator_resnet_9blocks_a(input_B)
         idt_loss_A = (
             fluid.layers.reduce_mean(
-                paddle.abs(fluid.layers.elementwise_sub(x=input_B, y=idt_A))
+                paddle.abs(paddle.subtract(x=input_B, y=idt_A))
             )
             * lambda_B
             * lambda_identity
@@ -115,12 +115,12 @@ def forward(self, input_A, input_B):
         idt_B = self.build_generator_resnet_9blocks_b(input_A)
         idt_loss_B = (
             fluid.layers.reduce_mean(
-                paddle.abs(fluid.layers.elementwise_sub(x=input_A, y=idt_B))
+                paddle.abs(paddle.subtract(x=input_A, y=idt_B))
             )
             * lambda_A
             * lambda_identity
         )
-        idt_loss = fluid.layers.elementwise_add(idt_loss_A, idt_loss_B)
+        idt_loss = paddle.add(idt_loss_A, idt_loss_B)
         g_loss = cyc_loss + G + idt_loss
         return (
             fake_A,
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
index 8b74bd7e9848be..068046e00bde08 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
@@ -326,7 +326,7 @@ def forward(self, inputs, ifshortcut):
         y = self._bottleneck_conv(y, if_act=True)
         y = self._linear_conv(y, if_act=False)
         if ifshortcut:
-            y = fluid.layers.elementwise_add(inputs, y)
+            y = paddle.add(inputs, y)
         return y
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py
index 1dc77a658bbd00..fa062464d5aa96 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py
@@ -96,7 +96,7 @@ def forward(self, input_embedding, init_hidden=None, init_cell=None):
                 nn = fluid.layers.concat([step_input, pre_hidden], 1)
                 gate_input = fluid.layers.matmul(x=nn, y=weight_1)
 
-                gate_input = fluid.layers.elementwise_add(gate_input, bias)
+                gate_input = paddle.add(gate_input, bias)
                 i, j, f, o = fluid.layers.split(
                     gate_input, num_or_sections=4, dim=-1
                 )
@@ -214,7 +214,7 @@ def forward(self, input, label, init_hidden, init_cell):
         )
 
         projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
-        projection = fluid.layers.elementwise_add(projection, self.softmax_bias)
+        projection = paddle.add(projection, self.softmax_bias)
 
         loss = fluid.layers.softmax_with_cross_entropy(
             logits=projection, label=label, soft_label=False
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
index f812cfef165f13..3b6da7e23c1c5c 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
@@ -124,7 +124,7 @@ def select_action(state):
             mask.stop_gradient = True
 
             loss_probs = fluid.layers.log(loss_probs)
-            loss_probs = fluid.layers.elementwise_mul(loss_probs, mask)
+            loss_probs = paddle.multiply(loss_probs, mask)
             loss_probs = paddle.sum(loss_probs, axis=-1)
 
             policy.saved_log_probs.append(loss_probs)
@@ -151,7 +151,7 @@ def finish_episode():
                 _R = -1 * R * R_numpy
                 _R = to_variable(_R)
                 _R.stop_gradient = True
-                cur_loss = fluid.layers.elementwise_mul(_R, log_prob)
+                cur_loss = paddle.multiply(_R, log_prob)
                 policy_loss.append(cur_loss)
 
             policy_loss = fluid.layers.concat(policy_loss)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
index 0dca14c462044f..5851f82630569d 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
@@ -133,7 +133,7 @@ def forward(self, inputs):
         else:
             short = self.short(inputs)
 
-        y = fluid.layers.elementwise_add(x=short, y=conv2)
+        y = paddle.add(x=short, y=conv2)
 
         layer_helper = fluid.layer_helper.LayerHelper(
             self.full_name(), act='relu'
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
index e58555003e9e04..7d3b07a395c907 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
@@ -215,7 +215,7 @@ def forward(self, inputs):
         else:
             short = self.short(inputs)
 
-        y = fluid.layers.elementwise_add(x=short, y=scale, act='relu')
+        y = paddle.nn.functional.relu(paddle.add(x=short, y=scale))
         return y
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
index e8d4bcd9fd27bd..d5bd239afd4ad7 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
@@ -133,7 +133,7 @@ def forward(self, inputs):
             short = inputs
         else:
             short = self.short(inputs)
-        y = fluid.layers.elementwise_add(x=short, y=conv2, act="relu")
+        y = paddle.nn.functional.relu(paddle.add(x=short, y=conv2))
         return y
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py
index fc1341350c1659..9b444aecae50c7 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py
@@ -257,9 +257,7 @@ def forward(self, center_words, target_words, label):
 
         # center_words_emb = [batch_size, embedding_size]
         # target_words_emb = [batch_size, embedding_size]
-        word_sim = fluid.layers.elementwise_mul(
-            center_words_emb, target_words_emb
-        )
+        word_sim = paddle.multiply(center_words_emb, target_words_emb)
         word_sim = paddle.sum(word_sim, axis=-1)
 
         pred = paddle.nn.functional.sigmoid(word_sim)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
index c996c54d053a69..d0f329b96cfb81 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
@@ -756,13 +756,12 @@ def split_batch_beams(tensor):
 
         def mask_probs(probs, finished, noend_mask_tensor):
             finished = layers.cast(finished, dtype=probs.dtype)
-            probs = layers.elementwise_mul(
+            probs = paddle.multiply(
                 paddle.expand(
                     layers.unsqueeze(finished, [2]),
                     [-1, -1, self.trg_vocab_size],
                 ),
                 noend_mask_tensor,
-                axis=-1,
             ) - layers.elementwise_mul(probs, (finished - 1), axis=0)
             return probs
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_amp_check_finite_and_scale_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_amp_check_finite_and_scale_op_npu.py
index ff11f1e68f0ca0..43e3c44182de08 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_amp_check_finite_and_scale_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_amp_check_finite_and_scale_op_npu.py
@@ -45,7 +45,7 @@ def get_prog(self):
                 inputs={"FloatStatus": float_status},
                 outputs={"FloatStatusOut": float_status},
             )
-            c = paddle.fluid.layers.elementwise_div(a, b)
+            c = paddle.divide(a, b)
             out, found_inf = check_finite_and_unscale(
                 [c], scale, float_status=float_status
             )
@@ -115,7 +115,7 @@ def get_prog(self):
                 inputs={"FloatStatus": float_status},
                 outputs={"FloatStatusOut": float_status},
             )
-            c = paddle.fluid.layers.elementwise_div(a, b)
+            c = paddle.divide(a, b)
             out, found_inf = check_finite_and_unscale(
                 [c], scale, float_status=float_status
             )
@@ -127,7 +127,7 @@ def get_prog(self):
                 inputs={"FloatStatus": float_status},
                 outputs={"FloatStatusOut": float_status},
             )
-            d = paddle.fluid.layers.elementwise_add(a, b)
+            d = paddle.add(a, b)
             out, found_inf = check_finite_and_unscale(
                 [d], scale, float_status=float_status
             )
diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py
index acdb8c75db5617..42460f46a1ec77 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py
@@ -133,7 +133,7 @@ def _test(self, run_npu=True):
             e = paddle.multiply(a, b)
             f = paddle.multiply(c, d)
             f.stop_gradient = True
-            g = fluid.layers.elementwise_div(e, f)
+            g = paddle.divide(e, f)
 
             fc_1 = fluid.layers.fc(input=g, size=128)
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py
index f3f33d44d96465..824815d48aa2cc 100644
--- a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py
+++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py
@@ -66,7 +66,7 @@ def __init__(
     def forward(self, input, label):
         x_emb = self.embedding(input)
         fc = fluid.layers.matmul(x_emb, self.softmax_weight)
-        fc = fluid.layers.elementwise_add(fc, self.softmax_bias)
+        fc = paddle.add(fc, self.softmax_bias)
         projection = paddle.reshape(fc, shape=[-1, self.vocab_size])
         loss = fluid.layers.softmax_with_cross_entropy(
             logits=projection, label=label, soft_label=False

From bf4d17924477733ddb4bf233732901d681d19561 Mon Sep 17 00:00:00 2001
From: lzy <569782149@qq.com>
Date: Tue, 29 Nov 2022 20:52:01 +0800
Subject: [PATCH 045/154] fix mma_tensorcore  (#48386)

* fix mma_tensorcore (__CUDA_ARCH__)

* disable tensorcore by default.

disable tensorcore by default, because the judgment of __CUDA_ARCH__ will cause undefined behavior in some environments, can manually enable it on a machine that supports tensorcore.
---
 .../operators/fused/fused_multi_transformer_op.cu.h   | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h
index c36ee69723e452..3c3a59b219615c 100644
--- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h
+++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h
@@ -95,7 +95,7 @@ using float16 = plat::float16;
 #define MMHA_USE_FP32_ACUM_FOR_LOGITS
 #define MMHA_USE_FP32_ACUM_FOR_OUT
 #define MMHA_USE_FP32_ACUM_FOR_FMA
-#define MMHA_USE_HMMA_FOR_REDUCTION
+// #define MMHA_USE_HMMA_FOR_REDUCTION
 
 template <typename D>
 class PDDataTypeTraits;
@@ -601,7 +601,8 @@ template <int N>
 inline __device__ float qk_hmma_dot_(const uint32_t (&q)[N],
                                      const uint32_t (&k)[N],
                                      float inv_sqrt_dh) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 750
+#if defined(MMHA_USE_HMMA_FOR_REDUCTION) && defined(__CUDA_ARCH__) && \
+    __CUDA_ARCH__ >= 750
 #ifdef MMHA_USE_FP32_ACUM_FOR_FMA
   using K_vec_acum = typename K_vec_acum_fp32_<uint32_t>::Type;
 #else
@@ -641,7 +642,8 @@ struct Qk_dot<float16, 4> {
   static inline __device__ float dot(const uint32_t (&q)[N],
                                      const uint32_t (&k)[N],
                                      float inv_sqrt_dh) {
-#if defined(MMHA_USE_HMMA_FOR_REDUCTION) && __CUDA_ARCH__ >= 750
+#if defined(MMHA_USE_HMMA_FOR_REDUCTION) && defined(__CUDA_ARCH__) && \
+    __CUDA_ARCH__ >= 750
     return qk_hmma_dot_(q, k, inv_sqrt_dh);
 #else
     return qk_dot_<4>(q, k, inv_sqrt_dh);
@@ -1104,7 +1106,8 @@ void fmha_launch_kernel(const Masked_multihead_attention_params<T> &params,
   if (params.timestep < 32) {
     MMHA_LAUNCH_KERNEL(T, Dh, Dh_MAX, 4, THREADS_PER_VALUE, 64, stream);
   } else if (params.timestep < 2048) {
-#if defined(MMHA_USE_HMMA_FOR_REDUCTION) && __CUDA_ARCH__ >= 750
+#if defined(MMHA_USE_HMMA_FOR_REDUCTION) && defined(__CUDA_ARCH__) && \
+    __CUDA_ARCH__ >= 750
     MMHA_LAUNCH_KERNEL(T, Dh, Dh_MAX, 4, THREADS_PER_VALUE, 256, stream);
 #else
     MMHA_LAUNCH_KERNEL(T, Dh, Dh_MAX, 2, THREADS_PER_VALUE, 128, stream);

From 7d6263e6e547c2a8cbcc5886129f8a8df246ac97 Mon Sep 17 00:00:00 2001
From: wangzhen38 <41941775+wangzhen38@users.noreply.github.com>
Date: Wed, 30 Nov 2022 10:27:26 +0800
Subject: [PATCH 046/154] [remove fluid] under unittesets (#48212)

* [remove fluid] under unittesets

* [remove fluid] under unittesets

* [remove fluid] under unittesets

* [remove fluid] under unittesets

* [remove fluid] under unittesets

* [remove fluid] under unittesets

* [remove fluid] under unittesets

* [remove fluid] under unittesets

* [remove fluid] under unittesets

* [remove fluid] under unittesets

* [remove fluid] under unittesets

* [remove fluid] under unittesets

* [remove fluid] under unittesets

* [remove fluid] under unittesets

* [remove fluid] under unittesets

* [remove fluid] under unittesets
---
 .../tests/test_imperative_qat_user_defined.py |   5 +-
 .../fleet/dygraph_dist_save_load.py           |   1 +
 .../fleet/dygraph_group_sharded_api.py        |   1 +
 .../fleet/dygraph_group_sharded_api_eager.py  |   1 +
 .../fleet/dygraph_group_sharded_stage2.py     |   1 +
 ...graph_group_sharded_stage2_comm_overlap.py |   1 +
 .../fleet/dygraph_group_sharded_stage3.py     |   1 +
 .../dygraph_group_sharded_stage3_offload.py   |   1 +
 .../fleet/dygraph_save_for_auto_infer.py      |   5 +-
 .../dygraph_sharding_optimizer_stage2.py      |   1 +
 .../fleet/dygraph_sharding_stage2.py          |   1 +
 .../fleet/dygraph_sharding_stage3.py          |   1 +
 .../fleet/dygraph_sharding_stage3_offload.py  |   1 +
 .../parallel_dygraph_control_flow_same.py     |   1 +
 .../fleet/parallel_dygraph_no_sync.py         |   7 +-
 .../parallel_dygraph_no_sync_control_flow.py  |   7 +-
 ...parallel_dygraph_no_sync_gradient_check.py |   2 +-
 .../parallel_dygraph_no_sync_unused_params.py |  10 +-
 .../fleet/parallel_dygraph_se_resnext.py      |  37 +++---
 .../fleet/test_fleet_hybrid_meta_optimizer.py |   8 +-
 .../mn_dygraph_group_sharded_stage3.py        |   1 +
 .../multinode/mn_dygraph_sharding_stage2.py   |   1 +
 .../dygraph_to_static/bert_dygraph_model.py   |  45 +++----
 .../dygraph_to_static/ifelse_simple_func.py   |  14 +--
 .../seq2seq_dygraph_model.py                  |  33 +++--
 .../dygraph_to_static/simnet_dygraph_model.py |   7 +-
 .../test_basic_api_transformation.py          |  57 ++++-----
 .../unittests/dygraph_to_static/test_bmn.py   |  14 +--
 .../dygraph_to_static/test_convert_call.py    |  18 +--
 .../dygraph_to_static/test_declarative.py     |   2 +-
 .../unittests/dygraph_to_static/test_dict.py  |  28 ++---
 .../unittests/dygraph_to_static/test_error.py |   2 +-
 .../dygraph_to_static/test_fetch_feed.py      |  17 ++-
 .../unittests/dygraph_to_static/test_lac.py   |  28 +++--
 .../unittests/dygraph_to_static/test_mnist.py |  12 +-
 .../dygraph_to_static/test_mobile_net.py      |  24 +++-
 .../test_program_translator.py                |   2 +-
 .../test_reinforcement_learning.py            |   7 +-
 .../dygraph_to_static/test_resnet.py          |  19 ++-
 .../dygraph_to_static/test_resnet_v2.py       |   5 +-
 .../test_save_inference_model.py              |   2 +-
 .../dygraph_to_static/test_se_resnet.py       |  32 +++--
 .../dygraph_to_static/test_sentiment.py       |  54 ++++----
 .../unittests/dygraph_to_static/test_tsm.py   |  25 ++--
 .../transformer_dygraph_model.py              |  31 ++---
 .../unittests/fleet_meta_optimizer_base.py    |   4 +-
 .../tests/unittests/mlu/test_pool2d_op_mlu.py | 104 ---------------
 .../parallel_dygraph_gradient_check.py        |   2 +-
 ...el_dygraph_gradient_check_in_eager_mode.py |   2 +-
 .../tests/unittests/parallel_dygraph_mnist.py |  15 +--
 .../parallel_dygraph_shared_unused_var.py     |   3 +-
 .../fluid/tests/unittests/test_adam_op.py     |   6 +-
 .../fluid/tests/unittests/test_detach.py      |  74 +++++------
 .../unittests/test_dygraph_mnist_fp16.py      |  13 +-
 .../unittests/test_dygraph_multi_forward.py   |  12 +-
 .../fluid/tests/unittests/test_exception.py   |   2 +-
 .../tests/unittests/test_gradient_clip.py     |   6 +-
 .../unittests/test_imperative_auto_prune.py   |  60 +++++----
 .../tests/unittests/test_imperative_basic.py  |  36 +++---
 .../test_imperative_container_layerlist.py    |  16 +--
 .../test_imperative_container_sequential.py   |  29 ++---
 .../test_imperative_data_parallel.py          |   2 +-
 .../tests/unittests/test_imperative_deepcf.py |  26 +++-
 .../unittests/test_imperative_double_grad.py  |   2 +-
 .../unittests/test_imperative_framework.py    |  21 ++--
 .../tests/unittests/test_imperative_gan.py    |  11 +-
 .../unittests/test_imperative_layer_apply.py  |   4 +-
 .../test_imperative_layer_children.py         |   4 +-
 .../test_imperative_layer_trainable.py        |   3 +-
 .../test_imperative_load_static_param.py      |  15 ++-
 .../tests/unittests/test_imperative_mnist.py  |  18 ++-
 .../test_imperative_named_members.py          |  14 +--
 .../test_imperative_ocr_attention_model.py    |  27 ++--
 .../unittests/test_imperative_optimizer.py    |  24 ++--
 .../unittests/test_imperative_optimizer_v2.py |  24 ++--
 .../test_imperative_partitial_backward.py     |   5 +-
 .../test_imperative_reinforcement.py          |   7 +-
 .../tests/unittests/test_imperative_resnet.py |  16 +--
 .../unittests/test_imperative_se_resnext.py   |  51 ++++----
 ...imperative_trace_non_persistable_inputs.py |   5 +-
 ..._imperative_transformer_sorted_gradient.py |  13 +-
 .../tests/unittests/test_jit_save_load.py     |   2 +-
 .../fluid/tests/unittests/test_layers.py      | 119 +++++++++---------
 .../unittests/test_learning_rate_scheduler.py |   8 +-
 .../test_multiprocess_dataloader_dynamic.py   |  20 +--
 ...ess_dataloader_iterable_dataset_dynamic.py |  22 ++--
 .../test_optimizer_in_control_flow.py         |  28 ++---
 .../fluid/tests/unittests/test_pool2d_op.py   | 108 ----------------
 .../fluid/tests/unittests/test_regularizer.py |  12 +-
 .../tests/unittests/test_regularizer_api.py   |  12 +-
 .../fluid/tests/unittests/test_var_base.py    |   8 +-
 91 files changed, 717 insertions(+), 871 deletions(-)

diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py
index 21c7eda8cfa34c..ead2a89c372a1b 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py
@@ -23,7 +23,6 @@
 from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
 from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
 from paddle.nn import Sequential
-from paddle.fluid.dygraph import Pool2D
 from paddle.fluid.dygraph import Linear
 from paddle.nn.quant.quant_layers import QuantizedConv2DTranspose
 from paddle.fluid.log_helper import get_logger
@@ -132,7 +131,7 @@ def __init__(self, num_classes=10, classifier_activation='softmax'):
                 stride=1,
                 padding=1,
             ),
-            Pool2D(pool_size=2, pool_type='max', pool_stride=2),
+            paddle.nn.MaxPool2D(kernel_size=2, stride=2),
             paddle.nn.Conv2D(
                 in_channels=6,
                 out_channels=16,
@@ -140,7 +139,7 @@ def __init__(self, num_classes=10, classifier_activation='softmax'):
                 stride=1,
                 padding=0,
             ),
-            Pool2D(pool_size=2, pool_type='max', pool_stride=2),
+            paddle.nn.MaxPool2D(kernel_size=2, stride=2),
         )
 
         self.fc = Sequential(
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_dist_save_load.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_dist_save_load.py
index cae578b66d6954..a9c525daae41f9 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_dist_save_load.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_dist_save_load.py
@@ -33,6 +33,7 @@
 )
 from paddle.fluid.dygraph.nn import Linear
 from paddle.incubate.distributed.utils.io import load, save
+from paddle.nn import Linear
 
 print(load)
 epoch = 2
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_api.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_api.py
index eba6bab976bd7d..6ecf1ca72d40af 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_api.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_api.py
@@ -26,6 +26,7 @@
 )
 from paddle.fluid.dygraph.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 epoch = 10
 paddle.seed(2022)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_api_eager.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_api_eager.py
index 14272aba8133bf..3849b806085186 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_api_eager.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_api_eager.py
@@ -24,6 +24,7 @@
 )
 from paddle.fluid.dygraph.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 epoch = 10
 paddle.seed(2022)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage2.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage2.py
index ec3baef9a9a263..fc562ffae22360 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage2.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage2.py
@@ -30,6 +30,7 @@
 )
 from paddle.fluid.dygraph.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 seed = 2022
 epoch = 2
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage2_comm_overlap.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage2_comm_overlap.py
index 66d975c18888a8..7e2b626ec45ec4 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage2_comm_overlap.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage2_comm_overlap.py
@@ -30,6 +30,7 @@
 )
 from paddle.fluid.dygraph.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 seed = 2022
 epoch = 2
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3.py
index 28f11d93ef7feb..d462eb339b80df 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3.py
@@ -36,6 +36,7 @@
 )
 from paddle.fluid.dygraph.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 epoch = 10
 paddle.seed(2022)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3_offload.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3_offload.py
index d8bc6a3363386e..3bb6ed15a69da6 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3_offload.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_group_sharded_stage3_offload.py
@@ -26,6 +26,7 @@
 )
 from paddle.fluid.dygraph.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 epoch = 10
 paddle.seed(2022)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py
index 5b92edb9032bad..f63cfc089ed8a5 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py
@@ -39,8 +39,9 @@
 from paddle.distributed.sharding.group_sharded import group_sharded_parallel
 from paddle.distributed.utils.log_utils import get_logger
 from paddle.fluid.dataloader.dataset import IterableDataset
-from paddle.fluid.dygraph.nn import Embedding, Linear
+from paddle.fluid.dygraph.nn import Embedding
 from paddle.incubate.distributed.utils.io import save_for_auto_inference
+from paddle.nn import Linear
 
 logger = get_logger("INFO", __file__)
 
@@ -76,7 +77,7 @@ def __init__(
                 gather_output=True,
                 has_bias=True,
             ),
-            LayerDesc(Linear, input_dim=linear_size, output_dim=10),
+            LayerDesc(Linear, in_features=linear_size, out_features=10),
         ]
         super(MLP_pipe, self).__init__(
             desc,
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_sharding_optimizer_stage2.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_sharding_optimizer_stage2.py
index d8b8eb88680619..2b4237507360ba 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_sharding_optimizer_stage2.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_sharding_optimizer_stage2.py
@@ -25,6 +25,7 @@
 from paddle.distributed.fleet.utils.internal_storage import GradStorage
 from paddle.fluid.dygraph.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 base_lr = 0.1
 momentum_rate = 0.9
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_sharding_stage2.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_sharding_stage2.py
index 2cf3fd920be041..9a7d1081a633ce 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_sharding_stage2.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_sharding_stage2.py
@@ -31,6 +31,7 @@
 )
 from paddle.fluid.dygraph.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 seed = 2022
 epoch = 2
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_sharding_stage3.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_sharding_stage3.py
index 7a37890a1c255b..6822872a39834e 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_sharding_stage3.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_sharding_stage3.py
@@ -37,6 +37,7 @@
 )
 from paddle.fluid.dygraph.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 epoch = 10
 paddle.seed(2021)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_sharding_stage3_offload.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_sharding_stage3_offload.py
index 18a601c5257f92..1ebcff04c83270 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_sharding_stage3_offload.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_sharding_stage3_offload.py
@@ -27,6 +27,7 @@
 )
 from paddle.fluid.dygraph.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 epoch = 10
 paddle.seed(2022)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_control_flow_same.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_control_flow_same.py
index 9c4e1750744ac9..9bf0f50676be0c 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_control_flow_same.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_control_flow_same.py
@@ -19,6 +19,7 @@
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.dygraph.nn import Linear
+from paddle.nn import Linear
 
 np.random.seed(2021)
 paddle.seed(1024)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync.py
index 5bef19776302e3..970b0cee70e0f4 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync.py
@@ -27,6 +27,7 @@
 import paddle.distributed as dist
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.nn import Linear
+from paddle.nn import Linear
 
 seed = 90
 RUN_STEP = 20
@@ -37,9 +38,9 @@
 class SimpleNet(fluid.Layer):
     def __init__(self):
         super().__init__()
-        self.net_a = Linear(input_dim=10, output_dim=20)
-        self.net_b = Linear(input_dim=20, output_dim=5)
-        self.net_c = Linear(input_dim=5, output_dim=10)
+        self.net_a = Linear(10, 20)
+        self.net_b = Linear(20, 5)
+        self.net_c = Linear(5, 10)
 
     def forward(self, x):
         x = self.net_a(x)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync_control_flow.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync_control_flow.py
index f9502244d2fe34..8ac9d6b2113d1f 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync_control_flow.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync_control_flow.py
@@ -19,6 +19,7 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.nn import Linear
+from paddle.nn import Linear
 
 seed = 90
 RUN_STEP = 20
@@ -29,9 +30,9 @@
 class SimpleNetControlFlow(fluid.Layer):
     def __init__(self):
         super().__init__()
-        self.net_a = Linear(input_dim=10, output_dim=20)
-        self.net_b = Linear(input_dim=20, output_dim=5)
-        self.net_c = Linear(input_dim=5, output_dim=10)
+        self.net_a = Linear(10, 20)
+        self.net_b = Linear(20, 5)
+        self.net_c = Linear(5, 10)
         self.step = 0
 
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync_gradient_check.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync_gradient_check.py
index 4cb32e5109e30a..6ea8c59f806c35 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync_gradient_check.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync_gradient_check.py
@@ -19,7 +19,7 @@
 import paddle
 import paddle.distributed as dist
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.nn import Linear
+from paddle.nn import Linear
 
 paddle.seed(1024)
 np.random.seed(2021)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync_unused_params.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync_unused_params.py
index 2a558953d13e7e..ace0da64b49bfb 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync_unused_params.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_no_sync_unused_params.py
@@ -18,7 +18,7 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.nn import Linear
+from paddle.nn import Linear
 
 seed = 90
 RUN_STEP = 20
@@ -29,11 +29,11 @@
 class SimpleNetUnusedParam(fluid.Layer):
     def __init__(self):
         super().__init__()
-        self.net_a = Linear(input_dim=10, output_dim=20)
-        self.net_b = Linear(input_dim=20, output_dim=5)
-        self.net_c = Linear(input_dim=5, output_dim=10)
+        self.net_a = Linear(10, 20)
+        self.net_b = Linear(20, 5)
+        self.net_c = Linear(5, 10)
 
-        self.net_d = Linear(input_dim=20, output_dim=10)
+        self.net_d = Linear(20, 10)
 
     def forward(self, x):
         x = self.net_a(x)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_se_resnext.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_se_resnext.py
index 95f596c20562c2..13e83741ea6956 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_se_resnext.py
@@ -20,7 +20,8 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.base import to_variable
-from paddle.fluid.dygraph.nn import Linear, Pool2D
+from paddle.fluid.dygraph.nn import Linear
+from paddle.nn import Linear
 
 batch_size = 64
 momentum_rate = 0.9
@@ -114,31 +115,33 @@ def __init__(self, num_channels, reduction_ratio):
 
         super().__init__()
         self._num_channels = num_channels
-        self._pool = Pool2D(pool_size=0, pool_type='avg', global_pooling=True)
+        self._pool = paddle.fluid.dygraph.nn.Pool2D(
+            pool_size=0, pool_type='avg', global_pooling=True
+        )
         stdv = 1.0 / math.sqrt(num_channels * 1.0)
         self._squeeze = Linear(
             num_channels,
             num_channels // reduction_ratio,
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Uniform(-stdv, stdv)
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Uniform(-stdv, stdv)
             ),
-            act='relu',
         )
         stdv = 1.0 / math.sqrt(num_channels / 16.0 * 1.0)
         self._excitation = Linear(
             num_channels // reduction_ratio,
             num_channels,
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Uniform(-stdv, stdv)
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Uniform(-stdv, stdv)
             ),
-            act='sigmoid',
         )
 
     def forward(self, input):
         y = self._pool(input)
         y = paddle.reshape(y, shape=[-1, self._num_channels])
         y = self._squeeze(y)
+        y = paddle.nn.functional.relu(y)
         y = self._excitation(y)
+        y = paddle.nn.functional.sigmoid(y)
         y = fluid.layers.elementwise_mul(x=input, y=y, axis=0)
         return y
 
@@ -231,9 +234,7 @@ def __init__(self, layers=50, class_dim=102):
                 stride=2,
                 act='relu',
             )
-            self.pool = Pool2D(
-                pool_size=3, pool_stride=2, pool_padding=1, pool_type='max'
-            )
+            self.pool = paddle.nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
         elif layers == 101:
             cardinality = 32
             reduction_ratio = 16
@@ -246,9 +247,7 @@ def __init__(self, layers=50, class_dim=102):
                 stride=2,
                 act='relu',
             )
-            self.pool = Pool2D(
-                pool_size=3, pool_stride=2, pool_padding=1, pool_type='max'
-            )
+            self.pool = paddle.nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
         elif layers == 152:
             cardinality = 64
             reduction_ratio = 16
@@ -275,9 +274,7 @@ def __init__(self, layers=50, class_dim=102):
                 stride=1,
                 act='relu',
             )
-            self.pool = Pool2D(
-                pool_size=3, pool_stride=2, pool_padding=1, pool_type='max'
-            )
+            self.pool = paddle.nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
 
         self.bottleneck_block_list = []
         num_channels = 64
@@ -299,7 +296,7 @@ def __init__(self, layers=50, class_dim=102):
                 self.bottleneck_block_list.append(bottleneck_block)
                 shortcut = True
 
-        self.pool2d_avg = Pool2D(
+        self.pool2d_avg = paddle.fluid.dygraph.nn.Pool2D(
             pool_size=7, pool_type='avg', global_pooling=True
         )
         stdv = 1.0 / math.sqrt(2048 * 1.0)
@@ -309,8 +306,8 @@ def __init__(self, layers=50, class_dim=102):
         self.out = Linear(
             self.pool2d_avg_output,
             class_dim,
-            param_attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.Uniform(-stdv, stdv)
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Uniform(-stdv, stdv)
             ),
         )
 
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py
index cf73e6ad6e1312..46c0b4fc58f65b 100755
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py
@@ -994,7 +994,7 @@ def test_opt_sharding_with_pp_amp_gclip_boundary(self):
             [
                 'recv_v2',
                 'cast',
-                'matmul',
+                'matmul_v2',
                 'cast',
                 'reduce_mean',
                 'elementwise_mul',
@@ -1002,7 +1002,7 @@ def test_opt_sharding_with_pp_amp_gclip_boundary(self):
                 'elementwise_mul_grad',
                 'reduce_mean_grad',
                 'cast',
-                'matmul_grad',
+                'matmul_v2_grad',
                 'c_sync_calc_stream',
                 'send_v2',
                 'fill_constant',
@@ -1087,7 +1087,7 @@ def test_opt_sharding_with_pp_amp_gclip_boundary_card1(self):
             [
                 'recv_v2',
                 'cast',
-                'matmul',
+                'matmul_v2',
                 'cast',
                 'reduce_mean',
                 'elementwise_mul',
@@ -1095,7 +1095,7 @@ def test_opt_sharding_with_pp_amp_gclip_boundary_card1(self):
                 'elementwise_mul_grad',
                 'reduce_mean_grad',
                 'cast',
-                'matmul_grad',
+                'matmul_v2_grad',
                 'c_sync_calc_stream',
                 'send_v2',
                 'fill_constant',
diff --git a/python/paddle/fluid/tests/unittests/collective/multinode/mn_dygraph_group_sharded_stage3.py b/python/paddle/fluid/tests/unittests/collective/multinode/mn_dygraph_group_sharded_stage3.py
index 84f7ce37c1cb45..5c328591bda754 100644
--- a/python/paddle/fluid/tests/unittests/collective/multinode/mn_dygraph_group_sharded_stage3.py
+++ b/python/paddle/fluid/tests/unittests/collective/multinode/mn_dygraph_group_sharded_stage3.py
@@ -36,6 +36,7 @@
 )
 from paddle.fluid.dygraph.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 epoch = 10
 paddle.seed(2022)
diff --git a/python/paddle/fluid/tests/unittests/collective/multinode/mn_dygraph_sharding_stage2.py b/python/paddle/fluid/tests/unittests/collective/multinode/mn_dygraph_sharding_stage2.py
index ab7f9c9ea2f7d9..d68a6cb5880dab 100644
--- a/python/paddle/fluid/tests/unittests/collective/multinode/mn_dygraph_sharding_stage2.py
+++ b/python/paddle/fluid/tests/unittests/collective/multinode/mn_dygraph_sharding_stage2.py
@@ -31,6 +31,7 @@
 )
 from paddle.fluid.dygraph.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 seed = 2022
 epoch = 2
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
index b41e7614ce83b7..8eb757d87ac4f9 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
@@ -16,7 +16,8 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph import Embedding, Layer, Linear
+from paddle.fluid.dygraph import Embedding, Layer
+from paddle.nn import Linear
 from paddle.jit.api import declarative
 
 
@@ -33,19 +34,18 @@ def __init__(
         super().__init__()
 
         self._i2h = Linear(
-            input_dim=d_model,
-            output_dim=d_inner_hid,
-            param_attr=fluid.ParamAttr(
+            in_features=d_model,
+            out_features=d_inner_hid,
+            weight_attr=fluid.ParamAttr(
                 name=name + '_fc_0.w_0', initializer=param_initializer
             ),
             bias_attr=name + '_fc_0.b_0',
-            act=hidden_act,
         )
 
         self._h2o = Linear(
-            input_dim=d_inner_hid,
-            output_dim=d_model,
-            param_attr=fluid.ParamAttr(
+            in_features=d_inner_hid,
+            out_features=d_model,
+            weight_attr=fluid.ParamAttr(
                 name=name + '_fc_1.w_0', initializer=param_initializer
             ),
             bias_attr=name + '_fc_1.b_0',
@@ -234,13 +234,12 @@ def __init__(self, config, return_pooled_out=True, use_fp16=False):
         )
 
         self.pooled_fc = Linear(
-            input_dim=self._emb_size,
-            output_dim=self._emb_size,
-            param_attr=fluid.ParamAttr(
+            in_features=self._emb_size,
+            out_features=self._emb_size,
+            weight_attr=fluid.ParamAttr(
                 name="pooled_fc.w_0", initializer=self._param_initializer
             ),
             bias_attr="pooled_fc.b_0",
-            act="tanh",
         )
 
         self.pre_process_layer = PrePostProcessLayer(
@@ -295,6 +294,8 @@ def forward(self, src_ids, position_ids, sentence_ids, input_mask):
             input=enc_output, axes=[1], starts=[0], ends=[1]
         )
         next_sent_feat = self.pooled_fc(next_sent_feat)
+
+        next_sent_feat = paddle.tanh(next_sent_feat)
         next_sent_feat = paddle.reshape(
             next_sent_feat, shape=[-1, self._emb_size]
         )
@@ -334,13 +335,12 @@ def __init__(
         )
 
         self.pooled_fc = Linear(
-            input_dim=self._emb_size,
-            output_dim=self._emb_size,
-            param_attr=fluid.ParamAttr(
+            in_features=self._emb_size,
+            out_features=self._emb_size,
+            weight_attr=fluid.ParamAttr(
                 name="mask_lm_trans_fc.w_0", initializer=self._param_initializer
             ),
             bias_attr="mask_lm_trans_fc.b_0",
-            act="tanh",
         )
 
         self.mask_lm_out_bias_attr = fluid.ParamAttr(
@@ -350,9 +350,9 @@ def __init__(
 
         if not self._weight_sharing:
             self.out_fc = Linear(
-                input_dim=self._emb_size,
-                output_dim=self._voc_size,
-                param_attr=fluid.ParamAttr(
+                in_features=self._emb_size,
+                out_features=self._voc_size,
+                weight_attr=fluid.ParamAttr(
                     name="mask_lm_out_fc.w_0",
                     initializer=self._param_initializer,
                 ),
@@ -367,9 +367,9 @@ def __init__(
             )
 
         self.next_sent_fc = Linear(
-            input_dim=self._emb_size,
-            output_dim=2,
-            param_attr=fluid.ParamAttr(
+            in_features=self._emb_size,
+            out_features=2,
+            weight_attr=fluid.ParamAttr(
                 name="next_sent_fc.w_0", initializer=self._param_initializer
             ),
             bias_attr="next_sent_fc.b_0",
@@ -397,6 +397,7 @@ def forward(
 
         mask_feat = paddle.gather(reshaped_emb_out, index=mask_pos)
         mask_trans_feat = self.pooled_fc(mask_feat)
+        mask_trans_feat = paddle.tanh(mask_trans_feat)
         mask_trans_feat = self.pre_process_layer(mask_trans_feat)
 
         if self._weight_sharing:
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py
index 3862ab6f9420a4..fd084e06649080 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py
@@ -232,14 +232,14 @@ class NetWithControlFlowIf(fluid.dygraph.Layer):
     def __init__(self, hidden_dim=16):
         super().__init__()
         self.hidden_dim = hidden_dim
-        self.fc = fluid.dygraph.Linear(
-            input_dim=hidden_dim,
-            output_dim=5,
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.99)
+        self.fc = paddle.nn.Linear(
+            in_features=hidden_dim,
+            out_features=5,
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.99)
             ),
-            bias_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.5)
+            bias_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.5)
             ),
         )
         self.alpha = 10.0
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
index e7af14446410f6..cb9e92bf629cea 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
@@ -166,10 +166,14 @@ def __init__(
                 )
             )
 
-        self.fc = fluid.dygraph.nn.Linear(
+        self.fc = paddle.nn.Linear(
             self.hidden_size,
             self.tar_vocab_size,
-            param_attr=param_attr,
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Uniform(
+                    low=-self.init_scale, high=self.init_scale
+                )
+            ),
             bias_attr=False,
         )
 
@@ -611,31 +615,38 @@ def __init__(
                     )
                 )
 
-        self.attn_fc = fluid.dygraph.nn.Linear(
+        self.attn_fc = paddle.nn.Linear(
             self.hidden_size,
             self.hidden_size,
-            param_attr=ParamAttr(
+            weight_attr=paddle.ParamAttr(
                 name="self_attn_fc",
-                initializer=uniform_initializer(self.init_scale),
+                initializer=paddle.nn.initializer.Uniform(
+                    low=-self.init_scale, high=self.init_scale
+                ),
             ),
             bias_attr=False,
         )
 
-        self.concat_fc = fluid.dygraph.nn.Linear(
+        self.concat_fc = paddle.nn.Linear(
             2 * self.hidden_size,
             self.hidden_size,
-            param_attr=ParamAttr(
+            weight_attr=paddle.ParamAttr(
                 name="self_concat_fc",
-                initializer=uniform_initializer(self.init_scale),
+                initializer=paddle.nn.initializer.Uniform(
+                    low=-self.init_scale, high=self.init_scale
+                ),
             ),
             bias_attr=False,
         )
 
-        self.fc = fluid.dygraph.nn.Linear(
+        self.fc = paddle.nn.Linear(
             self.hidden_size,
             self.tar_vocab_size,
-            param_attr=ParamAttr(
-                name="self_fc", initializer=uniform_initializer(self.init_scale)
+            weight_attr=paddle.ParamAttr(
+                name="self_fc",
+                initializer=paddle.nn.initializer.Uniform(
+                    low=-self.init_scale, high=self.init_scale
+                ),
             ),
             bias_attr=False,
         )
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
index b10a5dc55806c8..10e4b9d85ec161 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
@@ -17,7 +17,10 @@
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.param_attr as attr
-from paddle.fluid.dygraph import Embedding, Layer, Linear
+
+from functools import reduce
+
+from paddle.fluid.dygraph import Embedding, Layer
 from paddle.jit.api import declarative
 from paddle.static import Variable
 
@@ -490,7 +493,7 @@ def __init__(self, conf_dict):
         self.emb_layer = EmbeddingLayer(
             self.dict_size, self.emb_dim, "emb"
         ).ops()
-        self.bow_layer = Linear(self.bow_dim, self.bow_dim)
+        self.bow_layer = paddle.nn.Linear(self.bow_dim, self.bow_dim)
         self.bow_layer_po = FCLayer(self.bow_dim, None, "fc").ops()
         self.softmax_layer = FCLayer(2, "softmax", "cos_sim").ops()
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py
index 69765c1b80f225..34a65913c5ae56 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py
@@ -149,15 +149,15 @@ def dyfunc_Conv2D(input):
 
 
 def dyfunc_Conv3D(input):
-    conv3d = fluid.dygraph.Conv3D(
-        num_channels=3,
-        num_filters=2,
-        filter_size=3,
-        param_attr=fluid.ParamAttr(
-            initializer=fluid.initializer.Constant(value=0.99)
+    conv3d = paddle.nn.Conv3D(
+        in_channels=3,
+        out_channels=2,
+        kernel_size=3,
+        weight_attr=paddle.ParamAttr(
+            initializer=paddle.nn.initializer.Constant(value=0.99)
         ),
         bias_attr=fluid.ParamAttr(
-            initializer=fluid.initializer.Constant(value=0.5)
+            initializer=paddle.nn.initializer.Constant(value=0.5)
         ),
     )
     res = conv3d(input)
@@ -182,16 +182,15 @@ def dyfunc_Conv2DTranspose(input):
 
 
 def dyfunc_Conv3DTranspose(input):
-    conv3dTranspose = fluid.dygraph.nn.Conv3DTranspose(
-        num_channels=3,
-        num_filters=12,
-        filter_size=12,
-        use_cudnn=False,
-        param_attr=fluid.ParamAttr(
-            initializer=fluid.initializer.Constant(value=0.99)
+    conv3dTranspose = paddle.nn.Conv3DTranspose(
+        in_channels=3,
+        out_channels=12,
+        kernel_size=12,
+        weight_attr=paddle.ParamAttr(
+            initializer=paddle.nn.initializer.Constant(value=0.99)
         ),
-        bias_attr=fluid.ParamAttr(
-            initializer=fluid.initializer.Constant(value=0.5)
+        bias_attr=paddle.ParamAttr(
+            initializer=paddle.nn.initializer.Constant(value=0.5)
         ),
     )
     ret = conv3dTranspose(input)
@@ -199,28 +198,24 @@ def dyfunc_Conv3DTranspose(input):
 
 
 def dyfunc_Linear(input):
-    fc = fluid.dygraph.Linear(
-        input_dim=10,
-        output_dim=5,
-        act='relu',
-        param_attr=fluid.ParamAttr(
-            initializer=fluid.initializer.Constant(value=0.99)
+    fc = paddle.nn.Linear(
+        in_features=10,
+        out_features=5,
+        weight_attr=paddle.ParamAttr(
+            initializer=paddle.nn.initializer.Constant(value=0.99)
         ),
-        bias_attr=fluid.ParamAttr(
-            initializer=fluid.initializer.Constant(value=0.5)
+        bias_attr=paddle.ParamAttr(
+            initializer=paddle.nn.initializer.Constant(value=0.5)
         ),
     )
+    m = paddle.nn.ReLU()
     res = fc(input)
-    return res
+    return m(res)
 
 
 def dyfunc_Pool2D(input):
-    fluid.dygraph.Pool2D(
-        pool_size=2, pool_type='avg', pool_stride=1, global_pooling=False
-    )
-    pool2d = fluid.dygraph.Pool2D(
-        pool_size=2, pool_type='avg', pool_stride=1, global_pooling=False
-    )
+    paddle.nn.AvgPool2D(kernel_size=2, stride=1)
+    pool2d = paddle.nn.AvgPool2D(kernel_size=2, stride=1)
     res = pool2d(input)
     return res
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
index c0f5e8c0c3ce10..90a7b4d35efd9d 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
@@ -218,15 +218,14 @@ def __init__(self, cfg):
         self.sample_mask = fluid.dygraph.base.to_variable(sample_mask)
         self.sample_mask.stop_gradient = True
 
-        self.p_conv3d1 = fluid.dygraph.Conv3D(
-            num_channels=128,
-            num_filters=self.hidden_dim_3d,
-            filter_size=(self.num_sample, 1, 1),
+        self.p_conv3d1 = paddle.nn.Conv3D(
+            in_channels=128,
+            out_channels=self.hidden_dim_3d,
+            kernel_size=(self.num_sample, 1, 1),
             stride=(self.num_sample, 1, 1),
             padding=0,
-            act="relu",
-            param_attr=ParamAttr(name="PEM_3d1_w"),
-            bias_attr=ParamAttr(name="PEM_3d1_b"),
+            weight_attr=paddle.ParamAttr(name="PEM_3d1_w"),
+            bias_attr=paddle.ParamAttr(name="PEM_3d1_b"),
         )
 
         self.p_conv2d1 = paddle.nn.Conv2D(
@@ -287,6 +286,7 @@ def forward(self, x):
         xp = paddle.reshape(xp, shape=[0, 0, -1, self.dscale, self.tscale])
 
         xp = self.p_conv3d1(xp)
+        xp = paddle.tanh(xp)
         xp = paddle.squeeze(xp, axis=[2])
         xp = paddle.nn.functional.relu(self.p_conv2d1(xp))
         xp = paddle.nn.functional.relu(self.p_conv2d2(xp))
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
index 3922c60bafc2f0..302045ed4038f2 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
@@ -155,23 +155,23 @@ def __init__(self):
         super().__init__()
 
         self.conv = MyConvLayer()
-        self.fc = fluid.dygraph.Linear(
-            input_dim=5,
-            output_dim=1,
-            act='relu',
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.99)
+        self.fc = paddle.nn.Linear(
+            in_features=5,
+            out_features=1,
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.99)
             ),
-            bias_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.5)
+            bias_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.5)
             ),
         )
+        self.act = paddle.nn.ReLU()
 
     @paddle.jit.to_static
     def forward(self, inputs):
         h = self.conv(inputs)
         out = self.fc(h)
-        return out
+        return self.act(out)
 
 
 class TestRecursiveCall2(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py
index b9ae4c5759d761..4ac7ca9e3c926d 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py
@@ -36,7 +36,7 @@
 class SimpleNet(Layer):
     def __init__(self):
         super().__init__()
-        self.linear = fluid.dygraph.Linear(10, 3)
+        self.linear = paddle.nn.Linear(10, 3)
 
     @declarative(input_spec=[InputSpec(shape=[None, 10], dtype='float32')])
     def forward(self, x, a=1, b=2):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py
index 2f049581ecec9b..57bd7c2936e8e0 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py
@@ -30,27 +30,27 @@ class SubNetWithDict(fluid.dygraph.Layer):
     def __init__(self, hidden_size=16, output_size=16):
         super().__init__()
 
-        init_weight = lambda x: fluid.ParamAttr(
-            initializer=fluid.initializer.Constant(x)
+        init_weight = lambda x: paddle.ParamAttr(
+            initializer=paddle.nn.initializer.Constant(x)
         )
 
-        self.q_fc = fluid.dygraph.Linear(
-            input_dim=hidden_size,
-            output_dim=output_size,
+        self.q_fc = paddle.nn.Linear(
+            in_features=hidden_size,
+            out_features=output_size,
             bias_attr=False,
-            param_attr=init_weight(0.6),
+            weight_attr=init_weight(0.6),
         )
-        self.k_fc = fluid.dygraph.Linear(
-            input_dim=hidden_size,
-            output_dim=output_size,
+        self.k_fc = paddle.nn.Linear(
+            in_features=hidden_size,
+            out_features=output_size,
             bias_attr=False,
-            param_attr=init_weight(0.5),
+            weight_attr=init_weight(0.5),
         )
-        self.v_fc = fluid.dygraph.Linear(
-            input_dim=hidden_size,
-            output_dim=output_size,
+        self.v_fc = paddle.nn.Linear(
+            in_features=hidden_size,
+            out_features=output_size,
             bias_attr=False,
-            param_attr=init_weight(0.2),
+            weight_attr=init_weight(0.2),
         )
 
     def forward(self, input, cache=None):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_error.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_error.py
index d7a21f3be6b08b..6faed1a61e809d 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_error.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_error.py
@@ -68,7 +68,7 @@ def func_decorated_by_other_2():
 class LayerErrorInCompiletime(fluid.dygraph.Layer):
     def __init__(self, fc_size=20):
         super().__init__()
-        self._linear = fluid.dygraph.Linear(fc_size, fc_size)
+        self._linear = paddle.nn.Linear(fc_size, fc_size)
 
     @paddle.jit.to_static(
         input_spec=[paddle.static.InputSpec(shape=[20, 20], dtype='float32')]
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_fetch_feed.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_fetch_feed.py
index b09ce1eab4e439..74dd84720f50fc 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_fetch_feed.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_fetch_feed.py
@@ -27,9 +27,7 @@
 class Pool2D(fluid.dygraph.Layer):
     def __init__(self):
         super().__init__()
-        self.pool2d = fluid.dygraph.Pool2D(
-            pool_size=2, pool_type='avg', pool_stride=1, global_pooling=False
-        )
+        self.pool2d = paddle.nn.AvgPool2D(kernel_size=2, stride=1)
 
     @declarative
     def forward(self, x):
@@ -44,21 +42,22 @@ def get_result(x):
 class Linear(fluid.dygraph.Layer):
     def __init__(self, input_dim=10, output_dim=5):
         super().__init__()
-        self.fc = fluid.dygraph.Linear(
+        self.fc = paddle.nn.Linear(
             input_dim,
             output_dim,
-            act='relu',
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.99)
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.99)
             ),
-            bias_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.5)
+            bias_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.5)
             ),
         )
+        self.act = paddle.nn.ReLU()
 
     @declarative
     def forward(self, x):
         pre = self.fc(x)
+        pre = self.act(pre)
         loss = paddle.mean(pre)
         return pre, loss
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py
index 1d4b5850e4fd33..dd4e7e6746d1ce 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py
@@ -24,8 +24,10 @@
 
 import paddle
 import paddle.fluid as fluid
+from paddle.fluid.dygraph import to_variable
+from paddle.fluid.dygraph import Embedding, GRUUnit
+
 from paddle import _legacy_C_ops
-from paddle.fluid.dygraph import Embedding, GRUUnit, Linear, to_variable
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
 from paddle.fluid.framework import _non_static_mode
 from paddle.jit import ProgramTranslator
@@ -100,10 +102,10 @@ class BiGRU(fluid.dygraph.Layer):
     def __init__(self, input_dim, grnn_hidden_dim, init_bound, h_0=None):
         super().__init__()
 
-        self.pre_gru = Linear(
-            input_dim=input_dim,
-            output_dim=grnn_hidden_dim * 3,
-            param_attr=fluid.ParamAttr(
+        self.pre_gru = paddle.nn.Linear(
+            in_features=input_dim,
+            out_features=grnn_hidden_dim * 3,
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Uniform(
                     low=-init_bound, high=init_bound
                 ),
@@ -126,10 +128,10 @@ def __init__(self, input_dim, grnn_hidden_dim, init_bound, h_0=None):
             ),
         )
 
-        self.pre_gru_r = Linear(
-            input_dim=input_dim,
-            output_dim=grnn_hidden_dim * 3,
-            param_attr=fluid.ParamAttr(
+        self.pre_gru_r = paddle.nn.Linear(
+            in_features=input_dim,
+            out_features=grnn_hidden_dim * 3,
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Uniform(
                     low=-init_bound, high=init_bound
                 ),
@@ -417,10 +419,10 @@ def __init__(self, args, length=None):
                     )
                 )
 
-        self.fc = Linear(
-            input_dim=self.grnn_hidden_dim * 2,
-            output_dim=self.num_labels,
-            param_attr=fluid.ParamAttr(
+        self.fc = paddle.nn.Linear(
+            in_features=self.grnn_hidden_dim * 2,
+            out_features=self.num_labels,
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Uniform(
                     low=-self.init_bound, high=self.init_bound
                 ),
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
index af942cdb8d70d2..fe7e463e1db2f3 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
@@ -23,9 +23,9 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.dygraph import to_variable
+from paddle.nn import Linear
 from paddle.fluid.dygraph.base import switch_to_static_graph
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
-from paddle.fluid.dygraph.nn import Linear, Pool2D
 from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.optimizer import AdamOptimizer
 
@@ -69,7 +69,7 @@ def __init__(
             bias_attr=None,
         )
 
-        self._pool2d = Pool2D(
+        self._pool2d = paddle.fluid.dygraph.nn.Pool2D(
             pool_size=pool_size,
             pool_type=pool_type,
             pool_stride=pool_stride,
@@ -102,12 +102,9 @@ def __init__(self):
         self._fc = Linear(
             self.pool_2_shape,
             10,
-            param_attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.NormalInitializer(
-                    loc=0.0, scale=scale
-                )
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Normal(mean=0.0, std=scale)
             ),
-            act="softmax",
         )
 
     def forward(self, inputs, label=None):
@@ -126,6 +123,7 @@ def inference(self, inputs):
         x = self._simple_img_conv_pool_2(x)
         x = paddle.reshape(x, shape=[-1, self.pool_2_shape])
         x = self._fc(x)
+        x = paddle.nn.functional.softmax(x)
         return x
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
index 068046e00bde08..b40eb92753dacc 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
@@ -23,11 +23,19 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
-from paddle.fluid.dygraph.nn import BatchNorm, Linear, Pool2D
+from paddle.fluid.dygraph.nn import BatchNorm, Linear
 from paddle.fluid.initializer import MSRA
 from paddle.fluid.param_attr import ParamAttr
-from paddle.jit import ProgramTranslator
+from paddle.fluid.dygraph.nn import BatchNorm
+from paddle.nn import Linear
 from paddle.jit.api import declarative
+from paddle.jit import ProgramTranslator
+
+from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
+
+import unittest
+
+from predictor_utils import PredictorTools
 
 # Note: Set True to eliminate randomness.
 #     1. For one operation, cuDNN has several algorithms,
@@ -255,12 +263,14 @@ def __init__(self, scale=1.0, class_dim=1000):
         )
         self.dwsl.append(dws6)
 
-        self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True)
+        self.pool2d_avg = paddle.fluid.dygraph.nn.Pool2D(
+            pool_type='avg', global_pooling=True
+        )
 
         self.out = Linear(
             int(1024 * scale),
             class_dim,
-            param_attr=ParamAttr(
+            weight_attr=ParamAttr(
                 initializer=MSRA(), name=self.full_name() + "fc7_weights"
             ),
             bias_attr=ParamAttr(name="fc7_offset"),
@@ -421,14 +431,16 @@ def __init__(self, class_dim=1000, scale=1.0):
         )
 
         # 4. pool
-        self._pool2d_avg = Pool2D(pool_type='avg', global_pooling=True)
+        self._pool2d_avg = paddle.fluid.dygraph.nn.Pool2D(
+            pool_type='avg', global_pooling=True
+        )
 
         # 5. fc
         tmp_param = ParamAttr(name=self.full_name() + "fc10_weights")
         self._fc = Linear(
             self._out_c,
             class_dim,
-            param_attr=tmp_param,
+            weight_attr=tmp_param,
             bias_attr=ParamAttr(name="fc10_offset"),
         )
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
index 86f5626f344c51..c61fdcccf015ac 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
@@ -204,7 +204,7 @@ def false_fn_3():
 class NetWithError(fluid.dygraph.layers.Layer):
     @declarative
     def forward(self, x):
-        linear = fluid.dygraph.Linear(32, 64)
+        linear = paddle.nn.Linear(32, 64)
         y = linear(x)
         return y
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
index 3b6da7e23c1c5c..6423d0d6bbcbf1 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
@@ -21,8 +21,7 @@
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.dygraph.nn as nn
-from paddle.fluid.dygraph import Layer, to_variable
+from paddle.fluid.dygraph import to_variable, Layer
 from paddle.jit import ProgramTranslator
 from paddle.jit.api import declarative
 
@@ -34,8 +33,8 @@ class Policy(Layer):
     def __init__(self):
         super().__init__()
 
-        self.affine1 = nn.Linear(4, 128)
-        self.affine2 = nn.Linear(128, 2)
+        self.affine1 = paddle.nn.Linear(4, 128)
+        self.affine2 = paddle.nn.Linear(128, 2)
         self.dropout_ratio = 0.6
 
         self.saved_log_probs = []
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
index 5851f82630569d..fec0109168b50d 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
@@ -17,14 +17,16 @@
 import tempfile
 import time
 import unittest
-
+import paddle
 import numpy as np
 from predictor_utils import PredictorTools
 
 import paddle
 import paddle.fluid as fluid
+
+from paddle.fluid.dygraph.nn import BatchNorm
+from paddle.jit import ProgramTranslator
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
-from paddle.fluid.dygraph.nn import BatchNorm, Linear, Pool2D
 from paddle.jit import ProgramTranslator
 
 SEED = 2020
@@ -165,9 +167,7 @@ def __init__(self, layers=50, class_dim=102):
         self.conv = ConvBNLayer(
             num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu'
         )
-        self.pool2d_max = Pool2D(
-            pool_size=3, pool_stride=2, pool_padding=1, pool_type='max'
-        )
+        self.pool2d_max = paddle.nn.MaxPool2D(kernel_size=3, stride=2)
 
         self.bottleneck_block_list = []
         for block in range(len(depth)):
@@ -186,8 +186,7 @@ def __init__(self, layers=50, class_dim=102):
                 )
                 self.bottleneck_block_list.append(bottleneck_block)
                 shortcut = True
-
-        self.pool2d_avg = Pool2D(
+        self.pool2d_avg = paddle.fluid.dygraph.nn.Pool2D(
             pool_size=7, pool_type='avg', global_pooling=True
         )
 
@@ -195,11 +194,10 @@ def __init__(self, layers=50, class_dim=102):
 
         stdv = 1.0 / math.sqrt(2048 * 1.0)
 
-        self.out = Linear(
+        self.out = paddle.nn.Linear(
             self.pool2d_avg_output,
             class_dim,
-            act='softmax',
-            param_attr=fluid.param_attr.ParamAttr(
+            weight_attr=fluid.param_attr.ParamAttr(
                 initializer=fluid.initializer.Uniform(-stdv, stdv)
             ),
         )
@@ -212,6 +210,7 @@ def forward(self, inputs):
         y = self.pool2d_avg(y)
         y = paddle.reshape(y, shape=[-1, self.pool2d_avg_output])
         pred = self.out(y)
+        pred = paddle.nn.functional.softmax(pred)
 
         return pred
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py
index 112ff2f1d0f95a..00e423d686fab3 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py
@@ -163,8 +163,8 @@ def __init__(self, layers=50, class_dim=102):
         self.conv = ConvBNLayer(
             num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu'
         )
-        self.pool2d_max = paddle.fluid.dygraph.Pool2D(
-            pool_size=3, pool_stride=2, pool_padding=1, pool_type='max'
+        self.pool2d_max = paddle.nn.MaxPool2D(
+            kernel_size=3, stride=2, padding=1
         )
 
         self.bottleneck_block_list = []
@@ -184,7 +184,6 @@ def __init__(self, layers=50, class_dim=102):
                 )
                 self.bottleneck_block_list.append(bottleneck_block)
                 shortcut = True
-
         self.pool2d_avg = paddle.fluid.dygraph.Pool2D(
             pool_size=7, pool_type='avg', global_pooling=True
         )
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_inference_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_inference_model.py
index 043ad587fe77b3..c22c78fefe3d2f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_inference_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_inference_model.py
@@ -39,7 +39,7 @@
 class SimpleFcLayer(fluid.dygraph.Layer):
     def __init__(self, fc_size):
         super().__init__()
-        self._linear = fluid.dygraph.Linear(fc_size, fc_size)
+        self._linear = paddle.nn.Linear(fc_size, fc_size)
 
     @declarative
     def forward(self, x):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
index 7d3b07a395c907..4b1aad178d0208 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
@@ -25,8 +25,9 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.base import to_variable
+from paddle.fluid.dygraph.nn import BatchNorm
+from paddle.nn import Linear
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
-from paddle.fluid.dygraph.nn import BatchNorm, Linear, Pool2D
 from paddle.jit import ProgramTranslator
 from paddle.jit.api import declarative
 
@@ -126,31 +127,33 @@ def __init__(self, num_channels, reduction_ratio):
 
         super().__init__()
         self._num_channels = num_channels
-        self._pool = Pool2D(pool_size=0, pool_type='avg', global_pooling=True)
+        self._pool = paddle.fluid.dygraph.nn.Pool2D(
+            pool_size=0, pool_type='avg', global_pooling=True
+        )
         stdv = 1.0 / math.sqrt(num_channels * 1.0)
         self._fc = Linear(
             num_channels,
             num_channels // reduction_ratio,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Uniform(-stdv, stdv)
             ),
-            act='relu',
         )
         stdv = 1.0 / math.sqrt(num_channels / 16.0 * 1.0)
         self._excitation = Linear(
             num_channels // reduction_ratio,
             num_channels,
-            param_attr=fluid.ParamAttr(
+            weight_attr=fluid.ParamAttr(
                 initializer=fluid.initializer.Uniform(-stdv, stdv)
             ),
-            act='sigmoid',
         )
 
     def forward(self, input):
         y = self._pool(input)
         y = paddle.reshape(y, shape=[-1, self._num_channels])
         y = self._fc(y)
+        y = paddle.nn.functional.relu(y)
         y = self._excitation(y)
+        y = paddle.nn.functional.sigmoid(y)
         y = fluid.layers.elementwise_mul(x=input, y=y, axis=0)
         return y
 
@@ -243,9 +246,7 @@ def __init__(self, layers=50, class_dim=102):
                 stride=2,
                 act='relu',
             )
-            self.pool = Pool2D(
-                pool_size=3, pool_stride=2, pool_padding=1, pool_type='max'
-            )
+            self.pool = paddle.nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
         elif layers == 101:
             cardinality = 32
             reduction_ratio = 16
@@ -258,9 +259,7 @@ def __init__(self, layers=50, class_dim=102):
                 stride=2,
                 act='relu',
             )
-            self.pool = Pool2D(
-                pool_size=3, pool_stride=2, pool_padding=1, pool_type='max'
-            )
+            self.pool = paddle.nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
         elif layers == 152:
             cardinality = 64
             reduction_ratio = 16
@@ -287,9 +286,7 @@ def __init__(self, layers=50, class_dim=102):
                 stride=1,
                 act='relu',
             )
-            self.pool = Pool2D(
-                pool_size=3, pool_stride=2, pool_padding=1, pool_type='max'
-            )
+            self.pool = paddle.nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
 
         self.bottleneck_block_list = []
         num_channels = 64
@@ -312,8 +309,7 @@ def __init__(self, layers=50, class_dim=102):
                 num_channels = bottleneck_block._num_channels_out
                 self.bottleneck_block_list.append(bottleneck_block)
                 shortcut = True
-
-        self.pool2d_avg = Pool2D(
+        self.pool2d_avg = paddle.fluid.dygraph.nn.Pool2D(
             pool_size=7, pool_type='avg', global_pooling=True
         )
         stdv = 1.0 / math.sqrt(2048 * 1.0)
@@ -323,7 +319,7 @@ def __init__(self, layers=50, class_dim=102):
         self.out = Linear(
             self.pool2d_avg_output,
             class_dim,
-            param_attr=fluid.param_attr.ParamAttr(
+            weight_attr=fluid.param_attr.ParamAttr(
                 initializer=fluid.initializer.Uniform(-stdv, stdv)
             ),
         )
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py
index 98ce0ca7780504..b02f6f418b3afa 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py
@@ -19,6 +19,8 @@
 
 import paddle
 import paddle.fluid as fluid
+from paddle.fluid.dygraph.nn import Embedding
+from paddle.nn import Linear
 from paddle.fluid.dygraph import to_variable
 from paddle.fluid.dygraph.nn import Embedding, Linear
 from paddle.jit import ProgramTranslator
@@ -83,13 +85,11 @@ def __init__(self, dict_dim, batch_size, seq_len):
             batch_size=self.batch_size,
         )
         self._fc1 = Linear(
-            input_dim=self.hid_dim * self.seq_len,
-            output_dim=self.fc_hid_dim,
-            act="softmax",
-        )
-        self._fc_prediction = Linear(
-            input_dim=self.fc_hid_dim, output_dim=self.class_dim, act="softmax"
+            self.hid_dim * self.seq_len,
+            self.fc_hid_dim,
         )
+        self._fc1_act = paddle.nn.Softmax()
+        self._fc_prediction = Linear(self.fc_hid_dim, self.class_dim)
 
     @declarative
     def forward(self, inputs, label=None):
@@ -104,7 +104,9 @@ def forward(self, inputs, label=None):
         )
         conv_3 = self._simple_conv_pool_1(emb)
         fc_1 = self._fc1(conv_3)
+        fc_1 = self._fc1_act(fc_1)
         prediction = self._fc_prediction(fc_1)
+        prediction = self._fc1_act(prediction)
 
         cost = fluid.layers.cross_entropy(input=prediction, label=label)
         avg_cost = paddle.mean(x=cost)
@@ -127,15 +129,9 @@ def __init__(self, dict_dim, batch_size, seq_len):
             dtype='float32',
             is_sparse=False,
         )
-        self._fc1 = Linear(
-            input_dim=self.hid_dim, output_dim=self.hid_dim, act="tanh"
-        )
-        self._fc2 = Linear(
-            input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh"
-        )
-        self._fc_prediction = Linear(
-            input_dim=self.fc_hid_dim, output_dim=self.class_dim, act="softmax"
-        )
+        self._fc1 = Linear(self.hid_dim, self.hid_dim)
+        self._fc2 = Linear(self.hid_dim, self.fc_hid_dim)
+        self._fc_prediction = Linear(self.fc_hid_dim, self.class_dim)
 
     @declarative
     def forward(self, inputs, label=None):
@@ -149,8 +145,11 @@ def forward(self, inputs, label=None):
         bow_1 = paddle.sum(emb, axis=1)
         bow_1 = paddle.tanh(bow_1)
         fc_1 = self._fc1(bow_1)
+        fc_1 = paddle.tanh(fc_1)
         fc_2 = self._fc2(fc_1)
+        fc_2 = paddle.tanh(fc_2)
         prediction = self._fc_prediction(fc_2)
+        prediction = paddle.nn.functional.softmax(prediction)
 
         cost = fluid.layers.cross_entropy(input=prediction, label=label)
         avg_cost = paddle.mean(x=cost)
@@ -176,13 +175,9 @@ def __init__(self, dict_dim, batch_size, seq_len):
         )
         h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
         h_0 = to_variable(h_0)
-        self._fc1 = Linear(input_dim=self.hid_dim, output_dim=self.hid_dim * 3)
-        self._fc2 = Linear(
-            input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh"
-        )
-        self._fc_prediction = Linear(
-            input_dim=self.fc_hid_dim, output_dim=self.class_dim, act="softmax"
-        )
+        self._fc1 = Linear(self.hid_dim, self.hid_dim * 3)
+        self._fc2 = Linear(self.hid_dim, self.fc_hid_dim)
+        self._fc_prediction = Linear(self.fc_hid_dim, self.class_dim)
         self._gru = DynamicGRU(size=self.hid_dim, h_0=h_0)
 
     @declarative
@@ -199,8 +194,9 @@ def forward(self, inputs, label=None):
         gru_hidden = paddle.max(gru_hidden, axis=1)
         tanh_1 = paddle.tanh(gru_hidden)
         fc_2 = self._fc2(tanh_1)
+        fc_2 = paddle.tanh(fc_2)
         prediction = self._fc_prediction(fc_2)
-
+        prediction = paddle.nn.functional.softmax(prediction)
         cost = fluid.layers.cross_entropy(input=prediction, label=label)
         avg_cost = paddle.mean(x=cost)
         acc = fluid.layers.accuracy(input=prediction, label=label)
@@ -225,13 +221,9 @@ def __init__(self, dict_dim, batch_size, seq_len):
         )
         h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
         h_0 = to_variable(h_0)
-        self._fc1 = Linear(input_dim=self.hid_dim, output_dim=self.hid_dim * 3)
-        self._fc2 = Linear(
-            input_dim=self.hid_dim * 2, output_dim=self.fc_hid_dim, act="tanh"
-        )
-        self._fc_prediction = Linear(
-            input_dim=self.fc_hid_dim, output_dim=self.class_dim, act="softmax"
-        )
+        self._fc1 = Linear(self.hid_dim, self.hid_dim * 3)
+        self._fc2 = Linear(self.hid_dim * 2, self.fc_hid_dim)
+        self._fc_prediction = Linear(self.fc_hid_dim, self.class_dim)
         self._gru_forward = DynamicGRU(
             size=self.hid_dim, h_0=h_0, is_reverse=False
         )
@@ -259,7 +251,9 @@ def forward(self, inputs, label=None):
         )
         encoded_vector = paddle.max(encoded_vector, axis=1)
         fc_2 = self._fc2(encoded_vector)
+        fc_2 = paddle.tanh(fc_2)
         prediction = self._fc_prediction(fc_2)
+        prediction = paddle.nn.functional.softmax(prediction)
         # TODO(Aurelius84): Uncomment the following codes when we support return variable-length vars.
         # if label is not None:
         cost = fluid.layers.cross_entropy(input=prediction, label=label)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
index d5bd239afd4ad7..0919e4bced39b0 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
@@ -23,10 +23,12 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph import to_variable
-from paddle.fluid.dygraph.nn import BatchNorm, Linear, Pool2D
-from paddle.jit import ProgramTranslator
+from paddle.fluid.dygraph.nn import BatchNorm
+from paddle.nn import Linear
 from paddle.jit.api import declarative
+from paddle.jit import ProgramTranslator
+from paddle.fluid.dygraph import to_variable
+from tsm_config_utils import merge_configs, parse_config, print_configs
 
 random.seed(0)
 np.random.seed(0)
@@ -159,8 +161,8 @@ def __init__(self, name_scope, config, mode):
         self.conv = ConvBNLayer(
             num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu'
         )
-        self.pool2d_max = Pool2D(
-            pool_size=3, pool_stride=2, pool_padding=1, pool_type='max'
+        self.pool2d_max = paddle.nn.MaxPool2D(
+            kernel_size=3, stride=2, padding=1
         )
 
         self.bottleneck_block_list = []
@@ -182,10 +184,9 @@ def __init__(self, name_scope, config, mode):
                 num_channels = int(bottleneck_block._num_channels_out)
                 self.bottleneck_block_list.append(bottleneck_block)
                 shortcut = True
-        self.pool2d_avg = Pool2D(
+        self.pool2d_avg = paddle.fluid.dygraph.nn.Pool2D(
             pool_size=7, pool_type='avg', global_pooling=True
         )
-
         import math
 
         stdv = 1.0 / math.sqrt(2048 * 1.0)
@@ -193,12 +194,11 @@ def __init__(self, name_scope, config, mode):
         self.out = Linear(
             2048,
             self.class_dim,
-            act="softmax",
-            param_attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.Uniform(-stdv, stdv)
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Uniform(-stdv, stdv)
             ),
-            bias_attr=fluid.param_attr.ParamAttr(
-                learning_rate=2.0, regularizer=fluid.regularizer.L2Decay(0.0)
+            bias_attr=paddle.ParamAttr(
+                learning_rate=2.0, regularizer=paddle.regularizer.L1Decay()
             ),
         )
 
@@ -215,6 +215,7 @@ def forward(self, inputs):
         y = paddle.mean(y, axis=1)
         y = paddle.reshape(y, shape=[-1, 2048])
         y = self.out(y)
+        y = paddle.nn.functional.softmax(y)
         return y
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
index d0f329b96cfb81..50d00a653170c1 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
@@ -22,9 +22,9 @@
     Embedding,
     Layer,
     LayerNorm,
-    Linear,
     to_variable,
 )
+from paddle.nn import Linear
 from paddle.fluid.layers.utils import map_structure
 from paddle.jit.api import dygraph_to_static_func
 
@@ -107,28 +107,28 @@ def __init__(
         self.d_model = d_model
         self.dropout_rate = dropout_rate
         self.q_fc = Linear(
-            input_dim=d_model,
-            output_dim=d_key * n_head,
+            in_features=d_model,
+            out_features=d_key * n_head,
             bias_attr=False,
-            param_attr=fluid.ParamAttr(initializer=param_initializer),
+            weight_attr=fluid.ParamAttr(initializer=param_initializer),
         )
         self.k_fc = Linear(
-            input_dim=d_model,
-            output_dim=d_key * n_head,
+            in_features=d_model,
+            out_features=d_key * n_head,
             bias_attr=False,
-            param_attr=fluid.ParamAttr(initializer=param_initializer),
+            weight_attr=fluid.ParamAttr(initializer=param_initializer),
         )
         self.v_fc = Linear(
-            input_dim=d_model,
-            output_dim=d_value * n_head,
+            in_features=d_model,
+            out_features=d_value * n_head,
             bias_attr=False,
-            param_attr=fluid.ParamAttr(initializer=param_initializer),
+            weight_attr=fluid.ParamAttr(initializer=param_initializer),
         )
         self.proj_fc = Linear(
-            input_dim=d_value * n_head,
-            output_dim=d_model,
+            in_features=d_value * n_head,
+            out_features=d_model,
             bias_attr=False,
-            param_attr=fluid.ParamAttr(initializer=param_initializer),
+            weight_attr=fluid.ParamAttr(initializer=param_initializer),
         )
 
     def forward(self, queries, keys, values, attn_bias, cache=None):
@@ -174,11 +174,12 @@ class FFN(Layer):
     def __init__(self, d_inner_hid, d_model, dropout_rate):
         super().__init__()
         self.dropout_rate = dropout_rate
-        self.fc1 = Linear(input_dim=d_model, output_dim=d_inner_hid, act="relu")
-        self.fc2 = Linear(input_dim=d_inner_hid, output_dim=d_model)
+        self.fc1 = Linear(d_model, d_inner_hid)
+        self.fc2 = Linear(d_inner_hid, d_model)
 
     def forward(self, x):
         hidden = self.fc1(x)
+        hidden = paddle.nn.functional.relu(hidden)
         if self.dropout_rate:
             hidden = layers.dropout(hidden, dropout_prob=self.dropout_rate)
         out = self.fc2(hidden)
diff --git a/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py b/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py
index 7bc12a99c3abc5..225bff65114bd0 100755
--- a/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py
+++ b/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py
@@ -117,10 +117,10 @@ def boundary_net(self, main_prog, startup_prog):
             fleet.init(is_collective=True)
             x = paddle.static.data(name='x', shape=[-1, 4], dtype='float32')
             with paddle.static.device_guard('gpu:0'):
-                linear = fluid.Linear(4, 8, bias_attr=False)
+                linear = paddle.nn.Linear(4, 8, bias_attr=False)
                 out = linear(x)
             with paddle.static.device_guard('gpu:1'):
-                linear = fluid.Linear(8, 5, bias_attr=False)
+                linear = paddle.nn.Linear(8, 5, bias_attr=False)
                 out = linear(out)
                 avg_cost = paddle.mean(out)
             strategy = fleet.DistributedStrategy()
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_pool2d_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_pool2d_op_mlu.py
index 1a7a2f2255145e..1620e6093794d1 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_pool2d_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_pool2d_op_mlu.py
@@ -998,110 +998,6 @@ def run_5():
         self.assertRaises(ValueError, run_5)
 
 
-class TestDygraphPool2DAPIError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            # the input of Pool2D must be Variable.
-            data1 = np.random.random((3, 32, 32, 5)).astype('float32')
-            pool2d = fluid.dygraph.Pool2D(
-                pool_size=2,
-                pool_type='max',
-                pool_stride=1,
-                global_pooling=False,
-            )
-            self.assertRaises(TypeError, pool2d, data1)
-
-            # the input dtype of mlu Pool2D must be float16 or float32
-            data2 = fluid.layers.data(
-                name='x1', shape=[3, 32, 32, 5], dtype="int32"
-            )
-            self.assertRaises(TypeError, pool2d, data2)
-
-    def test_data_format_error(self):
-        with program_guard(Program(), Program()):
-            # the data_format must be 'NCHW' or 'NHWC'
-            data1 = np.random.random((3, 32, 32, 5)).astype('float32')
-            self.assertRaises(
-                ValueError,
-                fluid.dygraph.Pool2D,
-                pool_size=2,
-                pool_type='max',
-                pool_stride=1,
-                global_pooling=False,
-                data_format='NWHC',
-            )
-
-
-class TestDygraphPool2DAPI(unittest.TestCase):
-    def test_nhwc(self):
-        with fluid.dygraph.guard():
-            data = np.random.random((3, 32, 32, 5)).astype('float32')
-            x = fluid.dygraph.to_variable(data)
-            pool2d = fluid.dygraph.Pool2D(
-                pool_size=2,
-                pool_type='max',
-                pool_stride=1,
-                pool_padding=[0, 0],
-                global_pooling=False,
-                data_format='NHWC',
-            )
-            out1 = pool2d(x)
-            out2 = pool2D_forward_naive(
-                data,
-                [2, 2],
-                [1, 1],
-                paddings=[0, 0],
-                pool_type='max',
-                data_format='NHWC',
-            )
-            np.testing.assert_allclose(out1.numpy(), out2)
-
-    def test_lower_case(self):
-        with fluid.dygraph.guard():
-            data = np.random.random((3, 32, 32, 5)).astype('float32')
-            x = fluid.dygraph.to_variable(data)
-            pool2d = fluid.dygraph.Pool2D(
-                pool_size=2,
-                pool_type='max',
-                pool_stride=1,
-                pool_padding=[0, 0],
-                global_pooling=False,
-                data_format='nhwc',
-            )
-            out1 = pool2d(x)
-            out2 = pool2D_forward_naive(
-                data,
-                [2, 2],
-                [1, 1],
-                paddings=[0, 0],
-                pool_type='max',
-                data_format='NHWC',
-            )
-            np.testing.assert_allclose(out1.numpy(), out2)
-
-    def test_upper_case(self):
-        with fluid.dygraph.guard():
-            data = np.random.random((3, 32, 32, 5)).astype('float32')
-            x = fluid.dygraph.to_variable(data)
-            pool2d = fluid.dygraph.Pool2D(
-                pool_size=2,
-                pool_type='MAX',
-                pool_stride=1,
-                pool_padding=[0, 0],
-                global_pooling=False,
-                data_format='nhwc',
-            )
-            out1 = pool2d(x)
-            out2 = pool2D_forward_naive(
-                data,
-                [2, 2],
-                [1, 1],
-                paddings=[0, 0],
-                pool_type='max',
-                data_format='NHWC',
-            )
-            np.testing.assert_allclose(out1.numpy(), out2)
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check.py
index eb98bcd6c09339..036b9d967e8616 100644
--- a/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check.py
+++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check.py
@@ -19,7 +19,7 @@
 import paddle
 import paddle.distributed as dist
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.nn import Linear
+from paddle.nn import Linear
 
 paddle.seed(1024)
 np.random.seed(2021)
diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check_in_eager_mode.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check_in_eager_mode.py
index a5b3c584e5e3b1..5680f7a40e16ca 100644
--- a/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check_in_eager_mode.py
+++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check_in_eager_mode.py
@@ -19,8 +19,8 @@
 import paddle
 import paddle.distributed as dist
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 paddle.seed(1024)
 np.random.seed(2021)
diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_mnist.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_mnist.py
index ec7044e8d51ae7..150abe911e5018 100644
--- a/python/paddle/fluid/tests/unittests/parallel_dygraph_mnist.py
+++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_mnist.py
@@ -18,7 +18,6 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.base import to_variable
-from paddle.fluid.dygraph.nn import Linear, Pool2D
 
 
 class SimpleImgConvPool(fluid.dygraph.Layer):
@@ -55,7 +54,7 @@ def __init__(
             bias_attr=None,
         )
 
-        self._pool2d = Pool2D(
+        self._pool2d = paddle.fluid.dygraph.nn.Pool2D(
             pool_size=pool_size,
             pool_type=pool_type,
             pool_stride=pool_stride,
@@ -85,23 +84,21 @@ def __init__(self):
         self.pool_2_shape = 50 * 4 * 4
         SIZE = 10
         scale = (2.0 / (self.pool_2_shape**2 * SIZE)) ** 0.5
-        self._fc = Linear(
+        self._fc = paddle.nn.Linear(
             self.pool_2_shape,
             10,
-            param_attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.NormalInitializer(
-                    loc=0.0, scale=scale
-                )
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Normal(mean=0.0, std=scale)
             ),
-            act="softmax",
         )
+        self.act = paddle.nn.Softmax()
 
     def forward(self, inputs, label):
         x = self._simple_img_conv_pool_1(inputs)
         x = self._simple_img_conv_pool_2(x)
         x = paddle.reshape(x, shape=[-1, self.pool_2_shape])
         cost = self._fc(x)
-        loss = fluid.layers.cross_entropy(cost, label)
+        loss = fluid.layers.cross_entropy(self.act(cost), label)
         avg_loss = paddle.mean(loss)
         return avg_loss
 
diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_shared_unused_var.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_shared_unused_var.py
index b5d584327b84c7..1f063f849e3509 100644
--- a/python/paddle/fluid/tests/unittests/parallel_dygraph_shared_unused_var.py
+++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_shared_unused_var.py
@@ -19,6 +19,7 @@
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.dygraph.nn import Linear
+from paddle.nn import Linear
 
 np.random.seed(2021)
 paddle.seed(1024)
@@ -28,7 +29,7 @@ class SimpleNet(fluid.Layer):
     def __init__(self):
         # bias is unused parameters, and it share with net_a
         super().__init__()
-        self.net_a = Linear(input_dim=10, output_dim=5)
+        self.net_a = Linear(10, 5)
         self.net_b = Linear(10, 10)
         self.bias = self.net_a.bias
 
diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py
index 07fef0b4603e88..d84366efdcb698 100644
--- a/python/paddle/fluid/tests/unittests/test_adam_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adam_op.py
@@ -641,7 +641,7 @@ def test_adam_op_dygraph(self):
         paddle.disable_static()
         value = np.arange(26).reshape(2, 13).astype("float32")
         a = fluid.dygraph.to_variable(value)
-        linear = fluid.Linear(13, 5, dtype="float32")
+        linear = paddle.nn.Linear(13, 5)
 
         adam = paddle.optimizer.Adam(
             learning_rate=0.01, parameters=linear.parameters()
@@ -690,7 +690,7 @@ def test_adam_with_grad_clip(self):
         paddle.disable_static()
         value = np.arange(26).reshape(2, 13).astype("float32")
         a = fluid.dygraph.to_variable(value)
-        linear = fluid.Linear(13, 5, dtype="float32")
+        linear = paddle.nn.Linear(13, 5)
         clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)
         adam = paddle.optimizer.Adam(
             0.1, parameters=linear.parameters(), grad_clip=clip
@@ -1095,7 +1095,7 @@ def _adam_optimize_dygraph(
             trainable=True,
         )
         if use_param_attr:
-            model = paddle.nn.Linear(5, 5, weight_attr)
+            model = paddle.nn.Linear(5, 5, weight_attr=weight_attr)
         else:
             model = paddle.nn.Linear(5, 5)
 
diff --git a/python/paddle/fluid/tests/unittests/test_detach.py b/python/paddle/fluid/tests/unittests/test_detach.py
index bd976cf3943477..cf7214b858889b 100644
--- a/python/paddle/fluid/tests/unittests/test_detach.py
+++ b/python/paddle/fluid/tests/unittests/test_detach.py
@@ -18,7 +18,7 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph import Linear
+from paddle.nn import Linear
 from paddle.fluid.dygraph.base import to_variable
 
 
@@ -32,40 +32,40 @@ def generate_Data(self):
     def no_detach_multi(self):
         data = self.generate_Data()
         with fluid.dygraph.guard():
-            linear_w_param_attrs = fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(5.0)
+            linear_w_param_attrs = paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(5.0)
             )
-            linear_b_param_attrs = fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(6.0)
+            linear_b_param_attrs = paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(6.0)
             )
             linear = Linear(
                 4,
                 10,
-                param_attr=linear_w_param_attrs,
+                weight_attr=linear_w_param_attrs,
                 bias_attr=linear_b_param_attrs,
             )
-            linear1_w_param_attrs = fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(7.0)
+            linear1_w_param_attrs = paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(7.0)
             )
-            linear1_b_param_attrs = fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(8.0)
+            linear1_b_param_attrs = paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(8.0)
             )
             linear1 = Linear(
                 10,
                 1,
-                param_attr=linear1_w_param_attrs,
+                weight_attr=linear1_w_param_attrs,
                 bias_attr=linear1_b_param_attrs,
             )
-            linear2_w_param_attrs = fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(9.0)
+            linear2_w_param_attrs = paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(9.0)
             )
-            linear2_b_param_attrs = fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(10.0)
+            linear2_b_param_attrs = paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(10.0)
             )
             linear2 = Linear(
                 10,
                 1,
-                param_attr=linear2_w_param_attrs,
+                weight_attr=linear2_w_param_attrs,
                 bias_attr=linear2_b_param_attrs,
             )
             data = to_variable(data)
@@ -80,28 +80,28 @@ def no_detach_multi(self):
     def no_detach_single(self):
         data = self.generate_Data()
         with fluid.dygraph.guard():
-            linear_w_param_attrs = fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(5.0)
+            linear_w_param_attrs = paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(5.0)
             )
-            linear_b_param_attrs = fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(6.0)
+            linear_b_param_attrs = paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(6.0)
             )
             linear = Linear(
                 4,
                 10,
-                param_attr=linear_w_param_attrs,
+                weight_attr=linear_w_param_attrs,
                 bias_attr=linear_b_param_attrs,
             )
-            linear1_w_param_attrs = fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(7.0)
+            linear1_w_param_attrs = paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(7.0)
             )
-            linear1_b_param_attrs = fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(8.0)
+            linear1_b_param_attrs = paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(8.0)
             )
             linear1 = Linear(
                 10,
                 1,
-                param_attr=linear1_w_param_attrs,
+                weight_attr=linear1_w_param_attrs,
                 bias_attr=linear1_b_param_attrs,
             )
             data = to_variable(data)
@@ -115,8 +115,8 @@ def no_detach_single(self):
     def detach_multi(self):
         data = self.generate_Data()
         with fluid.dygraph.guard():
-            linear_w_param_attrs = fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(5.0)
+            linear_w_param_attrs = paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(5.0)
             )
             linear_b_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(6.0)
@@ -124,11 +124,11 @@ def detach_multi(self):
             linear = Linear(
                 4,
                 10,
-                param_attr=linear_w_param_attrs,
+                weight_attr=linear_w_param_attrs,
                 bias_attr=linear_b_param_attrs,
             )
-            linear1_w_param_attrs = fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(7.0)
+            linear1_w_param_attrs = paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(7.0)
             )
             linear1_b_param_attrs = fluid.ParamAttr(
                 initializer=fluid.initializer.Constant(8.0)
@@ -136,19 +136,19 @@ def detach_multi(self):
             linear1 = Linear(
                 10,
                 1,
-                param_attr=linear1_w_param_attrs,
+                weight_attr=linear1_w_param_attrs,
                 bias_attr=linear1_b_param_attrs,
             )
-            linear2_w_param_attrs = fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(9.0)
+            linear2_w_param_attrs = paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(9.0)
             )
-            linear2_b_param_attrs = fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(10.0)
+            linear2_b_param_attrs = paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(10.0)
             )
             linear2 = Linear(
                 10,
                 1,
-                param_attr=linear2_w_param_attrs,
+                weight_attr=linear2_w_param_attrs,
                 bias_attr=linear2_b_param_attrs,
             )
             data = to_variable(data)
diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py b/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py
index 204361fc92d8b8..b9a130be6bfbb8 100644
--- a/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py
+++ b/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py
@@ -18,7 +18,7 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.nn import Linear, Pool2D
+from paddle.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
 
 
@@ -57,7 +57,7 @@ def __init__(
             bias_attr=bias_attr,
         )
 
-        self._pool2d = Pool2D(
+        self._pool2d = paddle.fluid.dygraph.nn.Pool2D(
             pool_size=pool_size,
             pool_type=pool_type,
             pool_stride=pool_stride,
@@ -104,13 +104,9 @@ def __init__(self, dtype="float32"):
         self._linear = Linear(
             self.pool_2_shape,
             10,
-            param_attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.NormalInitializer(
-                    loc=0.0, scale=scale
-                )
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Normal(mean=0.0, std=scale)
             ),
-            act="softmax",
-            dtype=dtype,
         )
 
     def forward(self, inputs, label):
@@ -118,6 +114,7 @@ def forward(self, inputs, label):
         x = paddle.nn.functional.relu(self._simple_img_conv_pool_2(x))
         x = paddle.reshape(x, shape=[-1, self.pool_2_shape])
         cost = self._linear(x)
+        cost = paddle.nn.functional.softmax(cost)
         loss = fluid.layers.cross_entropy(cost, label)
         avg_loss = paddle.mean(loss)
         return avg_loss
diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
index 3c099642553035..d8b8c2ac4f0dff 100644
--- a/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
+++ b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
@@ -20,8 +20,8 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
+from paddle.nn import Linear
 from paddle.fluid.dygraph.base import to_variable
-from paddle.fluid.dygraph.nn import Linear, Pool2D
 from paddle.fluid.optimizer import SGDOptimizer
 
 SEED = 123123111
@@ -61,7 +61,7 @@ def __init__(
             bias_attr=None,
         )
 
-        self._pool2d = Pool2D(
+        self._pool2d = paddle.fluid.dygraph.nn.Pool2D(
             pool_size=pool_size,
             pool_type=pool_type,
             pool_stride=pool_stride,
@@ -94,12 +94,9 @@ def __init__(self):
         self._fc = Linear(
             self.pool_2_shape,
             SIZE,
-            param_attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.NormalInitializer(
-                    loc=0.0, scale=scale
-                )
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Normal(mean=0.0, std=scale)
             ),
-            act="softmax",
         )
 
     def forward(self, inputs):
@@ -107,6 +104,7 @@ def forward(self, inputs):
         x = self._simple_img_conv_pool_2(x)
         x = paddle.reshape(x, shape=[-1, self.pool_2_shape])
         x = self._fc(x)
+        x = paddle.nn.functional.softmax(x)
         return x
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_exception.py b/python/paddle/fluid/tests/unittests/test_exception.py
index a42c2f5bad0f0a..c627f8688a1580 100644
--- a/python/paddle/fluid/tests/unittests/test_exception.py
+++ b/python/paddle/fluid/tests/unittests/test_exception.py
@@ -66,7 +66,7 @@ def test_exception_in_dynamic_mode(self):
         place = fluid.CPUPlace()
         with fluid.dygraph.guard(place):
             x = numpy.random.random(size=(10, 2)).astype('float32')
-            linear = fluid.dygraph.Linear(1, 10)
+            linear = paddle.nn.Linear(1, 10)
             data = fluid.dygraph.to_variable(x)
             with self.assertRaises(ValueError):
                 res = linear(data)
diff --git a/python/paddle/fluid/tests/unittests/test_gradient_clip.py b/python/paddle/fluid/tests/unittests/test_gradient_clip.py
index ec2812e4ff32b7..71952b73f5bdce 100644
--- a/python/paddle/fluid/tests/unittests/test_gradient_clip.py
+++ b/python/paddle/fluid/tests/unittests/test_gradient_clip.py
@@ -407,7 +407,7 @@ def test_none_grad(self):
 class TestDygraphGradientClip(unittest.TestCase):
     def test_gradient_clip(self):
         with fluid.dygraph.guard():
-            linear = fluid.dygraph.Linear(5, 5)
+            linear = paddle.nn.Linear(5, 5)
             inputs = fluid.layers.uniform_random(
                 [16, 5], min=-10, max=10
             ).astype('float32')
@@ -602,8 +602,8 @@ def test_gradient_clip(self):
         with fluid.dygraph.guard():
             inputs = fluid.layers.uniform_random(
                 [16, 5], min=-10, max=10
-            ).astype('float64')
-            linear = fluid.dygraph.Linear(5, 5, dtype="float64")
+            ).astype('float32')
+            linear = paddle.nn.Linear(5, 5)
             out = linear(fluid.dygraph.to_variable(inputs))
             loss = fluid.layers.reduce_mean(out)
             loss.backward()
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
index 5889d8299dc389..5fc83145d24f44 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
@@ -24,16 +24,20 @@
 class AutoPruneLayer0(fluid.Layer):
     def __init__(self, input_size):
         super().__init__()
-        self.linear1 = fluid.dygraph.Linear(
+        self.linear1 = paddle.nn.Linear(
             input_size,
             5,
-            param_attr=fluid.initializer.ConstantInitializer(value=2),
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=2)
+            ),
             bias_attr=False,
         )
-        self.linear2 = fluid.dygraph.Linear(
+        self.linear2 = paddle.nn.Linear(
             5,
             5,
-            param_attr=fluid.initializer.ConstantInitializer(value=2),
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=2)
+            ),
             bias_attr=False,
         )
 
@@ -48,16 +52,20 @@ def forward(self, x, y):
 class AutoPruneLayer1(fluid.Layer):
     def __init__(self, input_size):
         super().__init__()
-        self.linear1 = fluid.dygraph.Linear(
+        self.linear1 = paddle.nn.Linear(
             input_size,
             5,
-            param_attr=fluid.initializer.ConstantInitializer(value=2),
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=2)
+            ),
             bias_attr=False,
         )
-        self.linear2 = fluid.dygraph.Linear(
+        self.linear2 = paddle.nn.Linear(
             5,
             5,
-            param_attr=fluid.initializer.ConstantInitializer(value=2),
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=2)
+            ),
             bias_attr=False,
         )
 
@@ -73,8 +81,8 @@ def forward(self, x, y):
 class AutoPruneLayer2(fluid.Layer):
     def __init__(self, input_size):
         super().__init__()
-        self.linear = fluid.dygraph.Linear(input_size, 10, act=None)
-        self.linear2 = fluid.dygraph.Linear(1, 1, act=None)
+        self.linear = paddle.nn.Linear(input_size, 10)
+        self.linear2 = paddle.nn.Linear(1, 1)
 
     def forward(self, x, label):
         feature = self.linear(x)
@@ -90,7 +98,7 @@ def forward(self, x, label):
 class AutoPruneLayer3(fluid.Layer):
     def __init__(self, input_size):
         super().__init__()
-        self.linear = fluid.dygraph.Linear(input_size, 20, act=None)
+        self.linear = paddle.nn.Linear(input_size, 20)
 
     def forward(self, x, label, test_num):
         feature = self.linear(x)
@@ -111,8 +119,8 @@ def __init__(self, input_size, vocab_size, size, dtype="float32"):
         super().__init__(dtype=dtype)
         self.embed0 = fluid.Embedding(size=(vocab_size, size))
         self.embed1 = fluid.Embedding(size=(vocab_size, size))
-        self.linear_0 = fluid.Linear(input_size, size, dtype=dtype)
-        self.linear_1 = fluid.Linear(input_size, size, dtype=dtype)
+        self.linear_0 = paddle.nn.Linear(input_size, size)
+        self.linear_1 = paddle.nn.Linear(input_size, size)
 
     def forward(self, x):
         # this method involves only the linear layers
@@ -133,8 +141,8 @@ def __init__(self, input_size, vocab_size, size, dtype="float32"):
         super().__init__(dtype=dtype)
         self.embed0 = fluid.Embedding(size=(vocab_size, size))
         self.embed1 = fluid.Embedding(size=(vocab_size, size))
-        self.linear_0 = fluid.Linear(input_size, size, dtype=dtype)
-        self.linear_1 = fluid.Linear(input_size, size, dtype=dtype)
+        self.linear_0 = paddle.nn.Linear(input_size, size)
+        self.linear_1 = paddle.nn.Linear(input_size, size)
 
     def forward(self, indices):
         # mind the difference with MyLayer
@@ -253,8 +261,8 @@ def func_auto_prune6(self):
             value0 = np.arange(26).reshape(2, 13).astype("float32")
             value1 = np.arange(6).reshape(2, 3).astype("float32")
             value2 = np.arange(10).reshape(2, 5).astype("float32")
-            linear = fluid.Linear(13, 5, dtype="float32")
-            linear2 = fluid.Linear(3, 3, dtype="float32")
+            linear = paddle.nn.Linear(13, 5)
+            linear2 = paddle.nn.Linear(3, 3)
             a = fluid.dygraph.to_variable(value0)
             b = fluid.dygraph.to_variable(value1)
             c = fluid.dygraph.to_variable(value2)
@@ -276,8 +284,8 @@ def func_auto_prune7(self):
             value0 = np.arange(26).reshape(2, 13).astype("float32")
             value1 = np.arange(6).reshape(2, 3).astype("float32")
             value2 = np.arange(10).reshape(2, 5).astype("float32")
-            linear = fluid.Linear(13, 5, dtype="float32")
-            linear2 = fluid.Linear(3, 3, dtype="float32")
+            linear = paddle.nn.Linear(13, 5)
+            linear2 = paddle.nn.Linear(3, 3)
             a = fluid.dygraph.to_variable(value0)
             b = fluid.dygraph.to_variable(value1)
             c = fluid.dygraph.to_variable(value2)
@@ -299,8 +307,8 @@ def func_auto_prune8(self):
             value0 = np.arange(26).reshape(2, 13).astype("float32")
             value1 = np.arange(6).reshape(2, 3).astype("float32")
             value2 = np.arange(10).reshape(2, 5).astype("float32")
-            linear = fluid.Linear(13, 5, dtype="float32")
-            linear2 = fluid.Linear(5, 3, dtype="float32")
+            linear = paddle.nn.Linear(13, 5)
+            linear2 = paddle.nn.Linear(5, 3)
             a = fluid.dygraph.to_variable(value0)
             b = fluid.dygraph.to_variable(value1)
             c = fluid.dygraph.to_variable(value2)
@@ -332,8 +340,8 @@ def func_auto_prune9(self):
             value0 = np.arange(26).reshape(2, 13).astype("float32")
             value1 = np.arange(6).reshape(2, 3).astype("float32")
             value2 = np.arange(10).reshape(2, 5).astype("float32")
-            linear = fluid.Linear(13, 5, dtype="float32")
-            linear2 = fluid.Linear(5, 3, dtype="float32")
+            linear = paddle.nn.Linear(13, 5)
+            linear2 = paddle.nn.Linear(5, 3)
             a = fluid.dygraph.to_variable(value0)
             b = fluid.dygraph.to_variable(value1)
             c = fluid.dygraph.to_variable(value2)
@@ -367,8 +375,8 @@ def func_auto_prune10(self):
             value0 = np.arange(26).reshape(2, 13).astype("float32")
             value1 = np.arange(6).reshape(2, 3).astype("float32")
             value2 = np.arange(10).reshape(2, 5).astype("float32")
-            linear = fluid.Linear(13, 5, dtype="float32")
-            linear2 = fluid.Linear(3, 3, dtype="float32")
+            linear = paddle.nn.Linear(13, 5)
+            linear2 = paddle.nn.Linear(3, 3)
             a = fluid.dygraph.to_variable(value0)
             b = fluid.dygraph.to_variable(value1)
             c = fluid.dygraph.to_variable(value2)
@@ -462,7 +470,7 @@ def test_case2_prune_no_grad_branch(self):
     def func_case3_prune_no_grad_branch2(self):
         with fluid.dygraph.guard():
             value1 = np.arange(1).reshape(1, 1)
-            linear = fluid.dygraph.Linear(1, 1, act=None)
+            linear = paddle.nn.Linear(1, 1)
             label = fluid.dygraph.to_variable(value1).astype("float32")
             label = linear(label)
             label = fluid.layers.cast(label, dtype="float32")
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
index 595ec4fe3e60a0..d6d40dfc61c73b 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
@@ -19,8 +19,8 @@
 
 import paddle
 import paddle.fluid as fluid
+from paddle.fluid import core
 import paddle.fluid.dygraph_utils as dygraph_utils
-from paddle.fluid import Linear, core
 from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper
 from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
 from paddle.fluid.layer_helper import LayerHelper
@@ -41,24 +41,24 @@ def forward(self, inputs):
 class MLP(fluid.Layer):
     def __init__(self, input_size):
         super().__init__()
-        self._linear1 = Linear(
+        self._linear1 = paddle.nn.Linear(
             input_size,
             3,
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.1)
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.1)
             ),
-            bias_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.1)
+            bias_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.1)
             ),
         )
-        self._linear2 = Linear(
+        self._linear2 = paddle.nn.Linear(
             3,
             4,
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.1)
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.1)
             ),
-            bias_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.1)
+            bias_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.1)
             ),
         )
 
@@ -269,9 +269,9 @@ def test_create_varbase(self):
     def test_no_grad_guard(self):
         data = np.array([[2, 3], [4, 5]]).astype('float32')
         with fluid.dygraph.guard():
-            l0 = fluid.Linear(2, 2)
+            l0 = paddle.nn.Linear(2, 2)
             self.assertIsNone(l0.weight._grad_ivar())
-            l1 = fluid.Linear(2, 2)
+            l1 = paddle.nn.Linear(2, 2)
             with fluid.dygraph.no_grad():
                 self.assertTrue(l1.weight.stop_gradient is False)
                 tmp = l1.weight * 2
@@ -287,9 +287,9 @@ def test_no_grad_guard(self):
     def test_paddle_imperative_no_grad_guard(self):
         data = np.array([[2, 3], [4, 5]]).astype('float32')
         with fluid.dygraph.guard():
-            l0 = fluid.Linear(2, 2)
+            l0 = paddle.nn.Linear(2, 2)
             self.assertIsNone(l0.weight._grad_ivar())
-            l1 = fluid.Linear(2, 2)
+            l1 = paddle.nn.Linear(2, 2)
             with paddle.no_grad():
                 self.assertTrue(l1.weight.stop_gradient is False)
                 tmp = l1.weight * 2
@@ -305,9 +305,9 @@ def test_paddle_imperative_no_grad_guard(self):
     def test_paddle_imperative_set_grad_enabled(self):
         data = np.array([[2, 3], [4, 5]]).astype('float32')
         with fluid.dygraph.guard():
-            l0 = fluid.Linear(2, 2)
+            l0 = paddle.nn.Linear(2, 2)
             self.assertIsNone(l0.weight._grad_ivar())
-            l1 = fluid.Linear(2, 2)
+            l1 = paddle.nn.Linear(2, 2)
             with paddle.set_grad_enabled(False):
                 self.assertTrue(l1.weight.stop_gradient is False)
                 tmp = l1.weight * 2
@@ -863,7 +863,7 @@ def func_layer_attrs(self):
         self.assertRaises(TypeError, my_layer.__setattr__, 'w1', 'str')
         my_layer.w1 = None
         self.assertEqual(len(my_layer.parameters()), 0)
-        my_layer.l1 = fluid.dygraph.Linear(3, 3)
+        my_layer.l1 = paddle.nn.Linear(3, 3)
         self.assertEqual(len(my_layer.sublayers()), 1)
         self.assertRaises(TypeError, my_layer.__setattr__, 'l1', 'str')
         my_layer.l1 = None
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_layerlist.py b/python/paddle/fluid/tests/unittests/test_imperative_container_layerlist.py
index e90a16def8f5bc..0675a67193781b 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_container_layerlist.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_container_layerlist.py
@@ -35,7 +35,7 @@ def forward(self, x):
 class TestImperativeContainer(unittest.TestCase):
     def paddle_imperative_list(self):
         return paddle.nn.LayerList(
-            [fluid.dygraph.Linear(2**i, 2 ** (i + 1)) for i in range(6)]
+            [paddle.nn.Linear(2**i, 2 ** (i + 1)) for i in range(6)]
         )
 
     def layer_list(self, use_fluid_api):
@@ -48,13 +48,13 @@ def layer_list(self, use_fluid_api):
             model = MyLayer(layerlist)
             res1 = model(x)
             self.assertListEqual(res1.shape, [5, 2**size])
-            model.layerlist[size - 1] = fluid.dygraph.Linear(2 ** (size - 1), 5)
+            model.layerlist[size - 1] = paddle.nn.Linear(2 ** (size - 1), 5)
             res2 = model(x)
             self.assertListEqual(res2.shape, [5, 5])
             del model.layerlist[size - 1]
             res3 = model(x)
             self.assertListEqual(res3.shape, [5, 2 ** (size - 1)])
-            model.layerlist.append(fluid.dygraph.Linear(2 ** (size - 1), 3))
+            model.layerlist.append(paddle.nn.Linear(2 ** (size - 1), 3))
             res4 = model(x)
             self.assertListEqual(res4.shape, [5, 3])
             res4.backward()
@@ -68,14 +68,14 @@ def layer_list(self, use_fluid_api):
             res6.backward()
 
             model3 = MyLayer(layerlist[:-2])
-            model3.layerlist.append(fluid.dygraph.Linear(3, 1))
+            model3.layerlist.append(paddle.nn.Linear(3, 1))
             model3.layerlist.insert(
-                size - 2, fluid.dygraph.Linear(2 ** (size - 2), 3)
+                size - 2, paddle.nn.Linear(2 ** (size - 2), 3)
             )
             res7 = model3(x)
             self.assertListEqual(res7.shape, [5, 1])
             to_be_extended = [
-                fluid.dygraph.Linear(3**i, 3 ** (i + 1)) for i in range(3)
+                paddle.nn.Linear(3**i, 3 ** (i + 1)) for i in range(3)
             ]
             model3.layerlist.extend(to_be_extended)
             res8 = model3(x)
@@ -83,13 +83,13 @@ def layer_list(self, use_fluid_api):
             res8.backward()
 
             model4 = MyLayer(layerlist[:3])
-            model4.layerlist[-1] = fluid.dygraph.Linear(4, 5)
+            model4.layerlist[-1] = paddle.nn.Linear(4, 5)
             res9 = model4(x)
             self.assertListEqual(res9.shape, [5, 5])
             del model4.layerlist[-1]
             res10 = model4(x)
             self.assertListEqual(res10.shape, [5, 4])
-            model4.layerlist.insert(-1, fluid.dygraph.Linear(2, 2))
+            model4.layerlist.insert(-1, paddle.nn.Linear(2, 2))
             res11 = model4(x)
             self.assertListEqual(res11.shape, [5, 4])
             res11.backward()
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py b/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py
index 7ed45d58703c22..eca6e5d81d0109 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py
@@ -15,6 +15,7 @@
 import unittest
 
 import numpy as np
+from paddle.nn import Linear
 
 import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
@@ -25,19 +26,17 @@ def func_sequential(self):
         data = np.random.uniform(-1, 1, [5, 10]).astype('float32')
         with fluid.dygraph.guard():
             data = fluid.dygraph.to_variable(data)
-            model1 = fluid.dygraph.Sequential(
-                fluid.Linear(10, 1), fluid.Linear(1, 2)
-            )
+            model1 = fluid.dygraph.Sequential(Linear(10, 1), Linear(1, 2))
             res1 = model1(data)
             self.assertListEqual(res1.shape, [5, 2])
-            model1[1] = fluid.Linear(1, 3)
+            model1[1] = Linear(1, 3)
             res1 = model1(data)
             self.assertListEqual(res1.shape, [5, 3])
             loss1 = fluid.layers.reduce_mean(res1)
             loss1.backward()
 
-            l1 = fluid.Linear(10, 1)
-            l2 = fluid.Linear(1, 3)
+            l1 = Linear(10, 1)
+            l2 = Linear(1, 3)
             model2 = fluid.dygraph.Sequential(('l1', l1), ('l2', l2))
             self.assertEqual(len(model2), 2)
             res2 = model2(data)
@@ -48,8 +47,8 @@ def func_sequential(self):
             self.assertEqual(len(model2), 1)
             res2 = model2(data)
             self.assertListEqual(res2.shape, [5, 1])
-            model2.add_sublayer('l3', fluid.Linear(1, 3))
-            model2.add_sublayer('l4', fluid.Linear(3, 4))
+            model2.add_sublayer('l3', Linear(1, 3))
+            model2.add_sublayer('l4', Linear(3, 4))
             self.assertEqual(len(model2), 3)
             res2 = model2(data)
             self.assertListEqual(res2.shape, [5, 4])
@@ -66,19 +65,17 @@ def func_sequential_list_params(self):
         data = np.random.uniform(-1, 1, [5, 10]).astype('float32')
         with fluid.dygraph.guard():
             data = fluid.dygraph.to_variable(data)
-            model1 = fluid.dygraph.Sequential(
-                fluid.Linear(10, 1), fluid.Linear(1, 2)
-            )
+            model1 = fluid.dygraph.Sequential(Linear(10, 1), Linear(1, 2))
             res1 = model1(data)
             self.assertListEqual(res1.shape, [5, 2])
-            model1[1] = fluid.Linear(1, 3)
+            model1[1] = Linear(1, 3)
             res1 = model1(data)
             self.assertListEqual(res1.shape, [5, 3])
             loss1 = fluid.layers.reduce_mean(res1)
             loss1.backward()
 
-            l1 = fluid.Linear(10, 1)
-            l2 = fluid.Linear(1, 3)
+            l1 = Linear(10, 1)
+            l2 = Linear(1, 3)
             model2 = fluid.dygraph.Sequential(['l1', l1], ['l2', l2])
             self.assertEqual(len(model2), 2)
             res2 = model2(data)
@@ -89,8 +86,8 @@ def func_sequential_list_params(self):
             self.assertEqual(len(model2), 1)
             res2 = model2(data)
             self.assertListEqual(res2.shape, [5, 1])
-            model2.add_sublayer('l3', fluid.Linear(1, 3))
-            model2.add_sublayer('l4', fluid.Linear(3, 4))
+            model2.add_sublayer('l3', Linear(1, 3))
+            model2.add_sublayer('l4', Linear(3, 4))
             self.assertEqual(len(model2), 3)
             res2 = model2(data)
             self.assertListEqual(res2.shape, [5, 4])
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py b/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py
index b02b5105000163..e3b50aa123b36f 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py
@@ -19,7 +19,7 @@
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.fluid.dygraph as dygraph
-from paddle.fluid.dygraph.nn import Linear
+from paddle.nn import Linear
 
 
 class MLP(fluid.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
index 38c19677f79c1c..15f9365772b3eb 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
@@ -23,8 +23,8 @@
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.dygraph import Linear
 from paddle.fluid.dygraph.base import to_variable
+from paddle.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
 
 
@@ -44,20 +44,30 @@ def __init__(self):
                     Linear(
                         256 if i == 0 else self._hid_sizes[i - 1],
                         self._hid_sizes[i],
-                        act='relu',
                     ),
                 )
             )
+            self._user_layers.append(
+                self.add_sublayer(
+                    'user_layer_act_%d' % i,
+                    paddle.nn.ReLU(),
+                )
+            )
             self._item_layers.append(
                 self.add_sublayer(
                     'item_layer_%d' % i,
                     Linear(
                         256 if i == 0 else self._hid_sizes[i - 1],
                         self._hid_sizes[i],
-                        act='relu',
                     ),
                 )
             )
+            self._item_layers.append(
+                self.add_sublayer(
+                    'item_layer_act_%d' % i,
+                    paddle.nn.ReLU(),
+                )
+            )
 
     def forward(self, users, items):
         users = self._user_latent(users)
@@ -83,10 +93,15 @@ def __init__(self):
                     Linear(
                         256 * 2 if i == 0 else self._hid_sizes[i - 1],
                         self._hid_sizes[i],
-                        act='relu',
                     ),
                 )
             )
+            self._match_layers.append(
+                self.add_sublayer(
+                    'match_layer_act_%d' % i,
+                    paddle.nn.ReLU(),
+                )
+            )
 
     def forward(self, users, items):
         users = self._user_latent(users)
@@ -115,7 +130,7 @@ def __init__(self, num_users, num_items, matrix):
 
         self._mlp = MLP()
         self._dmf = DMF()
-        self._match_fc = Linear(128, 1, act='sigmoid')
+        self._match_fc = Linear(128, 1)
 
     def forward(self, users, items):
         # users_emb = self._user_emb(users)
@@ -134,6 +149,7 @@ def forward(self, users, items):
             [mlp_predictive, dmf_predictive], axis=len(mlp_predictive.shape) - 1
         )
         prediction = self._match_fc(predictive)
+        prediction = paddle.nn.functional.sigmoid(prediction)
         return prediction
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
index ec879d9cf0178c..2c020c0465bb71 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
@@ -586,7 +586,7 @@ def func_compare(self):
         )
 
         def model_f(input):
-            linear = fluid.dygraph.Linear(5, 3, bias_attr=False)
+            linear = paddle.nn.Linear(5, 3)
             for i in range(10):
                 if i == 0:
                     out = linear(input)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_framework.py b/python/paddle/fluid/tests/unittests/test_imperative_framework.py
index fd86679a77a203..4aec8b308a6c8d 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_framework.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_framework.py
@@ -20,29 +20,30 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
+import paddle
 
 
 class MLP(fluid.Layer):
     def __init__(self, input_size):
         super().__init__()
-        self._linear1 = fluid.dygraph.Linear(
+        self._linear1 = paddle.nn.Linear(
             input_size,
             3,
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.1)
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.1)
             ),
-            bias_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.1)
+            bias_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.1)
             ),
         )
-        self._linear2 = fluid.dygraph.Linear(
+        self._linear2 = paddle.nn.Linear(
             3,
             4,
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.1)
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.1)
             ),
-            bias_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.1)
+            bias_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.1)
             ),
         )
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gan.py b/python/paddle/fluid/tests/unittests/test_imperative_gan.py
index 781253449d58a8..0b1ee16d32f583 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_gan.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_gan.py
@@ -20,7 +20,7 @@
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid import Linear
+from paddle.nn import Linear
 from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.optimizer import SGDOptimizer
@@ -29,11 +29,12 @@
 class Discriminator(fluid.Layer):
     def __init__(self):
         super().__init__()
-        self._fc1 = Linear(1, 32, act='elu')
+        self._fc1 = Linear(1, 32)
         self._fc2 = Linear(32, 1)
 
     def forward(self, inputs):
         x = self._fc1(inputs)
+        x = paddle.nn.functional.elu(x)
         x = self._fc2(x)
         return x
 
@@ -41,13 +42,15 @@ def forward(self, inputs):
 class Generator(fluid.Layer):
     def __init__(self):
         super().__init__()
-        self._fc1 = Linear(2, 64, act='elu')
-        self._fc2 = Linear(64, 64, act='elu')
+        self._fc1 = Linear(2, 64)
+        self._fc2 = Linear(64, 64)
         self._fc3 = Linear(64, 1)
 
     def forward(self, inputs):
         x = self._fc1(inputs)
+        x = paddle.nn.functional.elu(x)
         x = self._fc2(x)
+        x = paddle.nn.functional.elu(x)
         x = self._fc3(x)
         return x
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layer_apply.py b/python/paddle/fluid/tests/unittests/test_imperative_layer_apply.py
index 57756945523364..5354b6c403096f 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_layer_apply.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_layer_apply.py
@@ -29,10 +29,10 @@ def __init__(self, num_classes=10, classifier_activation='softmax'):
         self.features = nn.Sequential(
             nn.Conv2D(1, 6, 3, stride=1, padding=1),
             nn.ReLU(),
-            paddle.fluid.dygraph.Pool2D(2, 'max', 2),
+            paddle.nn.MaxPool2D(2, 2),
             nn.Conv2D(6, 16, 5, stride=1, padding=0),
             nn.ReLU(),
-            paddle.fluid.dygraph.Pool2D(2, 'max', 2),
+            paddle.nn.MaxPool2D(2, 2),
         )
 
         if num_classes > 0:
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py b/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py
index 59717d48949332..410ed77de26e83 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_layer_children.py
@@ -28,10 +28,10 @@ def __init__(self):
         self.features = nn.Sequential(
             nn.Conv2D(1, 6, 3, stride=1, padding=1),
             nn.ReLU(),
-            paddle.fluid.dygraph.Pool2D(2, 'max', 2),
+            paddle.nn.MaxPool2D(2, 2),
             nn.Conv2D(6, 16, 5, stride=1, padding=0),
             nn.ReLU(),
-            paddle.fluid.dygraph.Pool2D(2, 'max', 2),
+            paddle.nn.MaxPool2D(2, 2),
         )
 
     def forward(self, inputs):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py b/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py
index abcb811f671a6c..335db28d70c2d0 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py
@@ -19,6 +19,7 @@
 import paddle.fluid as fluid
 import paddle.fluid.dygraph as dygraph
 from paddle.fluid.framework import _test_eager_guard
+import paddle
 
 
 class TestImperativeLayerTrainable(unittest.TestCase):
@@ -28,7 +29,7 @@ def func_set_trainable(self):
 
             label = dygraph.to_variable(label)
 
-            linear = dygraph.Linear(10, 10)
+            linear = paddle.nn.Linear(10, 10)
             y = linear(label)
             self.assertFalse(y.stop_gradient)
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
index b3e23565799b18..8e9b6c7f2ff8ef 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
@@ -24,13 +24,16 @@
 from paddle.fluid.dygraph.nn import (
     NCE,
     BatchNorm,
-    Conv3D,
     Embedding,
     GroupNorm,
     LayerNorm,
-    Linear,
+    NCE,
     PRelu,
 )
+from paddle.nn import Linear
+import numpy as np
+import os
+import tempfile
 
 
 class TestDygraphLoadStatic(unittest.TestCase):
@@ -200,11 +203,11 @@ def __init__(self):
                         in_channels=10, out_channels=10, kernel_size=5
                     )
 
-                    self.conv3d_1 = Conv3D(
-                        num_channels=3, num_filters=2, filter_size=3, act="relu"
+                    self.conv3d_1 = paddle.nn.Conv3D(
+                        in_channels=3, out_channels=2, kernel_size=3
                     )
-                    self.conv3d_2 = Conv3D(
-                        num_channels=3, num_filters=2, filter_size=3, act="relu"
+                    self.conv3d_2 = paddle.nn.Conv3D(
+                        in_channels=3, out_channels=2, kernel_size=3
                     )
 
                     self.batch_norm_1 = BatchNorm(10)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py
index 2ca175390d5516..69796f69c6b23c 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py
@@ -15,15 +15,16 @@
 import unittest
 
 import numpy as np
-from test_imperative_base import new_program_scope
 from utils import DyGraphProgramDescTracerTestHelper
 
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
-from paddle.fluid.dygraph.nn import Linear, Pool2D
-from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
 from paddle.fluid.optimizer import SGDOptimizer
+from paddle.nn import Linear
+from test_imperative_base import new_program_scope
+from utils import DyGraphProgramDescTracerTestHelper
+from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph
 
 
 class SimpleImgConvPool(fluid.dygraph.Layer):
@@ -59,8 +60,7 @@ def __init__(
             weight_attr=None,
             bias_attr=None,
         )
-
-        self._pool2d = Pool2D(
+        self._pool2d = paddle.fluid.dygraph.nn.Pool2D(
             pool_size=pool_size,
             pool_type=pool_type,
             pool_stride=pool_stride,
@@ -93,12 +93,9 @@ def __init__(self):
         self._fc = Linear(
             self.pool_2_shape,
             10,
-            param_attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.NormalInitializer(
-                    loc=0.0, scale=scale
-                )
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Normal(mean=0.0, std=scale)
             ),
-            act="softmax",
         )
 
     def forward(self, inputs):
@@ -106,6 +103,7 @@ def forward(self, inputs):
         x = self._simple_img_conv_pool_2(x)
         x = paddle.reshape(x, shape=[-1, self.pool_2_shape])
         x = self._fc(x)
+        x = paddle.nn.functional.softmax(x)
         return x
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_named_members.py b/python/paddle/fluid/tests/unittests/test_imperative_named_members.py
index 622839253d2ab2..faaa02ea46a5d0 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_named_members.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_named_members.py
@@ -24,7 +24,7 @@
 class MyLayer(fluid.Layer):
     def __init__(self, num_channel, dim, num_filter=5):
         super().__init__()
-        self.fc = fluid.dygraph.Linear(dim, dim)
+        self.fc = paddle.nn.Linear(dim, dim)
         self.conv = paddle.nn.Conv2D(num_channel, num_channel, num_filter)
 
     def forward(self, x):
@@ -36,8 +36,8 @@ def forward(self, x):
 class TestImperativeNamedSubLayers(unittest.TestCase):
     def func_test_named_sublayers(self):
         with fluid.dygraph.guard():
-            fc1 = fluid.Linear(10, 3)
-            fc2 = fluid.Linear(3, 10, bias_attr=False)
+            fc1 = paddle.nn.Linear(10, 3)
+            fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
             custom = MyLayer(3, 10)
             model = paddle.nn.Sequential(fc1, fc2, custom)
             named_sublayers = model.named_sublayers()
@@ -71,8 +71,8 @@ def test_named_sublayers(self):
 class TestImperativeNamedParameters(unittest.TestCase):
     def func_test_named_parameters(self):
         with fluid.dygraph.guard():
-            fc1 = fluid.Linear(10, 3)
-            fc2 = fluid.Linear(3, 10, bias_attr=False)
+            fc1 = paddle.nn.Linear(10, 3)
+            fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
             custom = MyLayer(3, 10)
             model = paddle.nn.Sequential(fc1, fc2, custom)
 
@@ -98,8 +98,8 @@ def func_test_dir_layer(self):
             class Mymodel(fluid.dygraph.Layer):
                 def __init__(self):
                     super().__init__()
-                    self.linear1 = fluid.dygraph.Linear(10, 10)
-                    self.linear2 = fluid.dygraph.Linear(5, 5)
+                    self.linear1 = paddle.nn.Linear(10, 10)
+                    self.linear2 = paddle.nn.Linear(5, 5)
                     self.conv2d = paddle.nn.Conv2D(3, 2, 3)
                     self.embedding = fluid.dygraph.Embedding(size=[128, 16])
                     self.h_0 = fluid.dygraph.to_variable(
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
index fcaafc72b8cce0..d89d7d6b258b24 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
@@ -26,8 +26,8 @@
     Embedding,
     GRUUnit,
     Linear,
-    Pool2D,
 )
+from paddle.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
 
 
@@ -112,11 +112,9 @@ def __init__(
         self.bn_1_layer = BatchNorm(out_ch[1], act=act, is_test=is_test)
 
         if self.pool:
-            self.pool_layer = Pool2D(
-                pool_size=2,
-                pool_type='max',
-                pool_stride=2,
-                use_cudnn=use_cudnn,
+            self.pool_layer = paddle.nn.MaxPool2D(
+                kernel_size=2,
+                stride=2,
                 ceil_mode=True,
             )
 
@@ -232,10 +230,10 @@ def __init__(
         self.ocr_convs = OCRConv(is_test=is_test, use_cudnn=use_cudnn)
 
         self.fc_1_layer = Linear(
-            32, rnn_hidden_size * 3, param_attr=para_attr, bias_attr=False
+            32, rnn_hidden_size * 3, weight_attr=para_attr, bias_attr=False
         )
         self.fc_2_layer = Linear(
-            32, rnn_hidden_size * 3, param_attr=para_attr, bias_attr=False
+            32, rnn_hidden_size * 3, weight_attr=para_attr, bias_attr=False
         )
         self.gru_forward_layer = DynamicGRU(
             size=rnn_hidden_size,
@@ -295,10 +293,8 @@ class SimpleAttention(fluid.dygraph.Layer):
     def __init__(self, decoder_size):
         super().__init__()
 
-        self.fc_1 = Linear(
-            decoder_size, decoder_size, act=None, bias_attr=False
-        )
-        self.fc_2 = Linear(decoder_size, 1, act=None, bias_attr=False)
+        self.fc_1 = Linear(decoder_size, decoder_size, bias_attr=False)
+        self.fc_2 = Linear(decoder_size, 1, bias_attr=False)
 
     def forward(self, encoder_vec, encoder_proj, decoder_state):
 
@@ -344,9 +340,7 @@ def __init__(self, decoder_size, num_classes):
         self.gru_unit = GRUUnit(
             size=decoder_size * 3, param_attr=None, bias_attr=None
         )
-        self.out_layer = Linear(
-            decoder_size, num_classes + 2, bias_attr=None, act='softmax'
-        )
+        self.out_layer = Linear(decoder_size, num_classes + 2, bias_attr=None)
 
         self.decoder_size = decoder_size
 
@@ -373,6 +367,7 @@ def forward(
             h, _, _ = self.gru_unit(decoder_inputs, hidden_mem)
             hidden_mem = h
             out = self.out_layer(h)
+            out = paddle.nn.functional.softmax(out)
             res.append(out)
 
         res1 = fluid.layers.concat(res, axis=1)
@@ -388,7 +383,6 @@ def __init__(self):
             Config.encoder_size,
             Config.decoder_size,
             bias_attr=False,
-            act='relu',
         )
         self.embedding = Embedding(
             [Config.num_classes + 2, Config.word_vector_dim], dtype='float32'
@@ -406,6 +400,7 @@ def forward(self, inputs, label_in):
             backward_first, [-1, backward_first.shape[2]]
         )
         decoder_boot = self.fc(backward_first)
+        decoder_boot = paddle.nn.functional.relu(decoder_boot)
         label_in = paddle.reshape(label_in, [-1])
         trg_embedding = self.embedding(label_in)
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
index 521ff77d58f358..1650532e49d7a9 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
@@ -16,14 +16,10 @@
 import unittest
 
 import numpy as np
-from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
-from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
 from paddle.fluid import core
-from paddle.fluid.dygraph import Linear
-from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.optimizer import (
     AdadeltaOptimizer,
     AdagradOptimizer,
@@ -43,6 +39,10 @@
     RMSPropOptimizer,
     SGDOptimizer,
 )
+from test_imperative_base import new_program_scope
+from paddle.fluid.framework import _test_eager_guard
+
+from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
 
 # Note(wangzhongpu)
 # In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.
@@ -52,8 +52,8 @@ class MLP(fluid.Layer):
     def __init__(self, param_attr=None, bias_attr=None):
         super().__init__()
 
-        self._fc1 = Linear(784, 10)
-        self._fc2 = Linear(10, 10)
+        self._fc1 = paddle.nn.Linear(784, 10)
+        self._fc2 = paddle.nn.Linear(10, 10)
 
     def forward(self, inputs):
         y = self._fc1(inputs)
@@ -473,7 +473,7 @@ def func_test_constant_lr(self):
         with fluid.dygraph.guard():
             a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
 
-            linear = fluid.dygraph.nn.Linear(10, 10)
+            linear = paddle.nn.Linear(10, 10)
 
             a = fluid.dygraph.to_variable(a)
 
@@ -504,7 +504,7 @@ def func_test_lr_decay(self):
         with fluid.dygraph.guard():
             a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
 
-            linear = fluid.dygraph.nn.Linear(10, 10)
+            linear = paddle.nn.Linear(10, 10)
 
             a = fluid.dygraph.to_variable(a)
 
@@ -540,7 +540,7 @@ def func_test_lr_decay_natural_exp(self):
         with fluid.dygraph.guard():
             a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
 
-            linear = fluid.dygraph.nn.Linear(10, 10)
+            linear = paddle.nn.Linear(10, 10)
 
             a = fluid.dygraph.to_variable(a)
 
@@ -579,7 +579,7 @@ def func_test_set_lr(self):
         with fluid.dygraph.guard():
             a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
 
-            linear = fluid.dygraph.nn.Linear(10, 10)
+            linear = paddle.nn.Linear(10, 10)
 
             a = fluid.dygraph.to_variable(a)
 
@@ -951,8 +951,8 @@ def test_recompute(self):
 class TestImperativeOptimizerList(unittest.TestCase):
     def func_test_parameter_list(self):
         with fluid.dygraph.guard():
-            linear_1 = Linear(10, 10)
-            linear_2 = Linear(10, 10)
+            linear_1 = paddle.nn.Linear(10, 10)
+            linear_2 = paddle.nn.Linear(10, 10)
 
             sgd = SGDOptimizer(
                 1.0,
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
index 8bc9a953aaf297..48ee814b4ddc91 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
@@ -16,14 +16,10 @@
 import unittest
 
 import numpy as np
-from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
-from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
 from paddle.fluid import core
-from paddle.fluid.dygraph import Linear
-from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.optimizer import (
     AdadeltaOptimizer,
     AdagradOptimizer,
@@ -40,6 +36,10 @@
     RecomputeOptimizer,
     RMSPropOptimizer,
 )
+from test_imperative_base import new_program_scope
+from paddle.fluid.framework import _test_eager_guard
+
+from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
 
 # Note(wangzhongpu)
 # In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.
@@ -49,8 +49,8 @@ class MLP(fluid.Layer):
     def __init__(self, param_attr=None, bias_attr=None):
         super().__init__()
 
-        self._fc1 = Linear(784, 10)
-        self._fc2 = Linear(10, 10)
+        self._fc1 = paddle.nn.Linear(784, 10)
+        self._fc2 = paddle.nn.Linear(10, 10)
 
     def forward(self, inputs):
         y = self._fc1(inputs)
@@ -611,7 +611,7 @@ def func_test_constant_lr(self):
         with fluid.dygraph.guard():
             a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
 
-            linear = fluid.dygraph.nn.Linear(10, 10)
+            linear = paddle.nn.Linear(10, 10)
 
             a = fluid.dygraph.to_variable(a)
 
@@ -640,7 +640,7 @@ def func_test_lr_decay(self):
         with fluid.dygraph.guard():
             a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
 
-            linear = fluid.dygraph.nn.Linear(10, 10)
+            linear = paddle.nn.Linear(10, 10)
 
             a = fluid.dygraph.to_variable(a)
 
@@ -674,7 +674,7 @@ def func_test_lr_scheduler_natural_exp(self):
         with fluid.dygraph.guard():
             a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
 
-            linear = fluid.dygraph.nn.Linear(10, 10)
+            linear = paddle.nn.Linear(10, 10)
             a = fluid.dygraph.to_variable(a)
             b = linear(a)
 
@@ -704,7 +704,7 @@ def func_test_set_lr(self):
         with fluid.dygraph.guard():
             a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
 
-            linear = fluid.dygraph.nn.Linear(10, 10)
+            linear = paddle.nn.Linear(10, 10)
 
             a = fluid.dygraph.to_variable(a)
 
@@ -1071,8 +1071,8 @@ def test_recompute(self):
 class TestImperativeOptimizerList(unittest.TestCase):
     def func_test_parameter_list(self):
         with fluid.dygraph.guard():
-            linear_1 = Linear(10, 10)
-            linear_2 = Linear(10, 10)
+            linear_1 = paddle.nn.Linear(10, 10)
+            linear_2 = paddle.nn.Linear(10, 10)
 
             sgd = paddle.optimizer.SGD(
                 1.0,
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py b/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
index 042fb294ff9fd1..29d42076e2c739 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
@@ -18,6 +18,7 @@
 
 import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
+import paddle
 
 
 class TestImperativePartitialBackward(unittest.TestCase):
@@ -25,8 +26,8 @@ def func_partitial_backward(self):
         with fluid.dygraph.guard():
             x = np.random.randn(2, 4, 5).astype("float32")
             x = fluid.dygraph.to_variable(x)
-            linear1 = fluid.dygraph.Linear(5, 10)
-            linear2 = fluid.dygraph.Linear(5, 10)
+            linear1 = paddle.nn.Linear(5, 10)
+            linear2 = paddle.nn.Linear(5, 10)
 
             y = linear1(x[:, :2])
             z = linear2(x[:, 2:])
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
index 0181c7a431c369..bea24aa2739327 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
@@ -15,12 +15,11 @@
 import unittest
 
 import numpy as np
-from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.dygraph.nn as nn
 from paddle.fluid import core
+from test_imperative_base import new_program_scope
 from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.optimizer import SGDOptimizer
 
@@ -29,8 +28,8 @@ class Policy(fluid.dygraph.Layer):
     def __init__(self, input_size):
         super().__init__()
 
-        self.affine1 = nn.Linear(input_size, 128)
-        self.affine2 = nn.Linear(128, 2)
+        self.affine1 = paddle.nn.Linear(input_size, 128)
+        self.affine2 = paddle.nn.Linear(128, 2)
         self.dropout_ratio = 0.6
 
         self.saved_log_probs = []
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
index eca1e2d8cce263..6c3fdf77a2d1c7 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
@@ -20,7 +20,8 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import BatchNorm, Linear, Pool2D, core
+from paddle.fluid import core
+from paddle.fluid import BatchNorm
 from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
 from paddle.fluid.layer_helper import LayerHelper
@@ -193,8 +194,8 @@ def __init__(self, layers=50, class_dim=102, use_cudnn=True):
             act='relu',
             use_cudnn=use_cudnn,
         )
-        self.pool2d_max = Pool2D(
-            pool_size=3, pool_stride=2, pool_padding=1, pool_type='max'
+        self.pool2d_max = paddle.nn.MaxPool2D(
+            kernel_size=3, stride=2, padding=1
         )
 
         self.bottleneck_block_list = []
@@ -215,8 +216,7 @@ def __init__(self, layers=50, class_dim=102, use_cudnn=True):
                 )
                 self.bottleneck_block_list.append(bottleneck_block)
                 shortcut = True
-
-        self.pool2d_avg = Pool2D(
+        self.pool2d_avg = paddle.fluid.dygraph.nn.Pool2D(
             pool_size=7, pool_type='avg', global_pooling=True
         )
 
@@ -226,11 +226,10 @@ def __init__(self, layers=50, class_dim=102, use_cudnn=True):
 
         stdv = 1.0 / math.sqrt(2048 * 1.0)
 
-        self.out = Linear(
+        self.out = paddle.nn.Linear(
             self.pool2d_avg_output,
             class_dim,
-            act='softmax',
-            param_attr=fluid.param_attr.ParamAttr(
+            weight_attr=fluid.param_attr.ParamAttr(
                 initializer=fluid.initializer.Uniform(-stdv, stdv)
             ),
         )
@@ -243,6 +242,7 @@ def forward(self, inputs):
         y = self.pool2d_avg(y)
         y = paddle.reshape(y, shape=[-1, self.pool2d_avg_output])
         y = self.out(y)
+        y = paddle.nn.functional.softmax(y)
         return y
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
index 1970c63bace057..265f3720680088 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
@@ -20,7 +20,8 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
-from paddle.fluid.dygraph.nn import BatchNorm, Linear, Pool2D
+from paddle.fluid.dygraph.nn import BatchNorm
+from test_imperative_base import new_program_scope
 from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.layer_helper import LayerHelper
 
@@ -104,29 +105,34 @@ def __init__(self, num_channels, reduction_ratio):
 
         super().__init__()
         self._num_channels = num_channels
-        self._pool = Pool2D(pool_size=0, pool_type='avg', global_pooling=True)
-        self._squeeze = Linear(
+        self._pool = paddle.fluid.dygraph.nn.Pool2D(
+            pool_size=0, pool_type='avg', global_pooling=True
+        )
+        self._squeeze = paddle.nn.Linear(
             num_channels,
             num_channels // reduction_ratio,
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.05)
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.05)
             ),
-            act='relu',
         )
-        self._excitation = Linear(
+        self.act_1 = paddle.nn.ReLU()
+        self._excitation = paddle.nn.Linear(
             num_channels // reduction_ratio,
             num_channels,
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.05)
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.05)
             ),
-            act='sigmoid',
         )
 
+        self.act_2 = paddle.nn.Softmax()
+
     def forward(self, input):
         y = self._pool(input)
         y = paddle.reshape(y, shape=[-1, self._num_channels])
         y = self._squeeze(y)
+        y = self.act_1(y)
         y = self._excitation(y)
+        y = self.act_2(y)
         y = fluid.layers.elementwise_mul(x=input, y=y, axis=0)
         return y
 
@@ -218,9 +224,7 @@ def __init__(self, layers=50, class_dim=102):
                 stride=2,
                 act='relu',
             )
-            self.pool = Pool2D(
-                pool_size=3, pool_stride=2, pool_padding=1, pool_type='max'
-            )
+            self.pool = paddle.nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
         elif layers == 101:
             cardinality = 32
             reduction_ratio = 16
@@ -233,9 +237,7 @@ def __init__(self, layers=50, class_dim=102):
                 stride=2,
                 act='relu',
             )
-            self.pool = Pool2D(
-                pool_size=3, pool_stride=2, pool_padding=1, pool_type='max'
-            )
+            self.pool = paddle.nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
         elif layers == 152:
             cardinality = 64
             reduction_ratio = 16
@@ -262,9 +264,7 @@ def __init__(self, layers=50, class_dim=102):
                 stride=1,
                 act='relu',
             )
-            self.pool = Pool2D(
-                pool_size=3, pool_stride=2, pool_padding=1, pool_type='max'
-            )
+            self.pool = paddle.nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
 
         self.bottleneck_block_list = []
         num_channels = 64
@@ -287,8 +287,7 @@ def __init__(self, layers=50, class_dim=102):
                 num_channels = bottleneck_block._num_channels_out
                 self.bottleneck_block_list.append(bottleneck_block)
                 shortcut = True
-
-        self.pool2d_avg = Pool2D(
+        self.pool2d_avg = paddle.fluid.dygraph.nn.Pool2D(
             pool_size=7, pool_type='avg', global_pooling=True
         )
         import math
@@ -297,14 +296,14 @@ def __init__(self, layers=50, class_dim=102):
 
         self.pool2d_avg_output = num_filters[-1] * 4 * 1 * 1
 
-        self.out = Linear(
+        self.out = paddle.nn.Linear(
             self.pool2d_avg_output,
             class_dim,
-            act='softmax',
-            param_attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.Uniform(-stdv, stdv)
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Uniform(-stdv, stdv)
             ),
         )
+        self.out_act = paddle.nn.Softmax()
 
     def forward(self, inputs):
         if self.layers == 50 or self.layers == 101:
@@ -321,7 +320,7 @@ def forward(self, inputs):
         y = self.pool2d_avg(y)
         y = paddle.reshape(y, shape=[-1, self.pool2d_avg_output])
         y = self.out(y)
-        return y
+        return self.out_act(y)
 
 
 class TestImperativeResneXt(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py b/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py
index 72d987b0d4c5fd..959ed8bbfec38c 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py
@@ -16,14 +16,15 @@
 import unittest
 
 import numpy as np
-
+import os
+import paddle
 import paddle.fluid as fluid
 
 
 class SimpleFCLayer(fluid.dygraph.Layer):
     def __init__(self, feature_size, batch_size, fc_size):
         super().__init__()
-        self._linear = fluid.dygraph.Linear(feature_size, fc_size)
+        self._linear = paddle.nn.Linear(feature_size, fc_size)
         self._offset = fluid.dygraph.to_variable(
             np.random.random((batch_size, fc_size)).astype('float32')
         )
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
index ccdf99f0f783dd..f5737956c3b600 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
@@ -19,10 +19,14 @@
 
 import paddle
 import paddle.fluid as fluid
-import paddle.nn.functional as F
-from paddle.fluid import Embedding, Layer, LayerNorm, Linear, core
-from paddle.fluid.dygraph import guard, to_variable
+from paddle.fluid import Embedding, LayerNorm, Layer
+from paddle.nn import Linear
+from paddle.fluid.dygraph import to_variable, guard
+from test_imperative_base import new_program_scope
 from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
+from paddle.fluid import core
+import numpy as np
+import paddle.nn.functional as F
 from paddle.jit import TracedLayer
 
 np.set_printoptions(suppress=True)
@@ -428,12 +432,13 @@ def forward(self, prev_out, out, process_cmd, dropout_rate=0.0):
 class PositionwiseFeedForwardLayer(Layer):
     def __init__(self, d_inner_hid, d_hid, dropout_rate):
         super().__init__()
-        self._i2h = Linear(d_hid, d_inner_hid, act="relu")
+        self._i2h = Linear(d_hid, d_inner_hid)
         self._h2o = Linear(d_inner_hid, d_hid)
         self._dropout_rate = dropout_rate
 
     def forward(self, x):
         hidden = self._i2h(x)
+        hidden = paddle.nn.functional.relu(hidden)
         if self._dropout_rate:
             hidden = fluid.layers.dropout(
                 hidden,
diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py
index faf6a61df3f390..eac87065048954 100644
--- a/python/paddle/fluid/tests/unittests/test_jit_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py
@@ -23,8 +23,8 @@
 
 import paddle
 import paddle.fluid as fluid
+from paddle.nn import Linear
 from paddle.fluid import unique_name
-from paddle.fluid.dygraph import Linear
 from paddle.fluid.dygraph.io import INFER_PARAMS_INFO_SUFFIX
 from paddle.fluid.layers.utils import flatten
 from paddle.jit.api import declarative
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index f3f5598f52c733..f08c0d1176cfd5 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -91,10 +91,12 @@ def test_custom_layer_with_kwargs(self):
         class CustomLayer(fluid.Layer):
             def __init__(self, input_size, linear1_size=4):
                 super().__init__()
-                self.linear1 = nn.Linear(
+                self.linear1 = paddle.nn.Linear(
                     input_size, linear1_size, bias_attr=False
                 )
-                self.linear2 = nn.Linear(linear1_size, 1, bias_attr=False)
+                self.linear2 = paddle.nn.Linear(
+                    linear1_size, 1, bias_attr=False
+                )
 
             def forward(self, x, do_linear2=False):
                 ret = self.linear1(x)
@@ -172,7 +174,7 @@ def test_linear(self):
                 dtype='float32',
                 append_batch_size=False,
             )
-            linear = nn.Linear(
+            linear = paddle.nn.Linear(
                 32, 4, bias_attr=fluid.initializer.ConstantInitializer(value=1)
             )
             ret = linear(t)
@@ -182,7 +184,7 @@ def test_linear(self):
         with self.dynamic_graph():
             with _test_eager_guard():
                 t = base.to_variable(inp)
-                linear = nn.Linear(
+                linear = paddle.nn.Linear(
                     32,
                     4,
                     bias_attr=fluid.initializer.ConstantInitializer(value=1),
@@ -191,7 +193,7 @@ def test_linear(self):
                 dy_eager_ret_value = dy_eager_ret.numpy()
 
             t = base.to_variable(inp)
-            linear = nn.Linear(
+            linear = paddle.nn.Linear(
                 32, 4, bias_attr=fluid.initializer.ConstantInitializer(value=1)
             )
             dy_ret = linear(t)
@@ -205,7 +207,7 @@ def test_linear(self):
             # the input of Linear must be Variable.
             def test_Variable():
                 inp = np.ones([3, 32, 32], dtype='float32')
-                linear = nn.Linear(
+                linear = paddle.nn.Linear(
                     32,
                     4,
                     bias_attr=fluid.initializer.ConstantInitializer(value=1),
@@ -218,7 +220,7 @@ def test_Variable():
             # float16 only can be set on GPU place
             def test_type():
                 inp = np.ones([3, 32, 32], dtype='int32')
-                linear = nn.Linear(
+                linear = paddle.nn.Linear(
                     32,
                     4,
                     bias_attr=fluid.initializer.ConstantInitializer(value=1),
@@ -261,7 +263,7 @@ def test_Flatten(self):
             # the input of Linear must be Variable.
             def test_Variable():
                 inp = np.ones([3, 32, 32], dtype='float32')
-                linear = nn.Linear(
+                linear = paddle.nn.Linear(
                     32,
                     4,
                     bias_attr=fluid.initializer.ConstantInitializer(value=1),
@@ -274,7 +276,7 @@ def test_Variable():
             # float16 only can be set on GPU place
             def test_type():
                 inp = np.ones([3, 32, 32], dtype='int32')
-                linear = nn.Linear(
+                linear = paddle.nn.Linear(
                     32,
                     4,
                     bias_attr=fluid.initializer.ConstantInitializer(value=1),
@@ -1703,7 +1705,9 @@ def test_conv3d(self):
             images = layers.data(
                 name='pixel', shape=[3, 6, 6, 6], dtype='float32'
             )
-            conv3d = nn.Conv3D(num_channels=3, num_filters=3, filter_size=2)
+            conv3d = paddle.nn.Conv3D(
+                in_channels=3, out_channels=3, kernel_size=2
+            )
             ret = conv3d(images)
             static_ret2 = self.get_static_graph_result(
                 feed={'pixel': np.ones([2, 3, 6, 6, 6], dtype='float32')},
@@ -1713,12 +1717,16 @@ def test_conv3d(self):
         with self.dynamic_graph():
             with _test_eager_guard():
                 images = np.ones([2, 3, 6, 6, 6], dtype='float32')
-                conv3d = nn.Conv3D(num_channels=3, num_filters=3, filter_size=2)
+                conv3d = paddle.nn.Conv3D(
+                    in_channels=3, out_channels=3, kernel_size=2
+                )
                 dy_eager_ret = conv3d(base.to_variable(images))
                 dy_eager_rlt_value = dy_eager_ret.numpy()
 
             images = np.ones([2, 3, 6, 6, 6], dtype='float32')
-            conv3d = nn.Conv3D(num_channels=3, num_filters=3, filter_size=2)
+            conv3d = paddle.nn.Conv3D(
+                in_channels=3, out_channels=3, kernel_size=2
+            )
             dy_ret = conv3d(base.to_variable(images))
             dy_rlt_value = dy_ret.numpy()
 
@@ -1735,14 +1743,14 @@ def test_conv3d(self):
                         custom_weight
                     )
                 )
-                conv3d1 = nn.Conv3D(
-                    num_channels=3, num_filters=3, filter_size=2
+                conv3d1 = paddle.nn.Conv3D(
+                    in_channels=3, out_channels=3, kernel_size=2
                 )
-                conv3d2 = nn.Conv3D(
-                    num_channels=3,
-                    num_filters=3,
-                    filter_size=2,
-                    param_attr=weight_attr,
+                conv3d2 = paddle.nn.Conv3D(
+                    in_channels=3,
+                    out_channels=3,
+                    kernel_size=2,
+                    weight_attr=weight_attr,
                 )
                 dy_ret1 = conv3d1(base.to_variable(images))
                 dy_ret2 = conv3d2(base.to_variable(images))
@@ -1780,12 +1788,14 @@ def test_conv3d(self):
                     custom_weight
                 )
             )
-            conv3d1 = nn.Conv3D(num_channels=3, num_filters=3, filter_size=2)
-            conv3d2 = nn.Conv3D(
-                num_channels=3,
-                num_filters=3,
-                filter_size=2,
-                param_attr=weight_attr,
+            conv3d1 = paddle.nn.Conv3D(
+                in_channels=3, out_channels=3, kernel_size=2
+            )
+            conv3d2 = paddle.nn.Conv3D(
+                in_channels=3,
+                out_channels=3,
+                kernel_size=2,
+                weight_attr=weight_attr,
             )
             dy_ret1 = conv3d1(base.to_variable(images))
             dy_ret2 = conv3d2(base.to_variable(images))
@@ -2277,15 +2287,15 @@ def test_conv3d_transpose(self):
         with self.static_graph():
             img = layers.data(name='pixel', shape=[3, 2, 2, 2], dtype='float32')
             out = paddle.static.nn.conv3d_transpose(
-                input=img, num_filters=12, filter_size=12, use_cudnn=False
+                input=img, num_filters=12, filter_size=12, use_cudnn=True
             )
             static_rlt = self.get_static_graph_result(
                 feed={'pixel': input_array}, fetch_list=[out]
             )[0]
         with self.static_graph():
             img = layers.data(name='pixel', shape=[3, 2, 2, 2], dtype='float32')
-            conv3d_transpose = nn.Conv3DTranspose(
-                num_channels=3, num_filters=12, filter_size=12, use_cudnn=False
+            conv3d_transpose = paddle.nn.Conv3DTranspose(
+                in_channels=3, out_channels=12, kernel_size=12
             )
             out = conv3d_transpose(img)
             static_rlt2 = self.get_static_graph_result(
@@ -2293,17 +2303,16 @@ def test_conv3d_transpose(self):
             )[0]
         with self.dynamic_graph():
             with _test_eager_guard():
-                conv3d_transpose = nn.Conv3DTranspose(
-                    num_channels=3,
-                    num_filters=12,
-                    filter_size=12,
-                    use_cudnn=False,
+                conv3d_transpose = paddle.nn.Conv3DTranspose(
+                    in_channels=3,
+                    out_channels=12,
+                    kernel_size=12,
                 )
                 dy_eager_rlt = conv3d_transpose(base.to_variable(input_array))
                 dy_eager_rlt_value = dy_eager_rlt.numpy()
 
-            conv3d_transpose = nn.Conv3DTranspose(
-                num_channels=3, num_filters=12, filter_size=12, use_cudnn=False
+            conv3d_transpose = paddle.nn.Conv3DTranspose(
+                in_channels=3, out_channels=12, kernel_size=12
             )
             dy_rlt = conv3d_transpose(base.to_variable(input_array))
             dy_rlt_value = dy_rlt.numpy()
@@ -2320,20 +2329,18 @@ def test_conv3d_transpose(self):
                         custom_weight
                     )
                 )
-                conv3d1 = nn.Conv3DTranspose(
-                    num_channels=3,
-                    num_filters=3,
-                    filter_size=2,
+                conv3d1 = paddle.nn.Conv3DTranspose(
+                    in_channels=3,
+                    out_channels=3,
+                    kernel_size=2,
                     bias_attr='eager_conv3d1_b',
-                    use_cudnn=False,
                 )
-                conv3d2 = nn.Conv3DTranspose(
-                    num_channels=3,
-                    num_filters=3,
-                    filter_size=2,
-                    param_attr=weight_attr,
+                conv3d2 = paddle.nn.Conv3DTranspose(
+                    in_channels=3,
+                    out_channels=3,
+                    kernel_size=2,
+                    weight_attr=weight_attr,
                     bias_attr='eager_conv3d2_b',
-                    use_cudnn=False,
                 )
                 dy_ret1 = conv3d1(base.to_variable(images))
                 dy_ret2 = conv3d2(base.to_variable(images))
@@ -2371,20 +2378,18 @@ def test_conv3d_transpose(self):
                     custom_weight
                 )
             )
-            conv3d1 = nn.Conv3DTranspose(
-                num_channels=3,
-                num_filters=3,
-                filter_size=2,
+            conv3d1 = paddle.nn.Conv3DTranspose(
+                in_channels=3,
+                out_channels=3,
+                kernel_size=2,
                 bias_attr='conv3d1_b',
-                use_cudnn=False,
             )
-            conv3d2 = nn.Conv3DTranspose(
-                num_channels=3,
-                num_filters=3,
-                filter_size=2,
-                param_attr=weight_attr,
+            conv3d2 = paddle.nn.Conv3DTranspose(
+                in_channels=3,
+                out_channels=3,
+                kernel_size=2,
+                weight_attr=weight_attr,
                 bias_attr='conv3d2_b',
-                use_cudnn=False,
             )
             dy_ret1 = conv3d1(base.to_variable(images))
             dy_ret2 = conv3d2(base.to_variable(images))
diff --git a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
index ffe45f3ec312e9..dd37c2bff66a7a 100644
--- a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
+++ b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
@@ -16,7 +16,7 @@
 import math
 import numpy as np
 import unittest
-
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
 import paddle.fluid.framework as framework
@@ -120,7 +120,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
     def test_LR_state_dict(self):
         with fluid.dygraph.guard():
             x = np.random.uniform(-1, 1, [3, 10]).astype("float32")
-            linear = fluid.dygraph.Linear(10, 10)
+            linear = paddle.nn.Linear(10, 10)
             input = fluid.dygraph.to_variable(x)
 
             Exponential_scheduler = fluid.dygraph.ExponentialDecay(
@@ -291,7 +291,7 @@ def test_MultiStepDecay(self):
             learning_rate = 0.5
             milestones = [2, 4, 8]
             decay_rate = 0.2
-            linear = fluid.dygraph.Linear(10, 10)
+            linear = paddle.nn.Linear(10, 10)
 
             scheduler = fluid.dygraph.MultiStepDecay(
                 learning_rate, milestones, decay_rate
@@ -364,7 +364,7 @@ def test_LambdaDecay(self):
             lr_lambda = lambda x: 0.95**x
             scheduler = fluid.dygraph.LambdaDecay(learning_rate, lr_lambda)
 
-            linear = fluid.dygraph.nn.Linear(10, 10)
+            linear = paddle.nn.Linear(10, 10)
             adam = fluid.optimizer.Adam(
                 scheduler, parameter_list=linear.parameters()
             )
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py
index b2b0d32d72a2ca..2ea1b4faf8d24a 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py
@@ -15,6 +15,9 @@
 import sys
 import time
 import unittest
+import numpy as np
+import paddle
+from paddle.nn import Linear
 
 import numpy as np
 from test_multiprocess_dataloader_static import (
@@ -29,7 +32,6 @@
 )
 
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.nn import Linear
 from paddle.io import DataLoader
 
 
@@ -37,11 +39,11 @@ class SimpleFCNet(fluid.dygraph.Layer):
     def __init__(self):
         super().__init__()
 
-        param_attr = fluid.ParamAttr(
-            initializer=fluid.initializer.Constant(value=0.8)
+        param_attr = paddle.ParamAttr(
+            initializer=paddle.nn.initializer.Constant(value=0.8)
         )
-        bias_attr = fluid.ParamAttr(
-            initializer=fluid.initializer.Constant(value=0.5)
+        bias_attr = paddle.ParamAttr(
+            initializer=paddle.nn.initializer.Constant(value=0.5)
         )
         self._fcs = []
         in_channel = IMAGE_SIZE
@@ -50,21 +52,21 @@ def __init__(self):
                 Linear(
                     in_channel,
                     hidden_size,
-                    act='tanh',
-                    param_attr=param_attr,
+                    weight_attr=param_attr,
                     bias_attr=bias_attr,
                 )
             )
+            self._fcs.append(paddle.nn.Tanh())
             in_channel = hidden_size
         self._fcs.append(
             Linear(
                 in_channel,
                 CLASS_NUM,
-                act='softmax',
-                param_attr=param_attr,
+                weight_attr=param_attr,
                 bias_attr=bias_attr,
             )
         )
+        self._fcs.append(paddle.nn.Softmax())
 
     def forward(self, image):
         out = image
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py
index 1ce77249ea5787..1f15241b26c6a0 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py
@@ -15,6 +15,10 @@
 import sys
 import time
 import unittest
+import numpy as np
+
+import paddle
+from paddle.nn import Linear
 
 import numpy as np
 from test_multiprocess_dataloader_iterable_dataset_static import (
@@ -29,7 +33,6 @@
 )
 
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.nn import Linear
 from paddle.io import DataLoader
 
 
@@ -37,11 +40,11 @@ class SimpleFCNet(fluid.dygraph.Layer):
     def __init__(self):
         super().__init__()
 
-        param_attr = fluid.ParamAttr(
-            initializer=fluid.initializer.Constant(value=0.8)
+        param_attr = paddle.ParamAttr(
+            initializer=paddle.nn.initializer.Constant(value=0.8)
         )
-        bias_attr = fluid.ParamAttr(
-            initializer=fluid.initializer.Constant(value=0.5)
+        bias_attr = paddle.ParamAttr(
+            initializer=paddle.nn.initializer.Constant(value=0.5)
         )
         self._fcs = []
         in_channel = IMAGE_SIZE
@@ -50,21 +53,22 @@ def __init__(self):
                 Linear(
                     in_channel,
                     hidden_size,
-                    act='tanh',
-                    param_attr=param_attr,
+                    weight_attr=param_attr,
                     bias_attr=bias_attr,
                 )
             )
+            self._fcs.append(paddle.nn.Tanh())
+
             in_channel = hidden_size
         self._fcs.append(
             Linear(
                 in_channel,
                 CLASS_NUM,
-                act='softmax',
-                param_attr=param_attr,
+                weight_attr=param_attr,
                 bias_attr=bias_attr,
             )
         )
+        self._fcs.append(paddle.nn.Softmax())
 
     def forward(self, image):
         out = image
diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py
index de9c02f776835d..3294b6f37067ce 100644
--- a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py
@@ -138,34 +138,34 @@ def fn_2(opt, avg_loss=None, pred=None, label=None):
 class DygraphLayer(fluid.dygraph.Layer):
     def __init__(self):
         super().__init__()
-        self.fc_1 = fluid.dygraph.nn.Linear(
+        self.fc_1 = paddle.nn.Linear(
             INPUT_SIZE,
             FC_SIZE,
-            act='relu',
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.99)
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.99)
             ),
-            bias_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.5)
+            bias_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.5)
             ),
         )
-
-        self.fc_2 = fluid.dygraph.nn.Linear(
+        self.act_1 = paddle.nn.ReLU()
+        self.fc_2 = paddle.nn.Linear(
             FC_SIZE,
             CLASS_NUM,
-            act='softmax',
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=1.2)
+            weight_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=1.2)
             ),
-            bias_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=0.8)
+            bias_attr=paddle.ParamAttr(
+                initializer=paddle.nn.initializer.Constant(value=0.8)
             ),
         )
 
+        self.act_2 = paddle.nn.Softmax()
+
     def forward(self, inputs):
         hidden = self.fc_1(inputs)
         prediction = self.fc_2(hidden)
-        return hidden, prediction
+        return self.act_1(hidden), self.act_2(prediction)
 
 
 def dynamic(train_data, use_cuda=False, use_parallel_exe=False):
diff --git a/python/paddle/fluid/tests/unittests/test_pool2d_op.py b/python/paddle/fluid/tests/unittests/test_pool2d_op.py
index fb802b60b87274..5285b7767a35a5 100644
--- a/python/paddle/fluid/tests/unittests/test_pool2d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pool2d_op.py
@@ -18,7 +18,6 @@
 
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid import Program, program_guard
 from paddle.fluid.tests.unittests.op_test import OpTest
 
 
@@ -1460,112 +1459,5 @@ def run_5():
         self.assertRaises(ValueError, run_5)
 
 
-class TestDygraphPool2DAPIError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            # the input of Pool2D must be Variable.
-            data1 = np.random.random((3, 32, 32, 5)).astype('float32')
-            pool2d = fluid.dygraph.Pool2D(
-                pool_size=2,
-                pool_type='max',
-                pool_stride=1,
-                global_pooling=False,
-            )
-            self.assertRaises(TypeError, pool2d, data1)
-
-            # the input dtype of Pool2D must be uint8 or int8 or float16 or float32 or float64
-            # uint8 and int8 only can be set on mkldnn
-            # float16 only can be set on GPU place
-            data2 = fluid.layers.data(
-                name='x1', shape=[3, 32, 32, 5], dtype="int32"
-            )
-            self.assertRaises(TypeError, pool2d, data2)
-
-    def test_data_format_error(self):
-        with program_guard(Program(), Program()):
-            # the data_format must be 'NCHW' or 'NHWC'
-            data1 = np.random.random((3, 32, 32, 5)).astype('float32')
-            self.assertRaises(
-                ValueError,
-                fluid.dygraph.Pool2D,
-                pool_size=2,
-                pool_type='max',
-                pool_stride=1,
-                global_pooling=False,
-                data_format='NWHC',
-            )
-
-
-class TestDygraphPool2DAPI(unittest.TestCase):
-    def test_nhwc(self):
-        with fluid.dygraph.guard():
-            data = np.random.random((3, 32, 32, 5)).astype('float32')
-            x = fluid.dygraph.to_variable(data)
-            pool2d = fluid.dygraph.Pool2D(
-                pool_size=2,
-                pool_type='max',
-                pool_stride=1,
-                pool_padding=[0, 0],
-                global_pooling=False,
-                data_format='NHWC',
-            )
-            out1 = pool2d(x)
-            out2 = pool2D_forward_naive(
-                data,
-                [2, 2],
-                [1, 1],
-                paddings=[0, 0],
-                pool_type='max',
-                data_format='NHWC',
-            )
-            np.testing.assert_allclose(out1.numpy(), out2, rtol=1e-05)
-
-    def test_lower_case(self):
-        with fluid.dygraph.guard():
-            data = np.random.random((3, 32, 32, 5)).astype('float32')
-            x = fluid.dygraph.to_variable(data)
-            pool2d = fluid.dygraph.Pool2D(
-                pool_size=2,
-                pool_type='max',
-                pool_stride=1,
-                pool_padding=[0, 0],
-                global_pooling=False,
-                data_format='nhwc',
-            )
-            out1 = pool2d(x)
-            out2 = pool2D_forward_naive(
-                data,
-                [2, 2],
-                [1, 1],
-                paddings=[0, 0],
-                pool_type='max',
-                data_format='NHWC',
-            )
-            np.testing.assert_allclose(out1.numpy(), out2, rtol=1e-05)
-
-    def test_upper_case(self):
-        with fluid.dygraph.guard():
-            data = np.random.random((3, 32, 32, 5)).astype('float32')
-            x = fluid.dygraph.to_variable(data)
-            pool2d = fluid.dygraph.Pool2D(
-                pool_size=2,
-                pool_type='MAX',
-                pool_stride=1,
-                pool_padding=[0, 0],
-                global_pooling=False,
-                data_format='nhwc',
-            )
-            out1 = pool2d(x)
-            out2 = pool2D_forward_naive(
-                data,
-                [2, 2],
-                [1, 1],
-                paddings=[0, 0],
-                pool_type='max',
-                data_format='NHWC',
-            )
-            np.testing.assert_allclose(out1.numpy(), out2, rtol=1e-05)
-
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_regularizer.py b/python/paddle/fluid/tests/unittests/test_regularizer.py
index ba81625a04d085..f162a8e829fe81 100644
--- a/python/paddle/fluid/tests/unittests/test_regularizer.py
+++ b/python/paddle/fluid/tests/unittests/test_regularizer.py
@@ -259,7 +259,9 @@ def test_l2(self):
     def test_repeated_regularization(self):
         l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1)
         l2 = fluid.regularizer.L2Decay(regularization_coeff=0.01)
-        fc_param_attr = fluid.ParamAttr(regularizer=l1)
+        fc_param_attr = paddle.ParamAttr(
+            regularizer=paddle.regularizer.L1Decay()
+        )
         with fluid.program_guard(fluid.Program(), fluid.Program()):
             x = fluid.layers.uniform_random([2, 2, 3])
             out = fluid.layers.fc(x, 5, param_attr=fc_param_attr)
@@ -273,11 +275,11 @@ def test_repeated_regularization(self):
             paddle.seed(1)
             paddle.framework.random._manual_program_seed(1)
 
-            linear1 = fluid.dygraph.Linear(
-                2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr
+            linear1 = paddle.nn.Linear(
+                2, 2, weight_attr=fc_param_attr, bias_attr=fc_param_attr
             )
-            linear2 = fluid.dygraph.Linear(
-                2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr
+            linear2 = paddle.nn.Linear(
+                2, 2, weight_attr=fc_param_attr, bias_attr=fc_param_attr
             )
 
             loss1 = linear1(input)
diff --git a/python/paddle/fluid/tests/unittests/test_regularizer_api.py b/python/paddle/fluid/tests/unittests/test_regularizer_api.py
index aee1e8c25eee6a..c3adc0cf0b3591 100644
--- a/python/paddle/fluid/tests/unittests/test_regularizer_api.py
+++ b/python/paddle/fluid/tests/unittests/test_regularizer_api.py
@@ -169,7 +169,9 @@ def test_repeated_regularization(self):
         paddle.enable_static()
         l1 = paddle.regularizer.L1Decay(0.1)
         l2 = paddle.regularizer.L2Decay(0.01)
-        fc_param_attr = fluid.ParamAttr(regularizer=l1)
+        fc_param_attr = paddle.ParamAttr(
+            regularizer=paddle.regularizer.L1Decay()
+        )
         with fluid.program_guard(fluid.Program(), fluid.Program()):
             x = fluid.layers.uniform_random([2, 2, 3])
             out = fluid.layers.fc(x, 5, param_attr=fc_param_attr)
@@ -183,11 +185,11 @@ def test_repeated_regularization(self):
             paddle.seed(1)
             paddle.framework.random._manual_program_seed(1)
 
-            linear1 = fluid.dygraph.Linear(
-                2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr
+            linear1 = paddle.nn.Linear(
+                2, 2, weight_attr=fc_param_attr, bias_attr=fc_param_attr
             )
-            linear2 = fluid.dygraph.Linear(
-                2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr
+            linear2 = paddle.nn.Linear(
+                2, 2, weight_attr=fc_param_attr, bias_attr=fc_param_attr
             )
 
             loss1 = linear1(input)
diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py
index e4db05ecc34231..439ff3d2b82226 100644
--- a/python/paddle/fluid/tests/unittests/test_var_base.py
+++ b/python/paddle/fluid/tests/unittests/test_var_base.py
@@ -354,7 +354,7 @@ def func_test_to_variable(self):
                 var = fluid.dygraph.to_variable("test", name="abc")
             # test to_variable of LayerObjectHelper(LayerHelperBase)
             with self.assertRaises(TypeError):
-                linear = fluid.dygraph.Linear(32, 64)
+                linear = paddle.nn.Linear(32, 64)
                 var = linear._helper.to_variable("test", name="abc")
 
     def test_to_variable(self):
@@ -1170,13 +1170,13 @@ def func_test_to_static_var(self):
             self._assert_to_static(var_base, static_param, True)
 
             # Convert ParamBase into Parameter
-            fc = fluid.dygraph.Linear(
+            fc = paddle.nn.Linear(
                 10,
                 20,
-                param_attr=fluid.ParamAttr(
+                weight_attr=paddle.ParamAttr(
                     learning_rate=0.001,
                     do_model_average=True,
-                    regularizer=fluid.regularizer.L1Decay(),
+                    regularizer=paddle.regularizer.L1Decay(),
                 ),
             )
             weight = fc.parameters()[0]

From 7bf7e6e0f97b40e739858b10e353a3a9998458d8 Mon Sep 17 00:00:00 2001
From: zhangyikun02 <48021248+zhangyk0314@users.noreply.github.com>
Date: Wed, 30 Nov 2022 10:29:37 +0800
Subject: [PATCH 047/154] optimize for argsort with xpu, test=kunlun (#48440)

---
 paddle/phi/kernels/xpu/argsort_kernel.cc      | 221 ++++++++++++++----
 .../unittests/xpu/test_argsort_op_xpu.py      |  85 ++++++-
 .../tests/unittests/xpu/test_pad3d_op_xpu.py  | 164 +++++++++++++
 3 files changed, 424 insertions(+), 46 deletions(-)

diff --git a/paddle/phi/kernels/xpu/argsort_kernel.cc b/paddle/phi/kernels/xpu/argsort_kernel.cc
index 9a1cdd763b9e8c..0a71ec71463d41 100644
--- a/paddle/phi/kernels/xpu/argsort_kernel.cc
+++ b/paddle/phi/kernels/xpu/argsort_kernel.cc
@@ -20,6 +20,149 @@
 
 namespace phi {
 
+template <typename T, typename TID>
+static inline void xpu_argsort(xpu::Context* ctx,
+                               const T* input_data,
+                               T* output_data,
+                               TID* indices_data,
+                               int m,
+                               int n,
+                               bool descending) {
+  int ret =
+      xpu::sort(ctx, input_data, output_data, indices_data, m, n, descending);
+  PADDLE_ENFORCE_XDNN_SUCCESS(ret, "sort");
+}
+
+template <typename T>
+static inline void xpu_transpose(xpu::Context* ctx,
+                                 const T* x,
+                                 T* y,
+                                 const std::vector<int>& xshape,
+                                 const std::vector<int>& permute) {
+  int ret = xpu::transpose(ctx, x, y, xshape, permute);
+  PADDLE_ENFORCE_XDNN_SUCCESS(ret, "transpose");
+}
+
+template <typename TX, typename TY>
+static inline void xpu_cast(xpu::Context* ctx, const TX* x, TY* y, int len) {
+  int ret = xpu::cast(ctx, x, y, len);
+  PADDLE_ENFORCE_XDNN_SUCCESS(ret, "cast");
+}
+
+template <typename T,
+          bool VALUE_NEED_CAST = false,
+          bool INDEX_NEED_CAST = false>
+struct XPUArgsort {
+  void operator()(xpu::Context* ctx,
+                  const T* input_data,
+                  T* output_data,
+                  int64_t* indices_data,
+                  const std::vector<int>& data_shape,
+                  const std::vector<int>& permute,
+                  bool descending) {
+    xpu::ctx_guard RAII_GUARD(ctx);
+    int m = data_shape[0] * data_shape[2];
+    int n = data_shape[1];
+    int len = data_shape[0] * data_shape[1] * data_shape[2];
+    std::vector<int> trans_data_shape{
+        data_shape[0], data_shape[2], data_shape[1]};
+
+    T* input_data_trans = RAII_GUARD.alloc_l3_or_gm<T>(len);
+    T* output_data_trans = RAII_GUARD.alloc_l3_or_gm<T>(len);
+    int64_t* indices_data_trans = RAII_GUARD.alloc_l3_or_gm<int64_t>(len);
+
+    xpu_transpose(ctx, input_data, input_data_trans, data_shape, permute);
+    xpu_argsort(ctx,
+                input_data_trans,
+                output_data_trans,
+                indices_data_trans,
+                m,
+                n,
+                descending);
+    xpu_transpose(
+        ctx, output_data_trans, output_data, trans_data_shape, permute);
+    xpu_transpose(
+        ctx, indices_data_trans, indices_data, trans_data_shape, permute);
+  }
+};
+
+template <typename T>
+struct XPUArgsort<T, false, true> {
+  void operator()(xpu::Context* ctx,
+                  const T* input_data,
+                  T* output_data,
+                  int64_t* indices_data,
+                  const std::vector<int>& data_shape,
+                  const std::vector<int>& permute,
+                  bool descending) {
+    xpu::ctx_guard RAII_GUARD(ctx);
+    int m = data_shape[0] * data_shape[2];
+    int n = data_shape[1];
+    int len = data_shape[0] * data_shape[1] * data_shape[2];
+    std::vector<int> trans_data_shape{
+        data_shape[0], data_shape[2], data_shape[1]};
+
+    T* input_data_trans = RAII_GUARD.alloc_l3_or_gm<T>(len);
+    T* output_data_trans = RAII_GUARD.alloc_l3_or_gm<T>(len);
+    int* indices_data_trans = RAII_GUARD.alloc_l3_or_gm<int>(len);
+    int64_t* cast_data_int64 = RAII_GUARD.alloc_l3_or_gm<int64_t>(len);
+
+    xpu_transpose(ctx, input_data, input_data_trans, data_shape, permute);
+    xpu_argsort(ctx,
+                input_data_trans,
+                output_data_trans,
+                indices_data_trans,
+                m,
+                n,
+                descending);
+    xpu_transpose(
+        ctx, output_data_trans, output_data, trans_data_shape, permute);
+    xpu_cast(ctx, indices_data_trans, cast_data_int64, len);
+    xpu_transpose(
+        ctx, cast_data_int64, indices_data, trans_data_shape, permute);
+  }
+};
+
+template <>
+struct XPUArgsort<int64_t, true, true> {
+  void operator()(xpu::Context* ctx,
+                  const int64_t* input_data,
+                  int64_t* output_data,
+                  int64_t* indices_data,
+                  const std::vector<int>& data_shape,
+                  const std::vector<int>& permute,
+                  bool descending) {
+    xpu::ctx_guard RAII_GUARD(ctx);
+    int m = data_shape[0] * data_shape[2];
+    int n = data_shape[1];
+    int len = data_shape[0] * data_shape[1] * data_shape[2];
+    std::vector<int> trans_data_shape{
+        data_shape[0], data_shape[2], data_shape[1]};
+
+    int* input_data_trans = RAII_GUARD.alloc_l3_or_gm<int>(len);
+    int* output_data_trans = RAII_GUARD.alloc_l3_or_gm<int>(len);
+    int* indices_data_trans = RAII_GUARD.alloc_l3_or_gm<int>(len);
+    int* cast_data_int = RAII_GUARD.alloc_l3_or_gm<int>(len);
+    int64_t* cast_data_int64 = RAII_GUARD.alloc_l3_or_gm<int64_t>(len);
+
+    xpu_cast(ctx, input_data, cast_data_int, len);
+    xpu_transpose(ctx, cast_data_int, input_data_trans, data_shape, permute);
+    xpu_argsort(ctx,
+                input_data_trans,
+                output_data_trans,
+                indices_data_trans,
+                m,
+                n,
+                descending);
+
+    xpu_cast(ctx, output_data_trans, cast_data_int64, len);
+    xpu_transpose(ctx, cast_data_int64, output_data, trans_data_shape, permute);
+    xpu_cast(ctx, indices_data_trans, cast_data_int64, len);
+    xpu_transpose(
+        ctx, cast_data_int64, indices_data, trans_data_shape, permute);
+  }
+};
+
 template <typename T, typename Context>
 void ArgsortKernel(const Context& dev_ctx,
                    const DenseTensor& input,
@@ -35,63 +178,51 @@ void ArgsortKernel(const Context& dev_ctx,
   auto output_data = dev_ctx.template Alloc<T>(output);
   auto indices_data = dev_ctx.template Alloc<int64_t>(indices);
 
-  bool is_need_transpose = true;
-  if (axis == -1 || axis + 1 == in_dims.size()) {
-    is_need_transpose = false;
-  }
   int len_before = phi::product(phi::slice_ddim(in_dims, 0, axis));
   int len_after =
       phi::product(phi::slice_ddim(in_dims, axis + 1, in_dims.size()));
-  int m = len_before * len_after;
-  int len = m * n;
   std::vector<int> permute_vec{0, 2, 1};
   std::vector<int> data_shape{len_before, n, len_after};
-  std::vector<int> data_shape_trans{len_before, len_after, n};
 
-  xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
-  if (is_need_transpose) {
-    T* input_data_trans = RAII_GUARD.alloc_l3_or_gm<T>(len);
-    PADDLE_ENFORCE_XDNN_NOT_NULL(input_data_trans);
-    T* output_data_trans = RAII_GUARD.alloc_l3_or_gm<T>(len);
-    PADDLE_ENFORCE_XDNN_NOT_NULL(output_data_trans);
-    int64_t* indices_data_trans = RAII_GUARD.alloc_l3_or_gm<int64_t>(len);
-    PADDLE_ENFORCE_XDNN_NOT_NULL(indices_data_trans);
-
-    int r = xpu::transpose<T>(dev_ctx.x_context(),
-                              input_data,
-                              input_data_trans,
-                              data_shape,
-                              permute_vec);
-    PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose");
-
-    input_data = input_data_trans;
-    output_data = output_data_trans;
-    indices_data = indices_data_trans;
+  bool int64_need_cast = false;
+  bool index_need_cast = false;
+  if (std::is_same<T, int64_t>::value) {
+    if ((n > 10240) && (n <= 16384)) {
+      int64_need_cast = true;
+    }
+    if ((n > 8192) && (n <= 10240)) {
+      index_need_cast = true;
+    }
+  } else {
+    if ((n > 10240) && (n <= 16384)) {
+      index_need_cast = true;
+    }
   }
 
-  int ret = xpu::sort<T, int64_t>(dev_ctx.x_context(),
+  if (int64_need_cast) {
+    XPUArgsort<T, true, true>()(dev_ctx.x_context(),
+                                input_data,
+                                output_data,
+                                indices_data,
+                                data_shape,
+                                permute_vec,
+                                descending);
+  } else if (index_need_cast) {
+    XPUArgsort<T, false, true>()(dev_ctx.x_context(),
+                                 input_data,
+                                 output_data,
+                                 indices_data,
+                                 data_shape,
+                                 permute_vec,
+                                 descending);
+  } else {
+    XPUArgsort<T, false, false>()(dev_ctx.x_context(),
                                   input_data,
                                   output_data,
                                   indices_data,
-                                  m,
-                                  n,
+                                  data_shape,
+                                  permute_vec,
                                   descending);
-  PADDLE_ENFORCE_XDNN_SUCCESS(ret, "sort");
-
-  if (is_need_transpose) {
-    int r = xpu::transpose<T>(dev_ctx.x_context(),
-                              output_data,
-                              output->data<T>(),
-                              data_shape_trans,
-                              permute_vec);
-    PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose");
-
-    r = xpu::transpose<int64_t>(dev_ctx.x_context(),
-                                indices_data,
-                                indices->data<int64_t>(),
-                                data_shape_trans,
-                                permute_vec);
-    PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose");
   }
 }
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py
index 12227622e65973..70b988dcd1b034 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_argsort_op_xpu.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#  Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -100,9 +100,92 @@ def test_check_grad(self):
             self.check_grad_with_place(self.place, {'X'}, 'Out')
 
 
+class XPUTestArgsortOp_LargeN(XPUOpTestWrapper):
+    def __init__(self):
+        self.op_name = 'argsort'
+        self.use_dynamic_create_class = False
+
+    class TestArgsortOpCase1(XPUOpTest):
+        def setUp(self):
+            self.set_xpu()
+            self.op_type = "argsort"
+            self.place = paddle.XPUPlace(0)
+            self.dtype = self.in_type
+            self.axis = -1 if not hasattr(self, 'init_axis') else self.init_axis
+            self.init_test_case()
+            self.descending = (
+                False
+                if not hasattr(self, 'init_descending')
+                else self.init_descending
+            )
+
+            np.random.seed(100)
+            if self.dtype == np.float32:
+                self.x = np.random.random(self.input_shape).astype(self.dtype)
+            else:
+                self.x = np.random.choice(
+                    1000000, self.input_shape, replace=False
+                ).astype(self.dtype)
+
+            self.inputs = {"X": self.x}
+            self.attrs = {"axis": self.axis, "descending": self.descending}
+            self.get_output()
+            self.outputs = {"Out": self.sorted_x, "Indices": self.indices}
+
+        def get_output(self):
+            if self.descending:
+                self.indices = np.flip(
+                    np.argsort(self.x, kind='heapsort', axis=self.axis),
+                    self.axis,
+                )
+                self.sorted_x = np.flip(
+                    np.sort(self.x, kind='heapsort', axis=self.axis), self.axis
+                )
+            else:
+                self.indices = np.argsort(
+                    self.x, kind='heapsort', axis=self.axis
+                )
+                self.sorted_x = np.sort(self.x, kind='heapsort', axis=self.axis)
+
+        def set_xpu(self):
+            self.__class__.use_xpu = True
+
+        def init_test_case(self):
+            self.input_shape = [2, 8732]  # test for 8192 < n <= 10240
+
+        def test_check_output(self):
+            self.check_output_with_place(self.place)
+
+        def test_check_grad(self):
+            self.check_grad_with_place(self.place, {'X'}, 'Out')
+
+    class TestArgsortOpCase2(TestArgsortOpCase1):
+        def init_test_case(self):
+            self.input_shape = [2, 10241]  # test for 10240 < n <= 16384
+
+    class TestArgsortOpCase3(TestArgsortOpCase1):
+        def init_test_case(self):
+            self.input_shape = [
+                2,
+                8732,
+                1,
+            ]  # test for 8192 < n <= 10240 + nees_transpose
+            self.axis = 1
+
+    class TestArgsortOpCase4(TestArgsortOpCase1):
+        def init_test_case(self):
+            self.input_shape = [
+                2,
+                10241,
+                1,
+            ]  # test for 10240 < n <= 16384 + nees_transpose
+            self.axis = 1
+
+
 support_types = get_xpu_op_support_types('argsort')
 for stype in support_types:
     create_test_class(globals(), XPUTestArgsortOp, stype)
+    create_test_class(globals(), XPUTestArgsortOp_LargeN, stype)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_pad3d_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_pad3d_op_xpu.py
index 2522fa9f6cecd9..4ecb8878ba9025 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_pad3d_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_pad3d_op_xpu.py
@@ -457,6 +457,170 @@ def test_dygraph_3(self):
             np.testing.assert_allclose(y2.numpy(), np_out2, rtol=1e-05)
             np.testing.assert_allclose(y3.numpy(), np_out3, rtol=1e-05)
 
+    class TestPad1dAPI(unittest.TestCase):
+        def _get_numpy_out(
+            self, input_data, pad, mode, value=0.0, data_format="NCL"
+        ):
+            if data_format == "NCL":
+                pad = [
+                    (0, 0),
+                    (0, 0),
+                    (pad[0], pad[1]),
+                ]
+            else:
+                pad = [
+                    (0, 0),
+                    (pad[0], pad[1]),
+                    (0, 0),
+                ]
+
+            if mode == "constant":
+                out = np.pad(input_data, pad, mode=mode, constant_values=value)
+            elif mode == "reflect":
+                out = np.pad(input_data, pad, mode=mode)
+            elif mode == "replicate":
+                out = np.pad(input_data, pad, mode="edge")
+            elif mode == "circular":
+                out = np.pad(input_data, pad, mode="wrap")
+
+            return out
+
+        def setUp(self):
+            self.places = [paddle.XPUPlace(0)]
+            self.dtype = self.in_type
+
+        def test_class(self):
+            paddle.disable_static()
+            for place in self.places:
+                input_shape = (3, 4, 5)
+                pad = [1, 2]
+                pad_int = 1
+                value = 100
+                input_data = np.random.rand(*input_shape).astype(self.dtype)
+
+                pad_reflection = nn.Pad1D(padding=pad, mode="reflect")
+                pad_replication = nn.Pad1D(padding=pad, mode="replicate")
+                pad_constant = nn.Pad1D(
+                    padding=pad, mode="constant", value=value
+                )
+                pad_constant_int = nn.Pad1D(
+                    padding=pad_int, mode="constant", value=value
+                )
+                pad_circular = nn.Pad1D(padding=pad, mode="circular")
+
+                data = paddle.to_tensor(input_data)
+
+                output = pad_reflection(data)
+                np_out = self._get_numpy_out(
+                    input_data, pad, "reflect", data_format="NCL"
+                )
+                np.testing.assert_allclose(output.numpy(), np_out, rtol=1e-05)
+
+                output = pad_replication(data)
+                np_out = self._get_numpy_out(
+                    input_data, pad, "replicate", data_format="NCL"
+                )
+                np.testing.assert_allclose(output.numpy(), np_out, rtol=1e-05)
+
+                output = pad_constant(data)
+                np_out = self._get_numpy_out(
+                    input_data, pad, "constant", value=value, data_format="NCL"
+                )
+                np.testing.assert_allclose(output.numpy(), np_out, rtol=1e-05)
+
+                output = pad_constant_int(data)
+                np_out = self._get_numpy_out(
+                    input_data,
+                    [pad_int] * 2,
+                    "constant",
+                    value=value,
+                    data_format="NCL",
+                )
+                np.testing.assert_allclose(output.numpy(), np_out, rtol=1e-05)
+
+    class TestPad2dAPI(unittest.TestCase):
+        def _get_numpy_out(
+            self, input_data, pad, mode, value=0.0, data_format="NCHW"
+        ):
+            if data_format == "NCHW":
+                pad = [
+                    (0, 0),
+                    (0, 0),
+                    (pad[2], pad[3]),
+                    (pad[0], pad[1]),
+                ]
+            else:
+                pad = [
+                    (0, 0),
+                    (pad[2], pad[3]),
+                    (pad[0], pad[1]),
+                    (0, 0),
+                ]
+
+            if mode == "constant":
+                out = np.pad(input_data, pad, mode=mode, constant_values=value)
+            elif mode == "reflect":
+                out = np.pad(input_data, pad, mode=mode)
+            elif mode == "replicate":
+                out = np.pad(input_data, pad, mode="edge")
+            elif mode == "circular":
+                out = np.pad(input_data, pad, mode="wrap")
+
+            return out
+
+        def setUp(self):
+            self.places = [paddle.XPUPlace(0)]
+            self.dtype = self.in_type
+
+        def test_class(self):
+            paddle.disable_static()
+            for place in self.places:
+                input_shape = (3, 4, 5, 6)
+                pad = [1, 2, 2, 1]
+                pad_int = 1
+                value = 100
+                input_data = np.random.rand(*input_shape).astype(self.dtype)
+
+                pad_reflection = nn.Pad2D(padding=pad, mode="reflect")
+                pad_replication = nn.Pad2D(padding=pad, mode="replicate")
+                pad_constant = nn.Pad2D(
+                    padding=pad, mode="constant", value=value
+                )
+                pad_constant_int = nn.Pad2D(
+                    padding=pad_int, mode="constant", value=value
+                )
+                pad_circular = nn.Pad2D(padding=pad, mode="circular")
+
+                data = paddle.to_tensor(input_data)
+
+                output = pad_reflection(data)
+                np_out = self._get_numpy_out(
+                    input_data, pad, "reflect", data_format="NCHW"
+                )
+                np.testing.assert_allclose(output.numpy(), np_out, rtol=1e-05)
+
+                output = pad_replication(data)
+                np_out = self._get_numpy_out(
+                    input_data, pad, "replicate", data_format="NCHW"
+                )
+                np.testing.assert_allclose(output.numpy(), np_out, rtol=1e-05)
+
+                output = pad_constant(data)
+                np_out = self._get_numpy_out(
+                    input_data, pad, "constant", value=value, data_format="NCHW"
+                )
+                np.testing.assert_allclose(output.numpy(), np_out, rtol=1e-05)
+
+                output = pad_constant_int(data)
+                np_out = self._get_numpy_out(
+                    input_data,
+                    [pad_int] * 4,
+                    "constant",
+                    value=value,
+                    data_format="NCHW",
+                )
+                np.testing.assert_allclose(output.numpy(), np_out, rtol=1e-05)
+
     class TestPad3dAPI(unittest.TestCase):
         def _get_numpy_out(
             self, input_data, pad, mode, value=0.0, data_format="NCDHW"

From 16562a9d4446e0df92f54f399bfdd803ac9b26aa Mon Sep 17 00:00:00 2001
From: james <zhangxiaoci@baidu.com>
Date: Wed, 30 Nov 2022 10:30:16 +0800
Subject: [PATCH 048/154] use correct xpu stream for synchronization (#48470)

some legacy code still use xpu_wait() for stream sync -- it only syncs
default stream. this PR replaces them with dev_ctx.Wait() to ensure
that correct stream is always used
---
 paddle/fluid/operators/controlflow/logical_op_xpu.h | 2 +-
 paddle/fluid/platform/device/xpu/xpu_info.cc        | 2 +-
 paddle/phi/backends/xpu/xpu_info.cc                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/paddle/fluid/operators/controlflow/logical_op_xpu.h b/paddle/fluid/operators/controlflow/logical_op_xpu.h
index 5e1a24116b0801..8afefd68374493 100644
--- a/paddle/fluid/operators/controlflow/logical_op_xpu.h
+++ b/paddle/fluid/operators/controlflow/logical_op_xpu.h
@@ -151,7 +151,7 @@ class BinaryLogicalOpXPUKernel : public framework::OpKernel<T> {
                                    XpuLogicalType2Str(xpu_type)));
 
     if (need_broad_cast && dev_ctx.x_context()->xpu_stream != nullptr) {
-      xpu_wait();
+      dev_ctx.Wait();
     }
   }
 };
diff --git a/paddle/fluid/platform/device/xpu/xpu_info.cc b/paddle/fluid/platform/device/xpu/xpu_info.cc
index 0ee8963b26afa9..548fe89dc5ceab 100644
--- a/paddle/fluid/platform/device/xpu/xpu_info.cc
+++ b/paddle/fluid/platform/device/xpu/xpu_info.cc
@@ -72,7 +72,7 @@ void MemcpySyncD2H(void* dst,
 }
 
 // if src.device == dst.device and you need sync , after call this function,
-// need to call xpu_wait()
+// need to call dev_ctx.Wait()
 void MemcpySyncD2D(void* dst,
                    const platform::XPUPlace& dst_place,
                    const void* src,
diff --git a/paddle/phi/backends/xpu/xpu_info.cc b/paddle/phi/backends/xpu/xpu_info.cc
index 978680e958370d..d084afee2285c2 100644
--- a/paddle/phi/backends/xpu/xpu_info.cc
+++ b/paddle/phi/backends/xpu/xpu_info.cc
@@ -160,7 +160,7 @@ void MemcpySyncD2H(void* dst,
 }
 
 // if src.device == dst.device and you need sync , after call this function,
-// need to call xpu_wait()
+// need to call dev_ctx.Wait()
 void MemcpySyncD2D(void* dst,
                    const phi::XPUPlace& dst_place,
                    const void* src,

From 41da96c8b95b714370c1702c8a440f92fa4d9239 Mon Sep 17 00:00:00 2001
From: Aurelius84 <zhangliujie@baidu.com>
Date: Wed, 30 Nov 2022 10:48:49 +0800
Subject: [PATCH 049/154] [Fluid clean]Migrate base/call/print et.al
 transformer into paddle.jit (#48513)

* [Fluid clean]Migrate base/call/print et.al transformer into paddle.jit

* fix phi kernel

* Revert "fix phi kernel"

This reverts commit eff8891c7efda6e49799edbcaef2ca50379d50ef.
---
 .../dygraph_to_static/test_logical.py         |  3 ++-
 .../jit/dy2static/assert_transformer.py       |  2 +-
 .../paddle/jit/dy2static/ast_transformer.py   | 20 ++++++++---------
 .../dy2static}/base_transformer.py            | 22 ++++++-------------
 .../dy2static}/basic_api_transformer.py       |  2 +-
 .../dy2static/break_continue_transformer.py   |  4 ++--
 .../dy2static}/call_transformer.py            |  2 +-
 .../dy2static}/cast_transformer.py            |  2 +-
 .../dy2static}/create_variable_transformer.py |  3 +--
 .../dy2static}/decorator_transformer.py       |  8 +++----
 .../jit/dy2static/early_return_transformer.py |  2 +-
 .../jit/dy2static/ifelse_transformer.py       |  2 +-
 .../dy2static}/logical_transformer.py         |  2 +-
 .../paddle/jit/dy2static/loop_transformer.py  |  7 ++----
 .../dy2static}/print_transformer.py           |  2 +-
 .../jit/dy2static/return_transformer.py       |  2 +-
 .../dy2static}/tensor_shape_transformer.py    |  2 +-
 .../dy2static}/typehint_transformer.py        |  5 +----
 18 files changed, 38 insertions(+), 54 deletions(-)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/base_transformer.py (95%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/basic_api_transformer.py (98%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/call_transformer.py (97%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/cast_transformer.py (96%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/create_variable_transformer.py (94%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/decorator_transformer.py (96%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/logical_transformer.py (98%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/print_transformer.py (96%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/tensor_shape_transformer.py (96%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/typehint_transformer.py (89%)

diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logical.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logical.py
index 97213982ba6f78..97454a8cefb5a6 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logical.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logical.py
@@ -21,7 +21,8 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.dygraph_to_static.logical_transformer import (
+from paddle.jit import ProgramTranslator
+from paddle.jit.dy2static.logical_transformer import (
     cmpop_node_to_str,
 )
 from paddle.jit import ProgramTranslator
diff --git a/python/paddle/jit/dy2static/assert_transformer.py b/python/paddle/jit/dy2static/assert_transformer.py
index 96b2abca217cda..3a8a8b01aecc8b 100644
--- a/python/paddle/jit/dy2static/assert_transformer.py
+++ b/python/paddle/jit/dy2static/assert_transformer.py
@@ -18,7 +18,7 @@
     AstNodeWrapper,
 )
 from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     BaseTransformer,
 )
 
diff --git a/python/paddle/jit/dy2static/ast_transformer.py b/python/paddle/jit/dy2static/ast_transformer.py
index f99c85c94a58e1..b23a6dc368ad37 100644
--- a/python/paddle/jit/dy2static/ast_transformer.py
+++ b/python/paddle/jit/dy2static/ast_transformer.py
@@ -18,7 +18,7 @@
 # See details in https://github.com/serge-sans-paille/gast/
 
 import os
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     BaseTransformer,
 )
 from .early_return_transformer import (
@@ -27,47 +27,47 @@
 from .assert_transformer import (
     AssertTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.basic_api_transformer import (
+from .basic_api_transformer import (
     BasicApiTransformer,
 )
 from .break_continue_transformer import (
     BreakContinueTransformer,
     BreakTransformOptimizer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.call_transformer import (
+from .call_transformer import (
     CallTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.cast_transformer import (
+from .cast_transformer import (
     CastTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.typehint_transformer import (
+from .typehint_transformer import (
     TypeHintTransformer,
 )
 from .ifelse_transformer import (
     IfElseTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.logical_transformer import (
+from .logical_transformer import (
     LogicalTransformer,
 )
 from .loop_transformer import (
     LoopTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.print_transformer import (
+from .print_transformer import (
     PrintTransformer,
 )
 from .return_transformer import (
     ReturnTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.create_variable_transformer import (
+from .create_variable_transformer import (
     CreateVariableTransformer,
 )
 from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
     StaticAnalysisVisitor,
 )
-from paddle.fluid.dygraph.dygraph_to_static.tensor_shape_transformer import (
+from .tensor_shape_transformer import (
     TensorShapeTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.decorator_transformer import (
+from .decorator_transformer import (
     DecoratorTransformer,
 )
 
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/base_transformer.py b/python/paddle/jit/dy2static/base_transformer.py
similarity index 95%
rename from python/paddle/fluid/dygraph/dygraph_to_static/base_transformer.py
rename to python/paddle/jit/dy2static/base_transformer.py
index 5f082acca5d401..518805250de93a 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/base_transformer.py
+++ b/python/paddle/jit/dy2static/base_transformer.py
@@ -14,25 +14,17 @@
 
 from paddle.utils import gast
 from paddle.fluid import unique_name
-from paddle.fluid.dygraph.dygraph_to_static.utils import get_attribute_full_name
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
-from paddle.fluid.dygraph.dygraph_to_static.utils import create_assign_node
-from paddle.fluid.dygraph.dygraph_to_static.utils import ORIGI_INFO
-from paddle.fluid.dygraph.dygraph_to_static.utils import FOR_ITER_INDEX_PREFIX
-from paddle.fluid.dygraph.dygraph_to_static.utils import FOR_ITER_TUPLE_PREFIX
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
-    FOR_ITER_TUPLE_INDEX_PREFIX,
-)
-from paddle.fluid.dygraph.dygraph_to_static.utils import FOR_ITER_VAR_LEN_PREFIX
 from paddle.fluid.dygraph.dygraph_to_static.utils import (
+    ORIGI_INFO,
+    FOR_ITER_INDEX_PREFIX,
+    FOR_ITER_VAR_LEN_PREFIX,
+    FOR_ITER_TARGET_PREFIX,
     FOR_ITER_VAR_NAME_PREFIX,
-)
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
     FOR_ITER_ZIP_TO_LIST_PREFIX,
-)
-from paddle.fluid.dygraph.dygraph_to_static.utils import FOR_ITER_TARGET_PREFIX
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
     FOR_ITER_ITERATOR_PREFIX,
+    create_assign_node,
+    ast_to_source_code,
+    get_attribute_full_name,
 )
 
 
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/basic_api_transformer.py b/python/paddle/jit/dy2static/basic_api_transformer.py
similarity index 98%
rename from python/paddle/fluid/dygraph/dygraph_to_static/basic_api_transformer.py
rename to python/paddle/jit/dy2static/basic_api_transformer.py
index d6c32a1fc2bd12..8a80fc4e6ed641 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/basic_api_transformer.py
+++ b/python/paddle/jit/dy2static/basic_api_transformer.py
@@ -19,7 +19,7 @@
     AstNodeWrapper,
 )
 from paddle.fluid.dygraph.dygraph_to_static import utils
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     BaseTransformer,
 )
 from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
diff --git a/python/paddle/jit/dy2static/break_continue_transformer.py b/python/paddle/jit/dy2static/break_continue_transformer.py
index 7ee451b736158f..23576fdf9849ec 100644
--- a/python/paddle/jit/dy2static/break_continue_transformer.py
+++ b/python/paddle/jit/dy2static/break_continue_transformer.py
@@ -20,10 +20,10 @@
 from paddle.fluid.dygraph.dygraph_to_static.variable_trans_func import (
     create_bool_node,
 )
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     BaseTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     ForNodeVisitor,
 )
 
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/call_transformer.py b/python/paddle/jit/dy2static/call_transformer.py
similarity index 97%
rename from python/paddle/fluid/dygraph/dygraph_to_static/call_transformer.py
rename to python/paddle/jit/dy2static/call_transformer.py
index 043102b0f661aa..7380934d47e16a 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/call_transformer.py
+++ b/python/paddle/jit/dy2static/call_transformer.py
@@ -19,7 +19,7 @@
 )
 from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
 from paddle.fluid.dygraph.dygraph_to_static.utils import is_paddle_api
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     BaseTransformer,
 )
 
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/cast_transformer.py b/python/paddle/jit/dy2static/cast_transformer.py
similarity index 96%
rename from python/paddle/fluid/dygraph/dygraph_to_static/cast_transformer.py
rename to python/paddle/jit/dy2static/cast_transformer.py
index 7a5821e12f5bf2..ca1bf11c43897f 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/cast_transformer.py
+++ b/python/paddle/jit/dy2static/cast_transformer.py
@@ -18,7 +18,7 @@
     AstNodeWrapper,
 )
 from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     BaseTransformer,
 )
 
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/create_variable_transformer.py b/python/paddle/jit/dy2static/create_variable_transformer.py
similarity index 94%
rename from python/paddle/fluid/dygraph/dygraph_to_static/create_variable_transformer.py
rename to python/paddle/jit/dy2static/create_variable_transformer.py
index 3432765191fdda..feccbfe594e137 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/create_variable_transformer.py
+++ b/python/paddle/jit/dy2static/create_variable_transformer.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.utils import gast
 from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
     AstNodeWrapper,
 )
@@ -22,7 +21,7 @@
 from paddle.fluid.dygraph.dygraph_to_static.variable_trans_func import (
     create_undefined_var,
 )
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     BaseTransformer,
 )
 
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/decorator_transformer.py b/python/paddle/jit/dy2static/decorator_transformer.py
similarity index 96%
rename from python/paddle/fluid/dygraph/dygraph_to_static/decorator_transformer.py
rename to python/paddle/jit/dy2static/decorator_transformer.py
index 55ca1f9d9aca1f..f802db72de3bc7 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/decorator_transformer.py
+++ b/python/paddle/jit/dy2static/decorator_transformer.py
@@ -17,19 +17,17 @@
 from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
     AstNodeWrapper,
 )
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     BaseTransformer,
 )
 from paddle.fluid.dygraph.dygraph_to_static.utils import (
-    create_funcDef_node,
+    RE_PYNAME,
+    RE_PYMODULE,
     ast_to_source_code,
-    is_paddle_api,
-    Dygraph2StaticException,
 )
 import warnings
 
 import re
-from paddle.fluid.dygraph.dygraph_to_static.utils import RE_PYNAME, RE_PYMODULE
 
 IGNORE_NAMES = [
     'declarative',
diff --git a/python/paddle/jit/dy2static/early_return_transformer.py b/python/paddle/jit/dy2static/early_return_transformer.py
index 864ca52155d72e..53bb2394e80373 100644
--- a/python/paddle/jit/dy2static/early_return_transformer.py
+++ b/python/paddle/jit/dy2static/early_return_transformer.py
@@ -16,7 +16,7 @@
 from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
     AstNodeWrapper,
 )
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     BaseTransformer,
 )
 
diff --git a/python/paddle/jit/dy2static/ifelse_transformer.py b/python/paddle/jit/dy2static/ifelse_transformer.py
index 86d4f4d4054d6b..8bae82c11a16fd 100644
--- a/python/paddle/jit/dy2static/ifelse_transformer.py
+++ b/python/paddle/jit/dy2static/ifelse_transformer.py
@@ -39,7 +39,7 @@
     create_get_args_node,
     create_set_args_node,
 )
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     BaseTransformer,
 )
 from paddle.fluid.dygraph.dygraph_to_static.utils import (
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/logical_transformer.py b/python/paddle/jit/dy2static/logical_transformer.py
similarity index 98%
rename from python/paddle/fluid/dygraph/dygraph_to_static/logical_transformer.py
rename to python/paddle/jit/dy2static/logical_transformer.py
index 3ad623a8ff0850..1ab4491d8d9412 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/logical_transformer.py
+++ b/python/paddle/jit/dy2static/logical_transformer.py
@@ -14,7 +14,7 @@
 
 from paddle.utils import gast
 from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     BaseTransformer,
 )
 
diff --git a/python/paddle/jit/dy2static/loop_transformer.py b/python/paddle/jit/dy2static/loop_transformer.py
index 3d109398c81e7d..7d42638b9e0f6b 100644
--- a/python/paddle/jit/dy2static/loop_transformer.py
+++ b/python/paddle/jit/dy2static/loop_transformer.py
@@ -35,15 +35,12 @@
     FunctionNameLivenessAnalysis,
 )
 from .ifelse_transformer import ARGS_NAME
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     BaseTransformer,
-)
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
     ForLoopTuplePreTransformer,
-)
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
     ForNodeVisitor,
 )
+
 from paddle.fluid.dygraph.dygraph_to_static.utils import (
     GetterSetterHelper,
     create_name_str,
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/print_transformer.py b/python/paddle/jit/dy2static/print_transformer.py
similarity index 96%
rename from python/paddle/fluid/dygraph/dygraph_to_static/print_transformer.py
rename to python/paddle/jit/dy2static/print_transformer.py
index fdbd585a71bfb4..cd4f13f019bc8f 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/print_transformer.py
+++ b/python/paddle/jit/dy2static/print_transformer.py
@@ -18,7 +18,7 @@
     AstNodeWrapper,
     StaticAnalysisVisitor,
 )
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     BaseTransformer,
 )
 
diff --git a/python/paddle/jit/dy2static/return_transformer.py b/python/paddle/jit/dy2static/return_transformer.py
index cb18dfd33adf39..c8114e078d592e 100644
--- a/python/paddle/jit/dy2static/return_transformer.py
+++ b/python/paddle/jit/dy2static/return_transformer.py
@@ -20,7 +20,7 @@
     ForToWhileTransformer,
 )
 from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     BaseTransformer,
 )
 from paddle.fluid.dygraph.dygraph_to_static.utils import Dygraph2StaticException
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/tensor_shape_transformer.py b/python/paddle/jit/dy2static/tensor_shape_transformer.py
similarity index 96%
rename from python/paddle/fluid/dygraph/dygraph_to_static/tensor_shape_transformer.py
rename to python/paddle/jit/dy2static/tensor_shape_transformer.py
index e7a882b28a296d..9dae08b123eebc 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/tensor_shape_transformer.py
+++ b/python/paddle/jit/dy2static/tensor_shape_transformer.py
@@ -18,7 +18,7 @@
 from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
     AstNodeWrapper,
 )
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     BaseTransformer,
 )
 
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/typehint_transformer.py b/python/paddle/jit/dy2static/typehint_transformer.py
similarity index 89%
rename from python/paddle/fluid/dygraph/dygraph_to_static/typehint_transformer.py
rename to python/paddle/jit/dy2static/typehint_transformer.py
index 3fddc0bcf58c20..dd272f76477f47 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/typehint_transformer.py
+++ b/python/paddle/jit/dy2static/typehint_transformer.py
@@ -12,14 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.utils import gast
-import warnings
 
 from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
     AstNodeWrapper,
 )
-from paddle.fluid.dygraph.dygraph_to_static import utils
-from paddle.fluid.dygraph.dygraph_to_static.base_transformer import (
+from .base_transformer import (
     BaseTransformer,
 )
 

From 9ff99e9e61297135b272a713bf873dfa87653b3f Mon Sep 17 00:00:00 2001
From: yuehuayingxueluo <867460659@qq.com>
Date: Wed, 30 Nov 2022 10:59:49 +0800
Subject: [PATCH 050/154] clear fluid api: sigmoid_cross_entropy_with_logits
 (#48146)

* clear fluid api: sigmoid_cross_entropy_with_logits

* fix loss.py

* change paddle.nn.functional.sigmoid_cross_entropy_with_logits

* delete sigmoid_cross_entropy_with_logits

* fix binary_cross_entropy_with_logits

* fix ci bug

* fix ci buf
---
 python/paddle/fluid/layers/loss.py            |  64 -----------
 .../dygraph_to_static/test_word2vec.py        |   5 +-
 .../tests/unittests/ipu/test_dy2static_ipu.py |   2 +-
 ...igmoid_cross_entropy_with_logits_op_ipu.py | 100 ------------------
 .../tests/unittests/test_dist_transpiler.py   |   4 +-
 .../tests/unittests/test_imperative_gan.py    |  40 +++----
 ...perative_star_gan_with_gradient_penalty.py |   4 +-
 .../fluid/tests/unittests/test_layers.py      |  11 --
 ...st_sigmoid_cross_entropy_with_logits_op.py |  49 ++++-----
 python/paddle/nn/functional/loss.py           |  13 ++-
 python/paddle/nn/layer/loss.py                |   2 -
 11 files changed, 61 insertions(+), 233 deletions(-)
 delete mode 100644 python/paddle/fluid/tests/unittests/ipu/test_sigmoid_cross_entropy_with_logits_op_ipu.py

diff --git a/python/paddle/fluid/layers/loss.py b/python/paddle/fluid/layers/loss.py
index 306437f754ff61..65a52415f15c6b 100644
--- a/python/paddle/fluid/layers/loss.py
+++ b/python/paddle/fluid/layers/loss.py
@@ -38,7 +38,6 @@
     'cross_entropy',
     'square_error_cost',
     'softmax_with_cross_entropy',
-    'sigmoid_cross_entropy_with_logits',
 ]
 
 kIgnoreIndex = -100
@@ -292,66 +291,3 @@ def softmax_with_cross_entropy(
         return_softmax,
         axis,
     )
-
-
-@templatedoc()
-def sigmoid_cross_entropy_with_logits(
-    x, label, ignore_index=kIgnoreIndex, name=None, normalize=False
-):
-    """
-
-    ${comment}
-
-    Args:
-        x(Tensor): a 2-D tensor with shape N x D, where N is the batch size and
-                D is the number of classes. This input is a tensor of logits computed
-                by the previous operator. Logits are unscaled log probabilities given
-                as log(p/(1-p)) The data type should be float32 or float64.
-        label (Tensor): a 2-D tensor of the same type and shape as X.
-                This input is a tensor of probabalistic labels for each logit.
-        ignore_index(int): Specifies a target value that is ignored and
-                does not contribute to the input gradient.
-        name(str|None): The default value is None.  Normally there is
-            no need for user to set this property.  For more information,
-            please refer to :ref:`api_guide_Name`
-        normalize(bool): If true, divide the output by the number of
-            targets != ignore_index.
-
-    Returns:
-        out(Tensor): ${out_comment}
-
-    Examples:
-        .. code-block:: python
-
-
-            import paddle
-
-            input = paddle.rand(shape=[10], dtype='float32')
-            label = paddle.rand(shape=[10], dtype='float32')
-            loss = paddle.fluid.layers.sigmoid_cross_entropy_with_logits(input, label,
-                                                            ignore_index=-1, normalize=True)
-            print(loss)
-    """
-
-    if in_dygraph_mode():
-        return _C_ops.sigmoid_cross_entropy_with_logits(
-            x, label, normalize, int(ignore_index)
-        )
-    check_variable_and_dtype(
-        x,
-        'input',
-        ['float16', 'float32', 'float64'],
-        'sigmoid_cross_entropy_with_logits',
-    )
-
-    helper = LayerHelper("sigmoid_cross_entropy_with_logits", **locals())
-
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-
-    helper.append_op(
-        type="sigmoid_cross_entropy_with_logits",
-        inputs={"X": x, "Label": label},
-        attrs={"ignore_index": ignore_index, 'normalize': normalize},
-        outputs={"Out": out},
-    )
-    return out
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py
index 9b444aecae50c7..fb7027e88be601 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py
@@ -14,6 +14,7 @@
 
 import math
 import random
+import paddle
 import numpy as np
 import paddle
 import paddle.fluid as fluid
@@ -262,7 +263,9 @@ def forward(self, center_words, target_words, label):
 
         pred = paddle.nn.functional.sigmoid(word_sim)
 
-        loss = fluid.layers.sigmoid_cross_entropy_with_logits(word_sim, label)
+        loss = paddle.nn.functional.binary_cross_entropy_with_logits(
+            word_sim, label
+        )
         loss = fluid.layers.reduce_mean(loss)
 
         return pred, loss
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py
index 77b68a9dee6bd7..73ddadc0ac4174 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py
@@ -271,7 +271,7 @@ def create_model(self, use_ipu=False):
 
 class TestWithoutIdentityLoss5(TestBase):
     def set_op_attrs(self):
-        self.loss_op = paddle.fluid.layers.sigmoid_cross_entropy_with_logits
+        self.loss_op = paddle.nn.functional.binary_cross_entropy_with_logits
 
     def set_data_feed(self):
         self.data = paddle.uniform((8, 3, 10, 10), dtype='float32')
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_sigmoid_cross_entropy_with_logits_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_sigmoid_cross_entropy_with_logits_op_ipu.py
deleted file mode 100644
index 1eda7088533b43..00000000000000
--- a/python/paddle/fluid/tests/unittests/ipu/test_sigmoid_cross_entropy_with_logits_op_ipu.py
+++ /dev/null
@@ -1,100 +0,0 @@
-#  Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-
-import paddle
-import paddle.static
-from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest
-
-
-class TestBase(IPUOpTest):
-    def setUp(self):
-        self.set_atol()
-        self.set_training()
-        self.set_data_feed()
-        self.set_feed_attr()
-        self.set_op_attrs()
-
-    def set_data_feed(self):
-        x = np.random.uniform(size=[10])
-        label = np.arange(10).reshape([10])
-        self.feed_fp32 = {
-            "x": x.astype(np.float32),
-            "label": label.astype(np.float32),
-        }
-        self.feed_fp16 = {
-            "x": x.astype(np.float16),
-            "label": label.astype(np.float16),
-        }
-
-    def set_feed_attr(self):
-        self.feed_shape = [x.shape for x in self.feed_fp32.values()]
-        self.feed_list = list(self.feed_fp32.keys())
-
-    def set_op_attrs(self):
-        self.attrs = {
-            'ignore_index': -100,
-        }
-
-    @IPUOpTest.static_graph
-    def build_model(self, on_ipu):
-        x = paddle.static.data(
-            name=self.feed_list[0], shape=self.feed_shape[0], dtype="float32"
-        )
-        label = paddle.static.data(
-            name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32'
-        )
-        out = paddle.fluid.layers.sigmoid_cross_entropy_with_logits(
-            x, label, **self.attrs
-        )
-        self.fetch_list = [out.name]
-
-    def run_model(self, exec_mode):
-        self.run_op_test(exec_mode)
-
-    def test(self):
-        for m in IPUOpTest.ExecutionMode:
-            if not self.skip_mode(m):
-                self.build_model(self.is_ipu_mode(m))
-                self.run_model(m)
-        self.check()
-
-
-class TestCase1(TestBase):
-    def set_op_attrs(self):
-        self.attrs = {
-            'ignore_index': 1,
-        }
-
-
-class TestCase2(TestBase):
-    def set_atol(self):
-        # epsilon is added when normalize is True, use larger atol.
-        self.atol = 1e-6
-        self.rtol = 1e-5
-        self.atol_fp16 = 1e-3
-        self.rtol_fp16 = 1e-3
-
-    def set_op_attrs(self):
-        self.attrs = {
-            'ignore_index': 1,
-            'normalize': True,
-        }
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
index cdeb7619e7b9d8..04ed91fb0565c7 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
@@ -427,10 +427,10 @@ def net_conf(self):
             true_logits, shape=[-1, neg_num], value=0.0, dtype='float32'
         )
 
-        true_xent = fluid.layers.sigmoid_cross_entropy_with_logits(
+        true_xent = paddle.nn.functional.binary_cross_entropy_with_logits(
             true_logits, label_ones
         )
-        neg_xent = fluid.layers.sigmoid_cross_entropy_with_logits(
+        neg_xent = paddle.nn.functional.binary_cross_entropy_with_logits(
             neg_logits, label_zeros
         )
         cost = fluid.layers.elementwise_add(
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gan.py b/python/paddle/fluid/tests/unittests/test_imperative_gan.py
index 0b1ee16d32f583..6b0e4fb66f5748 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_gan.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_gan.py
@@ -80,8 +80,8 @@ def func_test_gan_float32(self):
 
             d_real = discriminator(img)
             d_loss_real = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
-                    x=d_real,
+                paddle.nn.functional.binary_cross_entropy_with_logits(
+                    logit=d_real,
                     label=fluid.layers.fill_constant(
                         shape=[2, 1], dtype='float32', value=1.0
                     ),
@@ -90,8 +90,8 @@ def func_test_gan_float32(self):
 
             d_fake = discriminator(generator(noise))
             d_loss_fake = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
-                    x=d_fake,
+                paddle.nn.functional.binary_cross_entropy_with_logits(
+                    logit=d_fake,
                     label=fluid.layers.fill_constant(
                         shape=[2, 1], dtype='float32', value=0.0
                     ),
@@ -113,8 +113,8 @@ def func_test_gan_float32(self):
 
             d_fake = discriminator(generator(noise))
             g_loss = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
-                    x=d_fake,
+                paddle.nn.functional.binary_cross_entropy_with_logits(
+                    logit=d_fake,
                     label=fluid.layers.fill_constant(
                         shape=[2, 1], dtype='float32', value=1.0
                     ),
@@ -165,8 +165,8 @@ def func_test_gan_float32(self):
 
             d_real = discriminator(to_variable(np.ones([2, 1], np.float32)))
             d_loss_real = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
-                    x=d_real, label=to_variable(np.ones([2, 1], np.float32))
+                paddle.nn.functional.binary_cross_entropy_with_logits(
+                    logit=d_real, label=to_variable(np.ones([2, 1], np.float32))
                 )
             )
 
@@ -174,8 +174,9 @@ def func_test_gan_float32(self):
                 generator(to_variable(np.ones([2, 2], np.float32)))
             )
             d_loss_fake = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
-                    x=d_fake, label=to_variable(np.zeros([2, 1], np.float32))
+                paddle.nn.functional.binary_cross_entropy_with_logits(
+                    logit=d_fake,
+                    label=to_variable(np.zeros([2, 1], np.float32)),
                 )
             )
 
@@ -189,8 +190,8 @@ def func_test_gan_float32(self):
                 generator(to_variable(np.ones([2, 2], np.float32)))
             )
             g_loss = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
-                    x=d_fake, label=to_variable(np.ones([2, 1], np.float32))
+                paddle.nn.functional.binary_cross_entropy_with_logits(
+                    logit=d_fake, label=to_variable(np.ones([2, 1], np.float32))
                 )
             )
             g_loss.backward()
@@ -219,8 +220,9 @@ def func_test_gan_float32(self):
 
             d_real2 = discriminator2(to_variable(np.ones([2, 1], np.float32)))
             d_loss_real2 = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
-                    x=d_real2, label=to_variable(np.ones([2, 1], np.float32))
+                paddle.nn.functional.binary_cross_entropy_with_logits(
+                    logit=d_real2,
+                    label=to_variable(np.ones([2, 1], np.float32)),
                 )
             )
 
@@ -228,8 +230,9 @@ def func_test_gan_float32(self):
                 generator2(to_variable(np.ones([2, 2], np.float32)))
             )
             d_loss_fake2 = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
-                    x=d_fake2, label=to_variable(np.zeros([2, 1], np.float32))
+                paddle.nn.functional.binary_cross_entropy_with_logits(
+                    logit=d_fake2,
+                    label=to_variable(np.zeros([2, 1], np.float32)),
                 )
             )
 
@@ -243,8 +246,9 @@ def func_test_gan_float32(self):
                 generator2(to_variable(np.ones([2, 2], np.float32)))
             )
             g_loss2 = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
-                    x=d_fake2, label=to_variable(np.ones([2, 1], np.float32))
+                paddle.nn.functional.binary_cross_entropy_with_logits(
+                    logit=d_fake2,
+                    label=to_variable(np.ones([2, 1], np.float32)),
                 )
             )
             g_loss2.backward()
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py
index 9b52dc0a04731b..5e3ecf8b6cc3bf 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py
@@ -381,7 +381,9 @@ def loss_cls(cls, label, cfg):
     cls_shape = cls.shape
     cls = paddle.reshape(cls, [-1, cls_shape[1] * cls_shape[2] * cls_shape[3]])
     return (
-        paddle.sum(fluid.layers.sigmoid_cross_entropy_with_logits(cls, label))
+        paddle.sum(
+            paddle.nn.functional.binary_cross_entropy_with_logits(cls, label)
+        )
         / cfg.batch_size
     )
 
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index f08c0d1176cfd5..eaf7acaba59963 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -3152,17 +3152,6 @@ def make_word_embedding(self):
             avg_cost = paddle.mean(cost)
             return avg_cost
 
-    def make_sigmoid_cross_entropy(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            dat = self._get_data(name='data', shape=[10], dtype='float32')
-            lbl = self._get_data(name='label', shape=[10], dtype='float32')
-            ignore_index = -1
-            return layers.sigmoid_cross_entropy_with_logits(
-                x=dat, label=lbl, ignore_index=ignore_index
-            )
-
     def make_pool2d(self):
         with program_guard(
             fluid.default_main_program(), fluid.default_startup_program()
diff --git a/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py b/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py
index 1d09aefd2d8a8e..aa0e0e36ff6134 100644
--- a/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py
@@ -22,18 +22,11 @@
 import paddle
 
 
-def test_fluid_sigmoid(x, label, normalize=False, ignore_index=-100):
-    return paddle.fluid.layers.sigmoid_cross_entropy_with_logits(
-        x, label, int(ignore_index), normalize=normalize
-    )
-
-
 class TestSigmoidCrossEntropyWithLogitsOp1(OpTest):
     """Test sigmoid_cross_entropy_with_logit_op with binary label"""
 
     def setUp(self):
         self.op_type = "sigmoid_cross_entropy_with_logits"
-        self.python_api = test_fluid_sigmoid
         batch_size = 64
         num_classes = 20
         self.inputs = {
@@ -56,10 +49,10 @@ def setUp(self):
         self.outputs = {'Out': -term1 - term2}
 
     def test_check_output(self):
-        self.check_output(check_eager=True)
+        self.check_output(check_eager=False)
 
     def test_check_grad(self):
-        self.check_grad(['X'], 'Out', check_eager=True)
+        self.check_grad(['X'], 'Out', check_eager=False)
 
 
 class TestSigmoidCrossEntropyWithLogitsOp2(OpTest):
@@ -67,7 +60,6 @@ class TestSigmoidCrossEntropyWithLogitsOp2(OpTest):
 
     def setUp(self):
         self.op_type = "sigmoid_cross_entropy_with_logits"
-        self.python_api = test_fluid_sigmoid
         batch_size = 64
         num_classes = 20
         ignore_index = -1
@@ -95,10 +87,10 @@ def setUp(self):
         self.outputs = {'Out': out}
 
     def test_check_output(self):
-        self.check_output(check_eager=True)
+        self.check_output(check_eager=False)
 
     def test_check_grad(self):
-        self.check_grad(['X'], 'Out', check_eager=True)
+        self.check_grad(['X'], 'Out', check_eager=False)
 
 
 class TestSigmoidCrossEntropyWithLogitsOp3(OpTest):
@@ -106,7 +98,6 @@ class TestSigmoidCrossEntropyWithLogitsOp3(OpTest):
 
     def setUp(self):
         self.op_type = "sigmoid_cross_entropy_with_logits"
-        self.python_api = test_fluid_sigmoid
         batch_size = 64
         num_classes = 20
         self.inputs = {
@@ -129,16 +120,15 @@ def setUp(self):
         self.outputs = {'Out': -term1 - term2}
 
     def test_check_output(self):
-        self.check_output(check_eager=True)
+        self.check_output(check_eager=False)
 
     def test_check_grad(self):
-        self.check_grad(['X'], 'Out', check_eager=True)
+        self.check_grad(['X'], 'Out', check_eager=False)
 
 
 class TestSigmoidCrossEntropyWithNorm(OpTest):
     def setUp(self):
         self.op_type = "sigmoid_cross_entropy_with_logits"
-        self.python_api = test_fluid_sigmoid
         batch_size = 64
         num_classes = 20
         ignore_index = -1
@@ -165,10 +155,10 @@ def setUp(self):
         self.outputs = {'Out': out}
 
     def test_check_output(self):
-        self.check_output(check_eager=True)
+        self.check_output(check_eager=False)
 
     def test_check_grad(self):
-        self.check_grad(['X'], 'Out', check_eager=True)
+        self.check_grad(['X'], 'Out', check_eager=False)
 
 
 class TestSigmoidCrossEntropyWithLogitsOp5(OpTest):
@@ -176,7 +166,6 @@ class TestSigmoidCrossEntropyWithLogitsOp5(OpTest):
 
     def setUp(self):
         self.op_type = "sigmoid_cross_entropy_with_logits"
-        self.python_api = test_fluid_sigmoid
         batch_size = [10, 10]
         num_classes = 20
         self.inputs = {
@@ -199,16 +188,15 @@ def setUp(self):
         self.outputs = {'Out': -term1 - term2}
 
     def test_check_output(self):
-        self.check_output(check_eager=True)
+        self.check_output(check_eager=False)
 
     def test_check_grad(self):
-        self.check_grad(['X'], 'Out', check_eager=True)
+        self.check_grad(['X'], 'Out', check_eager=False)
 
 
 class TestSigmoidCrossEntropyWithNorm2(OpTest):
     def setUp(self):
         self.op_type = "sigmoid_cross_entropy_with_logits"
-        self.python_api = test_fluid_sigmoid
         batch_size = [10, 10]
         num_classes = 20
         ignore_index = -1
@@ -235,17 +223,16 @@ def setUp(self):
         self.outputs = {'Out': out}
 
     def test_check_output(self):
-        self.check_output(check_eager=True)
+        self.check_output(check_eager=False)
 
     def test_check_grad(self):
-        self.check_grad(['X'], 'Out', check_eager=True)
+        self.check_grad(['X'], 'Out', check_eager=False)
 
     class TestSigmoidCrossEntropyWithLogitsOp6(OpTest):
         """Test sigmoid_cross_entropy_with_logit_op with binary label"""
 
         def setUp(self):
             self.op_type = "sigmoid_cross_entropy_with_logits"
-            self.python_api = test_fluid_sigmoid
             batch_size = [10, 10]
             num_classes = 20
             self.inputs = {
@@ -268,10 +255,10 @@ def setUp(self):
             self.outputs = {'Out': -term1 - term2}
 
         def test_check_output(self):
-            self.check_output(check_eager=True)
+            self.check_output(check_eager=False)
 
         def test_check_grad(self):
-            self.check_grad(['X'], 'Out', check_eager=True)
+            self.check_grad(['X'], 'Out', check_eager=False)
 
     class TestSigmoidCrossEntropyWithLogitsOpError(unittest.TestCase):
         def test_errors(self):
@@ -289,7 +276,9 @@ def test_Variable():
                         [[1, 1, 1, 1]],
                         fluid.CPUPlace(),
                     )
-                    fluid.layers.sigmoid_cross_entropy_with_logits(x1, lab1)
+                    paddle.nn.functional.binary_cross_entropy_with_logits(
+                        x1, lab1
+                    )
 
                 self.assertRaises(TypeError, test_Variable)
 
@@ -302,7 +291,9 @@ def test_dtype():
                     lab2 = fluid.layers.data(
                         name='lab2', shape=[3, 4, 5, 6], dtype="int32"
                     )
-                    fluid.layers.sigmoid_cross_entropy_with_logits(x2, lab2)
+                    paddle.nn.functional.binary_cross_entropy_with_logits(
+                        x2, lab2
+                    )
 
                 self.assertRaises(TypeError, test_dtype)
 
diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py
index 9a99a6ac9804df..481598ab95858a 100755
--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -729,8 +729,6 @@ def binary_cross_entropy_with_logits(
 ):
     r"""
     This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer.
-    Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits``
-    layer and some reduce operations.
 
     This measures the element-wise probability error in classification tasks
     in which each class is independent.
@@ -885,8 +883,15 @@ def binary_cross_entropy_with_logits(
     if reduction == 'none' and pos_weight is None and weight is None:
         sigmoid_name = name
 
-    out = paddle.fluid.layers.sigmoid_cross_entropy_with_logits(
-        logit, label, name=sigmoid_name
+    helper = LayerHelper("sigmoid_cross_entropy_with_logits", **locals())
+
+    out = helper.create_variable_for_type_inference(dtype=logit.dtype)
+
+    helper.append_op(
+        type="sigmoid_cross_entropy_with_logits",
+        inputs={"X": logit, "Label": label},
+        attrs={"ignore_index": kIgnoreIndex, 'normalize': False},
+        outputs={"Out": out},
     )
 
     one = paddle.full(shape=[1], fill_value=1.0, dtype=logit.dtype)
diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py
index 95db0d9acd7fcf..cf9f9762aa6088 100644
--- a/python/paddle/nn/layer/loss.py
+++ b/python/paddle/nn/layer/loss.py
@@ -30,8 +30,6 @@ class BCEWithLogitsLoss(Layer):
     r"""
 
     This operator combines the sigmoid layer and the :ref:`api_paddle_nn_BCELoss` layer.
-    Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits``
-    layer and some reduce operations.
 
     This measures the element-wise probability error in classification tasks
     in which each class is independent.

From 1248671228549d69480ba45acf065ec325c42a5d Mon Sep 17 00:00:00 2001
From: RichardWooSJTU <37864677+RichardWooSJTU@users.noreply.github.com>
Date: Wed, 30 Nov 2022 11:00:27 +0800
Subject: [PATCH 051/154] Add int8 support in fused_multi_transformer_pass and
 fuse_multi_transformer_layer_pass (#48209)

* delete unnecessary shape and slice op

Co-authored-by: Your Name <you@example.com>
---
 paddle/fluid/framework/ir/CMakeLists.txt      |   2 +
 .../ir/delete_quant_dequant_linear_op_pass.cc |  17 +-
 ...e_weight_dequant_linear_op_decoder_pass.cc | 373 ++++++++++
 ...te_weight_dequant_linear_op_decoder_pass.h |  34 +
 ...e_weight_dequant_linear_op_encoder_pass.cc | 370 ++++++++++
 ...te_weight_dequant_linear_op_encoder_pass.h |  34 +
 .../ir/fuse_multi_transformer_layer_pass.cc   |  29 +-
 ...use_multi_transformer_layer_pass_tester.cc |   1 +
 .../fused_multi_transformer_decoder_pass.cc   | 255 ++++++-
 ...d_multi_transformer_decoder_pass_tester.cc |   3 +
 .../fused_multi_transformer_encoder_pass.cc   | 687 ++++++++++++++++--
 ...d_multi_transformer_encoder_pass_tester.cc |   3 +
 .../framework/ir/graph_pattern_detector.cc    |  67 ++
 .../framework/ir/graph_pattern_detector.h     |  33 +
 paddle/fluid/framework/ir/pass.cc             |   5 +-
 .../inference/api/paddle_pass_builder.cc      |  12 +-
 paddle/fluid/operators/fused/attn_gemm_int8.h |  24 +-
 paddle/fluid/operators/fused/cublaslt.h       |  96 ++-
 .../operators/fused/fused_dropout_act_bias.h  |  55 +-
 .../operators/fused/fused_dropout_helper.h    |   8 -
 .../fused_layernorm_residual_dropout_bias.h   |  10 +-
 .../fused/fused_multi_transformer_int8_op.cc  |  20 +-
 .../fused/fused_multi_transformer_int8_op.cu  |  82 +--
 .../fused/fused_residual_dropout_bias.h       |  13 +-
 .../operators/fused/quant_dequant_kernel.h    |  71 +-
 .../test_fused_multi_transformer_int8_op.py   | 106 ++-
 26 files changed, 2115 insertions(+), 295 deletions(-)
 create mode 100644 paddle/fluid/framework/ir/delete_weight_dequant_linear_op_decoder_pass.cc
 create mode 100644 paddle/fluid/framework/ir/delete_weight_dequant_linear_op_decoder_pass.h
 create mode 100644 paddle/fluid/framework/ir/delete_weight_dequant_linear_op_encoder_pass.cc
 create mode 100644 paddle/fluid/framework/ir/delete_weight_dequant_linear_op_encoder_pass.h

diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt
index 06ea7acb3315e1..85deab25dee44b 100644
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -96,6 +96,8 @@ pass_library(shuffle_channel_detect_pass inference)
 pass_library(delete_quant_dequant_op_pass inference)
 pass_library(delete_quant_dequant_filter_op_pass inference)
 pass_library(delete_weight_dequant_linear_op_pass inference)
+pass_library(delete_weight_dequant_linear_op_encoder_pass inference)
+pass_library(delete_weight_dequant_linear_op_decoder_pass inference)
 pass_library(delete_quant_dequant_linear_op_pass inference)
 pass_library(delete_dropout_op_pass inference)
 pass_library(delete_c_identity_op_pass inference)
diff --git a/paddle/fluid/framework/ir/delete_quant_dequant_linear_op_pass.cc b/paddle/fluid/framework/ir/delete_quant_dequant_linear_op_pass.cc
index 9057f3450453ae..e5ecbea39061ab 100644
--- a/paddle/fluid/framework/ir/delete_quant_dequant_linear_op_pass.cc
+++ b/paddle/fluid/framework/ir/delete_quant_dequant_linear_op_pass.cc
@@ -121,14 +121,27 @@ void DeleteQuantDequantLinearOpPass::ApplyImpl(ir::Graph* graph) const {
         true,
         platform::errors::InvalidArgument(
             "Input scale tensor's place should be CPU."));
-    const float* input_scale_data = input_scale_tensor.data<float>();
-    float input_scale = input_scale_data[0];
+
+    float input_scale;
+    if (input_scale_tensor.dtype() == paddle::experimental::DataType::FLOAT32) {
+      const float* input_scale_data = input_scale_tensor.data<float>();
+      input_scale = input_scale_data[0];
+    } else if (input_scale_tensor.dtype() ==
+               paddle::experimental::DataType::FLOAT16) {
+      const phi::dtype::float16* input_scale_data =
+          input_scale_tensor.data<phi::dtype::float16>();
+      input_scale = static_cast<float>(input_scale_data[0]);
+    } else {
+      PADDLE_THROW(platform::errors::Unimplemented("%d is not supported.",
+                                                   input_scale_tensor.dtype()));
+    }
 
     int nums_any_ops = dequantize_linear_op_out->outputs.size();
     for (int i = 0; i < nums_any_ops; ++i) {
       auto* any_op_desc = dequantize_linear_op_out->outputs[i]->Op();
       any_op_desc->SetAttr("Input_scale_" + quantize_linear_op_x->Var()->Name(),
                            input_scale);
+
       // link x to any_op2
       any_op_desc->RenameInput(dequantize_linear_op_out->Var()->Name(),
                                quantize_linear_op_x->Var()->Name());
diff --git a/paddle/fluid/framework/ir/delete_weight_dequant_linear_op_decoder_pass.cc b/paddle/fluid/framework/ir/delete_weight_dequant_linear_op_decoder_pass.cc
new file mode 100644
index 00000000000000..fe692d01928f72
--- /dev/null
+++ b/paddle/fluid/framework/ir/delete_weight_dequant_linear_op_decoder_pass.cc
@@ -0,0 +1,373 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/ir/delete_weight_dequant_linear_op_decoder_pass.h"
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern);
+#define GET_NODES                                 \
+  GET_IR_NODE(weight_dequantize_linear_op_x);     \
+  GET_IR_NODE(weight_dequantize_linear_op_scale); \
+  GET_IR_NODE(weight_dequantize_linear_op);       \
+  GET_IR_NODE(weight_dequantize_linear_op_out);   \
+  GET_IR_NODE(any_op2);
+
+DeleteWeightDequantLinearOpDecoderPass::
+    DeleteWeightDequantLinearOpDecoderPass() {
+  AddOpCompat(OpCompat("quantize_linear"))
+      .AddInput("X")
+      .IsTensor()
+      .End()
+      .AddInput("Scale")
+      .IsTensor()
+      .End()
+      .AddInput("ZeroPoint")
+      .IsTensor()
+      .IsOptional()
+      .End()
+      .AddOutput("Y")
+      .IsTensor()
+      .End()
+      .AddAttr("bit_length")
+      .IsType<int>()
+      .End()
+      .AddAttr("quant_axis")
+      .IsType<int>()
+      .End()
+      .AddAttr("round_type")
+      .IsOptional()
+      .IsType<int>()
+      .End();
+  AddOpCompat(OpCompat("dequantize_linear"))
+      .AddInput("X")
+      .IsTensor()
+      .End()
+      .AddInput("Scale")
+      .IsTensor()
+      .End()
+      .AddInput("ZeroPoint")
+      .IsTensor()
+      .IsOptional()
+      .End()
+      .AddOutput("Y")
+      .IsTensor()
+      .End()
+      .AddAttr("bit_length")
+      .IsType<int>()
+      .End()
+      .AddAttr("quant_axis")
+      .IsType<int>()
+      .End()
+      .AddAttr("round_type")
+      .IsOptional()
+      .IsType<int>()
+      .End();
+  AddOpCompat(OpCompat("conv2d"))
+      .AddInput("Input")
+      .IsTensor()
+      .End()
+      .AddInput("Filter")
+      .IsTensor()
+      .End()
+      .AddInput("Bias")
+      .IsTensor()
+      .IsOptional()
+      .End()
+      .AddInput("ResidualData")
+      .IsTensor()
+      .IsOptional()
+      .End()
+      .AddOutput("Output")
+      .IsTensor()
+      .End()
+      .AddAttr("strides")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("paddings")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("padding_algorithm")
+      .IsOptional()
+      .IsStringIn({"EXPLICIT", "SAME", "VALID"})
+      .End()
+      .AddAttr("groups")
+      .IsNumGE(1)
+      .End()
+      .AddAttr("dilations")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("data_format")
+      .IsStringIn({"NCHW", "NHWC", "AnyLayout"})
+      .End();
+  AddOpCompat(OpCompat("depthwise_conv2d"))
+      .AddInput("Input")
+      .IsTensor()
+      .End()
+      .AddInput("Filter")
+      .IsTensor()
+      .End()
+      .AddInput("Bias")
+      .IsTensor()
+      .IsOptional()
+      .End()
+      .AddInput("ResidualData")
+      .IsTensor()
+      .IsOptional()
+      .End()
+      .AddOutput("Output")
+      .IsTensor()
+      .End()
+      .AddAttr("strides")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("paddings")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("padding_algorithm")
+      .IsOptional()
+      .IsStringIn({"EXPLICIT", "SAME", "VALID"})
+      .End()
+      .AddAttr("groups")
+      .IsNumGE(1)
+      .End()
+      .AddAttr("dilations")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("data_format")
+      .IsStringIn({"NCHW", "NHWC", "AnyLayout"})
+      .End();
+  AddOpCompat(OpCompat("mul"))
+      .AddInput("X")
+      .IsTensor()
+      .End()
+      .AddInput("Y")
+      .IsTensor()
+      .End()
+      .AddOutput("Out")
+      .IsTensor()
+      .End()
+      .AddAttr("x_num_col_dims")
+      .IsNumGE(1)
+      .End()
+      .AddAttr("y_num_col_dims")
+      .IsNumEQ(1)
+      .End();
+  AddOpCompat(OpCompat("matmul_v2"))
+      .AddInput("X")
+      .IsTensor()
+      .End()
+      .AddInput("Y")
+      .IsTensor()
+      .End()
+      .AddOutput("Out")
+      .IsTensor()
+      .End()
+      .AddAttr("trans_x")
+      .IsBoolEQ(false)
+      .End()
+      .AddAttr("trans_y")
+      .IsBoolEQ(false)
+      .End();
+  AddOpCompat(OpCompat("matmul"))
+      .AddInput("X")
+      .IsTensor()
+      .End()
+      .AddInput("Y")
+      .IsTensor()
+      .End()
+      .AddOutput("Out")
+      .IsTensor()
+      .End()
+      .AddAttr("alpha")
+      .IsNumGE(0.99f)
+      .IsNumLE(1.01f)
+      .End()
+      .AddAttr("transpose_X")
+      .IsBoolEQ(false)
+      .End()
+      .AddAttr("transpose_Y")
+      .IsBoolEQ(false)
+      .End();
+  AddOpCompat(OpCompat("fc"))
+      .AddInput("Input")
+      .IsTensor()
+      .End()
+      .AddInput("W")
+      .IsTensor()
+      .End()
+      .AddInput("Bias")
+      .IsTensor()
+      .End()
+      .AddOutput("Out")
+      .IsTensor()
+      .End()
+      .AddAttr("in_num_col_dims")
+      .IsNumGE(1)
+      .End()
+      .AddAttr("activation_type")
+      .IsStringIn({"relu", ""})
+      .End();
+  AddOpCompat(OpCompat("conv2d_transpose"))
+      .AddInput("Input")
+      .IsTensor()
+      .End()
+      .AddInput("Filter")
+      .IsTensor()
+      .End()
+      .AddInput("Bias")
+      .IsTensor()
+      .IsOptional()
+      .End()
+      .AddOutput("Output")
+      .IsTensor()
+      .End()
+      .AddAttr("output_padding")
+      .IsType<std::vector<int>>()
+      .IsOptional()
+      .End()
+      .AddAttr("output_size")
+      .IsType<std::vector<int>>()
+      .IsOptional()
+      .End()
+      .AddAttr("groups")
+      .IsNumGE(1)
+      .End()
+      .AddAttr("dilations")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("strides")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("paddings")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("padding_algorithm")
+      .IsOptional()
+      .IsStringIn({"EXPLICIT", "SAME", "VALID"})
+      .End()
+      .AddAttr("data_format")
+      .IsStringIn({"NCHW", "NHWC", "AnyLayout"})
+      .End();
+}
+// Delete dequantize_linear_op, then dequantize weight
+void DeleteWeightDequantLinearOpDecoderPass::ApplyImpl(ir::Graph* graph) const {
+  const std::string pattern_name =
+      "delete_weight_dequant_linear_op_decoder_pattern";
+  FusePassBase::Init(pattern_name, graph);
+
+  GraphPatternDetector gpd;
+  auto* scope = param_scope();
+  PADDLE_ENFORCE_NOT_NULL(scope,
+                          platform::errors::InvalidArgument(
+                              "Scope in DeleteWeightDequantLinearOpDecoderPass "
+                              "should not be null."));
+  // Create pattern
+  patterns::DeleteWeightDequantLinearOpDecoderPattern pattern(
+      gpd.mutable_pattern(), pattern_name);
+  pattern();
+  int found_count = 0;
+  bool is_int8 = false;
+
+  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
+                     Graph* g) {
+    GET_NODES;
+    /*
+    if (!IsCompat(subgraph, g)) {
+      LOG(WARNING) << "delete_weight_dequant_linear_op_pass "
+                      "compat check failed.";
+      return;
+    }
+    */
+    is_int8 = true;
+    std::unordered_set<const Node*> nodes2rm = {};
+
+    auto* any_op2_desc = any_op2->Op();
+
+    // Get weight scale
+    std::vector<float> weight_scale;
+    auto* weight_scale_tensor =
+        scope->GetVar(weight_dequantize_linear_op_scale->Name())
+            ->GetMutable<phi::DenseTensor>();
+    auto weight_scale_nums = weight_scale_tensor->numel();
+
+    if (weight_scale_tensor->dtype() ==
+        paddle::experimental::DataType::FLOAT32) {
+      float* weight_scale_data = weight_scale_tensor->data<float>();
+      for (int i = 0; i < weight_scale_nums; i++) {
+        weight_scale.push_back(weight_scale_data[i]);
+      }
+    } else if (weight_scale_tensor->dtype() ==
+               paddle::experimental::DataType::FLOAT16) {
+      phi::dtype::float16* weight_scale_data =
+          weight_scale_tensor->data<phi::dtype::float16>();
+      for (int i = 0; i < weight_scale_nums; i++) {
+        weight_scale.push_back(static_cast<float>(weight_scale_data[i]));
+      }
+    } else {
+      PADDLE_THROW(platform::errors::Unimplemented(
+          "%d is not supported.", weight_scale_tensor->dtype()));
+    }
+
+    int quant_axis = PADDLE_GET_CONST(
+        int, weight_dequantize_linear_op->Op()->GetAttr("quant_axis"));
+    if (quant_axis == -1) {  // per_layer quant_dequant: all OP
+      PADDLE_ENFORCE_EQ(weight_scale_nums,
+                        1,
+                        platform::errors::InvalidArgument(
+                            "When quant_axis == -1 means use per_layer "
+                            "quant_dequant, weight_scale'number should be 1."));
+
+      // Add attr to anyop 2
+      any_op2_desc->SetAttr("weight_scale", weight_scale[0]);
+    } else {
+      PADDLE_THROW(platform::errors::Unimplemented(
+          "Delete Weight Dequant Linear Op Encoder Pass is not supported for "
+          "per-channel quantization"));
+    }
+
+    nodes2rm.insert(weight_dequantize_linear_op_scale);
+    nodes2rm.insert(weight_dequantize_linear_op);
+    nodes2rm.insert(weight_dequantize_linear_op_out);
+
+    // relink weight to any_op2
+    any_op2_desc->RenameInput(weight_dequantize_linear_op_out->Var()->Name(),
+                              weight_dequantize_linear_op_x->Var()->Name());
+    any_op2_desc->Flush();
+    IR_NODE_LINK_TO(weight_dequantize_linear_op_x, any_op2);
+    GraphSafeRemoveNodes(graph, nodes2rm);
+    found_count++;
+  };
+  gpd(graph, handler);
+  if (is_int8) {
+    auto& enable_int8 = graph->Get<bool>("enable_int8");
+    enable_int8 = true;
+  }
+  AddStatis(found_count);
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+REGISTER_PASS(delete_weight_dequant_linear_op_decoder_pass,
+              paddle::framework::ir::DeleteWeightDequantLinearOpDecoderPass);
diff --git a/paddle/fluid/framework/ir/delete_weight_dequant_linear_op_decoder_pass.h b/paddle/fluid/framework/ir/delete_weight_dequant_linear_op_decoder_pass.h
new file mode 100644
index 00000000000000..866bfb7b736543
--- /dev/null
+++ b/paddle/fluid/framework/ir/delete_weight_dequant_linear_op_decoder_pass.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+class DeleteWeightDequantLinearOpDecoderPass : public FusePassBase {
+ public:
+  DeleteWeightDequantLinearOpDecoderPass();
+  virtual ~DeleteWeightDequantLinearOpDecoderPass() {}
+
+ protected:
+  void ApplyImpl(ir::Graph* graph) const override;
+};
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/ir/delete_weight_dequant_linear_op_encoder_pass.cc b/paddle/fluid/framework/ir/delete_weight_dequant_linear_op_encoder_pass.cc
new file mode 100644
index 00000000000000..0cffcd38b3466a
--- /dev/null
+++ b/paddle/fluid/framework/ir/delete_weight_dequant_linear_op_encoder_pass.cc
@@ -0,0 +1,370 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/ir/delete_weight_dequant_linear_op_encoder_pass.h"
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern);
+#define GET_NODES                                 \
+  GET_IR_NODE(weight_dequantize_linear_op_x);     \
+  GET_IR_NODE(weight_dequantize_linear_op_scale); \
+  GET_IR_NODE(weight_dequantize_linear_op);       \
+  GET_IR_NODE(weight_dequantize_linear_op_out);   \
+  GET_IR_NODE(any_op2);
+
+DeleteWeightDequantLinearOpEncoderPass::
+    DeleteWeightDequantLinearOpEncoderPass() {
+  AddOpCompat(OpCompat("quantize_linear"))
+      .AddInput("X")
+      .IsTensor()
+      .End()
+      .AddInput("Scale")
+      .IsTensor()
+      .End()
+      .AddInput("ZeroPoint")
+      .IsTensor()
+      .IsOptional()
+      .End()
+      .AddOutput("Y")
+      .IsTensor()
+      .End()
+      .AddAttr("bit_length")
+      .IsType<int>()
+      .End()
+      .AddAttr("quant_axis")
+      .IsType<int>()
+      .End()
+      .AddAttr("round_type")
+      .IsOptional()
+      .IsType<int>()
+      .End();
+  AddOpCompat(OpCompat("dequantize_linear"))
+      .AddInput("X")
+      .IsTensor()
+      .End()
+      .AddInput("Scale")
+      .IsTensor()
+      .End()
+      .AddInput("ZeroPoint")
+      .IsTensor()
+      .IsOptional()
+      .End()
+      .AddOutput("Y")
+      .IsTensor()
+      .End()
+      .AddAttr("bit_length")
+      .IsType<int>()
+      .End()
+      .AddAttr("quant_axis")
+      .IsType<int>()
+      .End()
+      .AddAttr("round_type")
+      .IsOptional()
+      .IsType<int>()
+      .End();
+  AddOpCompat(OpCompat("conv2d"))
+      .AddInput("Input")
+      .IsTensor()
+      .End()
+      .AddInput("Filter")
+      .IsTensor()
+      .End()
+      .AddInput("Bias")
+      .IsTensor()
+      .IsOptional()
+      .End()
+      .AddInput("ResidualData")
+      .IsTensor()
+      .IsOptional()
+      .End()
+      .AddOutput("Output")
+      .IsTensor()
+      .End()
+      .AddAttr("strides")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("paddings")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("padding_algorithm")
+      .IsOptional()
+      .IsStringIn({"EXPLICIT", "SAME", "VALID"})
+      .End()
+      .AddAttr("groups")
+      .IsNumGE(1)
+      .End()
+      .AddAttr("dilations")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("data_format")
+      .IsStringIn({"NCHW", "NHWC", "AnyLayout"})
+      .End();
+  AddOpCompat(OpCompat("depthwise_conv2d"))
+      .AddInput("Input")
+      .IsTensor()
+      .End()
+      .AddInput("Filter")
+      .IsTensor()
+      .End()
+      .AddInput("Bias")
+      .IsTensor()
+      .IsOptional()
+      .End()
+      .AddInput("ResidualData")
+      .IsTensor()
+      .IsOptional()
+      .End()
+      .AddOutput("Output")
+      .IsTensor()
+      .End()
+      .AddAttr("strides")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("paddings")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("padding_algorithm")
+      .IsOptional()
+      .IsStringIn({"EXPLICIT", "SAME", "VALID"})
+      .End()
+      .AddAttr("groups")
+      .IsNumGE(1)
+      .End()
+      .AddAttr("dilations")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("data_format")
+      .IsStringIn({"NCHW", "NHWC", "AnyLayout"})
+      .End();
+  AddOpCompat(OpCompat("mul"))
+      .AddInput("X")
+      .IsTensor()
+      .End()
+      .AddInput("Y")
+      .IsTensor()
+      .End()
+      .AddOutput("Out")
+      .IsTensor()
+      .End()
+      .AddAttr("x_num_col_dims")
+      .IsNumGE(1)
+      .End()
+      .AddAttr("y_num_col_dims")
+      .IsNumEQ(1)
+      .End();
+  AddOpCompat(OpCompat("matmul_v2"))
+      .AddInput("X")
+      .IsTensor()
+      .End()
+      .AddInput("Y")
+      .IsTensor()
+      .End()
+      .AddOutput("Out")
+      .IsTensor()
+      .End()
+      .AddAttr("trans_x")
+      .IsBoolEQ(false)
+      .End()
+      .AddAttr("trans_y")
+      .IsBoolEQ(false)
+      .End();
+  AddOpCompat(OpCompat("matmul"))
+      .AddInput("X")
+      .IsTensor()
+      .End()
+      .AddInput("Y")
+      .IsTensor()
+      .End()
+      .AddOutput("Out")
+      .IsTensor()
+      .End()
+      .AddAttr("alpha")
+      .IsNumGE(0.99f)
+      .IsNumLE(1.01f)
+      .End()
+      .AddAttr("transpose_X")
+      .IsBoolEQ(false)
+      .End()
+      .AddAttr("transpose_Y")
+      .IsBoolEQ(false)
+      .End();
+  AddOpCompat(OpCompat("fc"))
+      .AddInput("Input")
+      .IsTensor()
+      .End()
+      .AddInput("W")
+      .IsTensor()
+      .End()
+      .AddInput("Bias")
+      .IsTensor()
+      .End()
+      .AddOutput("Out")
+      .IsTensor()
+      .End()
+      .AddAttr("in_num_col_dims")
+      .IsNumGE(1)
+      .End()
+      .AddAttr("activation_type")
+      .IsStringIn({"relu", ""})
+      .End();
+  AddOpCompat(OpCompat("conv2d_transpose"))
+      .AddInput("Input")
+      .IsTensor()
+      .End()
+      .AddInput("Filter")
+      .IsTensor()
+      .End()
+      .AddInput("Bias")
+      .IsTensor()
+      .IsOptional()
+      .End()
+      .AddOutput("Output")
+      .IsTensor()
+      .End()
+      .AddAttr("output_padding")
+      .IsType<std::vector<int>>()
+      .IsOptional()
+      .End()
+      .AddAttr("output_size")
+      .IsType<std::vector<int>>()
+      .IsOptional()
+      .End()
+      .AddAttr("groups")
+      .IsNumGE(1)
+      .End()
+      .AddAttr("dilations")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("strides")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("paddings")
+      .IsType<std::vector<int>>()
+      .End()
+      .AddAttr("padding_algorithm")
+      .IsOptional()
+      .IsStringIn({"EXPLICIT", "SAME", "VALID"})
+      .End()
+      .AddAttr("data_format")
+      .IsStringIn({"NCHW", "NHWC", "AnyLayout"})
+      .End();
+}
+// Delete dequantize_linear_op, then dequantize weight
+void DeleteWeightDequantLinearOpEncoderPass::ApplyImpl(ir::Graph* graph) const {
+  const std::string pattern_name =
+      "delete_weight_dequant_linear_op_encoder_pattern";
+  FusePassBase::Init(pattern_name, graph);
+
+  GraphPatternDetector gpd;
+  auto* scope = param_scope();
+  PADDLE_ENFORCE_NOT_NULL(scope,
+                          platform::errors::InvalidArgument(
+                              "Scope in DeleteWeightDequantLinearOpEncoderPass "
+                              "should not be null."));
+  // Create pattern
+  patterns::DeleteWeightDequantLinearOpEncoderPattern pattern(
+      gpd.mutable_pattern(), pattern_name);
+  pattern();
+  int found_count = 0;
+  bool is_int8 = false;
+
+  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
+                     Graph* g) {
+    GET_NODES;
+    /*
+    if (!IsCompat(subgraph, g)) {
+      LOG(WARNING) << "delete_weight_dequant_linear_op_pass "
+                      "compat check failed.";
+      return;
+    }
+    */
+    is_int8 = true;
+    std::unordered_set<const Node*> nodes2rm = {};
+
+    auto* any_op2_desc = any_op2->Op();
+
+    // Get weight scale
+    std::vector<float> weight_scale;
+    auto* weight_scale_tensor =
+        scope->GetVar(weight_dequantize_linear_op_scale->Name())
+            ->GetMutable<phi::DenseTensor>();
+    auto weight_scale_nums = weight_scale_tensor->numel();
+
+    if (weight_scale_tensor->dtype() ==
+        paddle::experimental::DataType::FLOAT32) {
+      float* weight_scale_data = weight_scale_tensor->data<float>();
+      for (int i = 0; i < weight_scale_nums; i++) {
+        weight_scale.push_back(weight_scale_data[i]);
+      }
+    } else if (weight_scale_tensor->dtype() ==
+               paddle::experimental::DataType::FLOAT16) {
+      phi::dtype::float16* weight_scale_data =
+          weight_scale_tensor->data<phi::dtype::float16>();
+      for (int i = 0; i < weight_scale_nums; i++) {
+        weight_scale.push_back(static_cast<float>(weight_scale_data[i]));
+      }
+    } else {
+      PADDLE_THROW(platform::errors::Unimplemented(
+          "%d is not supported.", weight_scale_tensor->dtype()));
+    }
+
+    int quant_axis = PADDLE_GET_CONST(
+        int, weight_dequantize_linear_op->Op()->GetAttr("quant_axis"));
+    if (quant_axis == -1) {  // per_layer quant_dequant: all OP
+      PADDLE_ENFORCE_EQ(weight_scale_nums,
+                        1,
+                        platform::errors::InvalidArgument(
+                            "When quant_axis == -1 means use per_layer "
+                            "quant_dequant, weight_scale'number should be 1."));
+
+      // Add attr to anyop 2
+      any_op2_desc->SetAttr("weight_scale", weight_scale[0]);
+    } else {
+      PADDLE_THROW(platform::errors::Unimplemented(
+          "Delete Weight Dequant Linear Op Encoder Pass is not supported for "
+          "per-channel quantization"));
+    }
+
+    nodes2rm.insert(weight_dequantize_linear_op_scale);
+    nodes2rm.insert(weight_dequantize_linear_op);
+    nodes2rm.insert(weight_dequantize_linear_op_out);
+
+    // relink weight to any_op2
+    any_op2_desc->RenameInput(weight_dequantize_linear_op_out->Var()->Name(),
+                              weight_dequantize_linear_op_x->Var()->Name());
+    any_op2_desc->Flush();
+    IR_NODE_LINK_TO(weight_dequantize_linear_op_x, any_op2);
+    GraphSafeRemoveNodes(graph, nodes2rm);
+    found_count++;
+  };
+  gpd(graph, handler);
+  graph->Set("enable_int8", new bool(is_int8));
+  AddStatis(found_count);
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+REGISTER_PASS(delete_weight_dequant_linear_op_encoder_pass,
+              paddle::framework::ir::DeleteWeightDequantLinearOpEncoderPass);
diff --git a/paddle/fluid/framework/ir/delete_weight_dequant_linear_op_encoder_pass.h b/paddle/fluid/framework/ir/delete_weight_dequant_linear_op_encoder_pass.h
new file mode 100644
index 00000000000000..8aead6bd5cc583
--- /dev/null
+++ b/paddle/fluid/framework/ir/delete_weight_dequant_linear_op_encoder_pass.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+class DeleteWeightDequantLinearOpEncoderPass : public FusePassBase {
+ public:
+  DeleteWeightDequantLinearOpEncoderPass();
+  virtual ~DeleteWeightDequantLinearOpEncoderPass() {}
+
+ protected:
+  void ApplyImpl(ir::Graph* graph) const override;
+};
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/ir/fuse_multi_transformer_layer_pass.cc b/paddle/fluid/framework/ir/fuse_multi_transformer_layer_pass.cc
index b730d46ab7c5f9..7cbb601a68cbf9 100644
--- a/paddle/fluid/framework/ir/fuse_multi_transformer_layer_pass.cc
+++ b/paddle/fluid/framework/ir/fuse_multi_transformer_layer_pass.cc
@@ -118,9 +118,15 @@ int FuseMultiTransformerLayerPass::BuildFusion(Graph* graph,
   GraphPatternDetector gpd;
   auto* pattern = gpd.mutable_pattern();
 
-  // TODO(wufeisheng): Get enable_int8 attr from graph after
-  // fused_multi_transformer pass with int8 merged
   bool enable_int8 = false;
+  if (graph->Has("enable_int8")) {
+    enable_int8 = graph->Get<bool>("enable_int8");
+  }
+  if (!enable_int8) {
+    VLOG(4)
+        << "fuse_multi_layer_transformer_pass will match float transformer op "
+           "cause enable_int8 is not been set or set to false";
+  }
 
   int num_fuse_op = 0;
   bool is_decoder = false;
@@ -209,7 +215,13 @@ int FuseMultiTransformerLayerPass::BuildFusion(Graph* graph,
                                              "OutLinearW",
                                              "QKVBias",
                                              "QKVW"};
-
+    if (enable_int8) {
+      std::vector<std::string> inputs_names_int8_supp = {
+          "FFN1OutScale", "FFN2OutScale", "OutLinearOutScale", "QKVOutScale"};
+      inputs_names.insert(inputs_names.end(),
+                          inputs_names_int8_supp.begin(),
+                          inputs_names_int8_supp.end());
+    }
     for (const auto& input_name : inputs_names) {
       MergeInput(fuse_op_descs[0], fuse_op_input_var_name_maps, input_name);
     }
@@ -227,6 +239,17 @@ int FuseMultiTransformerLayerPass::BuildFusion(Graph* graph,
     }
     fuse_op_descs[0]->SetOutput("CacheKVOut", merged_cache_kv_out_names);
 
+    if (enable_int8) {
+      // Merge inputs scale
+      std::vector<std::string> attr_names = {"qkv_in_scale",
+                                             "out_linear_in_scale",
+                                             "ffn1_in_scale",
+                                             "ffn2_in_scale"};
+      for (const auto& name : attr_names) {
+        MergeAttrs<float>(fuse_op_descs, name);
+      }
+    }
+
     ////////////////
     //// ReLink ////
     ////////////////
diff --git a/paddle/fluid/framework/ir/fuse_multi_transformer_layer_pass_tester.cc b/paddle/fluid/framework/ir/fuse_multi_transformer_layer_pass_tester.cc
index 72635d1c958555..c96935a9ac649a 100644
--- a/paddle/fluid/framework/ir/fuse_multi_transformer_layer_pass_tester.cc
+++ b/paddle/fluid/framework/ir/fuse_multi_transformer_layer_pass_tester.cc
@@ -98,6 +98,7 @@ TEST(FuseMultiTransformerLayerPass, encoder_fp) {
   std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
   graph->Set("__param_scope__", CreateParamScope());
   graph->Set(kFusedMultiTransformerEncoderFusionCount, new int(num_layers));
+  graph->Set("enable_int8", new bool(false));
 
   auto pass = PassRegistry::Instance().Get("fuse_multi_transformer_layer_pass");
   if (pass.get() == nullptr)
diff --git a/paddle/fluid/framework/ir/fused_multi_transformer_decoder_pass.cc b/paddle/fluid/framework/ir/fused_multi_transformer_decoder_pass.cc
index 2d93758f177d28..bc1a2dd0ed4de5 100644
--- a/paddle/fluid/framework/ir/fused_multi_transformer_decoder_pass.cc
+++ b/paddle/fluid/framework/ir/fused_multi_transformer_decoder_pass.cc
@@ -1075,12 +1075,27 @@ PDNode* MultiDevicesFusedMultiTransformerDecoderFuseQKVPattern::operator()() {
 
 }  // namespace patterns
 
+inline Node* CreatePersistableVarNode(Graph* graph, const std::string& name) {
+  auto var_desc = VarDesc(name);
+  var_desc.SetDataType(framework::proto::VarType::FP32);
+  var_desc.SetPersistable(true);
+  auto node = graph->CreateVarNode(&var_desc);
+  return node;
+}
+
 int FusedMultiTransformerDecoderPass::BuildFusion(Graph* graph,
                                                   const std::string& name_scope,
                                                   Scope* scope) const {
   GraphPatternDetector gpd;
   auto* pattern = gpd.mutable_pattern();
 
+  bool enable_int8 = graph->Get<bool>("enable_int8");
+  if (enable_int8) {
+    VLOG(3) << "FusedMultiTransformerDecoderPass with int8";
+  } else {
+    VLOG(3) << "FusedMultiTransformerDecoderPass with fp";
+  }
+
   // Create pattern.
   patterns::FusedMultiTransformerDecoderPattern fused_multi_transformer_pattern(
       pattern, name_scope);
@@ -1093,6 +1108,7 @@ int FusedMultiTransformerDecoderPass::BuildFusion(Graph* graph,
                           Node* layer_norm_bias,
                           Node* layer_norm_mean,
                           Node* layer_norm_variance,
+                          Node* matmul0,
                           Node* matmul0_w,
                           Node* matmul1_w,
                           Node* matmul2_w,
@@ -1103,6 +1119,7 @@ int FusedMultiTransformerDecoderPass::BuildFusion(Graph* graph,
                           Node* transpose2_2_out,
                           Node* eltadd_qk_b,
                           Node* reshape2_0,
+                          Node* matmul_linear,
                           Node* matmul_linear_w,
                           Node* eltadd_linear_b,
                           Node* ffn_layer_norm,
@@ -1110,11 +1127,17 @@ int FusedMultiTransformerDecoderPass::BuildFusion(Graph* graph,
                           Node* ffn_layer_norm_bias,
                           Node* ffn_layer_norm_mean,
                           Node* ffn_layer_norm_variance,
+                          Node* ffn_matmul0,
                           Node* ffn_matmul0_w,
+                          Node* ffn_matmul1,
                           Node* ffn_matmul1_w,
                           Node* ffn_eltadd0_b,
                           Node* ffn_eltadd1_b,
                           Node* ffn_output) {
+    auto* matmul0_op = matmul0->Op();
+    auto* matmul_linear_op = matmul_linear->Op();
+    auto* ffn_matmul_0_op = ffn_matmul0->Op();
+    auto* ffn_matmul_1_op = ffn_matmul1->Op();
     // Calc index of transformer layer by LayerNorm Scale name
     // This calculation assumes:
     //    1. no LayerNorm before all transformer layer
@@ -1126,7 +1149,9 @@ int FusedMultiTransformerDecoderPass::BuildFusion(Graph* graph,
 
     // create fused_multi_transformer
     OpDesc fused_multi_transformer_op_desc(layer_norm->Op()->Block());
-    fused_multi_transformer_op_desc.SetType("fused_multi_transformer");
+    fused_multi_transformer_op_desc.SetType(enable_int8
+                                                ? "fused_multi_transformer_int8"
+                                                : "fused_multi_transformer");
 
     // 1. Input setting
     fused_multi_transformer_op_desc.SetInput("X", {input0->Name()});
@@ -1181,8 +1206,66 @@ int FusedMultiTransformerDecoderPass::BuildFusion(Graph* graph,
     fused_multi_transformer_op_desc.SetAttr("is_test", true);
     fused_multi_transformer_op_desc.SetAttr("dropout_rate", 0.0f);
 
+    if (enable_int8) {
+      // Set input scale
+      std::string qkv_input_name = matmul0_op->Input("X")[0];
+      auto qkv_in_scale = PADDLE_GET_CONST(
+          float, matmul0_op->GetAttr("Input_scale_" + qkv_input_name));
+      std::string out_linear_input_name = matmul_linear_op->Input("X")[0];
+      auto out_linear_in_scale = PADDLE_GET_CONST(
+          float,
+          matmul_linear_op->GetAttr("Input_scale_" + out_linear_input_name));
+      std::string ffn0_input_name = ffn_matmul_0_op->Input("X")[0];
+      auto ffn0_in_scale = PADDLE_GET_CONST(
+          float, ffn_matmul_0_op->GetAttr("Input_scale_" + ffn0_input_name));
+      std::string ffn1_input_name = ffn_matmul_1_op->Input("X")[0];
+      auto ffn1_in_scale = PADDLE_GET_CONST(
+          float, ffn_matmul_1_op->GetAttr("Input_scale_" + ffn1_input_name));
+
+      // Inverse input scale
+      qkv_in_scale = 1.0f / qkv_in_scale;
+      out_linear_in_scale = 1.0f / out_linear_in_scale;
+      ffn0_in_scale = 1.0f / ffn0_in_scale;
+      ffn1_in_scale = 1.0f / ffn1_in_scale;
+
+      fused_multi_transformer_op_desc.SetAttr("qkv_in_scale",
+                                              std::vector<float>{qkv_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "out_linear_in_scale", std::vector<float>{out_linear_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "ffn1_in_scale", std::vector<float>{ffn0_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "ffn2_in_scale", std::vector<float>{ffn1_in_scale});
+
+      fused_multi_transformer_op_desc.SetInput(
+          "QKVOutScale", {matmul0_w->Name() + "_out_scale"});
+      fused_multi_transformer_op_desc.SetInput(
+          "OutLinearOutScale", {matmul_linear_w->Name() + "_out_scale"});
+      fused_multi_transformer_op_desc.SetInput(
+          "FFN1OutScale", {ffn_matmul0_w->Name() + "_out_scale"});
+      fused_multi_transformer_op_desc.SetInput(
+          "FFN2OutScale", {ffn_matmul1_w->Name() + "_out_scale"});
+    }
+
     auto* fused_multi_transformer =
         graph->CreateOpNode(&fused_multi_transformer_op_desc);
+
+    if (enable_int8) {
+      auto qkv_out_scale_node =
+          CreatePersistableVarNode(graph, matmul0_w->Name() + "_out_scale");
+      auto out_out_scale_node = CreatePersistableVarNode(
+          graph, matmul_linear_w->Name() + "_out_scale");
+      auto ffn0_out_scale_node =
+          CreatePersistableVarNode(graph, ffn_matmul0_w->Name() + "_out_scale");
+      auto ffn1_out_scale_node =
+          CreatePersistableVarNode(graph, ffn_matmul1_w->Name() + "_out_scale");
+
+      IR_NODE_LINK_TO(qkv_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(out_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(ffn0_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(ffn1_out_scale_node, fused_multi_transformer);
+    }
+
     IR_NODE_LINK_TO(input0, fused_multi_transformer);
     IR_NODE_LINK_TO(layer_norm_scale, fused_multi_transformer);
     IR_NODE_LINK_TO(layer_norm_bias, fused_multi_transformer);
@@ -1456,6 +1539,7 @@ int FusedMultiTransformerDecoderPass::BuildFusion(Graph* graph,
                  layer_norm_bias,
                  layer_norm_mean,
                  layer_norm_variance,
+                 matmul0,
                  matmul0_w,
                  matmul1_w,
                  matmul2_w,
@@ -1466,6 +1550,7 @@ int FusedMultiTransformerDecoderPass::BuildFusion(Graph* graph,
                  transpose2_2_out,
                  eltadd_qk_b,
                  reshape2_0,
+                 matmul_linear,
                  matmul_linear_w,
                  eltadd_linear_b,
                  ffn_layer_norm,
@@ -1473,7 +1558,9 @@ int FusedMultiTransformerDecoderPass::BuildFusion(Graph* graph,
                  ffn_layer_norm_bias,
                  ffn_layer_norm_mean,
                  ffn_layer_norm_variance,
+                 ffn_matmul0,
                  ffn_matmul0_w,
+                 ffn_matmul1,
                  ffn_matmul1_w,
                  ffn_eltadd0_b,
                  ffn_eltadd1_b,
@@ -1732,6 +1819,13 @@ int FusedMultiTransformerDecoderFuseQKVPass::BuildFusion(
   GraphPatternDetector gpd;
   auto* pattern = gpd.mutable_pattern();
 
+  bool enable_int8 = graph->Get<bool>("enable_int8");
+  if (enable_int8) {
+    VLOG(3) << "FusedMultiTransformerDecoderFuseQKVPass with int8";
+  } else {
+    VLOG(3) << "FusedMultiTransformerDecoderFuseQKVPass with fp";
+  }
+
   // Create pattern.
   patterns::FusedMultiTransformerDecoderFuseQKVPattern
       fused_multi_transformer_fuse_qkv_pattern(pattern, name_scope);
@@ -1744,10 +1838,12 @@ int FusedMultiTransformerDecoderFuseQKVPass::BuildFusion(
                           Node* layer_norm_bias,
                           Node* layer_norm_mean,
                           Node* layer_norm_variance,
+                          Node* matmul0,
                           Node* matmul0_w,
                           Node* eltadd0_b,
                           Node* eltadd_qk_b,
                           Node* reshape2_0,
+                          Node* matmul_linear,
                           Node* matmul_linear_w,
                           Node* eltadd_linear_b,
                           Node* ffn_layer_norm,
@@ -1755,11 +1851,17 @@ int FusedMultiTransformerDecoderFuseQKVPass::BuildFusion(
                           Node* ffn_layer_norm_bias,
                           Node* ffn_layer_norm_mean,
                           Node* ffn_layer_norm_variance,
+                          Node* ffn_matmul0,
                           Node* ffn_matmul0_w,
+                          Node* ffn_matmul1,
                           Node* ffn_matmul1_w,
                           Node* ffn_eltadd0_b,
                           Node* ffn_eltadd1_b,
                           Node* ffn_output) {
+    auto* matmul0_op = matmul0->Op();
+    auto* matmul_linear_op = matmul_linear->Op();
+    auto* ffn_matmul_0_op = ffn_matmul0->Op();
+    auto* ffn_matmul_1_op = ffn_matmul1->Op();
     // Calc index of transformer layer by LayerNorm Scale name
     // This calculation assumes:
     //    1. no LayerNorm before all transformer layer
@@ -1771,7 +1873,9 @@ int FusedMultiTransformerDecoderFuseQKVPass::BuildFusion(
 
     // create fused_multi_transformer
     OpDesc fused_multi_transformer_op_desc(layer_norm->Op()->Block());
-    fused_multi_transformer_op_desc.SetType("fused_multi_transformer");
+    fused_multi_transformer_op_desc.SetType(enable_int8
+                                                ? "fused_multi_transformer_int8"
+                                                : "fused_multi_transformer");
 
     // 1. Input setting
     fused_multi_transformer_op_desc.SetInput("X", {input0->Name()});
@@ -1826,8 +1930,65 @@ int FusedMultiTransformerDecoderFuseQKVPass::BuildFusion(
     fused_multi_transformer_op_desc.SetAttr("dropout_rate", 0.0f);
     fused_multi_transformer_op_desc.SetAttr("is_test", true);
 
+    if (enable_int8) {
+      // Set input scale
+      std::string qkv_input_name = matmul0_op->Input("X")[0];
+      auto qkv_in_scale = PADDLE_GET_CONST(
+          float, matmul0_op->GetAttr("Input_scale_" + qkv_input_name));
+      std::string out_linear_input_name = matmul_linear_op->Input("X")[0];
+      auto out_linear_in_scale = PADDLE_GET_CONST(
+          float,
+          matmul_linear_op->GetAttr("Input_scale_" + out_linear_input_name));
+      std::string ffn0_input_name = ffn_matmul_0_op->Input("X")[0];
+      auto ffn0_in_scale = PADDLE_GET_CONST(
+          float, ffn_matmul_0_op->GetAttr("Input_scale_" + ffn0_input_name));
+      std::string ffn1_input_name = ffn_matmul_1_op->Input("X")[0];
+      auto ffn1_in_scale = PADDLE_GET_CONST(
+          float, ffn_matmul_1_op->GetAttr("Input_scale_" + ffn1_input_name));
+
+      // Inverse input scale
+      qkv_in_scale = 1.0f / qkv_in_scale;
+      out_linear_in_scale = 1.0f / out_linear_in_scale;
+      ffn0_in_scale = 1.0f / ffn0_in_scale;
+      ffn1_in_scale = 1.0f / ffn1_in_scale;
+
+      fused_multi_transformer_op_desc.SetAttr("qkv_in_scale",
+                                              std::vector<float>{qkv_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "out_linear_in_scale", std::vector<float>{out_linear_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "ffn1_in_scale", std::vector<float>{ffn0_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "ffn2_in_scale", std::vector<float>{ffn1_in_scale});
+
+      fused_multi_transformer_op_desc.SetInput(
+          "QKVOutScale", {matmul0_w->Name() + "_out_scale"});
+      fused_multi_transformer_op_desc.SetInput(
+          "OutLinearOutScale", {matmul_linear_w->Name() + "_out_scale"});
+      fused_multi_transformer_op_desc.SetInput(
+          "FFN1OutScale", {ffn_matmul0_w->Name() + "_out_scale"});
+      fused_multi_transformer_op_desc.SetInput(
+          "FFN2OutScale", {ffn_matmul1_w->Name() + "_out_scale"});
+    }
+
     auto* fused_multi_transformer =
         graph->CreateOpNode(&fused_multi_transformer_op_desc);
+
+    if (enable_int8) {
+      auto qkv_out_scale_node =
+          CreatePersistableVarNode(graph, matmul0_w->Name() + "_out_scale");
+      auto out_out_scale_node = CreatePersistableVarNode(
+          graph, matmul_linear_w->Name() + "_out_scale");
+      auto ffn0_out_scale_node =
+          CreatePersistableVarNode(graph, ffn_matmul0_w->Name() + "_out_scale");
+      auto ffn1_out_scale_node =
+          CreatePersistableVarNode(graph, ffn_matmul1_w->Name() + "_out_scale");
+
+      IR_NODE_LINK_TO(qkv_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(out_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(ffn0_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(ffn1_out_scale_node, fused_multi_transformer);
+    }
     IR_NODE_LINK_TO(input0, fused_multi_transformer);
     IR_NODE_LINK_TO(layer_norm_scale, fused_multi_transformer);
     IR_NODE_LINK_TO(layer_norm_bias, fused_multi_transformer);
@@ -2088,10 +2249,12 @@ int FusedMultiTransformerDecoderFuseQKVPass::BuildFusion(
                  layer_norm_bias,
                  layer_norm_mean,
                  layer_norm_variance,
+                 matmul0,
                  matmul0_w,
                  eltadd0_b,
                  eltadd_qk_b,
                  reshape2_0,
+                 matmul_linear,
                  matmul_linear_w,
                  eltadd_linear_b,
                  ffn_layer_norm,
@@ -2099,7 +2262,9 @@ int FusedMultiTransformerDecoderFuseQKVPass::BuildFusion(
                  ffn_layer_norm_bias,
                  ffn_layer_norm_mean,
                  ffn_layer_norm_variance,
+                 ffn_matmul0,
                  ffn_matmul0_w,
+                 ffn_matmul1,
                  ffn_matmul1_w,
                  ffn_eltadd0_b,
                  ffn_eltadd1_b,
@@ -2349,6 +2514,13 @@ int MultiDevicesFusedMultiTransformerDecoderFuseQKVPass::BuildFusion(
   GraphPatternDetector gpd;
   auto* pattern = gpd.mutable_pattern();
 
+  bool enable_int8 = graph->Get<bool>("enable_int8");
+  if (enable_int8) {
+    VLOG(3) << "MultiDevicesFusedMultiTransformerDecoderFuseQKVPass with int8";
+  } else {
+    VLOG(3) << "MultiDevicesFusedMultiTransformerDecoderFuseQKVPass with fp";
+  }
+
   // Create pattern.
   patterns::MultiDevicesFusedMultiTransformerDecoderFuseQKVPattern
       fused_multi_transformer_fuse_qkv_pattern(pattern, name_scope);
@@ -2362,10 +2534,12 @@ int MultiDevicesFusedMultiTransformerDecoderFuseQKVPass::BuildFusion(
                           Node* layer_norm_mean,
                           Node* layer_norm_variance,
                           Node* c_identity,
+                          Node* matmul0,
                           Node* matmul0_w,
                           Node* eltadd0_b,
                           Node* eltadd_qk_b,
                           Node* reshape2_0,
+                          Node* matmul_linear,
                           Node* matmul_linear_w,
                           Node* eltadd_linear_b,
                           Node* ffn_layer_norm,
@@ -2373,11 +2547,16 @@ int MultiDevicesFusedMultiTransformerDecoderFuseQKVPass::BuildFusion(
                           Node* ffn_layer_norm_bias,
                           Node* ffn_layer_norm_mean,
                           Node* ffn_layer_norm_variance,
+                          Node* ffn_c_identity,
+                          Node* ffn_matmul0,
                           Node* ffn_matmul0_w,
+                          Node* ffn_matmul1,
                           Node* ffn_matmul1_w,
                           Node* ffn_eltadd0_b,
                           Node* ffn_eltadd1_b,
                           Node* ffn_output) {
+    auto* matmul_linear_op = matmul_linear->Op();
+    auto* ffn_matmul_1_op = ffn_matmul1->Op();
     // Calc index of transformer layer by LayerNorm Scale name
     // This calculation assumes:
     //    1. no LayerNorm before all transformer layer
@@ -2389,7 +2568,9 @@ int MultiDevicesFusedMultiTransformerDecoderFuseQKVPass::BuildFusion(
 
     // create fused_multi_transformer
     OpDesc fused_multi_transformer_op_desc(layer_norm->Op()->Block());
-    fused_multi_transformer_op_desc.SetType("fused_multi_transformer");
+    fused_multi_transformer_op_desc.SetType(enable_int8
+                                                ? "fused_multi_transformer_int8"
+                                                : "fused_multi_transformer");
 
     // 1. Input setting
     fused_multi_transformer_op_desc.SetInput("X", {input0->Name()});
@@ -2449,8 +2630,71 @@ int MultiDevicesFusedMultiTransformerDecoderFuseQKVPass::BuildFusion(
     fused_multi_transformer_op_desc.SetAttr("ring_id",
                                             c_identity_op->GetAttr("ring_id"));
 
+    if (enable_int8) {
+      std::string matmul_input_scale_suffix = c_identity_op->Input("X")[0];
+      auto qkv_in_scale = PADDLE_GET_CONST(
+          float,
+          c_identity_op->GetAttr("Input_scale_" + matmul_input_scale_suffix));
+
+      std::string out_linear_input_name = matmul_linear_op->Input("X")[0];
+      auto out_linear_in_scale = PADDLE_GET_CONST(
+          float,
+          matmul_linear_op->GetAttr("Input_scale_" + out_linear_input_name));
+
+      auto* ffn_c_identity_op = ffn_c_identity->Op();
+      std::string ffn_input_scale_suffix = ffn_c_identity_op->Input("X")[0];
+      auto ffn0_in_scale = PADDLE_GET_CONST(
+          float,
+          ffn_c_identity_op->GetAttr("Input_scale_" + ffn_input_scale_suffix));
+
+      std::string ffn1_input_name = ffn_matmul_1_op->Input("X")[0];
+      auto ffn1_in_scale = PADDLE_GET_CONST(
+          float, ffn_matmul_1_op->GetAttr("Input_scale_" + ffn1_input_name));
+
+      // Inverse input scale
+      qkv_in_scale = 1.0f / qkv_in_scale;
+      out_linear_in_scale = 1.0f / out_linear_in_scale;
+      ffn0_in_scale = 1.0f / ffn0_in_scale;
+      ffn1_in_scale = 1.0f / ffn1_in_scale;
+
+      fused_multi_transformer_op_desc.SetAttr("qkv_in_scale",
+                                              std::vector<float>{qkv_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "out_linear_in_scale", std::vector<float>{out_linear_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "ffn1_in_scale", std::vector<float>{ffn0_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "ffn2_in_scale", std::vector<float>{ffn1_in_scale});
+
+      fused_multi_transformer_op_desc.SetInput(
+          "QKVOutScale", {matmul0_w->Name() + "_out_scale"});
+      fused_multi_transformer_op_desc.SetInput(
+          "OutLinearOutScale", {matmul_linear_w->Name() + "_out_scale"});
+      fused_multi_transformer_op_desc.SetInput(
+          "FFN1OutScale", {ffn_matmul0_w->Name() + "_out_scale"});
+      fused_multi_transformer_op_desc.SetInput(
+          "FFN2OutScale", {ffn_matmul1_w->Name() + "_out_scale"});
+    }
+
     auto* fused_multi_transformer =
         graph->CreateOpNode(&fused_multi_transformer_op_desc);
+
+    if (enable_int8) {
+      auto qkv_out_scale_node =
+          CreatePersistableVarNode(graph, matmul0_w->Name() + "_out_scale");
+      auto out_out_scale_node = CreatePersistableVarNode(
+          graph, matmul_linear_w->Name() + "_out_scale");
+      auto ffn0_out_scale_node =
+          CreatePersistableVarNode(graph, ffn_matmul0_w->Name() + "_out_scale");
+      auto ffn1_out_scale_node =
+          CreatePersistableVarNode(graph, ffn_matmul1_w->Name() + "_out_scale");
+
+      IR_NODE_LINK_TO(qkv_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(out_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(ffn0_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(ffn1_out_scale_node, fused_multi_transformer);
+    }
+
     IR_NODE_LINK_TO(input0, fused_multi_transformer);
     IR_NODE_LINK_TO(layer_norm_scale, fused_multi_transformer);
     IR_NODE_LINK_TO(layer_norm_bias, fused_multi_transformer);
@@ -2737,10 +2981,12 @@ int MultiDevicesFusedMultiTransformerDecoderFuseQKVPass::BuildFusion(
                  layer_norm_mean,
                  layer_norm_variance,
                  c_identity,
+                 matmul0,
                  matmul0_w,
                  eltadd0_b,
                  eltadd_qk_b,
                  reshape2_0,
+                 matmul_linear,
                  matmul_linear_w,
                  eltadd_linear_b,
                  ffn_layer_norm,
@@ -2748,7 +2994,10 @@ int MultiDevicesFusedMultiTransformerDecoderFuseQKVPass::BuildFusion(
                  ffn_layer_norm_bias,
                  ffn_layer_norm_mean,
                  ffn_layer_norm_variance,
+                 ffn_c_identity,
+                 ffn_matmul0,
                  ffn_matmul0_w,
+                 ffn_matmul1,
                  ffn_matmul1_w,
                  ffn_eltadd0_b,
                  ffn_eltadd1_b,
diff --git a/paddle/fluid/framework/ir/fused_multi_transformer_decoder_pass_tester.cc b/paddle/fluid/framework/ir/fused_multi_transformer_decoder_pass_tester.cc
index dbb6781442492c..2e54196e599a8e 100644
--- a/paddle/fluid/framework/ir/fused_multi_transformer_decoder_pass_tester.cc
+++ b/paddle/fluid/framework/ir/fused_multi_transformer_decoder_pass_tester.cc
@@ -193,6 +193,7 @@ TEST(FusedMultiTransformerDecoderPass, basic) {
 
   std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
   graph->Set("__param_scope__", CreateParamScope());
+  graph->Set("enable_int8", new bool(false));
 
   auto pass =
       PassRegistry::Instance().Get("fused_multi_transformer_decoder_pass");
@@ -344,6 +345,7 @@ TEST(FusedMultiTransformerDecoderFuseQKVPass, basic) {
 
   std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
   graph->Set("__param_scope__", CreateParamScope());
+  graph->Set("enable_int8", new bool(false));
 
   auto pass = PassRegistry::Instance().Get(
       "fused_multi_transformer_decoder_fuse_qkv_pass");
@@ -503,6 +505,7 @@ TEST(MultiDevicesFusedMultiTransformerDecoderFuseQKVPass, basic) {
 
   std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
   graph->Set("__param_scope__", CreateParamScope());
+  graph->Set("enable_int8", new bool(false));
 
   auto pass = PassRegistry::Instance().Get(
       "multi_devices_fused_multi_transformer_decoder_fuse_qkv_pass");
diff --git a/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc b/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc
index 0503b3a0a3d592..3635613f8c54b3 100644
--- a/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc
+++ b/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc
@@ -1025,21 +1025,14 @@ template <typename T>
 inline void QKVWeightsProcess(phi::DenseTensor* wq_tensor,
                               phi::DenseTensor* wk_tensor,
                               phi::DenseTensor* wv_tensor,
-                              phi::DenseTensor* bq_tensor,
-                              phi::DenseTensor* bk_tensor,
-                              phi::DenseTensor* bv_tensor,
                               const int num_head,
                               const int dim_head,
                               const int dim_embed) {
   auto* wq_data = wq_tensor->mutable_data<T>(platform::CPUPlace());
   auto* wk_data = wk_tensor->mutable_data<T>(platform::CPUPlace());
   auto* wv_data = wv_tensor->mutable_data<T>(platform::CPUPlace());
-  auto* bq_data = bq_tensor->mutable_data<T>(platform::CPUPlace());
-  auto* bk_data = bk_tensor->mutable_data<T>(platform::CPUPlace());
-  auto* bv_data = bv_tensor->mutable_data<T>(platform::CPUPlace());
 
   auto combined_w_dims = phi::make_ddim({3, num_head, dim_head, dim_embed});
-  auto combined_bias_dims = phi::make_ddim({3, num_head, dim_head});
 
   phi::DenseTensor tmp_combined_w_tensor;
   tmp_combined_w_tensor.Resize(combined_w_dims);
@@ -1065,6 +1058,20 @@ inline void QKVWeightsProcess(phi::DenseTensor* wq_tensor,
   auto* new_combined_w_data = wq_tensor->mutable_data<T>(platform::CPUPlace());
   memcpy(
       new_combined_w_data, tmp_combined_w_data, sizeof(T) * wq_tensor->numel());
+}
+
+template <typename T>
+inline void QKVBiasProcess(phi::DenseTensor* bq_tensor,
+                           phi::DenseTensor* bk_tensor,
+                           phi::DenseTensor* bv_tensor,
+                           const int num_head,
+                           const int dim_head,
+                           const int dim_embed) {
+  auto* bq_data = bq_tensor->mutable_data<T>(platform::CPUPlace());
+  auto* bk_data = bk_tensor->mutable_data<T>(platform::CPUPlace());
+  auto* bv_data = bv_tensor->mutable_data<T>(platform::CPUPlace());
+
+  auto combined_bias_dims = phi::make_ddim({3, num_head, dim_head});
 
   phi::DenseTensor tmp_combined_bias_tensor;
   tmp_combined_bias_tensor.Resize(combined_bias_dims);
@@ -1085,13 +1092,57 @@ inline void QKVWeightsProcess(phi::DenseTensor* wq_tensor,
          sizeof(T) * bq_tensor->numel());
 }
 
+inline void QKVWeightsBiasProcess(phi::DenseTensor* wq_tensor,
+                                  phi::DenseTensor* wk_tensor,
+                                  phi::DenseTensor* wv_tensor,
+                                  phi::DenseTensor* bq_tensor,
+                                  phi::DenseTensor* bk_tensor,
+                                  phi::DenseTensor* bv_tensor,
+                                  const int num_head,
+                                  const int dim_head,
+                                  const int dim_embed) {
+  switch (wq_tensor->dtype()) {
+    case paddle::experimental::DataType::FLOAT16:
+      QKVWeightsProcess<platform::float16>(
+          wq_tensor, wk_tensor, wv_tensor, num_head, dim_head, dim_embed);
+      break;
+    case paddle::experimental::DataType::FLOAT32:
+      QKVWeightsProcess<float>(
+          wq_tensor, wk_tensor, wv_tensor, num_head, dim_head, dim_embed);
+      break;
+    case paddle::experimental::DataType::INT8:
+      QKVWeightsProcess<int8_t>(
+          wq_tensor, wk_tensor, wv_tensor, num_head, dim_head, dim_embed);
+      break;
+    default:
+      PADDLE_THROW(platform::errors::Unavailable(
+          "fused_multi_transformer not supported weight dtype. "
+          "we now only support fp32/fp16/int8."));
+      break;
+  }
+  switch (bq_tensor->dtype()) {
+    case paddle::experimental::DataType::FLOAT16:
+      QKVBiasProcess<platform::float16>(
+          bq_tensor, bk_tensor, bv_tensor, num_head, dim_head, dim_embed);
+      break;
+    case paddle::experimental::DataType::FLOAT32:
+      QKVBiasProcess<float>(
+          bq_tensor, bk_tensor, bv_tensor, num_head, dim_head, dim_embed);
+      break;
+    default:
+      PADDLE_THROW(platform::errors::Unavailable(
+          "fused_multi_transformer not supported bias dtype. "
+          "we now only support fp32/fp16."));
+      break;
+  }
+}
+
 template <typename T>
 inline void QKVWeightsProcessFuseQKV(phi::DenseTensor* qkv_w_tensor,
-                                     phi::DenseTensor* qkv_b_tensor,
                                      const int num_head,
                                      const int dim_head,
                                      const int dim_embed) {
-  auto* qkv_w_data = qkv_w_tensor->mutable_data<T>(platform::CPUPlace());
+  auto* qkv_w_data = qkv_w_tensor->data<T>();
   auto transpose_w_dims = phi::make_ddim({3, num_head, dim_head, dim_embed});
 
   phi::DenseTensor tmp_transpose_w_tensor;
@@ -1120,8 +1171,14 @@ inline void QKVWeightsProcessFuseQKV(phi::DenseTensor* qkv_w_tensor,
   memcpy(new_transpose_w_data,
          tmp_transpose_w_data,
          sizeof(T) * qkv_w_tensor->numel());
+}
 
-  auto* qkv_b_data = qkv_b_tensor->mutable_data<T>(platform::CPUPlace());
+template <typename T>
+inline void QKVBiasProcessFuseQKV(phi::DenseTensor* qkv_b_tensor,
+                                  const int num_head,
+                                  const int dim_head,
+                                  const int dim_embed) {
+  auto* qkv_b_data = qkv_b_tensor->data<T>();
   auto transpose_b_dims = phi::make_ddim({3, num_head, dim_head});
 
   phi::DenseTensor tmp_transpose_b_tensor;
@@ -1148,11 +1205,86 @@ inline void QKVWeightsProcessFuseQKV(phi::DenseTensor* qkv_w_tensor,
          sizeof(T) * qkv_b_tensor->numel());
 }
 
+inline void QKVWeightsBiasProcessFuseQKV(phi::DenseTensor* qkv_w_tensor,
+                                         phi::DenseTensor* qkv_b_tensor,
+                                         const int num_head,
+                                         const int dim_head,
+                                         const int dim_embed) {
+  switch (qkv_w_tensor->dtype()) {
+    case paddle::experimental::DataType::FLOAT16:
+      QKVWeightsProcessFuseQKV<platform::float16>(
+          qkv_w_tensor, num_head, dim_head, dim_embed);
+      break;
+    case paddle::experimental::DataType::FLOAT32:
+      QKVWeightsProcessFuseQKV<float>(
+          qkv_w_tensor, num_head, dim_head, dim_embed);
+      break;
+    case paddle::experimental::DataType::INT8:
+      QKVWeightsProcessFuseQKV<int8_t>(
+          qkv_w_tensor, num_head, dim_head, dim_embed);
+      break;
+    default:
+      PADDLE_THROW(platform::errors::Unavailable(
+          "fused_multi_transformer not supported weight dtype. "
+          "we now only support fp32/fp16/int8."));
+      break;
+  }
+  switch (qkv_b_tensor->dtype()) {
+    case paddle::experimental::DataType::FLOAT16:
+      QKVBiasProcessFuseQKV<platform::float16>(
+          qkv_b_tensor, num_head, dim_head, dim_embed);
+      break;
+    case paddle::experimental::DataType::FLOAT32:
+      QKVBiasProcessFuseQKV<float>(qkv_b_tensor, num_head, dim_head, dim_embed);
+      break;
+    default:
+      PADDLE_THROW(platform::errors::Unavailable(
+          "fused_multi_transformer not supported bias dtype. "
+          "we now only support fp32/fp16."));
+      break;
+  }
+}
+
+// Just use for fused_multi_transformer_int8
+inline void TransposeWeights(phi::DenseTensor* weight_tensor) {
+  int m = weight_tensor->dims()[0];
+  int n = weight_tensor->dims()[1];
+  phi::DenseTensor tmp_weight_tensor;
+  auto tmp_weight_data =
+      tmp_weight_tensor.mutable_data<int8_t>({n, m}, platform::CPUPlace());
+  auto weight_data = weight_tensor->data<int8_t>();
+  for (int i = 0; i < m; ++i) {
+    for (int j = 0; j < n; ++j) {
+      int in_idx = i * n + j;
+      int out_idx = j * m + i;
+      tmp_weight_data[out_idx] = weight_data[in_idx];
+    }
+  }
+  weight_tensor->Resize({n, m});
+  auto new_weight_data =
+      weight_tensor->mutable_data<int8_t>(platform::CPUPlace());
+  memcpy(new_weight_data, tmp_weight_data, sizeof(int8_t) * m * n);
+}
+
+inline Node* CreatePersistableVarNode(Graph* graph, const std::string& name) {
+  auto var_desc = VarDesc(name);
+  var_desc.SetDataType(framework::proto::VarType::FP32);
+  var_desc.SetPersistable(true);
+  auto node = graph->CreateVarNode(&var_desc);
+  return node;
+}
+
 int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
                                                   const std::string& name_scope,
                                                   Scope* scope) const {
   GraphPatternDetector gpd;
   auto* pattern = gpd.mutable_pattern();
+  bool enable_int8 = graph->Get<bool>("enable_int8");
+  if (enable_int8) {
+    VLOG(3) << "FusedMultiTransformerEncoderPass with int8";
+  } else {
+    VLOG(3) << "FusedMultiTransformerEncoderPass with fp";
+  }
 
   // Create pattern.
   patterns::FusedMultiTransformerEncoderPattern fused_multi_transformer_pattern(
@@ -1166,6 +1298,7 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
                           Node* layer_norm_bias,
                           Node* layer_norm_mean,
                           Node* layer_norm_variance,
+                          Node* matmul0,
                           Node* matmul0_w,
                           Node* matmul1_w,
                           Node* matmul2_w,
@@ -1176,6 +1309,7 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
                           Node* transpose2_2_out,
                           Node* eltadd_qk_b,
                           Node* reshape2_0,
+                          Node* matmul_linear,
                           Node* matmul_linear_w,
                           Node* eltadd_linear_b,
                           Node* while0,
@@ -1184,7 +1318,9 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
                           Node* ffn_layer_norm_bias,
                           Node* ffn_layer_norm_mean,
                           Node* ffn_layer_norm_variance,
+                          Node* ffn_matmul0,
                           Node* ffn_matmul0_w,
+                          Node* ffn_matmul1,
                           Node* ffn_matmul1_w,
                           Node* ffn_eltadd0_b,
                           Node* ffn_eltadd1_b,
@@ -1196,7 +1332,14 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
     int dim_head =
         PADDLE_GET_CONST(std::vector<int>, reshape_desc->GetAttr("shape"))
             .at(3);
-    int dim_embed = num_head * dim_head;
+    auto* layer_norm_bias_tensor =
+        scope->FindVar(layer_norm_bias->Name())->GetMutable<phi::DenseTensor>();
+    int dim_embed = layer_norm_bias_tensor->dims()[0];
+
+    auto* matmul0_op = matmul0->Op();
+    auto* matmul_linear_op = matmul_linear->Op();
+    auto* ffn_matmul_0_op = ffn_matmul0->Op();
+    auto* ffn_matmul_1_op = ffn_matmul1->Op();
 
     // Calc index of transformer layer by LayerNorm Scale name
     // This calculation assumes:
@@ -1221,30 +1364,27 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
     auto* bv_tensor =
         scope->FindVar(eltadd2_b->Name())->GetMutable<phi::DenseTensor>();
 
-    if (wq_tensor->dtype() == phi::DataType::FLOAT32) {
-      QKVWeightsProcess<float>(wq_tensor,
-                               wk_tensor,
-                               wv_tensor,
-                               bq_tensor,
-                               bk_tensor,
-                               bv_tensor,
-                               num_head,
-                               dim_head,
-                               dim_embed);
-    } else if (wq_tensor->dtype() == phi::DataType::FLOAT16) {
-      QKVWeightsProcess<platform::float16>(wq_tensor,
-                                           wk_tensor,
-                                           wv_tensor,
-                                           bq_tensor,
-                                           bk_tensor,
-                                           bv_tensor,
-                                           num_head,
-                                           dim_head,
-                                           dim_embed);
-    } else {
-      PADDLE_THROW(platform::errors::Unavailable(
-          "fused_multi_transformer not supported weight dtype. "
-          "we now only support fp32 and fp16."));
+    QKVWeightsBiasProcess(wq_tensor,
+                          wk_tensor,
+                          wv_tensor,
+                          bq_tensor,
+                          bk_tensor,
+                          bv_tensor,
+                          num_head,
+                          dim_head,
+                          dim_embed);
+
+    if (enable_int8) {
+      auto* out_linear_w_tensor = scope->FindVar(matmul_linear_w->Name())
+                                      ->GetMutable<phi::DenseTensor>();
+      auto* ffn0_w_tensor =
+          scope->FindVar(ffn_matmul0_w->Name())->GetMutable<phi::DenseTensor>();
+      auto* ffn1_w_tensor =
+          scope->FindVar(ffn_matmul1_w->Name())->GetMutable<phi::DenseTensor>();
+
+      TransposeWeights(out_linear_w_tensor);
+      TransposeWeights(ffn0_w_tensor);
+      TransposeWeights(ffn1_w_tensor);
     }
 
     // reuse the mul0_w and eltadd_0_b nodes for the combined nodes.
@@ -1261,7 +1401,9 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
 
     // create fused_multi_transformer
     OpDesc fused_multi_transformer_op_desc(layer_norm->Op()->Block());
-    fused_multi_transformer_op_desc.SetType("fused_multi_transformer");
+    fused_multi_transformer_op_desc.SetType(enable_int8
+                                                ? "fused_multi_transformer_int8"
+                                                : "fused_multi_transformer");
 
     // 1. Input setting
     fused_multi_transformer_op_desc.SetInput("X", {input0->Name()});
@@ -1281,7 +1423,7 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
     VarDesc cache_kv_desc("cache_kv" + std::to_string(layer_idx));
     // FIXME: only support max_seq_len <= 1024
     cache_kv_desc.SetDataType(
-        framework::TransToProtoVarType(wq_tensor->dtype()));
+        framework::TransToProtoVarType(bq_tensor->dtype()));
     cache_kv_desc.SetPersistable(false);
     auto* cache_kv = graph->CreateVarNode(&cache_kv_desc);
 
@@ -1296,7 +1438,7 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
     fill_const_op_desc.SetAttr("value", 0);
     fill_const_op_desc.SetAttr(
         "dtype",
-        static_cast<int>(framework::TransToProtoVarType(wq_tensor->dtype())));
+        static_cast<int>(framework::TransToProtoVarType(bq_tensor->dtype())));
     auto* fill_const_op = graph->CreateOpNode(&fill_const_op_desc);
 
     fused_multi_transformer_op_desc.SetInput("CacheKV", {cache_kv->Name()});
@@ -1333,8 +1475,123 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
     fused_multi_transformer_op_desc.SetAttr("is_test", true);
     fused_multi_transformer_op_desc.SetAttr("dropout_rate", 0.0f);
 
+    // Quantization attribute/Input
+    if (enable_int8) {
+      // Set input scale
+      std::string qkv_input_name = matmul0_op->Input("X")[0];
+      auto qkv_in_scale = PADDLE_GET_CONST(
+          float, matmul0_op->GetAttr("Input_scale_" + qkv_input_name));
+      std::string out_linear_input_name = matmul_linear_op->Input("X")[0];
+      auto out_linear_in_scale = PADDLE_GET_CONST(
+          float,
+          matmul_linear_op->GetAttr("Input_scale_" + out_linear_input_name));
+      std::string ffn0_input_name = ffn_matmul_0_op->Input("X")[0];
+      auto ffn0_in_scale = PADDLE_GET_CONST(
+          float, ffn_matmul_0_op->GetAttr("Input_scale_" + ffn0_input_name));
+      std::string ffn1_input_name = ffn_matmul_1_op->Input("X")[0];
+      auto ffn1_in_scale = PADDLE_GET_CONST(
+          float, ffn_matmul_1_op->GetAttr("Input_scale_" + ffn1_input_name));
+
+      // Calc outscale and Set them
+      auto qkv_weight_scale =
+          PADDLE_GET_CONST(float, matmul0_op->GetAttr("weight_scale"));
+      auto out_weight_scale =
+          PADDLE_GET_CONST(float, matmul_linear_op->GetAttr("weight_scale"));
+      auto ffn0_weight_scale =
+          PADDLE_GET_CONST(float, ffn_matmul_0_op->GetAttr("weight_scale"));
+      auto ffn1_weight_scale =
+          PADDLE_GET_CONST(float, ffn_matmul_1_op->GetAttr("weight_scale"));
+
+      auto qkv_out_scales = std::vector<float>(
+          3 * dim_embed, (qkv_weight_scale / 127.0f) * (qkv_in_scale / 127.0f));
+      auto out_out_scales = std::vector<float>(
+          dim_embed,
+          (out_weight_scale / 127.0f) * (out_linear_in_scale / 127.0f));
+      auto ffn0_out_scales = std::vector<float>(
+          4 * dim_embed,
+          (ffn0_weight_scale / 127.0f) * (ffn0_in_scale / 127.0f));
+      auto ffn1_out_scales = std::vector<float>(
+          dim_embed, (ffn1_weight_scale / 127.0f) * (ffn1_in_scale / 127.0f));
+
+      // Inverse input scale
+      qkv_in_scale = 1.0f / qkv_in_scale;
+      out_linear_in_scale = 1.0f / out_linear_in_scale;
+      ffn0_in_scale = 1.0f / ffn0_in_scale;
+      ffn1_in_scale = 1.0f / ffn1_in_scale;
+
+      fused_multi_transformer_op_desc.SetAttr("qkv_in_scale",
+                                              std::vector<float>{qkv_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "out_linear_in_scale", std::vector<float>{out_linear_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "ffn1_in_scale", std::vector<float>{ffn0_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "ffn2_in_scale", std::vector<float>{ffn1_in_scale});
+
+      auto qkv_out_scale_var = scope->Var(matmul0_w->Name() + "_out_scale");
+      auto out_out_scale_var =
+          scope->Var(matmul_linear_w->Name() + "_out_scale");
+      auto ffn0_out_scale_var =
+          scope->Var(ffn_matmul0_w->Name() + "_out_scale");
+      auto ffn1_out_scale_var =
+          scope->Var(ffn_matmul1_w->Name() + "_out_scale");
+
+      auto qkv_out_scale_data =
+          qkv_out_scale_var->GetMutable<phi::DenseTensor>()
+              ->mutable_data<float>({3 * dim_embed}, platform::CPUPlace());
+      memcpy(qkv_out_scale_data,
+             qkv_out_scales.data(),
+             qkv_out_scales.size() * sizeof(float));
+      fused_multi_transformer_op_desc.SetInput(
+          "QKVOutScale", {matmul0_w->Name() + "_out_scale"});
+
+      auto out_out_scale_data =
+          out_out_scale_var->GetMutable<phi::DenseTensor>()
+              ->mutable_data<float>({dim_embed}, platform::CPUPlace());
+      memcpy(out_out_scale_data,
+             out_out_scales.data(),
+             out_out_scales.size() * sizeof(float));
+      fused_multi_transformer_op_desc.SetInput(
+          "OutLinearOutScale", {matmul_linear_w->Name() + "_out_scale"});
+
+      auto ffn0_out_scale_data =
+          ffn0_out_scale_var->GetMutable<phi::DenseTensor>()
+              ->mutable_data<float>({4 * dim_embed}, platform::CPUPlace());
+      memcpy(ffn0_out_scale_data,
+             ffn0_out_scales.data(),
+             ffn0_out_scales.size() * sizeof(float));
+      fused_multi_transformer_op_desc.SetInput(
+          "FFN1OutScale", {ffn_matmul0_w->Name() + "_out_scale"});
+
+      auto ffn1_out_scale_data =
+          ffn1_out_scale_var->GetMutable<phi::DenseTensor>()
+              ->mutable_data<float>({dim_embed}, platform::CPUPlace());
+      memcpy(ffn1_out_scale_data,
+             ffn1_out_scales.data(),
+             ffn1_out_scales.size() * sizeof(float));
+      fused_multi_transformer_op_desc.SetInput(
+          "FFN2OutScale", {ffn_matmul1_w->Name() + "_out_scale"});
+    }
+
     auto* fused_multi_transformer =
         graph->CreateOpNode(&fused_multi_transformer_op_desc);
+
+    if (enable_int8) {
+      auto qkv_out_scale_node =
+          CreatePersistableVarNode(graph, matmul0_w->Name() + "_out_scale");
+      auto out_out_scale_node = CreatePersistableVarNode(
+          graph, matmul_linear_w->Name() + "_out_scale");
+      auto ffn0_out_scale_node =
+          CreatePersistableVarNode(graph, ffn_matmul0_w->Name() + "_out_scale");
+      auto ffn1_out_scale_node =
+          CreatePersistableVarNode(graph, ffn_matmul1_w->Name() + "_out_scale");
+
+      IR_NODE_LINK_TO(qkv_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(out_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(ffn0_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(ffn1_out_scale_node, fused_multi_transformer);
+    }
+
     IR_NODE_LINK_TO(input0, fused_multi_transformer);
     IR_NODE_LINK_TO(layer_norm_scale, fused_multi_transformer);
     IR_NODE_LINK_TO(layer_norm_bias, fused_multi_transformer);
@@ -1622,6 +1879,7 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
                  layer_norm_bias,
                  layer_norm_mean,
                  layer_norm_variance,
+                 matmul0,
                  matmul0_w,
                  matmul1_w,
                  matmul2_w,
@@ -1632,6 +1890,7 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
                  transpose2_2_out,
                  eltadd_qk_b,
                  reshape2_0,
+                 matmul_linear,
                  matmul_linear_w,
                  eltadd_linear_b,
                  while0,
@@ -1640,7 +1899,9 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
                  ffn_layer_norm_bias,
                  ffn_layer_norm_mean,
                  ffn_layer_norm_variance,
+                 ffn_matmul0,
                  ffn_matmul0_w,
+                 ffn_matmul1,
                  ffn_matmul1_w,
                  ffn_eltadd0_b,
                  ffn_eltadd1_b,
@@ -1892,6 +2153,12 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
     Graph* graph, const std::string& name_scope, Scope* scope) const {
   GraphPatternDetector gpd;
   auto* pattern = gpd.mutable_pattern();
+  bool enable_int8 = graph->Get<bool>("enable_int8");
+  if (enable_int8) {
+    VLOG(3) << "FusedMultiTransformerEncoderFuseQKVPass with int8";
+  } else {
+    VLOG(3) << "FusedMultiTransformerEncoderFuseQKVPass with fp";
+  }
 
   // Create pattern.
   patterns::FusedMultiTransformerEncoderFuseQKVPattern
@@ -1905,12 +2172,14 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
                           Node* layer_norm_bias,
                           Node* layer_norm_mean,
                           Node* layer_norm_variance,
+                          Node* matmul0,
                           Node* matmul0_w,
                           Node* eltadd0_b,
                           Node* split0_k_out,
                           Node* split0_v_out,
                           Node* eltadd_qk_b,
                           Node* reshape2_0,
+                          Node* matmul_linear,
                           Node* matmul_linear_w,
                           Node* eltadd_linear_b,
                           Node* while0,
@@ -1919,7 +2188,9 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
                           Node* ffn_layer_norm_bias,
                           Node* ffn_layer_norm_mean,
                           Node* ffn_layer_norm_variance,
+                          Node* ffn_matmul0,
                           Node* ffn_matmul0_w,
+                          Node* ffn_matmul1,
                           Node* ffn_matmul1_w,
                           Node* ffn_eltadd0_b,
                           Node* ffn_eltadd1_b,
@@ -1932,7 +2203,14 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
         PADDLE_GET_CONST(std::vector<int>, reshape_desc->GetAttr("shape"))
             .at(3) /
         3;  // 3 for qkv
-    int dim_embed = num_head * dim_head;
+    auto* layer_norm_bias_tensor =
+        scope->FindVar(layer_norm_bias->Name())->GetMutable<phi::DenseTensor>();
+    int dim_embed = layer_norm_bias_tensor->dims()[0];
+
+    auto* matmul0_op = matmul0->Op();
+    auto* matmul_linear_op = matmul_linear->Op();
+    auto* ffn_matmul_0_op = ffn_matmul0->Op();
+    auto* ffn_matmul_1_op = ffn_matmul1->Op();
 
     // Calc index of transformer layer by LayerNorm Scale name
     // This calculation assumes:
@@ -1948,21 +2226,27 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
     auto* qkv_b_tensor =
         scope->FindVar(eltadd0_b->Name())->GetMutable<phi::DenseTensor>();
 
-    if (qkv_w_tensor->dtype() == phi::DataType::FLOAT32) {
-      QKVWeightsProcessFuseQKV<float>(
-          qkv_w_tensor, qkv_b_tensor, num_head, dim_head, dim_embed);
-    } else if (qkv_w_tensor->dtype() == phi::DataType::FLOAT16) {
-      QKVWeightsProcessFuseQKV<platform::float16>(
-          qkv_w_tensor, qkv_b_tensor, num_head, dim_head, dim_embed);
-    } else {
-      PADDLE_THROW(platform::errors::Unavailable(
-          "fused_multi_transformer not supported weight dtype. "
-          "we now only support fp32 and fp16."));
+    QKVWeightsBiasProcessFuseQKV(
+        qkv_w_tensor, qkv_b_tensor, num_head, dim_head, dim_embed);
+
+    if (enable_int8) {
+      auto* out_linear_w_tensor = scope->FindVar(matmul_linear_w->Name())
+                                      ->GetMutable<phi::DenseTensor>();
+      auto* ffn0_w_tensor =
+          scope->FindVar(ffn_matmul0_w->Name())->GetMutable<phi::DenseTensor>();
+      auto* ffn1_w_tensor =
+          scope->FindVar(ffn_matmul1_w->Name())->GetMutable<phi::DenseTensor>();
+
+      TransposeWeights(out_linear_w_tensor);
+      TransposeWeights(ffn0_w_tensor);
+      TransposeWeights(ffn1_w_tensor);
     }
 
     // create fused_multi_transformer
     OpDesc fused_multi_transformer_op_desc(layer_norm->Op()->Block());
-    fused_multi_transformer_op_desc.SetType("fused_multi_transformer");
+    fused_multi_transformer_op_desc.SetType(enable_int8
+                                                ? "fused_multi_transformer_int8"
+                                                : "fused_multi_transformer");
 
     // 1. Input setting
     fused_multi_transformer_op_desc.SetInput("X", {input0->Name()});
@@ -1982,7 +2266,7 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
     VarDesc cache_kv_desc("cache_kv" + std::to_string(layer_idx));
     // FIXME: only support max_seq_len <= 1024
     cache_kv_desc.SetDataType(
-        framework::TransToProtoVarType(qkv_w_tensor->dtype()));
+        framework::TransToProtoVarType(qkv_b_tensor->dtype()));
     cache_kv_desc.SetPersistable(false);
     auto* cache_kv = graph->CreateVarNode(&cache_kv_desc);
 
@@ -1997,7 +2281,7 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
     fill_const_op_desc.SetAttr("value", 0);
     fill_const_op_desc.SetAttr("dtype",
                                static_cast<int>(framework::TransToProtoVarType(
-                                   qkv_w_tensor->dtype())));
+                                   qkv_b_tensor->dtype())));
     auto* fill_const_op = graph->CreateOpNode(&fill_const_op_desc);
 
     fused_multi_transformer_op_desc.SetInput("CacheKV", {cache_kv->Name()});
@@ -2035,8 +2319,125 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
     fused_multi_transformer_op_desc.SetAttr("is_test", true);
     fused_multi_transformer_op_desc.SetAttr("dropout_rate", 0.0f);
 
+    // Quantization attribute/Input
+    if (enable_int8) {
+      // Set input scale
+      std::string qkv_input_name = matmul0_op->Input("X")[0];
+      auto qkv_in_scale = PADDLE_GET_CONST(
+          float, matmul0_op->GetAttr("Input_scale_" + qkv_input_name));
+      std::string out_linear_input_name = matmul_linear_op->Input("X")[0];
+      auto out_linear_in_scale = PADDLE_GET_CONST(
+          float,
+          matmul_linear_op->GetAttr("Input_scale_" + out_linear_input_name));
+      std::string ffn0_input_name = ffn_matmul_0_op->Input("X")[0];
+      auto ffn0_in_scale = PADDLE_GET_CONST(
+          float, ffn_matmul_0_op->GetAttr("Input_scale_" + ffn0_input_name));
+      std::string ffn1_input_name = ffn_matmul_1_op->Input("X")[0];
+      auto ffn1_in_scale = PADDLE_GET_CONST(
+          float, ffn_matmul_1_op->GetAttr("Input_scale_" + ffn1_input_name));
+
+      // Calc outscale and Set them
+      // TODO(wufeisheng): Currently just match layer-wise weight scale, where
+      // channel-wise weight scale should also be surpported.
+      auto qkv_weight_scale =
+          PADDLE_GET_CONST(float, matmul0_op->GetAttr("weight_scale"));
+      auto out_weight_scale =
+          PADDLE_GET_CONST(float, matmul_linear_op->GetAttr("weight_scale"));
+      auto ffn0_weight_scale =
+          PADDLE_GET_CONST(float, ffn_matmul_0_op->GetAttr("weight_scale"));
+      auto ffn1_weight_scale =
+          PADDLE_GET_CONST(float, ffn_matmul_1_op->GetAttr("weight_scale"));
+
+      auto qkv_out_scales = std::vector<float>(
+          3 * dim_embed, (qkv_weight_scale / 127.0f) * (qkv_in_scale / 127.0f));
+      auto out_out_scales = std::vector<float>(
+          dim_embed,
+          (out_weight_scale / 127.0f) * (out_linear_in_scale / 127.0f));
+      auto ffn0_out_scales = std::vector<float>(
+          4 * dim_embed,
+          (ffn0_weight_scale / 127.0f) * (ffn0_in_scale / 127.0f));
+      auto ffn1_out_scales = std::vector<float>(
+          dim_embed, (ffn1_weight_scale / 127.0f) * (ffn1_in_scale / 127.0f));
+
+      // Inverse input scale
+      qkv_in_scale = 1.0f / qkv_in_scale;
+      out_linear_in_scale = 1.0f / out_linear_in_scale;
+      ffn0_in_scale = 1.0f / ffn0_in_scale;
+      ffn1_in_scale = 1.0f / ffn1_in_scale;
+
+      fused_multi_transformer_op_desc.SetAttr("qkv_in_scale",
+                                              std::vector<float>{qkv_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "out_linear_in_scale", std::vector<float>{out_linear_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "ffn1_in_scale", std::vector<float>{ffn0_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "ffn2_in_scale", std::vector<float>{ffn1_in_scale});
+
+      auto qkv_out_scale_var = scope->Var(matmul0_w->Name() + "_out_scale");
+      auto out_out_scale_var =
+          scope->Var(matmul_linear_w->Name() + "_out_scale");
+      auto ffn0_out_scale_var =
+          scope->Var(ffn_matmul0_w->Name() + "_out_scale");
+      auto ffn1_out_scale_var =
+          scope->Var(ffn_matmul1_w->Name() + "_out_scale");
+
+      auto qkv_out_scale_data =
+          qkv_out_scale_var->GetMutable<phi::DenseTensor>()
+              ->mutable_data<float>({3 * dim_embed}, platform::CPUPlace());
+      memcpy(qkv_out_scale_data,
+             qkv_out_scales.data(),
+             qkv_out_scales.size() * sizeof(float));
+      fused_multi_transformer_op_desc.SetInput(
+          "QKVOutScale", {matmul0_w->Name() + "_out_scale"});
+
+      auto out_out_scale_data =
+          out_out_scale_var->GetMutable<phi::DenseTensor>()
+              ->mutable_data<float>({dim_embed}, platform::CPUPlace());
+      memcpy(out_out_scale_data,
+             out_out_scales.data(),
+             out_out_scales.size() * sizeof(float));
+      fused_multi_transformer_op_desc.SetInput(
+          "OutLinearOutScale", {matmul_linear_w->Name() + "_out_scale"});
+
+      auto ffn0_out_scale_data =
+          ffn0_out_scale_var->GetMutable<phi::DenseTensor>()
+              ->mutable_data<float>({4 * dim_embed}, platform::CPUPlace());
+      memcpy(ffn0_out_scale_data,
+             ffn0_out_scales.data(),
+             ffn0_out_scales.size() * sizeof(float));
+      fused_multi_transformer_op_desc.SetInput(
+          "FFN1OutScale", {ffn_matmul0_w->Name() + "_out_scale"});
+
+      auto ffn1_out_scale_data =
+          ffn1_out_scale_var->GetMutable<phi::DenseTensor>()
+              ->mutable_data<float>({dim_embed}, platform::CPUPlace());
+      memcpy(ffn1_out_scale_data,
+             ffn1_out_scales.data(),
+             ffn1_out_scales.size() * sizeof(float));
+      fused_multi_transformer_op_desc.SetInput(
+          "FFN2OutScale", {ffn_matmul1_w->Name() + "_out_scale"});
+    }
+
     auto* fused_multi_transformer =
         graph->CreateOpNode(&fused_multi_transformer_op_desc);
+
+    if (enable_int8) {
+      auto qkv_out_scale_node =
+          CreatePersistableVarNode(graph, matmul0_w->Name() + "_out_scale");
+      auto out_out_scale_node = CreatePersistableVarNode(
+          graph, matmul_linear_w->Name() + "_out_scale");
+      auto ffn0_out_scale_node =
+          CreatePersistableVarNode(graph, ffn_matmul0_w->Name() + "_out_scale");
+      auto ffn1_out_scale_node =
+          CreatePersistableVarNode(graph, ffn_matmul1_w->Name() + "_out_scale");
+
+      IR_NODE_LINK_TO(qkv_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(out_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(ffn0_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(ffn1_out_scale_node, fused_multi_transformer);
+    }
+
     IR_NODE_LINK_TO(input0, fused_multi_transformer);
     IR_NODE_LINK_TO(layer_norm_scale, fused_multi_transformer);
     IR_NODE_LINK_TO(layer_norm_bias, fused_multi_transformer);
@@ -2290,12 +2691,14 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
                  layer_norm_bias,
                  layer_norm_mean,
                  layer_norm_variance,
+                 matmul0,
                  matmul0_w,
                  eltadd0_b,
                  split0_k_out,
                  split0_v_out,
                  eltadd_qk_b,
                  reshape2_0,
+                 matmul_linear,
                  matmul_linear_w,
                  eltadd_linear_b,
                  while0,
@@ -2304,7 +2707,9 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
                  ffn_layer_norm_bias,
                  ffn_layer_norm_mean,
                  ffn_layer_norm_variance,
+                 ffn_matmul0,
                  ffn_matmul0_w,
+                 ffn_matmul1,
                  ffn_matmul1_w,
                  ffn_eltadd0_b,
                  ffn_eltadd1_b,
@@ -2546,6 +2951,12 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
     Graph* graph, const std::string& name_scope, Scope* scope) const {
   GraphPatternDetector gpd;
   auto* pattern = gpd.mutable_pattern();
+  bool enable_int8 = graph->Get<bool>("enable_int8");
+  if (enable_int8) {
+    VLOG(3) << "MultiDevicesFusedMultiTransformerEncoderFuseQKVPass with int8";
+  } else {
+    VLOG(3) << "MultiDevicesFusedMultiTransformerEncoderFuseQKVPass with fp";
+  }
 
   // Create pattern.
   patterns::MultiDevicesFusedMultiTransformerEncoderFuseQKVPattern
@@ -2560,12 +2971,14 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
                           Node* layer_norm_mean,
                           Node* layer_norm_variance,
                           Node* c_identity,
+                          Node* matmul0,
                           Node* matmul0_w,
                           Node* eltadd0_b,
                           Node* split0_k_out,
                           Node* split0_v_out,
                           Node* eltadd_qk_b,
                           Node* reshape2_0,
+                          Node* matmul_linear,
                           Node* matmul_linear_w,
                           Node* eltadd_linear_b,
                           Node* while0,
@@ -2574,7 +2987,10 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
                           Node* ffn_layer_norm_bias,
                           Node* ffn_layer_norm_mean,
                           Node* ffn_layer_norm_variance,
+                          Node* ffn_c_identity,
+                          Node* ffn_matmul0,
                           Node* ffn_matmul0_w,
+                          Node* ffn_matmul1,
                           Node* ffn_matmul1_w,
                           Node* ffn_eltadd0_b,
                           Node* ffn_eltadd1_b,
@@ -2588,6 +3004,11 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
             .at(3) /
         3;  // 3 for qkv
 
+    auto* matmul0_op = matmul0->Op();
+    auto* matmul_linear_op = matmul_linear->Op();
+    auto* ffn_matmul_0_op = ffn_matmul0->Op();
+    auto* ffn_matmul_1_op = ffn_matmul1->Op();
+
     // Calc index of transformer layer by LayerNorm Scale name
     // This calculation assumes:
     //    1. no LayerNorm before all transformer layer
@@ -2602,23 +3023,31 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
     auto* qkv_b_tensor =
         scope->FindVar(eltadd0_b->Name())->GetMutable<phi::DenseTensor>();
 
-    int dim_embed = qkv_w_tensor->dims()[0];
+    auto* layer_norm_bias_tensor =
+        scope->FindVar(layer_norm_bias->Name())->GetMutable<phi::DenseTensor>();
+    int dim_embed = layer_norm_bias_tensor->dims()[0];
 
-    if (qkv_w_tensor->dtype() == phi::DataType::FLOAT32) {
-      QKVWeightsProcessFuseQKV<float>(
-          qkv_w_tensor, qkv_b_tensor, num_head, dim_head, dim_embed);
-    } else if (qkv_w_tensor->dtype() == phi::DataType::FLOAT16) {
-      QKVWeightsProcessFuseQKV<platform::float16>(
-          qkv_w_tensor, qkv_b_tensor, num_head, dim_head, dim_embed);
-    } else {
-      PADDLE_THROW(platform::errors::Unavailable(
-          "fused_multi_transformer not supported weight dtype. "
-          "we now only support fp32 and fp16."));
+    QKVWeightsBiasProcessFuseQKV(
+        qkv_w_tensor, qkv_b_tensor, num_head, dim_head, dim_embed);
+
+    if (enable_int8) {
+      auto* out_linear_w_tensor = scope->FindVar(matmul_linear_w->Name())
+                                      ->GetMutable<phi::DenseTensor>();
+      auto* ffn0_w_tensor =
+          scope->FindVar(ffn_matmul0_w->Name())->GetMutable<phi::DenseTensor>();
+      auto* ffn1_w_tensor =
+          scope->FindVar(ffn_matmul1_w->Name())->GetMutable<phi::DenseTensor>();
+
+      TransposeWeights(out_linear_w_tensor);
+      TransposeWeights(ffn0_w_tensor);
+      TransposeWeights(ffn1_w_tensor);
     }
 
     // create fused_multi_transformer
     OpDesc fused_multi_transformer_op_desc(layer_norm->Op()->Block());
-    fused_multi_transformer_op_desc.SetType("fused_multi_transformer");
+    fused_multi_transformer_op_desc.SetType(enable_int8
+                                                ? "fused_multi_transformer_int8"
+                                                : "fused_multi_transformer");
 
     // 1. Input setting
     fused_multi_transformer_op_desc.SetInput("X", {input0->Name()});
@@ -2638,7 +3067,7 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
     VarDesc cache_kv_desc("cache_kv" + std::to_string(layer_idx));
     // FIXME: only support max_seq_len <= 1024
     cache_kv_desc.SetDataType(
-        framework::TransToProtoVarType(qkv_w_tensor->dtype()));
+        framework::TransToProtoVarType(qkv_b_tensor->dtype()));
     cache_kv_desc.SetPersistable(false);
     auto* cache_kv = graph->CreateVarNode(&cache_kv_desc);
 
@@ -2653,7 +3082,7 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
     fill_const_op_desc.SetAttr("value", 0);
     fill_const_op_desc.SetAttr("dtype",
                                static_cast<int>(framework::TransToProtoVarType(
-                                   qkv_w_tensor->dtype())));
+                                   qkv_b_tensor->dtype())));
     auto* fill_const_op = graph->CreateOpNode(&fill_const_op_desc);
 
     fused_multi_transformer_op_desc.SetInput("CacheKV", {cache_kv->Name()});
@@ -2696,8 +3125,129 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
     fused_multi_transformer_op_desc.SetAttr("ring_id",
                                             c_identity_op->GetAttr("ring_id"));
 
+    // Quantization attribute/Input
+    if (enable_int8) {
+      // Set input scale
+      std::string matmul_input_scale_suffix = c_identity_op->Input("X")[0];
+      auto qkv_in_scale = PADDLE_GET_CONST(
+          float,
+          c_identity_op->GetAttr("Input_scale_" + matmul_input_scale_suffix));
+
+      std::string out_linear_input_name = matmul_linear_op->Input("X")[0];
+      auto out_linear_in_scale = PADDLE_GET_CONST(
+          float,
+          matmul_linear_op->GetAttr("Input_scale_" + out_linear_input_name));
+
+      auto* ffn_c_identity_op = ffn_c_identity->Op();
+      std::string ffn_input_scale_suffix = ffn_c_identity_op->Input("X")[0];
+      auto ffn0_in_scale = PADDLE_GET_CONST(
+          float,
+          ffn_c_identity_op->GetAttr("Input_scale_" + ffn_input_scale_suffix));
+
+      std::string ffn1_input_name = ffn_matmul_1_op->Input("X")[0];
+      auto ffn1_in_scale = PADDLE_GET_CONST(
+          float, ffn_matmul_1_op->GetAttr("Input_scale_" + ffn1_input_name));
+
+      // Calc outscale and Set them
+      auto qkv_weight_scale =
+          PADDLE_GET_CONST(float, matmul0_op->GetAttr("weight_scale"));
+      auto out_weight_scale =
+          PADDLE_GET_CONST(float, matmul_linear_op->GetAttr("weight_scale"));
+      auto ffn0_weight_scale =
+          PADDLE_GET_CONST(float, ffn_matmul_0_op->GetAttr("weight_scale"));
+      auto ffn1_weight_scale =
+          PADDLE_GET_CONST(float, ffn_matmul_1_op->GetAttr("weight_scale"));
+
+      auto qkv_out_scales = std::vector<float>(
+          3 * dim_embed, (qkv_weight_scale / 127.0f) * (qkv_in_scale / 127.0f));
+      auto out_out_scales = std::vector<float>(
+          dim_embed,
+          (out_weight_scale / 127.0f) * (out_linear_in_scale / 127.0f));
+      auto ffn0_out_scales = std::vector<float>(
+          4 * dim_embed,
+          (ffn0_weight_scale / 127.0f) * (ffn0_in_scale / 127.0f));
+      auto ffn1_out_scales = std::vector<float>(
+          dim_embed, (ffn1_weight_scale / 127.0f) * (ffn1_in_scale / 127.0f));
+
+      // Inverse input scale
+      qkv_in_scale = 1.0f / qkv_in_scale;
+      out_linear_in_scale = 1.0f / out_linear_in_scale;
+      ffn0_in_scale = 1.0f / ffn0_in_scale;
+      ffn1_in_scale = 1.0f / ffn1_in_scale;
+
+      fused_multi_transformer_op_desc.SetAttr("qkv_in_scale",
+                                              std::vector<float>{qkv_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "out_linear_in_scale", std::vector<float>{out_linear_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "ffn1_in_scale", std::vector<float>{ffn0_in_scale});
+      fused_multi_transformer_op_desc.SetAttr(
+          "ffn2_in_scale", std::vector<float>{ffn1_in_scale});
+
+      auto qkv_out_scale_var = scope->Var(matmul0_w->Name() + "_out_scale");
+      auto out_out_scale_var =
+          scope->Var(matmul_linear_w->Name() + "_out_scale");
+      auto ffn0_out_scale_var =
+          scope->Var(ffn_matmul0_w->Name() + "_out_scale");
+      auto ffn1_out_scale_var =
+          scope->Var(ffn_matmul1_w->Name() + "_out_scale");
+
+      auto qkv_out_scale_data =
+          qkv_out_scale_var->GetMutable<phi::DenseTensor>()
+              ->mutable_data<float>({3 * dim_embed}, platform::CPUPlace());
+      memcpy(qkv_out_scale_data,
+             qkv_out_scales.data(),
+             qkv_out_scales.size() * sizeof(float));
+      fused_multi_transformer_op_desc.SetInput(
+          "QKVOutScale", {matmul0_w->Name() + "_out_scale"});
+
+      auto out_out_scale_data =
+          out_out_scale_var->GetMutable<phi::DenseTensor>()
+              ->mutable_data<float>({dim_embed}, platform::CPUPlace());
+      memcpy(out_out_scale_data,
+             out_out_scales.data(),
+             out_out_scales.size() * sizeof(float));
+      fused_multi_transformer_op_desc.SetInput(
+          "OutLinearOutScale", {matmul_linear_w->Name() + "_out_scale"});
+
+      auto ffn0_out_scale_data =
+          ffn0_out_scale_var->GetMutable<phi::DenseTensor>()
+              ->mutable_data<float>({4 * dim_embed}, platform::CPUPlace());
+      memcpy(ffn0_out_scale_data,
+             ffn0_out_scales.data(),
+             ffn0_out_scales.size() * sizeof(float));
+      fused_multi_transformer_op_desc.SetInput(
+          "FFN1OutScale", {ffn_matmul0_w->Name() + "_out_scale"});
+
+      auto ffn1_out_scale_data =
+          ffn1_out_scale_var->GetMutable<phi::DenseTensor>()
+              ->mutable_data<float>({dim_embed}, platform::CPUPlace());
+      memcpy(ffn1_out_scale_data,
+             ffn1_out_scales.data(),
+             ffn1_out_scales.size() * sizeof(float));
+      fused_multi_transformer_op_desc.SetInput(
+          "FFN2OutScale", {ffn_matmul1_w->Name() + "_out_scale"});
+    }
+
     auto* fused_multi_transformer =
         graph->CreateOpNode(&fused_multi_transformer_op_desc);
+
+    if (enable_int8) {
+      auto qkv_out_scale_node =
+          CreatePersistableVarNode(graph, matmul0_w->Name() + "_out_scale");
+      auto out_out_scale_node = CreatePersistableVarNode(
+          graph, matmul_linear_w->Name() + "_out_scale");
+      auto ffn0_out_scale_node =
+          CreatePersistableVarNode(graph, ffn_matmul0_w->Name() + "_out_scale");
+      auto ffn1_out_scale_node =
+          CreatePersistableVarNode(graph, ffn_matmul1_w->Name() + "_out_scale");
+
+      IR_NODE_LINK_TO(qkv_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(out_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(ffn0_out_scale_node, fused_multi_transformer);
+      IR_NODE_LINK_TO(ffn1_out_scale_node, fused_multi_transformer);
+    }
+
     IR_NODE_LINK_TO(input0, fused_multi_transformer);
     IR_NODE_LINK_TO(layer_norm_scale, fused_multi_transformer);
     IR_NODE_LINK_TO(layer_norm_bias, fused_multi_transformer);
@@ -2977,12 +3527,14 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
                  layer_norm_mean,
                  layer_norm_variance,
                  c_identity,
+                 matmul0,
                  matmul0_w,
                  eltadd0_b,
                  split0_k_out,
                  split0_v_out,
                  eltadd_qk_b,
                  reshape2_0,
+                 matmul_linear,
                  matmul_linear_w,
                  eltadd_linear_b,
                  while0,
@@ -2991,7 +3543,10 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
                  ffn_layer_norm_bias,
                  ffn_layer_norm_mean,
                  ffn_layer_norm_variance,
+                 ffn_c_identity,
+                 ffn_matmul0,
                  ffn_matmul0_w,
+                 ffn_matmul1,
                  ffn_matmul1_w,
                  ffn_eltadd0_b,
                  ffn_eltadd1_b,
diff --git a/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass_tester.cc b/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass_tester.cc
index 2e356d0dc1997d..08f4dc06f58aa0 100644
--- a/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass_tester.cc
+++ b/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass_tester.cc
@@ -188,6 +188,7 @@ TEST(FusedMultiTransformerEncoderPass, basic) {
 
   std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
   graph->Set("__param_scope__", CreateParamScope());
+  graph->Set("enable_int8", new bool(false));
 
   auto pass =
       PassRegistry::Instance().Get("fused_multi_transformer_encoder_pass");
@@ -334,6 +335,7 @@ TEST(FusedMultiTransformerEncoderFuseQKVPass, basic) {
   layers.elementwise_add(attention_out, ffn_eltadd1_out);
 
   std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
+  graph->Set("enable_int8", new bool(false));
   graph->Set("__param_scope__", CreateParamScope());
 
   auto pass = PassRegistry::Instance().Get(
@@ -489,6 +491,7 @@ TEST(MultiDevicesFusedMultiTransformerEncoderFuseQKVPass, basic) {
   layers.elementwise_add(attention_out, ffn_eltadd1_out);
 
   std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
+  graph->Set("enable_int8", new bool(false));
   graph->Set("__param_scope__", CreateParamScope());
 
   auto pass = PassRegistry::Instance().Get(
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
index 7f509d64b5c23e..753c169f8f6d68 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -3175,6 +3175,73 @@ void patterns::DeleteWeightQuantDequantLinearOpPattern::operator()() {
   any_op2->LinksFrom({weight_dequantize_linear_op_out});
 }
 
+void patterns::DeleteWeightDequantLinearOpEncoderPattern::operator()() {
+  auto weight_dequantize_linear_op_x =
+      pattern->NewNode(weight_dequantize_linear_op_x_repr())
+          ->AsInput()
+          ->assert_is_op_input("dequantize_linear", "X")
+          ->assert_is_persistable_var();
+
+  auto weight_dequantize_linear_op_scale =
+      pattern->NewNode(weight_dequantize_linear_op_scale_repr())
+          ->AsInput()
+          ->assert_is_op_input("dequantize_linear", "Scale")
+          ->assert_is_persistable_var();
+
+  auto weight_dequantize_linear_op =
+      pattern->NewNode(weight_dequantize_linear_op_repr())
+          ->assert_is_op("dequantize_linear");
+
+  auto weight_dequantize_linear_op_out =
+      pattern->NewNode(weight_dequantize_linear_op_out_repr())
+          ->AsIntermediate()
+          ->assert_is_op_output("dequantize_linear", "Y");
+
+  auto any_op2 = pattern->NewNode(any_op2_repr())->assert_is_op()->AsOutput();
+
+  // while loop
+  auto *while0 =
+      pattern->NewNode(while0_repr())->assert_is_op("while")->AsOutput();
+  while0->LinksFrom({weight_dequantize_linear_op_out});
+
+  weight_dequantize_linear_op
+      ->LinksFrom(
+          {weight_dequantize_linear_op_x, weight_dequantize_linear_op_scale})
+      .LinksTo({weight_dequantize_linear_op_out});
+  any_op2->LinksFrom({weight_dequantize_linear_op_out});
+}
+
+void patterns::DeleteWeightDequantLinearOpDecoderPattern::operator()() {
+  auto weight_dequantize_linear_op_x =
+      pattern->NewNode(weight_dequantize_linear_op_x_repr())
+          ->AsInput()
+          ->assert_is_op_input("dequantize_linear", "X")
+          ->assert_is_persistable_var();
+
+  auto weight_dequantize_linear_op_scale =
+      pattern->NewNode(weight_dequantize_linear_op_scale_repr())
+          ->AsInput()
+          ->assert_is_op_input("dequantize_linear", "Scale")
+          ->assert_is_persistable_var();
+
+  auto weight_dequantize_linear_op =
+      pattern->NewNode(weight_dequantize_linear_op_repr())
+          ->assert_is_op("dequantize_linear");
+
+  auto weight_dequantize_linear_op_out =
+      pattern->NewNode(weight_dequantize_linear_op_out_repr())
+          ->AsIntermediate()
+          ->assert_is_op_output("dequantize_linear", "Y");
+
+  auto any_op2 = pattern->NewNode(any_op2_repr())->assert_is_op()->AsOutput();
+
+  weight_dequantize_linear_op
+      ->LinksFrom(
+          {weight_dequantize_linear_op_x, weight_dequantize_linear_op_scale})
+      .LinksTo({weight_dequantize_linear_op_out});
+  any_op2->LinksFrom({weight_dequantize_linear_op_out});
+}
+
 void patterns::DeleteQuantDequantLinearOpPattern::operator()() {
   auto quantize_linear_op_x = pattern->NewNode(quantize_linear_op_x_repr())
                                   ->AsInput()
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h
index fdff82d30caaa5..cb1b9266b15308 100755
--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -1765,6 +1765,39 @@ struct DeleteWeightQuantDequantLinearOpPattern : public PatternBase {
   PATTERN_DECL_NODE(any_op2);
 };
 
+struct DeleteWeightDequantLinearOpEncoderPattern : public PatternBase {
+  DeleteWeightDequantLinearOpEncoderPattern(PDPattern* pattern,
+                                            const std::string& name_scope)
+      : PatternBase(pattern,
+                    name_scope,
+                    "delete_weight_quant_dequant_linear_op_pattern") {}
+
+  void operator()();
+
+  PATTERN_DECL_NODE(weight_dequantize_linear_op_x);
+  PATTERN_DECL_NODE(weight_dequantize_linear_op_scale);
+  PATTERN_DECL_NODE(while0);
+  PATTERN_DECL_NODE(weight_dequantize_linear_op);
+  PATTERN_DECL_NODE(weight_dequantize_linear_op_out);
+  PATTERN_DECL_NODE(any_op2);
+};
+
+struct DeleteWeightDequantLinearOpDecoderPattern : public PatternBase {
+  DeleteWeightDequantLinearOpDecoderPattern(PDPattern* pattern,
+                                            const std::string& name_scope)
+      : PatternBase(pattern,
+                    name_scope,
+                    "delete_weight_quant_dequant_linear_op_pattern") {}
+
+  void operator()();
+
+  PATTERN_DECL_NODE(weight_dequantize_linear_op_x);
+  PATTERN_DECL_NODE(weight_dequantize_linear_op_scale);
+  PATTERN_DECL_NODE(weight_dequantize_linear_op);
+  PATTERN_DECL_NODE(weight_dequantize_linear_op_out);
+  PATTERN_DECL_NODE(any_op2);
+};
+
 struct DeleteQuantDequantLinearOpPattern : public PatternBase {
   DeleteQuantDequantLinearOpPattern(PDPattern* pattern,
                                     const std::string& name_scope)
diff --git a/paddle/fluid/framework/ir/pass.cc b/paddle/fluid/framework/ir/pass.cc
index 4ad93183996fa5..5c05a9f27c1fc1 100644
--- a/paddle/fluid/framework/ir/pass.cc
+++ b/paddle/fluid/framework/ir/pass.cc
@@ -46,7 +46,10 @@ static const std::vector<std::string> support_subgraph_passes = {
     "fused_multi_transformer_decoder_fuse_qkv_pass",
     "multi_devices_fused_multi_transformer_encoder_fuse_qkv_pass",
     "multi_devices_fused_multi_transformer_decoder_fuse_qkv_pass",
-    "fuse_multi_transformer_layer_pass"};
+    "fuse_multi_transformer_layer_pass",
+    "delete_quant_dequant_linear_op_pass",
+    "delete_weight_dequant_linear_op_encoder_pass",
+    "delete_weight_dequant_linear_op_decoder_pass"};
 
 Graph *Pass::Apply(Graph *graph) const {
   VLOG(10) << "start to apply pass " << Type() << " to graph";
diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc
index c964ce7e4d0d22..f02776d00f8c77 100644
--- a/paddle/fluid/inference/api/paddle_pass_builder.cc
+++ b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -165,6 +165,9 @@ const std::vector<std::string> kLiteSubgraphPasses({
 // running errors. After fusion operator supports low precision, delete this.
 const std::vector<std::string> kGpuLowerPrecisionPasses{
     "simplify_with_basic_ops_pass",
+    "delete_quant_dequant_linear_op_pass",
+    "delete_weight_dequant_linear_op_encoder_pass",
+    "delete_weight_dequant_linear_op_decoder_pass",
     "map_depthwise_conv_to_conv_pass",
     "conv_bn_fuse_pass",
     "conv_eltwiseadd_bn_fuse_pass",
@@ -203,9 +206,12 @@ const std::vector<std::string> kTrtLowerPrecisionPasses{
 GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
   passes_.assign({
     //   "identity_scale_op_clean_pass",             //
-    "is_test_pass",                      //
-        "simplify_with_basic_ops_pass",  //
-        "map_depthwise_conv_to_conv_pass",
+    "is_test_pass",                                                     //
+        "simplify_with_basic_ops_pass",                                 //
+        "delete_quant_dequant_linear_op_pass",                          //
+        "delete_weight_dequant_linear_op_encoder_pass",                 //
+        "delete_weight_dequant_linear_op_decoder_pass",                 //
+        "map_depthwise_conv_to_conv_pass",                              //
         "conv_bn_fuse_pass",                                            //
         "conv_eltwiseadd_bn_fuse_pass",                                 //
         "embedding_eltwise_layernorm_fuse_pass",                        //
diff --git a/paddle/fluid/operators/fused/attn_gemm_int8.h b/paddle/fluid/operators/fused/attn_gemm_int8.h
index 98a45deac3c8da..cdbd5b2e0b8217 100644
--- a/paddle/fluid/operators/fused/attn_gemm_int8.h
+++ b/paddle/fluid/operators/fused/attn_gemm_int8.h
@@ -20,6 +20,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/fused/quant_dequant_kernel.h"
 #include "paddle/fluid/platform/device/gpu/gpu_info.h"
 #include "paddle/fluid/platform/float16.h"
+#include "paddle/phi/backends/gpu/gpu_launch_config.h"
 #include "paddle/phi/kernels/funcs/broadcast_function.h"
 #include "paddle/phi/kernels/funcs/elementwise_functor.h"
 
@@ -27,6 +28,7 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
+using phi::backends::gpu::GpuLaunchConfig;
 
 template <typename T>
 class AttnMatmulINT8 {
@@ -36,6 +38,9 @@ class AttnMatmulINT8 {
       : dev_ctx_(dev_ctx), m_(m), n_(n), k_(k), compute_bias_(compute_bias) {
     auto helper = std::make_shared<CublasLtHelper>(m, k, n);
     helpers_.emplace_back(helper);
+    gpu_config_ = std::make_unique<GpuLaunchConfig>(
+        phi::backends::gpu::GetGpuLaunchConfig1D(
+            dev_ctx, m * n, DequantKernelVecSize));
   }
   ~AttnMatmulINT8() {}
 
@@ -50,7 +55,6 @@ class AttnMatmulINT8 {
                       phi::DenseTensor* bias_out,
                       const float quant_in_scale,
                       const phi::DenseTensor* dequant_out_scale,
-                      const int quant_out_scale_offset,
                       const int quant_round_type = 1,
                       const float quant_max_bound = 127.0,
                       const float quant_min_bound = -127.0) {
@@ -74,9 +78,9 @@ class AttnMatmulINT8 {
                                   m_,
                                   n_,
                                   dev_ctx_.stream(),
+                                  gpu_config_.get(),
                                   quant_in_scale,
-                                  dequant_out_scale->data<float>(),
-                                  quant_out_scale_offset);
+                                  dequant_out_scale->data<float>());
 
     if (compute_bias_) {
       // bias_out = output + bias
@@ -99,11 +103,13 @@ class AttnMatmulINT8 {
                                 phi::DenseTensor* input,
                                 const phi::DenseTensor* bias,
                                 phi::DenseTensor* output,
-                                phi::DenseTensor* bias_out) {
+                                phi::DenseTensor* bias_out,
+                                void* workspace = nullptr) {
     helpers_[0]->GEMM(input->data<int8_t>(),
                       weight->data<int8_t>(),
                       output->data<int32_t>(),
-                      dev_ctx_.stream());
+                      dev_ctx_.stream(),
+                      workspace);
   }
 
   // This function is used to execute GEMM, with input and output's types are
@@ -115,8 +121,7 @@ class AttnMatmulINT8 {
                              phi::DenseTensor* output,
                              phi::DenseTensor* output_tmp,
                              phi::DenseTensor* bias_out,
-                             const phi::DenseTensor* dequant_out_scale,
-                             const int quant_out_scale_offset) {
+                             const phi::DenseTensor* dequant_out_scale) {
     helpers_[0]->GEMM(input->data<int8_t>(),
                       weight->data<int8_t>(),
                       output_tmp->data<int32_t>(),
@@ -127,9 +132,9 @@ class AttnMatmulINT8 {
                                   m_,
                                   n_,
                                   dev_ctx_.stream(),
+                                  gpu_config_.get(),
                                   quant_in_scale,
-                                  dequant_out_scale->data<float>(),
-                                  quant_out_scale_offset);
+                                  dequant_out_scale->data<float>());
 
     if (compute_bias_) {
       // bias_out = output + bias
@@ -183,6 +188,7 @@ class AttnMatmulINT8 {
 
   int compute_bias_;
   std::vector<std::shared_ptr<CublasLtHelper>> helpers_;
+  std::unique_ptr<GpuLaunchConfig> gpu_config_;
 };
 
 }  // namespace operators
diff --git a/paddle/fluid/operators/fused/cublaslt.h b/paddle/fluid/operators/fused/cublaslt.h
index b9cc6b56f13eea..e9728c58b55dc8 100644
--- a/paddle/fluid/operators/fused/cublaslt.h
+++ b/paddle/fluid/operators/fused/cublaslt.h
@@ -24,6 +24,20 @@ namespace dyl = paddle::platform::dynload;
 
 namespace paddle {
 namespace operators {
+
+struct CublasLtAlgoParam {
+  int algoId;
+  int swizzle;
+  int customOption;
+  int tile;
+  int splitK_val;
+  int reductionScheme;
+  int stages;
+  size_t workspace_size;
+};
+
+const std::map<std::tuple<int, int, int>, CublasLtAlgoParam> AlgoParamCache{};
+
 class CublasLtHelper {
  public:
   CublasLtHelper(int m, int k, int n)
@@ -99,38 +113,34 @@ class CublasLtHelper {
             "cublasLtMatrixLayoutCreate execution error"
             "refer https://docs.nvidia.com/cuda/cublas/index.html to get more "
             "information"));
-  }
-  ~CublasLtHelper() {
-    if (handle_) dyl::cublasLtDestroy(handle_);
-    if (matmul_desc_) dyl::cublasLtMatmulDescDestroy(matmul_desc_);
-    if (A_desc_) dyl::cublasLtMatrixLayoutDestroy(A_desc_);
-    if (B_desc_) dyl::cublasLtMatrixLayoutDestroy(B_desc_);
-    if (C_desc_) dyl::cublasLtMatrixLayoutDestroy(C_desc_);
-  }
 
-  void GEMM(int8_t* A_dev,
-            const int8_t* B_dev,
-            int32_t* C_dev,
-            cudaStream_t stream) {
-    cublasStatus_t status;
+#if CUDA_VERSION >= 11020
 
-#if __CUDA_ARCH__ >= 800 && CUDA_VERSION >= 11020
-    cublasLtMatmulAlgo_t algo;
     int algoId = 21;
     int swizzle = 0;
     int customOption = 0;
     int tile = 15;
     int splitK_val = 0;
     int reductionScheme = 0;
-#if CUDA_VERSION >= 11000
     int stages = 23;
-#endif
-
-#if CUBLAS_VER_MAJOR < 11
-    cudaDataType_t cudaComputeType = CUDA_R_32I;
-#else
-    cublasComputeType_t cudaComputeType = CUBLAS_COMPUTE_32I;
-#endif
+    workspace_size_ = 0;
+    if (m >= 128) {
+      tile = 20;
+      stages = 17;
+    }
+
+    std::tuple<int, int, int> key(m_, k_, n_);
+    if (AlgoParamCache.count(key) != 0) {
+      auto value = AlgoParamCache.at(key);
+      algoId = value.algoId;
+      swizzle = value.swizzle;
+      customOption = value.customOption;
+      tile = value.tile;
+      splitK_val = value.splitK_val;
+      reductionScheme = value.reductionScheme;
+      stages = value.stages;
+      workspace_size_ = value.workspace_size;
+    }
 
     dyl::cublasLtMatmulAlgoInit(handle_,
                                 cudaComputeType,
@@ -140,30 +150,43 @@ class CublasLtHelper {
                                 CUDA_R_32I,
                                 CUDA_R_32I,
                                 algoId,
-                                &algo);
+                                &algo_);
     dyl::cublasLtMatmulAlgoConfigSetAttribute(
-        &algo,
+        &algo_,
         CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION,
         &(customOption),
         sizeof(customOption));
     dyl::cublasLtMatmulAlgoConfigSetAttribute(
-        &algo, CUBLASLT_ALGO_CONFIG_TILE_ID, &(tile), sizeof(tile));
-    dyl::cublasLtMatmulAlgoConfigSetAttribute(&algo,
+        &algo_, CUBLASLT_ALGO_CONFIG_TILE_ID, &(tile), sizeof(tile));
+    dyl::cublasLtMatmulAlgoConfigSetAttribute(&algo_,
                                               CUBLASLT_ALGO_CONFIG_SPLITK_NUM,
                                               &(splitK_val),
                                               sizeof(splitK_val));
     dyl::cublasLtMatmulAlgoConfigSetAttribute(
-        &algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &(swizzle), sizeof(swizzle));
+        &algo_,
+        CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING,
+        &(swizzle),
+        sizeof(swizzle));
     dyl::cublasLtMatmulAlgoConfigSetAttribute(
-        &algo,
+        &algo_,
         CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME,
         &(reductionScheme),
         sizeof(int));
 #if CUDA_VERSION >= 11000
     dyl::cublasLtMatmulAlgoConfigSetAttribute(
-        &algo, CUBLASLT_ALGO_CONFIG_STAGES_ID, &(stages), sizeof(stages));
+        &algo_, CUBLASLT_ALGO_CONFIG_STAGES_ID, &(stages), sizeof(stages));
 #endif
 #endif
+  }
+  ~CublasLtHelper() {}
+
+  void GEMM(int8_t* A_dev,
+            const int8_t* B_dev,
+            int32_t* C_dev,
+            cudaStream_t stream,
+            void* workspace = nullptr) {
+    cublasStatus_t status;
+
     status = dyl::cublasLtMatmul(handle_,
                                  matmul_desc_,
                                  &alpha_,
@@ -176,13 +199,15 @@ class CublasLtHelper {
                                  C_desc_,
                                  C_dev,
                                  C_desc_,
-#if __CUDA_ARCH__ >= 800 && CUDA_VERSION >= 11020
-                                 &algo,
+#if CUDA_VERSION >= 11020
+                                 &algo_,
+                                 workspace,
+                                 workspace_size_,
 #else
                                  nullptr,
-#endif
                                  nullptr,
                                  0,
+#endif
                                  stream);
     PADDLE_ENFORCE_EQ(
         status,
@@ -199,12 +224,17 @@ class CublasLtHelper {
   cublasLtMatrixLayout_t A_desc_;
   cublasLtMatrixLayout_t B_desc_;
   cublasLtMatrixLayout_t C_desc_;
+
+  cublasLtMatmulAlgo_t algo_;
+
   int32_t alpha_;
   int32_t beta_;
 
   int m_;
   int k_;
   int n_;
+
+  size_t workspace_size_;
 };
 
 }  // namespace operators
diff --git a/paddle/fluid/operators/fused/fused_dropout_act_bias.h b/paddle/fluid/operators/fused/fused_dropout_act_bias.h
index 553fb8d7be6042..1156d04b8f557d 100644
--- a/paddle/fluid/operators/fused/fused_dropout_act_bias.h
+++ b/paddle/fluid/operators/fused/fused_dropout_act_bias.h
@@ -86,7 +86,6 @@ __global__ void FusedDropoutActBias(
     MaskType *mask,
     const float quant_last_in_scale = 1.0,
     const float *dequant_out_scale_data = nullptr,
-    const int quant_out_scale_offset = 0,
     const float quant_next_in_scale = 1.0,
     const int quant_round_type = 1,
     const float quant_max_bound = 127.0,
@@ -127,7 +126,6 @@ __global__ void FusedDropoutActBias(
                                                  act,
                                                  quant_last_in_scale,
                                                  dequant_out_scale_data,
-                                                 quant_out_scale_offset,
                                                  quant_next_in_scale,
                                                  quant_round_type,
                                                  quant_max_bound,
@@ -146,7 +144,13 @@ __global__ void FusedActBias(Functor act,
                              const uint64_t cols,
                              const InType *__restrict__ src,
                              const T *__restrict__ bias,
-                             OutType *dst) {
+                             OutType *dst,
+                             const float quant_last_in_scale = 1.0,
+                             const float *dequant_out_scale_data = nullptr,
+                             const float quant_next_in_scale = 1.0,
+                             const int quant_round_type = 1,
+                             const float quant_max_bound = 127.0,
+                             const float quant_min_bound = -127.0) {
   const int32_t global_thread_idx = blockDim.x * blockIdx.x + threadIdx.x;
   using LoadT = phi::AlignedVector<T, VecSize>;
   using LoadInType = phi::AlignedVector<InType, VecSize>;
@@ -156,23 +160,42 @@ __global__ void FusedActBias(Functor act,
   LoadInType src_vec;
   LoadT bias_vec;
   StoreOutType out_vec;
+  LoadFloat dequant_out_scale_vec;
   for (int32_t idx = global_thread_idx * VecSize,
                step = blockDim.x * gridDim.x * VecSize;
        idx < elem_cnt;
        idx += step) {
     const int32_t col_idx = idx % cols;
     phi::Load<InType, VecSize>(&src[idx], &src_vec);
+    phi::Load<float, VecSize>(&dequant_out_scale_data[col_idx],
+                              &dequant_out_scale_vec);
     if (bias) {
       phi::Load<T, VecSize>(&bias[col_idx], &bias_vec);
     }
 #pragma unroll
     for (int32_t unroll_idx = 0; unroll_idx < VecSize; unroll_idx++) {
-      if (bias) {
-        out_vec[unroll_idx] = static_cast<OutType>(
-            act(static_cast<T>(src_vec[unroll_idx]) + bias_vec[unroll_idx]));
+      T tmp;
+      if (std::is_same<InType, int32_t>::value) {
+        tmp = static_cast<T>(static_cast<float>(src_vec[unroll_idx]) *
+                             dequant_out_scale_vec[unroll_idx]);
+        if (bias) {
+          tmp = static_cast<T>(act(tmp + bias_vec[unroll_idx]));
+        } else {
+          tmp = static_cast<T>(act(tmp));
+        }
+        out_vec[unroll_idx] = quant_helper(tmp,
+                                           quant_next_in_scale,
+                                           quant_round_type,
+                                           quant_max_bound,
+                                           quant_min_bound);
       } else {
-        out_vec[unroll_idx] =
-            static_cast<OutType>(act(static_cast<T>(src_vec[unroll_idx])));
+        if (bias) {
+          out_vec[unroll_idx] = static_cast<OutType>(
+              act(static_cast<T>(src_vec[unroll_idx]) + bias_vec[unroll_idx]));
+        } else {
+          out_vec[unroll_idx] =
+              static_cast<OutType>(act(static_cast<T>(src_vec[unroll_idx])));
+        }
       }
     }
     phi::Store<OutType, VecSize>(out_vec, &dst[idx]);
@@ -202,7 +225,6 @@ void LaunchDropoutActBias(Functor act_functor,
                           const phi::GPUContext &ctx,
                           const float quant_last_in_scale = 1.0,
                           const float *dequant_out_scale_data = nullptr,
-                          const int quant_out_scale_offset = 0,
                           const float quant_next_in_scale = 1.0,
                           const int quant_round_type = 1,
                           const float quant_max_bound = 127.0,
@@ -218,7 +240,7 @@ void LaunchDropoutActBias(Functor act_functor,
   const int real_vec_size = cols % VecSize == 0 ? VecSize : 1;
   const auto config = Get1DBlocksAnd2DGrids(ctx, rows, cols, real_vec_size);
   if (cols % VecSize == 0) {
-    if (is_test && (dequant_out_scale_data == nullptr)) {
+    if (is_test) {
       const int32_t elem_cnt = rows * cols;
       const int32_t pack_num = elem_cnt / VecSize;
       const int32_t tmp_cols = cols / VecSize;
@@ -227,8 +249,15 @@ void LaunchDropoutActBias(Functor act_functor,
       const int grid_size = std::max(static_cast<int32_t>(1),
                                      (pack_num + block_size - 1) / block_size);
       FusedActBias<T, VecSize, Functor, InType, OutType>
-          <<<grid_size, block_size, 0, ctx.stream()>>>(
-              act_functor, elem_cnt, cols, src, bias, dst);
+          <<<grid_size, block_size, 0, ctx.stream()>>>(act_functor,
+                                                       elem_cnt,
+                                                       cols,
+                                                       src,
+                                                       bias,
+                                                       dst,
+                                                       quant_last_in_scale,
+                                                       dequant_out_scale_data,
+                                                       quant_next_in_scale);
     } else {
       FusedDropoutActBias<T, MaskType, VecSize, Functor, InType, OutType>
           <<<config.block_per_grid, config.thread_per_block, 0, ctx.stream()>>>(
@@ -246,7 +275,6 @@ void LaunchDropoutActBias(Functor act_functor,
               mask_data,
               quant_last_in_scale,
               dequant_out_scale_data,
-              quant_out_scale_offset,
               quant_next_in_scale);
     }
   } else {
@@ -266,7 +294,6 @@ void LaunchDropoutActBias(Functor act_functor,
             mask_data,
             quant_last_in_scale,
             dequant_out_scale_data,
-            quant_out_scale_offset,
             quant_next_in_scale);
   }
 }
diff --git a/paddle/fluid/operators/fused/fused_dropout_helper.h b/paddle/fluid/operators/fused/fused_dropout_helper.h
index 708aef3d690f97..f95d159144f370 100644
--- a/paddle/fluid/operators/fused/fused_dropout_helper.h
+++ b/paddle/fluid/operators/fused/fused_dropout_helper.h
@@ -154,7 +154,6 @@ class FusedDropoutHelper {
                            MaskType* mask,
                            const float quant_last_in_scale = 1.0,
                            const float* dequant_out_scale_data = nullptr,
-                           const int quant_out_scale_offset = 0,
                            const float quant_next_in_scale = 1.0) {
     auto increment = GetIncrement(ctx);
     LaunchResidualDropoutBias<T, MaskType, InType, OutType>(
@@ -173,7 +172,6 @@ class FusedDropoutHelper {
         ctx,
         quant_last_in_scale,
         dequant_out_scale_data,
-        quant_out_scale_offset,
         quant_next_in_scale);
   }
 
@@ -212,7 +210,6 @@ class FusedDropoutHelper {
                       MaskType* mask,
                       const float quant_last_in_scale = 1.0,
                       const float* dequant_out_scale_data = nullptr,
-                      const int quant_out_scale_offset = 0,
                       const float quant_next_in_scale = 1.0,
                       const int quant_round_type = 1,
                       const float quant_max_bound = 127.0,
@@ -237,7 +234,6 @@ class FusedDropoutHelper {
             ctx,
             quant_last_in_scale,
             dequant_out_scale_data,
-            quant_out_scale_offset,
             quant_next_in_scale,
             quant_round_type,
             quant_max_bound,
@@ -260,7 +256,6 @@ class FusedDropoutHelper {
             ctx,
             quant_last_in_scale,
             dequant_out_scale_data,
-            quant_out_scale_offset,
             quant_next_in_scale,
             quant_round_type,
             quant_max_bound,
@@ -287,7 +282,6 @@ class FusedDropoutHelper {
                                     ctx,
                                     quant_last_in_scale,
                                     dequant_out_scale_data,
-                                    quant_out_scale_offset,
                                     quant_next_in_scale,
                                     quant_round_type,
                                     quant_max_bound,
@@ -454,7 +448,6 @@ class FusedDropoutLayerNormHelper
       LayerNormParamType<T>* variance,
       const float quant_last_in_scale = 1.0,
       const float* dequant_out_scale_data = nullptr,
-      const int quant_out_scale_offset = 0,
       const float quant_next_in_scale = 1.0,
       const int quant_round_type = 1,
       const float quant_max_bound = 127.0,
@@ -494,7 +487,6 @@ class FusedDropoutLayerNormHelper
         ctx,
         quant_last_in_scale,
         dequant_out_scale_data,
-        quant_out_scale_offset,
         quant_next_in_scale,
         quant_round_type,
         quant_max_bound,
diff --git a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h
index 137943afbfb94d..a529271250e5d0 100644
--- a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h
+++ b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h
@@ -442,7 +442,6 @@ __global__ __launch_bounds__(THREADS_PER_CTA) void fused_fast_ln_fwd_kernel(
     OutType *__restrict__ y_ptr,
     const float quant_last_in_scale = 1.0,
     const float *__restrict__ quant_out_scale_ptr = nullptr,
-    const int quant_out_scale_offset = 0,
     const float quant_next_in_scale = 1.0,
     const int quant_round_type = 1,
     const float quant_max_bound = 127.0,
@@ -504,9 +503,8 @@ __global__ __launch_bounds__(THREADS_PER_CTA) void fused_fast_ln_fwd_kernel(
       phi::Load<InType, VecSize>(x_ptr + row * ELTS_PER_ROW + col * VecSize,
                                  &x_input[it]);
       if (quant_out_scale_ptr != nullptr) {
-        phi::Load<float, VecSize>(
-            quant_out_scale_ptr + quant_out_scale_offset + col * VecSize,
-            &dequant_out_scale[it]);
+        phi::Load<float, VecSize>(quant_out_scale_ptr + col * VecSize,
+                                  &dequant_out_scale[it]);
       }
       col += THREADS_PER_ROW;
     }
@@ -543,7 +541,6 @@ __global__ __launch_bounds__(THREADS_PER_CTA) void fused_fast_ln_fwd_kernel(
           // dropout(x) + residual
           if (std::is_same<InType, int32_t>::value) {
             T tmp = (static_cast<T>(static_cast<float>(x_input[it][jt]) *
-                                    quant_last_in_scale /
                                     dequant_out_scale[it][jt]) +
                      bias[it][jt]) *
                         static_cast<T>(mask_vec[it][jt]) * factor +
@@ -567,7 +564,6 @@ __global__ __launch_bounds__(THREADS_PER_CTA) void fused_fast_ln_fwd_kernel(
           if (std::is_same<InType, int32_t>::value) {
             // for int32 input, we need to dequantize.
             T tmp = static_cast<T>(static_cast<float>(x_input[it][jt]) *
-                                   quant_last_in_scale /
                                    dequant_out_scale[it][jt]) *
                         static_cast<T>(mask_vec[it][jt]) * factor +
                     residual[it][jt];
@@ -752,7 +748,6 @@ void LaunchLayernormResidualDropoutBias(
     const phi::GPUContext &ctx,
     const float quant_last_in_scale = 1.0,
     const float *dequant_out_scale_data = nullptr,
-    const int quant_out_scale_offset = 0,
     const float quant_next_in_scale = 1.0,
     const int quant_round_type = 1,
     const float quant_max_bound = 127.0,
@@ -844,7 +839,6 @@ void LaunchLayernormResidualDropoutBias(
                                                      layernorm_dst,           \
                                                      quant_last_in_scale,     \
                                                      dequant_out_scale_data,  \
-                                                     quant_out_scale_offset,  \
                                                      quant_next_in_scale,     \
                                                      quant_round_type,        \
                                                      quant_max_bound,         \
diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc
index 2a2d1f27edd9c2..3a9bd15c101e9d 100644
--- a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc
+++ b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc
@@ -58,6 +58,12 @@ class FusedMultiTransformerINT8Op : public framework::OperatorWithKernel {
     CHECK_INPUTS(FFN1Weight);
     CHECK_INPUTS(FFN2Weight);
 
+    // scale
+    CHECK_INPUTS(QKVOutScale);
+    CHECK_INPUTS(OutLinearOutScale);
+    CHECK_INPUTS(FFN1OutScale);
+    CHECK_INPUTS(FFN2OutScale);
+
     CHECK_OUTPUT(Out);
 
     // x: qkv's input [batch_size, seq_len, dim_embed]
@@ -232,20 +238,24 @@ class FusedMultiTransformerINT8OpMaker
              "In order to keep consistent with the PTQ/QAT calculation logic,"
              "QKVOutScale should be max_bound * max_bound / max_range."
              "Here max_range is per-channel weight scale."
-             "The shape of QKVOutScale is [num_layers, num_channels]")
-        .AsDispensable();
+             "The shape of QKVOutScale is [num_channels]")
+        .AsDispensable()
+        .AsDuplicable();
     AddInput("OutLinearOutScale",
              "OutLinearOutScale is used to dequantize out_linear output tensor."
              "The definition and shape is the same as QKVOutScale")
-        .AsDispensable();
+        .AsDispensable()
+        .AsDuplicable();
     AddInput("FFN1OutScale",
              "FFN1OutScale is used to dequantize ffn1 output tensor."
              "The definition and shape is the same as QKVOutScale")
-        .AsDispensable();
+        .AsDispensable()
+        .AsDuplicable();
     AddInput("FFN2OutScale",
              "FFN2OutScale is used to dequantize ffn2 output tensor."
              "The definition and shape is the same as QKVOutScale")
-        .AsDispensable();
+        .AsDispensable()
+        .AsDuplicable();
 
     AddOutput("CacheKVOut", "The updated cache KV. Inplace with CacheKV")
         .AsDispensable()
diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu
index 681748c71c91a7..fa22ee8d57e65b 100644
--- a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu
+++ b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu
@@ -48,16 +48,11 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel<T> {
 
     // dequant output scales, tensor, size = [num_layers, n], n is gemm output
     // size
-    auto *qkv_out_scale = ctx.Input<phi::DenseTensor>("QKVOutScale");
-    auto *out_linear_out_scale =
-        ctx.Input<phi::DenseTensor>("OutLinearOutScale");
-    auto *ffn1_out_scale = ctx.Input<phi::DenseTensor>("FFN1OutScale");
-    auto *ffn2_out_scale = ctx.Input<phi::DenseTensor>("FFN2OutScale");
-
-    int qkv_out_scale_n = qkv_out_scale->dims()[1];
-    int out_linear_out_scale_n = out_linear_out_scale->dims()[1];
-    int ffn1_out_scale_n = ffn1_out_scale->dims()[1];
-    int ffn2_out_scale_n = ffn2_out_scale->dims()[1];
+    auto qkv_out_scales = ctx.MultiInput<phi::DenseTensor>("QKVOutScale");
+    auto out_linear_out_scales =
+        ctx.MultiInput<phi::DenseTensor>("OutLinearOutScale");
+    auto ffn1_out_scales = ctx.MultiInput<phi::DenseTensor>("FFN1OutScale");
+    auto ffn2_out_scales = ctx.MultiInput<phi::DenseTensor>("FFN2OutScale");
 
     // 1. layer norm
     const auto pre_layer_norm = ctx.Attr<bool>("pre_layer_norm");
@@ -132,6 +127,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel<T> {
     transpose_out_2.Resize({{3, bsz, num_head, seq_len, dim_head}});
     auto *transpose_out_2_data =
         dev_ctx.Alloc<T>(&transpose_out_2, transpose_out_2.numel() * sizeof(T));
+
     qk_out.Resize({{bsz, num_head, seq_len, out_seq_len}});
     auto *qk_out_data = dev_ctx.Alloc<T>(&qk_out, qk_out.numel() * sizeof(T));
 
@@ -232,19 +228,23 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel<T> {
             dev_ctx, bsz_seq, dim_embed, ffn2_dropout_param, epsilon);
 
     // []. init workspace for cublasLt transform
-    Tensor input_workspace, output_workspace;
+    Tensor input_workspace, output_workspace, cublaslt_workspace;
     // for input and output transform data is CUBLASLT_ORDER_COL32 format,
     int m_max = bsz_seq, k_max = std::max(dim_embed, dim_ffn),
         n_max = std::max({output_size, dim_embed, dim_ffn});
 
-    input_workspace.Resize(
-        {{32 * ((m_max + 32 - 1) / 32), (k_max + 31) / 32 * 32}});
+    input_workspace.Resize({{(m_max * k_max + 31) / 32 * 32}});
     dev_ctx.Alloc<int8_t>(&input_workspace,
                           input_workspace.numel() * sizeof(int8_t));
-    output_workspace.Resize({{n_max * 4, (m_max + 31) / 32 * 32 * 4}});
+
+    output_workspace.Resize({{(n_max * m_max + 31) / 32 * 32}});
     dev_ctx.Alloc<int32_t>(&output_workspace,
                            output_workspace.numel() * sizeof(int32_t));
 
+    cublaslt_workspace.Resize({{3000000}});
+    dev_ctx.Alloc<int8_t>(&cublaslt_workspace,
+                          cublaslt_workspace.numel() * sizeof(int8_t));
+
     // calc
     auto *out = ctx.Output<phi::DenseTensor>("Out");
     auto *from_data = dev_ctx.Alloc<T>(out, out->numel() * sizeof(T));
@@ -305,8 +305,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel<T> {
                                    &output_workspace,
                                    &qkv_out,
                                    qkv_in_scale[i],
-                                   qkv_out_scale,
-                                   i * qkv_out_scale_n,
+                                   qkv_out_scales[i],
                                    quant_round_type,
                                    quant_max_bound,
                                    quant_min_bound);
@@ -319,8 +318,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel<T> {
                                    &output_workspace,
                                    &qkv_out,
                                    qkv_in_scale[i],
-                                   qkv_out_scale,
-                                   i * qkv_out_scale_n,
+                                   qkv_out_scales[i],
                                    quant_round_type,
                                    quant_max_bound,
                                    quant_min_bound);
@@ -332,8 +330,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel<T> {
                                           &qkv_out,
                                           &output_workspace,
                                           &qkv_out,
-                                          qkv_out_scale,
-                                          i * qkv_out_scale_n);
+                                          qkv_out_scales[i]);
       }
 #ifdef _DEBUG_FUSED_MULTI_TRANSFORMER
       VLOG(0) << "step2";
@@ -441,8 +438,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel<T> {
                                           &output_workspace,
                                           nullptr,
                                           out_linear_in_scale[i],
-                                          out_linear_out_scale,
-                                          i * out_linear_out_scale_n,
+                                          out_linear_out_scales[i],
                                           quant_round_type,
                                           quant_max_bound,
                                           quant_min_bound);
@@ -473,8 +469,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel<T> {
             ln_mean_data,
             ln_var_data,
             out_linear_in_scale[i],
-            out_linear_out_scale->data<float>(),
-            i * out_linear_out_scale_n,
+            out_linear_out_scales[i]->data<float>(),
             ffn1_in_scale[i],
             quant_round_type,
             quant_max_bound,
@@ -504,11 +499,13 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel<T> {
       // step6. ffn matmul1
 
       if (pre_layer_norm) {
-        ffn1_linear_compute.ComputeForwardINT8ToINT8(ffn1_weights[i],
-                                                     &input_workspace,
-                                                     nullptr,
-                                                     &output_workspace,
-                                                     nullptr);
+        ffn1_linear_compute.ComputeForwardINT8ToINT8(
+            ffn1_weights[i],
+            &input_workspace,
+            nullptr,
+            &output_workspace,
+            nullptr,
+            cublaslt_workspace.data<int8_t>());
       } else {
         ffn1_linear_compute.ComputeForward(ffn1_weights[i],
                                            buf1,
@@ -518,8 +515,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel<T> {
                                            &output_workspace,
                                            nullptr,
                                            ffn1_in_scale[i],
-                                           ffn1_out_scale,
-                                           i * ffn1_out_scale_n,
+                                           ffn1_out_scales[i],
                                            quant_round_type,
                                            quant_max_bound,
                                            quant_min_bound);
@@ -539,8 +535,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel<T> {
             input_workspace.data<int8_t>(),
             ffn1_dropout_mask_data,
             ffn1_in_scale[i],
-            ffn1_out_scale->data<float>(),
-            i * ffn1_out_scale_n,
+            ffn1_out_scales[i]->data<float>(),
             ffn2_in_scale[i],
             quant_round_type,
             quant_max_bound,
@@ -560,11 +555,13 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel<T> {
 
       // step8. ffn matmul2
       if (pre_layer_norm) {
-        ffn2_linear_compute.ComputeForwardINT8ToINT8(ffn2_weights[i],
-                                                     &input_workspace,
-                                                     nullptr,
-                                                     &output_workspace,
-                                                     nullptr);
+        ffn2_linear_compute.ComputeForwardINT8ToINT8(
+            ffn2_weights[i],
+            &input_workspace,
+            nullptr,
+            &output_workspace,
+            nullptr,
+            cublaslt_workspace.data<int8_t>());
       } else {
         ffn2_linear_compute.ComputeForward(ffn2_weights[i],
                                            &ffn1_dropout_out,
@@ -574,8 +571,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel<T> {
                                            &output_workspace,
                                            nullptr,
                                            ffn2_in_scale[i],
-                                           ffn2_out_scale,
-                                           i * ffn2_out_scale_n,
+                                           ffn2_out_scales[i],
                                            quant_round_type,
                                            quant_max_bound,
                                            quant_min_bound);
@@ -616,8 +612,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel<T> {
               ln_mean_data,
               ln_var_data,
               ffn2_in_scale[i],
-              ffn2_out_scale->data<float>(),
-              i * ffn2_out_scale_n,
+              ffn2_out_scales[i]->data<float>(),
               qkv_in_scale[i + 1],
               quant_round_type,
               quant_max_bound,
@@ -631,8 +626,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel<T> {
               buf1->data<T>(),
               dropout_mask_out_data,
               ffn2_in_scale[i],
-              ffn2_out_scale->data<float>(),
-              i * ffn2_out_scale_n,
+              ffn2_out_scales[i]->data<float>(),
               1.0);
         }
       } else {
diff --git a/paddle/fluid/operators/fused/fused_residual_dropout_bias.h b/paddle/fluid/operators/fused/fused_residual_dropout_bias.h
index f162d200abfe1e..972bbe3326a5db 100644
--- a/paddle/fluid/operators/fused/fused_residual_dropout_bias.h
+++ b/paddle/fluid/operators/fused/fused_residual_dropout_bias.h
@@ -49,7 +49,6 @@ __forceinline__ __device__ void FusedResidualDropoutBiasOneThread(
     Functor act_func,
     const float quant_last_in_scale = 1.0,
     const float *dequant_out_scale_data = nullptr,
-    const int quant_out_scale_offset = 0,
     const float quant_next_in_scale = 1.0,
     const int quant_round_type = 1,
     const float quant_max_bound = 127.0,
@@ -74,9 +73,8 @@ __forceinline__ __device__ void FusedResidualDropoutBiasOneThread(
   }
   // vectorize load data from global
   phi::Load<InType, VecSize>(&src[row_id * cols + col_id], &src_vec);
-  phi::Load<float, VecSize>(
-      &dequant_out_scale_data[quant_out_scale_offset + col_id],
-      &quant_out_scale_vec);
+  phi::Load<float, VecSize>(&dequant_out_scale_data[col_id],
+                            &quant_out_scale_vec);
   if (residual) {
     phi::Load<T, VecSize>(&residual[row_id * cols + col_id], &residual_vec);
   }
@@ -108,7 +106,7 @@ __forceinline__ __device__ void FusedResidualDropoutBiasOneThread(
     T tmp;
     if (std::is_same<InType, int32_t>::value) {
       T tmp0 = static_cast<T>(static_cast<float>(src_vec[ii]) *
-                              quant_last_in_scale / quant_out_scale_vec[ii]);
+                              quant_out_scale_vec[ii]);
       tmp = tmp0 + bias_vec[ii];
     } else {
       tmp = static_cast<T>(src_vec[ii]) + bias_vec[ii];
@@ -172,7 +170,6 @@ __global__ void FusedResidualDropoutBias(
     const bool is_test,
     const float quant_last_in_scale = 1.0,
     const float *dequant_out_scale_data = nullptr,
-    const int quant_out_scale_offset = 0,
     const float quant_next_in_scale = 1.0) {
   int col_id = blockDim.x * blockIdx.x + threadIdx.x;
   int row_id = blockIdx.y;
@@ -208,7 +205,6 @@ __global__ void FusedResidualDropoutBias(
                                                  relu,
                                                  quant_last_in_scale,
                                                  dequant_out_scale_data,
-                                                 quant_out_scale_offset,
                                                  quant_next_in_scale);
     }
   }
@@ -236,7 +232,6 @@ void LaunchResidualDropoutBias(const uint32_t rows,
                                const phi::GPUContext &ctx,
                                const float quant_last_in_scale = 1.0,
                                const float *dequant_out_scale_data = nullptr,
-                               const int quant_out_scale_offset = 0,
                                const float quant_next_in_scale = 1.0) {
   // dropout_prob == 1.0f
   if (std::abs(dropout_prob - 1.0f) < 1e-5) {
@@ -278,7 +273,6 @@ void LaunchResidualDropoutBias(const uint32_t rows,
             is_test,
             quant_last_in_scale,
             dequant_out_scale_data,
-            quant_out_scale_offset,
             quant_next_in_scale);
   } else {
     FusedResidualDropoutBias<T, uint8_t, 1, InType, OutType>
@@ -297,7 +291,6 @@ void LaunchResidualDropoutBias(const uint32_t rows,
             is_test,
             quant_last_in_scale,
             dequant_out_scale_data,
-            quant_out_scale_offset,
             quant_next_in_scale);
   }
 }
diff --git a/paddle/fluid/operators/fused/quant_dequant_kernel.h b/paddle/fluid/operators/fused/quant_dequant_kernel.h
index 21b7b0f345466e..164effe01d316a 100644
--- a/paddle/fluid/operators/fused/quant_dequant_kernel.h
+++ b/paddle/fluid/operators/fused/quant_dequant_kernel.h
@@ -18,17 +18,24 @@ limitations under the License. */
 #include "paddle/fluid/operators/fake_quantize_op.h"
 #include "paddle/fluid/platform/device/gpu/gpu_info.h"
 #include "paddle/fluid/platform/float16.h"
+#include "paddle/phi/backends/gpu/gpu_launch_config.h"
+#include "paddle/phi/kernels/funcs/aligned_vector.h"
 
 namespace paddle {
 namespace operators {
 
+using phi::backends::gpu::GpuLaunchConfig;
+
+constexpr int DequantKernelVecSize = 4;
+
 template <typename T>
 __forceinline__ __device__ int8_t quant_helper(const T input,
                                                const float scale,
                                                const int round_type,
                                                const float max_bound,
                                                const float min_bound) {
-  float quant_value = max_bound * inverse(scale) * static_cast<float>(input);
+  float quant_value = max_bound * scale * static_cast<float>(input);
+
   if (round_type == 0) {
     quant_value = static_cast<float>(roundWithTiesToEven(quant_value));
   } else {
@@ -77,7 +84,7 @@ void quantize_kernel_launcher(const T* input,
                               const float min_bound,
                               gpuStream_t stream) {
   // TODO(minghaoBD): optimize the kennel launch times when m==1 or n==1
-  dim3 grid((n + 31) / 32, (m + 31) / 32);
+  dim3 grid((n >> 2 + 31) / 32, (m + 31) / 32);
   dim3 block(32, 32);
 
   quantize_kernel<<<grid, block, 0, stream>>>(input,
@@ -90,46 +97,48 @@ void quantize_kernel_launcher(const T* input,
                                               min_bound);
 }
 
-// dequantize using weight scales and input scales
-template <typename T>
+template <typename T, int VecSize>
 __global__ void dequantize_kernel(T* output,
                                   const int32_t* input,
-                                  const int m,  // hidden
-                                  const int n,  // batch size
+                                  const int m,  // batch size
+                                  const int n,  // hidden
                                   const float quant_in_scale,
-                                  const float* dequant_out_scale_data,
-                                  const int quant_out_scale_offset) {
-  int m_id = blockIdx.x * blockDim.x + threadIdx.x;  // hidden
-  int n_id = blockIdx.y * blockDim.y + threadIdx.y;  // batch size
-
-  bool check = ((m_id < m) && (n_id < n));
-  if (check) {
-    float out_scale = dequant_out_scale_data[quant_out_scale_offset + m_id];
-    output[n_id * m + m_id] =
-        static_cast<T>(static_cast<float>(input[n_id * m + m_id]) *
-                       quant_in_scale / out_scale);
+                                  const float* dequant_out_scale_data) {
+  int numel = m * n;
+  int stride = blockDim.x * gridDim.x * VecSize;
+  int idx = (blockIdx.x * blockDim.x + threadIdx.x) * VecSize;
+  int col_id = idx % n;
+
+  phi::AlignedVector<int32_t, VecSize> in_vec;
+  phi::AlignedVector<float, VecSize> out_scale_vec;
+  phi::AlignedVector<T, VecSize> out_vec;
+
+  for (; idx < numel; idx += stride) {
+    phi::Load<int32_t, VecSize>(input + idx, &in_vec);
+    phi::Load<float, VecSize>(dequant_out_scale_data + col_id, &out_scale_vec);
+
+#pragma unroll
+    for (int i = 0; i < VecSize; ++i) {
+      out_vec[i] =
+          static_cast<T>(static_cast<float>(in_vec[i]) * out_scale_vec[i]);
+    }
+
+    phi::Store<T, VecSize>(out_vec, output + idx);
   }
 }
 
 template <typename T>
 void dequantize_kernel_launcher(const int32_t* input,
                                 T* output,
-                                const int batch_size,    // m
-                                const int hidden_units,  // n
+                                const int m,  // m
+                                const int n,  // n
                                 gpuStream_t stream,
+                                GpuLaunchConfig* gpu_config,
                                 const float quant_in_scale,
-                                const float* dequant_out_scale_data,
-                                const int quant_out_scale_offset) {
-  dim3 grid((hidden_units + 31) / 32, (batch_size + 31) / 32);
-  dim3 block(32, 32);
-
-  dequantize_kernel<<<grid, block, 0, stream>>>(output,
-                                                input,
-                                                hidden_units,
-                                                batch_size,
-                                                quant_in_scale,
-                                                dequant_out_scale_data,
-                                                quant_out_scale_offset);
+                                const float* dequant_out_scale_data) {
+  dequantize_kernel<T, DequantKernelVecSize>
+      <<<gpu_config->block_per_grid, gpu_config->thread_per_block, 0, stream>>>(
+          output, input, m, n, quant_in_scale, dequant_out_scale_data);
 }
 
 }  // namespace operators
diff --git a/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_int8_op.py b/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_int8_op.py
index 3f91f9b6e6d90d..fbbe2d65418af5 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_int8_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_int8_op.py
@@ -307,7 +307,7 @@ def generate_input_data(self):
             self.attn_mask = None
 
     def fake_quant(self, input, scale):
-        quant_value = 127.0 * (1.0 / scale) * paddle.cast(input, 'float32')
+        quant_value = 127.0 * scale * paddle.cast(input, 'float32')
         quant_value = paddle.round(quant_value)
 
         # No need to clip here because scale is the max value
@@ -333,11 +333,8 @@ def GetBaselineOut(self):
             if self.pre_layer_norm:
                 ln1_out = self.norm(tensor_query)
             max_v = paddle.max(paddle.abs(paddle.cast(ln1_out, 'float32')))[0]
-            # self.qkv_in_scales.append(127.0 / max_v)
-            self.qkv_in_scales.append(max_v)
-            self.qkv_out_scales.append(127.0 * 127.0)
-            # print('qkv_in_scales ', i, self.qkv_in_scales[i])
-            # print('qkv_out_scales ', i, self.qkv_out_scales[i])
+            self.qkv_in_scales.append(1 / max_v)
+            self.qkv_out_scales.append(max_v / (127.0 * 127.0))
 
             # quant ln1_out
             ln1_out = self.fake_quant(ln1_out, self.qkv_in_scales[i])
@@ -345,9 +342,7 @@ def GetBaselineOut(self):
             q = paddle.nn.functional.linear(ln1_out, self.q_weight_tensor)
             # de quant
             q = paddle.cast(
-                paddle.cast(q, 'float32')
-                * self.qkv_in_scales[i]
-                / self.qkv_out_scales[i],
+                paddle.cast(q, 'float32') * self.qkv_out_scales[i],
                 self.x_type,
             )
 
@@ -357,17 +352,13 @@ def GetBaselineOut(self):
 
             k = paddle.nn.functional.linear(ln1_out, self.k_weight_tensor)
             k = paddle.cast(
-                paddle.cast(k, 'float32')
-                * self.qkv_in_scales[i]
-                / self.qkv_out_scales[i],
+                paddle.cast(k, 'float32') * self.qkv_out_scales[i],
                 self.x_type,
             )
             k = k + self.k_proj_bias_tensor
             v = paddle.nn.functional.linear(ln1_out, self.v_weight_tensor)
             v = paddle.cast(
-                paddle.cast(v, 'float32')
-                * self.qkv_in_scales[i]
-                / self.qkv_out_scales[i],
+                paddle.cast(v, 'float32') * self.qkv_out_scales[i],
                 self.x_type,
             )
             v = v + self.v_proj_bias_tensor
@@ -442,10 +433,10 @@ def GetBaselineOut(self):
             max_v = paddle.max(
                 paddle.abs(paddle.cast(out_linear_in, 'float32'))
             )[0]
-            # self.out_linear_in_scales.append(127.0 / max_v)
 
-            self.out_linear_in_scales.append(max_v)
-            self.out_linear_out_scales.append((127.0 * 127.0))
+            self.out_linear_in_scales.append(1 / max_v)
+            self.out_linear_out_scales.append(max_v / (127.0 * 127.0))
+
             out_linear_in = self.fake_quant(
                 out_linear_in, self.out_linear_in_scales[i]
             )
@@ -455,9 +446,7 @@ def GetBaselineOut(self):
             )
 
             out = paddle.cast(
-                paddle.cast(out, 'float32')
-                * self.out_linear_in_scales[i]
-                / self.out_linear_out_scales[i],
+                paddle.cast(out, 'float32') * self.out_linear_out_scales[i],
                 self.x_type,
             )
 
@@ -476,8 +465,8 @@ def GetBaselineOut(self):
             max_v = paddle.max(paddle.abs(paddle.cast(ffn_ln_out, 'float32')))[
                 0
             ]
-            self.ffn1_in_scales.append(max_v)
-            self.ffn1_out_scales.append((127.0 * 127.0))
+            self.ffn1_in_scales.append(1 / max_v)
+            self.ffn1_out_scales.append(max_v / (127.0 * 127.0))
             ffn_ln_out = self.fake_quant(ffn_ln_out, self.ffn1_in_scales[i])
 
             ffn1_out = paddle.nn.functional.linear(
@@ -485,9 +474,7 @@ def GetBaselineOut(self):
             )
 
             ffn1_out = paddle.cast(
-                paddle.cast(ffn1_out, 'float32')
-                * self.ffn1_in_scales[i]
-                / self.ffn1_out_scales[i],
+                paddle.cast(ffn1_out, 'float32') * self.ffn1_out_scales[i],
                 self.x_type,
             )
 
@@ -495,10 +482,8 @@ def GetBaselineOut(self):
             ffn1_out = self.dropout(self.activation(ffn1_out))
 
             max_v = paddle.max(paddle.abs(paddle.cast(ffn1_out, 'float32')))[0]
-            # self.ffn2_in_scales.append(127.0 / max_v)
-            self.ffn2_in_scales.append(max_v)
-            self.ffn2_out_scales.append((127.0 * 127.0))
-            # print('ffn2_in_scales ', i, self.ffn2_in_scales[i])
+            self.ffn2_in_scales.append(1 / max_v)
+            self.ffn2_out_scales.append(max_v / (127.0 * 127.0))
             ffn1_out = self.fake_quant(ffn1_out, self.ffn2_in_scales[i])
 
             ffn2_out = paddle.nn.functional.linear(
@@ -506,16 +491,12 @@ def GetBaselineOut(self):
             )
 
             ffn2_out = paddle.cast(
-                paddle.cast(ffn2_out, 'float32')
-                * self.ffn2_in_scales[i]
-                / self.ffn2_out_scales[i],
+                paddle.cast(ffn2_out, 'float32') * self.ffn2_out_scales[i],
                 self.x_type,
             )
             ffn2_out = ffn2_out + self.ffn2_proj_bias_tensor
 
             residual_out = attn_out + self.dropout(ffn2_out)
-            # print("residual ", attn_out)
-            # print("residual_out ", residual_out)
             final_out = residual_out
             if not self.pre_layer_norm:
                 final_out = self.ffn_norm(residual_out)
@@ -644,23 +625,18 @@ def GetFusedMultiTransformerOut(self):
         ffn1_weights, ffn1_biases = [], []
         ffn2_weights, ffn2_biases = [], []
         ffn_ln_scales, ffn_ln_biases = [], []
+
+        # Input scales: list of value
         qkv_in_scale = []
         out_linear_in_scale = []
         ffn1_in_scale = []
         ffn2_in_scale = []
 
-        qkv_out_scales_tensor = paddle.ones(
-            [self.layers, 3 * self.embed_dim], 'float32'
-        )
-        out_linear_out_scales_tensor = paddle.ones(
-            [self.layers, self.embed_dim], 'float32'
-        )
-        ffn1_out_scales_tensor = paddle.ones(
-            [self.layers, 4 * self.embed_dim], 'float32'
-        )
-        ffn2_out_scales_tensor = paddle.ones(
-            [self.layers, self.embed_dim], 'float32'
-        )
+        # Output dequant scales: list of tensor
+        qkv_out_scales = []
+        out_linear_out_scales = []
+        ffn1_out_scales = []
+        ffn2_out_scales = []
 
         for i in range(self.layers):
             qkv_weights.append(qkv_weight_tensor)
@@ -680,10 +656,30 @@ def GetFusedMultiTransformerOut(self):
             ffn1_in_scale.append(self.ffn1_in_scales[i])
             ffn2_in_scale.append(self.ffn2_in_scales[i])
 
-            qkv_out_scales_tensor[i, :] *= self.qkv_out_scales[i]
-            out_linear_out_scales_tensor[i, :] *= self.out_linear_out_scales[i]
-            ffn1_out_scales_tensor[i, :] *= self.ffn1_out_scales[i]
-            ffn2_out_scales_tensor[i, :] *= self.ffn2_out_scales[i]
+            qkv_out_scale = (
+                paddle.ones([3 * self.embed_dim], 'float32')
+                * self.qkv_out_scales[i]
+            )
+
+            out_linear_out_scale = (
+                paddle.ones([self.embed_dim], 'float32')
+                * self.out_linear_out_scales[i]
+            )
+
+            ffn1_out_scale = (
+                paddle.ones([4 * self.embed_dim], 'float32')
+                * self.ffn1_out_scales[i]
+            )
+
+            ffn2_out_scale = (
+                paddle.ones([self.embed_dim], 'float32')
+                * self.ffn2_out_scales[i]
+            )
+
+            qkv_out_scales.append(qkv_out_scale)
+            out_linear_out_scales.append(out_linear_out_scale)
+            ffn1_out_scales.append(ffn1_out_scale)
+            ffn2_out_scales.append(ffn2_out_scale)
 
             if self.has_cache_kv:
                 cache_kvs.append(paddle.to_tensor(cache_kv, stop_gradient=True))
@@ -713,10 +709,10 @@ def GetFusedMultiTransformerOut(self):
             trans_qkvw=True,
             ring_id=-1,
             name=None,
-            qkv_out_scales=qkv_out_scales_tensor,
-            out_linear_out_scales=out_linear_out_scales_tensor,
-            ffn1_out_scales=ffn1_out_scales_tensor,
-            ffn2_out_scales=ffn2_out_scales_tensor,
+            qkv_out_scales=qkv_out_scales,
+            out_linear_out_scales=out_linear_out_scales,
+            ffn1_out_scales=ffn1_out_scales,
+            ffn2_out_scales=ffn2_out_scales,
             num_head=self.num_heads,
             dim_head=self.head_dim,
             dim_ffn=4 * self.embed_dim,

From 31e380ce89fc4702f71f0b609b08ef07f1723d1c Mon Sep 17 00:00:00 2001
From: Roc <30228238+sljlp@users.noreply.github.com>
Date: Wed, 30 Nov 2022 11:07:37 +0800
Subject: [PATCH 052/154] [Eager] fix recompute for stop_gradient and inpalce
 (#48471)

* fix recompute for stop_gradient and inpalce

* fix ut

* update
---
 .../fleet/recompute/recompute_hybrid.py       | 12 ++++++++++
 .../fleet/hybrid_parallel_pp_recompute.py     | 24 ++++++++++++++-----
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/python/paddle/distributed/fleet/recompute/recompute_hybrid.py b/python/paddle/distributed/fleet/recompute/recompute_hybrid.py
index 1595ffaf9ea511..db5166917edaeb 100644
--- a/python/paddle/distributed/fleet/recompute/recompute_hybrid.py
+++ b/python/paddle/distributed/fleet/recompute/recompute_hybrid.py
@@ -150,6 +150,18 @@ def forward(
                 tensor_inputs.append(arg)
                 ctx.tensor_indices.append(i)
                 ctx.inputs.append(None)
+
+                # In new dygraph mode, in some cases a subset of outputs is identity to the subset of inputs,
+                #  which is inplace operating. When the inputs' stop_gradient is True, an
+                #  error will occurs because the stop_gradient=True and inpalce-op are not
+                #  supported in the same time. The solution is to mark the inputs non_differentiable
+                #  if its stop_gradient is True.
+                # Note:
+                #  If not marked non_differentiable, all output tensors' attr `stop gradient`
+                #  will be reset to `False` in c++ backend.
+                #  See https://github.com/PaddlePaddle/Paddle/blob/9d62efb0e6e5373823039d9eda96cd5905426c0a/paddle/fluid/pybind/eager_py_layer.cc#L388
+                if framework.in_dygraph_mode() and state:
+                    ctx.mark_non_differentiable(arg)
             else:
                 ctx.inputs.append(arg)
 
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_recompute.py b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_recompute.py
index d9b0e94ae64157..e2690efcb61b33 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_recompute.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_recompute.py
@@ -22,6 +22,7 @@
 import paddle.distributed.fleet as fleet
 import paddle.nn as nn
 import paddle.nn.functional as F
+from paddle import framework
 from paddle.distributed.fleet.meta_parallel import LayerDesc, PipelineLayer
 from paddle.fluid import layers
 from paddle.fluid.dygraph.layers import Layer
@@ -88,14 +89,22 @@ def forward(self, x):
 
 
 class EmbeddingPipe(EmbeddingNet):
-    def forward(self, x):
-        return super().forward(x)
+    def forward(self, tensors):
+        if framework.in_dygraph_mode():
+            stable, x = tensors
+            return stable, super().forward(x)
+        else:
+            return super().forward(tensors)
 
 
 class TransformerNetPipe(TransformerNet):
-    def forward(self, x):
-        output = super().forward(x)
-        return output
+    def forward(self, tensors):
+        if framework.in_dygraph_mode():
+            stable, x = tensors
+            output = super().forward(x)
+            return stable, output
+        else:
+            return super().forward(tensors)
 
 
 class CriterionPipe(Layer):
@@ -103,6 +112,8 @@ def __init__(self):
         super().__init__()
 
     def forward(self, out, label):
+        if framework.in_dygraph_mode():
+            out = out[-1]
         loss = out.mean()
         return loss
 
@@ -171,7 +182,8 @@ def test_pp_model(self):
             x_data = np.random.randint(0, vocab_size, size=[batch_size, length])
             x = paddle.to_tensor(x_data)
             x.stop_gradient = True
-            loss = model.train_batch([x, x], optimizer, scheduler)
+            input_ = (x, x) if framework.in_dygraph_mode() else x
+            loss = model.train_batch([input_, x], optimizer, scheduler)
             # TODO(shenliang03) add utest for loss
             print("loss: ", loss)
 

From 35902ec635868340c2172d12903a479197d66edf Mon Sep 17 00:00:00 2001
From: zyfncg <zhangyunfei07@baidu.com>
Date: Wed, 30 Nov 2022 11:14:07 +0800
Subject: [PATCH 053/154] Fix bug of wrong eigen dependency (#48485)

* fix bug of eigen_dependency

* fix xpu compile
---
 paddle/fluid/framework/op_kernel_type.h        | 4 ++--
 paddle/fluid/platform/device/xpu/xpu_op_list.h | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/paddle/fluid/framework/op_kernel_type.h b/paddle/fluid/framework/op_kernel_type.h
index 78b38eed080b71..a609313e848005 100644
--- a/paddle/fluid/framework/op_kernel_type.h
+++ b/paddle/fluid/framework/op_kernel_type.h
@@ -19,8 +19,8 @@ limitations under the License. */
 #include "paddle/fluid/framework/data_layout.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/library_type.h"
-#include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/place.h"
+#include "paddle/phi/core/device_context.h"
 
 namespace paddle {
 namespace framework {
@@ -50,7 +50,7 @@ class OpKernelType {
         customized_type_value_(customized_type_value) {}
 
   OpKernelType(proto::VarType::Type data_type,
-               const platform::DeviceContext& dev_ctx,
+               const phi::DeviceContext& dev_ctx,
                DataLayout data_layout = DataLayout::kAnyLayout,
                LibraryType library_type = LibraryType::kPlain,
                int customized_type_value = kDefaultCustomizedTypeValue)
diff --git a/paddle/fluid/platform/device/xpu/xpu_op_list.h b/paddle/fluid/platform/device/xpu/xpu_op_list.h
index e008e9d111c53f..d701294865d6de 100644
--- a/paddle/fluid/platform/device/xpu/xpu_op_list.h
+++ b/paddle/fluid/platform/device/xpu/xpu_op_list.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #include <unordered_map>
 
 #include "paddle/fluid/framework/op_kernel_type.h"
+#include "paddle/fluid/platform/device/xpu/xpu_info.h"
 #include "paddle/phi/backends/xpu/xpu_op_list.h"
 
 namespace paddle {

From e337d2807a6da9ba70f7a56b334aae781066215e Mon Sep 17 00:00:00 2001
From: zyfncg <zhangyunfei07@baidu.com>
Date: Wed, 30 Nov 2022 11:15:08 +0800
Subject: [PATCH 054/154] Fix the name map of operator  from Phi to fluid
 (#48496)

* rename some kernel name

* fix compile problem
---
 cmake/phi.cmake                               | 29 ++++++++++++++-----
 paddle/fluid/operators/size_op.cc             |  2 +-
 paddle/phi/api/yaml/legacy_ops.yaml           |  4 +--
 paddle/phi/core/compat/op_utils.h             | 29 ++++++++++---------
 paddle/phi/infermeta/unary.cc                 |  2 +-
 paddle/phi/infermeta/unary.h                  |  2 +-
 .../cpu/{size_kernel.cc => numel_kernel.cc}   |  8 ++---
 .../gpu/{size_kernel.cu => numel_kernel.cu}   |  8 ++---
 ...size_kernel_impl.h => numel_kernel_impl.h} |  6 ++--
 .../kernels/{size_kernel.h => numel_kernel.h} |  4 ++-
 paddle/phi/ops/compat/einsum_sig.cc           |  2 ++
 paddle/phi/ops/compat/embedding_sig.cc        |  5 ++++
 paddle/phi/ops/compat/size_sig.cc             | 17 +++++++++++
 13 files changed, 79 insertions(+), 39 deletions(-)
 rename paddle/phi/kernels/cpu/{size_kernel.cc => numel_kernel.cc} (86%)
 rename paddle/phi/kernels/gpu/{size_kernel.cu => numel_kernel.cu} (86%)
 rename paddle/phi/kernels/impl/{size_kernel_impl.h => numel_kernel_impl.h} (90%)
 rename paddle/phi/kernels/{size_kernel.h => numel_kernel.h} (86%)
 create mode 100644 paddle/phi/ops/compat/size_sig.cc

diff --git a/cmake/phi.cmake b/cmake/phi.cmake
index b245c209a442e7..b2eaf554d2db8b 100644
--- a/cmake/phi.cmake
+++ b/cmake/phi.cmake
@@ -111,19 +111,31 @@ function(kernel_declare TARGET_LIST)
 endfunction()
 
 function(append_op_util_declare TARGET)
+  file(READ ${TARGET} target_content)
+  string(REGEX MATCH "(PD_REGISTER_ARG_MAPPING_FN)\\([ \t\r\n]*[a-z0-9_]*"
+               util_registrar "${target_content}")
+  if(NOT ${util_registrar} EQUAL "")
+    string(REPLACE "PD_REGISTER_ARG_MAPPING_FN" "PD_DECLARE_ARG_MAPPING_FN"
+                   util_declare "${util_registrar}")
+    string(APPEND util_declare ");\n")
+    file(APPEND ${op_utils_header} "${util_declare}")
+  endif()
+endfunction()
+
+function(append_op_kernel_map_declare TARGET)
   file(READ ${TARGET} target_content)
   string(
     REGEX
       MATCH
-      "(PD_REGISTER_BASE_KERNEL_NAME|PD_REGISTER_ARG_MAPPING_FN)\\([ \t\r\n]*[a-z0-9_]*"
-      util_registrar
+      "(PD_REGISTER_BASE_KERNEL_NAME)\\([ \t\r\n]*[a-z0-9_]*,[ \\\t\r\n]*[a-z0-9_]*"
+      kernel_mapping_registrar
       "${target_content}")
-  string(REPLACE "PD_REGISTER_ARG_MAPPING_FN" "PD_DECLARE_ARG_MAPPING_FN"
-                 util_declare "${util_registrar}")
-  string(REPLACE "PD_REGISTER_BASE_KERNEL_NAME" "PD_DECLARE_BASE_KERNEL_NAME"
-                 util_declare "${util_declare}")
-  string(APPEND util_declare ");\n")
-  file(APPEND ${op_utils_header} "${util_declare}")
+  if(NOT ${kernel_mapping_registrar} EQUAL "")
+    string(REPLACE "PD_REGISTER_BASE_KERNEL_NAME" "PD_DECLARE_BASE_KERNEL_NAME"
+                   kernel_mapping_declare "${kernel_mapping_registrar}")
+    string(APPEND kernel_mapping_declare ");\n")
+    file(APPEND ${op_utils_header} "${kernel_mapping_declare}")
+  endif()
 endfunction()
 
 function(register_op_utils TARGET_NAME)
@@ -137,6 +149,7 @@ function(register_op_utils TARGET_NAME)
   file(GLOB SIGNATURES "${PADDLE_SOURCE_DIR}/paddle/phi/ops/compat/*_sig.cc")
   foreach(target ${SIGNATURES})
     append_op_util_declare(${target})
+    append_op_kernel_map_declare(${target})
     list(APPEND utils_srcs ${target})
   endforeach()
 
diff --git a/paddle/fluid/operators/size_op.cc b/paddle/fluid/operators/size_op.cc
index 6d04f7a1c7cabb..094e87f384bcd4 100644
--- a/paddle/fluid/operators/size_op.cc
+++ b/paddle/fluid/operators/size_op.cc
@@ -62,7 +62,7 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER(SizeOpNoNeedBufferVarInferer, "Input");
 namespace ops = paddle::operators;
 DECLARE_INFER_SHAPE_FUNCTOR(size,
                             SizeInferShapeFunctor,
-                            PD_INFER_META(phi::SizeInferMeta));
+                            PD_INFER_META(phi::NumelInferMeta));
 REGISTER_OPERATOR(
     size,
     ops::SizeOp,
diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml
index b506c41cdff163..1bc0fc7f0aa43b 100755
--- a/paddle/phi/api/yaml/legacy_ops.yaml
+++ b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -1465,9 +1465,9 @@
   args : (Tensor x)
   output : Tensor(size)
   infer_meta :
-    func : SizeInferMeta
+    func : NumelInferMeta
   kernel :
-    func : size
+    func : numel
   data_transform:
     skip_transform : x
 
diff --git a/paddle/phi/core/compat/op_utils.h b/paddle/phi/core/compat/op_utils.h
index 1017aa3341e880..b836359ae817b3 100644
--- a/paddle/phi/core/compat/op_utils.h
+++ b/paddle/phi/core/compat/op_utils.h
@@ -223,21 +223,22 @@ struct ArgumentMappingFnRegistrar {
   }
 };
 
-#define PD_REGISTER_BASE_KERNEL_NAME(op_type, base_kernel_name)                \
+#define PD_REGISTER_BASE_KERNEL_NAME(op_type, base_kernel_name)               \
+  PD_STATIC_ASSERT_GLOBAL_NAMESPACE(                                          \
+      PD_REGISTER_base_kernel_name_ns_check_##base_kernel_name,               \
+      "PD_REGISTER_BASE_KERNEL_NAME must be called in global namespace.");    \
+  static const ::phi::BaseKernelNameRegistrar                                 \
+      __registrar_base_kernel_name_for_##base_kernel_name(#op_type,           \
+                                                          #base_kernel_name); \
+  int TouchBaseKernelNameSymbol_##base_kernel_name() { return 0; }
+
+#define PD_DECLARE_BASE_KERNEL_NAME(op_type, base_kernel_name)                 \
   PD_STATIC_ASSERT_GLOBAL_NAMESPACE(                                           \
-      PD_REGISTER_base_kernel_name_ns_check_##op_type,                         \
-      "PD_REGISTER_BASE_KERNEL_NAME must be called in global namespace.");     \
-  static const ::phi::BaseKernelNameRegistrar                                  \
-      __registrar_base_kernel_name_for_##op_type(#op_type, #base_kernel_name); \
-  int TouchBaseKernelNameSymbol_##op_type() { return 0; }
-
-#define PD_DECLARE_BASE_KERNEL_NAME(op_type)                              \
-  PD_STATIC_ASSERT_GLOBAL_NAMESPACE(                                      \
-      PD_DECLARE_ai_name_ns_check_##op_type,                              \
-      "PD_DECLARE_BASE_KERNEL_NAME must be called in global namespace."); \
-  extern int TouchBaseKernelNameSymbol_##op_type();                       \
-  UNUSED static int __declare_base_kernel_name_symbol_for_##op_type =     \
-      TouchBaseKernelNameSymbol_##op_type()
+      PD_DECLARE_ai_name_ns_check_##base_kernel_name,                          \
+      "PD_DECLARE_BASE_KERNEL_NAME must be called in global namespace.");      \
+  extern int TouchBaseKernelNameSymbol_##base_kernel_name();                   \
+  UNUSED static int __declare_base_kernel_name_symbol_for_##base_kernel_name = \
+      TouchBaseKernelNameSymbol_##base_kernel_name()
 
 #define PD_REGISTER_ARG_MAPPING_FN(op_type, arg_mapping_fn)              \
   PD_STATIC_ASSERT_GLOBAL_NAMESPACE(                                     \
diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc
index f2c3873d81e5cf..768c33d4f4ad69 100644
--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -3243,7 +3243,7 @@ void ShardIndexInferMeta(const MetaTensor& in,
   out->set_dtype(in.dtype());
 }
 
-void SizeInferMeta(const MetaTensor& input, MetaTensor* out) {
+void NumelInferMeta(const MetaTensor& input, MetaTensor* out) {
   out->set_dtype(DataType::INT64);
   if (input.dims().size() == 0) {
     out->set_dims(phi::make_ddim({}));
diff --git a/paddle/phi/infermeta/unary.h b/paddle/phi/infermeta/unary.h
index c7b7780b0cbee6..30f14cfb447bab 100644
--- a/paddle/phi/infermeta/unary.h
+++ b/paddle/phi/infermeta/unary.h
@@ -478,7 +478,7 @@ void ShardIndexInferMeta(const MetaTensor& in,
                          MetaTensor* out,
                          MetaConfig config = MetaConfig());
 
-void SizeInferMeta(const MetaTensor& input, MetaTensor* out);
+void NumelInferMeta(const MetaTensor& input, MetaTensor* out);
 
 void SliceRawInferMeta(const MetaTensor& input,
                        const std::vector<int64_t>& axes,
diff --git a/paddle/phi/kernels/cpu/size_kernel.cc b/paddle/phi/kernels/cpu/numel_kernel.cc
similarity index 86%
rename from paddle/phi/kernels/cpu/size_kernel.cc
rename to paddle/phi/kernels/cpu/numel_kernel.cc
index a070c2de5bc447..7050adb1c9c05b 100644
--- a/paddle/phi/kernels/cpu/size_kernel.cc
+++ b/paddle/phi/kernels/cpu/numel_kernel.cc
@@ -12,16 +12,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/size_kernel.h"
+#include "paddle/phi/kernels/numel_kernel.h"
 
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/impl/size_kernel_impl.h"
+#include "paddle/phi/kernels/impl/numel_kernel_impl.h"
 
-PD_REGISTER_KERNEL(size,
+PD_REGISTER_KERNEL(numel,
                    CPU,
                    ALL_LAYOUT,
-                   phi::SizeKernel,
+                   phi::NumelKernel,
                    uint8_t,
                    int16_t,
                    int,
diff --git a/paddle/phi/kernels/gpu/size_kernel.cu b/paddle/phi/kernels/gpu/numel_kernel.cu
similarity index 86%
rename from paddle/phi/kernels/gpu/size_kernel.cu
rename to paddle/phi/kernels/gpu/numel_kernel.cu
index a165e6c82df96b..2c44f4db08ec44 100644
--- a/paddle/phi/kernels/gpu/size_kernel.cu
+++ b/paddle/phi/kernels/gpu/numel_kernel.cu
@@ -12,16 +12,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/size_kernel.h"
+#include "paddle/phi/kernels/numel_kernel.h"
 
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/impl/size_kernel_impl.h"
+#include "paddle/phi/kernels/impl/numel_kernel_impl.h"
 
-PD_REGISTER_KERNEL(size,
+PD_REGISTER_KERNEL(numel,
                    GPU,
                    ALL_LAYOUT,
-                   phi::SizeKernel,
+                   phi::NumelKernel,
                    int16_t,
                    int,
                    int64_t,
diff --git a/paddle/phi/kernels/impl/size_kernel_impl.h b/paddle/phi/kernels/impl/numel_kernel_impl.h
similarity index 90%
rename from paddle/phi/kernels/impl/size_kernel_impl.h
rename to paddle/phi/kernels/impl/numel_kernel_impl.h
index 4c72f02f64349a..7504619b0f7ee8 100644
--- a/paddle/phi/kernels/impl/size_kernel_impl.h
+++ b/paddle/phi/kernels/impl/numel_kernel_impl.h
@@ -19,9 +19,9 @@
 namespace phi {
 
 template <typename T, typename Context>
-void SizeKernel(const Context& ctx,
-                const DenseTensor& input,
-                DenseTensor* out) {
+void NumelKernel(const Context& ctx,
+                 const DenseTensor& input,
+                 DenseTensor* out) {
   auto place = ctx.GetPlace();
   auto out_data = ctx.template Alloc<int64_t>(out);
 
diff --git a/paddle/phi/kernels/size_kernel.h b/paddle/phi/kernels/numel_kernel.h
similarity index 86%
rename from paddle/phi/kernels/size_kernel.h
rename to paddle/phi/kernels/numel_kernel.h
index 2d7a29104db081..07947de0b417cf 100644
--- a/paddle/phi/kernels/size_kernel.h
+++ b/paddle/phi/kernels/numel_kernel.h
@@ -19,6 +19,8 @@
 namespace phi {
 
 template <typename T, typename Context>
-void SizeKernel(const Context& ctx, const DenseTensor& input, DenseTensor* out);
+void NumelKernel(const Context& ctx,
+                 const DenseTensor& input,
+                 DenseTensor* out);
 
 }  // namespace phi
diff --git a/paddle/phi/ops/compat/einsum_sig.cc b/paddle/phi/ops/compat/einsum_sig.cc
index 1030946980f86f..c145b8f4fa5f3b 100644
--- a/paddle/phi/ops/compat/einsum_sig.cc
+++ b/paddle/phi/ops/compat/einsum_sig.cc
@@ -31,5 +31,7 @@ KernelSignature EinsumGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
 }
 }  // namespace phi
 
+PD_REGISTER_BASE_KERNEL_NAME(einsum, einsum_raw);
+
 PD_REGISTER_ARG_MAPPING_FN(einsum, phi::EinsumOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(einsum_grad, phi::EinsumGradOpArgumentMapping);
diff --git a/paddle/phi/ops/compat/embedding_sig.cc b/paddle/phi/ops/compat/embedding_sig.cc
index 48debcafaf2356..655cd00777225c 100644
--- a/paddle/phi/ops/compat/embedding_sig.cc
+++ b/paddle/phi/ops/compat/embedding_sig.cc
@@ -58,6 +58,11 @@ KernelSignature EmbeddingGradOpArgumentMapping(
 
 PD_REGISTER_BASE_KERNEL_NAME(lookup_table_v2, embedding);
 PD_REGISTER_BASE_KERNEL_NAME(lookup_table_v2_grad, embedding_grad);
+PD_REGISTER_BASE_KERNEL_NAME(lookup_table_v2_grad, embedding_sparse_grad);
+PD_REGISTER_BASE_KERNEL_NAME(lookup_table_v2_grad,
+                             sparse_weight_embedding_grad);
+PD_REGISTER_BASE_KERNEL_NAME(lookup_table_v2_grad,
+                             sparse_weight_embedding_sparse_grad);
 
 PD_REGISTER_ARG_MAPPING_FN(lookup_table_v2, phi::EmbeddingOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(lookup_table_v2_grad,
diff --git a/paddle/phi/ops/compat/size_sig.cc b/paddle/phi/ops/compat/size_sig.cc
new file mode 100644
index 00000000000000..46177e4ae35b99
--- /dev/null
+++ b/paddle/phi/ops/compat/size_sig.cc
@@ -0,0 +1,17 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/core/compat/op_utils.h"
+
+PD_REGISTER_BASE_KERNEL_NAME(size, numel);

From ca552933503f9c4a7f9c36099504a975f071832d Mon Sep 17 00:00:00 2001
From: zhangbo9674 <82555433+zhangbo9674@users.noreply.github.com>
Date: Wed, 30 Nov 2022 11:27:40 +0800
Subject: [PATCH 055/154] Add fuse_act_add_grad_pass (#48346)

* add fuse act add grad pass

* polish code

* refine code

* add test

* refine code
---
 .../framework/ir/fuse_elewise_add_act_pass.cc | 129 +++++++++++++++++-
 .../framework/ir/fuse_elewise_add_act_pass.h  |   8 ++
 .../framework/ir/graph_pattern_detector.cc    |  29 +++-
 .../framework/ir/graph_pattern_detector.h     |  21 +++
 .../new_executor/interpreter/data_transfer.cc |   1 -
 .../fused/fused_elemwise_activation_op.h      |  25 +++-
 .../test_fuse_elewise_add_act_pass.py         |  71 +++++++++-
 7 files changed, 272 insertions(+), 12 deletions(-)

diff --git a/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc b/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc
index 67aa5a822edae7..b6faf76f11d104 100644
--- a/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc
+++ b/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc
@@ -31,6 +31,7 @@ void FuseElewiseAddActPass::ApplyImpl(ir::Graph *graph) const {
   {
     std::unordered_set<std::string> in_place_act_types = {"relu_grad"};
     graph = FuseElewiseAddActInplaceGrad(graph, in_place_act_types);
+    graph = FuseActElewiseAddInplaceGrad(graph, in_place_act_types);
   }
 
   // Remove the removable intermediate_out.
@@ -110,7 +111,7 @@ ir::Graph *FuseElewiseAddActPass::FuseActElewiseAdd(
 
   auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
                      Graph *g) {
-    VLOG(4) << "handle FuseElewiseAddAct fuse";
+    VLOG(4) << "handle FuseActElewiseAdd fuse";
     GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, act_elewise_add_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(ele_x, ele_x, act_elewise_add_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(
@@ -220,6 +221,86 @@ ir::Graph *FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
   return graph;
 }
 
+// the backward of act(ele_add(x,y))
+// act_grad: in["Out", "Out@GRAD"], out["X@GRAD"]
+// ele_add_grad: in["Y", "Out@GRAD"], out["X@GRAD", "Y@GRAD"]
+ir::Graph *FuseElewiseAddActPass::FuseActElewiseAddInplaceGrad(
+    ir::Graph *graph, const std::unordered_set<std::string> &act_types) const {
+  PADDLE_ENFORCE_NOT_NULL(
+      graph, platform::errors::InvalidArgument("Graph cannot be nullptr."));
+  FusePassBase::Init("act_elewise_add_grad", graph);
+  GraphPatternDetector gpd;
+  auto *d_out_var =
+      gpd.mutable_pattern()
+          ->NewNode("act_elewise_add_grad_inplace/d_out_var")
+          ->AsInput()
+          ->assert_is_ops_input({"elementwise_add_grad"}, GradVarName("Out"));
+  patterns::ActElewiseAddInplaceGrad act_elewise_add_grad_pattern(
+      gpd.mutable_pattern(), "act_elewise_add_grad_inplace");
+  act_elewise_add_grad_pattern(d_out_var, act_types);
+
+  int found_elewise_add_act_count = 0;
+
+  auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
+                     Graph *g) {
+    VLOG(4) << "handle ActFuseElewiseAddGrad1 fuse";
+
+    GET_IR_NODE_FROM_SUBGRAPH(
+        ele_add_grad_op, ele_add_grad_op, act_elewise_add_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(
+        act_grad_op, act_grad_op, act_elewise_add_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(
+        intermediate_var, intermediate_var, act_elewise_add_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(
+        d_intermediate_var, d_intermediate_var, act_elewise_add_grad_pattern);
+
+    std::string d_out_var_n = subgraph.at(d_out_var)->Name();
+    std::string intermediate_var_n = intermediate_var->Name();
+    std::string d_intermediate_var_n = d_intermediate_var->Name();
+
+    OpDesc desc;
+    desc.SetType("fused_elemwise_add_activation_grad");
+    desc.SetInput("IntermediateOut",
+                  std::vector<std::string>({intermediate_var_n}));
+    desc.SetInput("X", {});
+    desc.SetInput("Y", ele_add_grad_op->Op()->Input("X"));
+    desc.SetInput("Out", {});
+    desc.SetInput(GradVarName("Out"), std::vector<std::string>({d_out_var_n}));
+    desc.SetOutput(GradVarName("X"),
+                   act_grad_op->Op()->Output(GradVarName("X")));
+    desc.SetOutput(GradVarName("Y"),
+                   ele_add_grad_op->Op()->Output(GradVarName("X")));
+    desc.SetOutput(GradVarName("IntermediateOut"),
+                   std::vector<std::string>({d_intermediate_var_n}));
+
+    desc.SetAttr("save_intermediate_out", false);
+    desc.SetAttr("functor_list",
+                 std::vector<std::string>({ele_add_grad_op->Op()->Type(),
+                                           act_grad_op->Op()->Type()}));
+
+    for (auto &n : {ele_add_grad_op->Op(), act_grad_op->Op()}) {
+      for (auto &m_ele : n->GetAttrMap()) {
+        desc.SetAttr(m_ele.first, m_ele.second);
+      }
+    }
+
+    auto fused_node = g->CreateOpNode(&desc);
+
+    VLOG(4) << "\n\t " << d_out_var_n << " -> " << ele_add_grad_op->Name()
+            << " -> " << d_intermediate_var_n << "\n\t " << intermediate_var_n
+            << " and " << d_intermediate_var_n << " -> " << act_grad_op->Name();
+
+    ReLinkNodes2(
+        g, d_intermediate_var, ele_add_grad_op, act_grad_op, fused_node);
+    found_elewise_add_act_count++;
+  };
+
+  gpd(graph, handler);
+
+  AddStatis(found_elewise_add_act_count);
+  return graph;
+}
+
 Node *FuseElewiseAddActPass::CreateFuseElewiseAddActNode(
     Graph *g,
     const Node *op_1,
@@ -364,6 +445,52 @@ void FuseElewiseAddActPass::ReLinkNodes(Graph *graph,
   GraphSafeRemoveNodes(graph, nodes2delete);
 }
 
+void FuseElewiseAddActPass::ReLinkNodes2(Graph *graph,
+                                         const Node *intermediate_out,
+                                         Node *op_1,
+                                         Node *op_2,
+                                         Node *fused_op) const {  // delete act
+  for (auto &in : op_1->inputs) {
+    fused_op->inputs.emplace_back(in);
+    in->outputs = this->ReplaceNode(op_1, fused_op, in->outputs);
+  }
+
+  std::unordered_set<const Node *> nodes2delete;
+  for (auto &out : op_1->outputs) {
+    if (out->IsCtrlVar()) {
+      auto result_iter = std::find_if(
+          op_2->inputs.begin(),
+          op_2->inputs.end(),
+          [&out](const Node *node) -> bool { return node == out; });
+
+      if (result_iter == op_2->inputs.end()) {
+        IR_OP_VAR_LINK(fused_op, out);
+      } else {
+        nodes2delete.emplace(out);
+      }
+    } else {
+      IR_OP_VAR_LINK(fused_op, out);
+    }
+  }
+
+  for (auto &in : op_2->inputs) {
+    if (in == intermediate_out || nodes2delete.count(in)) {
+      continue;
+    }
+    fused_op->inputs.emplace_back(in);
+    in->outputs = this->ReplaceNode(op_2, fused_op, in->outputs);
+  }
+
+  for (auto &out : op_2->outputs) {
+    IR_OP_VAR_LINK(fused_op, out);
+  }
+
+  nodes2delete.insert(std::move(op_1));
+  nodes2delete.insert(std::move(op_2));
+
+  GraphSafeRemoveNodes(graph, nodes2delete);
+}
+
 std::vector<Node *> FuseElewiseAddActPass::ReplaceNode(
     Node *cur_node, Node *new_node, const std::vector<Node *> &nodes) const {
   std::vector<Node *> new_list(nodes.size());
diff --git a/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.h b/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.h
index d9b0ec928ae5cc..c608bb5845584b 100644
--- a/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.h
+++ b/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.h
@@ -49,6 +49,9 @@ class FuseElewiseAddActPass : public FusePassBase {
   ir::Graph *FuseElewiseAddActInplaceGrad(
       ir::Graph *graph, const std::unordered_set<std::string> &act_types) const;
 
+  ir::Graph *FuseActElewiseAddInplaceGrad(
+      ir::Graph *graph, const std::unordered_set<std::string> &act_types) const;
+
   /**
    * Remove the removable intermediate_out.
    *   - If the intermediate_out is only used by the backward op, but the
@@ -69,6 +72,11 @@ class FuseElewiseAddActPass : public FusePassBase {
                    Node *op_1,
                    Node *op_2,
                    Node *fused_op) const;
+  void ReLinkNodes2(Graph *graph,
+                    const Node *intermediate_out,
+                    Node *op_1,
+                    Node *op_2,
+                    Node *fused_op) const;
   Node *CreateFuseElewiseAddActNode(Graph *g,
                                     const Node *op_1,
                                     const Node *op_2,
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
index 753c169f8f6d68..acbaef67a68fc4 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -91,7 +91,6 @@ void GraphPatternDetector::operator()(Graph *graph,
   if (!MarkPDNodesInGraph(*graph)) {
     return;
   }
-
   auto subgraphs = DetectPatterns();
   UniquePatterns(&subgraphs);
   SortSubgraphs(&subgraphs);
@@ -99,7 +98,6 @@ void GraphPatternDetector::operator()(Graph *graph,
   ValidateByNodeRole(&subgraphs);
 
   if (subgraphs.empty()) return;
-
   int id = 0;
   for (auto &g : subgraphs) {
     VLOG(3) << "optimizing #" << id++ << " subgraph";
@@ -1613,6 +1611,33 @@ PDNode *patterns::ElewiseAddActInplaceGrad::operator()(
   return ele_add_grad;
 }
 
+PDNode *patterns::ActElewiseAddInplaceGrad::operator()(
+    paddle::framework::ir::PDNode *d_out_var,
+    std::unordered_set<std::string> act_types) {
+  VLOG(4) << "ActElewiseAddInplaceGrad::operator";
+
+  auto *ele_add_grad_op = pattern->NewNode(ele_add_grad_op_repr())
+                              ->assert_is_op("elementwise_add_grad");
+  auto *act_grad_op =
+      pattern->NewNode(act_grad_op_repr())->assert_is_ops(act_types);
+
+  auto *d_intermediate_out_var =
+      pattern->NewNode(d_intermediate_var_repr())
+          ->assert_is_op_output("elementwise_add_grad", GradVarName("Y"))
+          ->assert_is_ops_input(act_types, GradVarName("Out"));
+  auto *intermediate_out_var =
+      pattern->NewNode(intermediate_var_repr())
+          ->assert_is_op_input("elementwise_add_grad", "Y")
+          ->assert_is_ops_input(act_types, "Out");
+
+  ele_add_grad_op->LinksFrom({d_out_var});
+  d_intermediate_out_var->LinksFrom({ele_add_grad_op}).LinksTo({act_grad_op});
+  intermediate_out_var->LinksTo({ele_add_grad_op});
+  intermediate_out_var->LinksTo({act_grad_op});
+
+  return act_grad_op;
+}
+
 PDNode *patterns::ElewiseAddAct::operator()(
     paddle::framework::ir::PDNode *ele_x_var,
     std::unordered_set<std::string> act_types) {
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h
index cb1b9266b15308..da479c1bf7c9b3 100755
--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -928,6 +928,27 @@ struct ElewiseAddActInplaceGrad : public PatternBase {
   PATTERN_DECL_NODE(ele_y);
 };
 
+// the backward of ele_add(act(x), y)
+// the act is inplace.
+// op: elementwise_add_grad + act_grad
+// named nodes: elementwise_add_grad, act_grad
+//              ele_y, d_ele_y, d_intermeiate_out, intermediate_out, d_x
+struct ActElewiseAddInplaceGrad : public PatternBase {
+  ActElewiseAddInplaceGrad(PDPattern* pattern, const std::string& name_scope)
+      : PatternBase(pattern, name_scope, "act_elewise_add_grad1") {}
+
+  // ele_add_grad: in["Y", "Out@GRAD"], out["IntermediateOut@GRAD", "Y@GRAD"]
+  // act_grad: in["IntermediateOut", "IntermediateOut@GRAD"], out["X@GRAD"]
+  PDNode* operator()(PDNode* d_out_var, std::unordered_set<std::string> acts);
+
+  // declare operator node's name
+  PATTERN_DECL_NODE(ele_add_grad_op);
+  PATTERN_DECL_NODE(act_grad_op);
+  // // declare variable node's name
+  PATTERN_DECL_NODE(intermediate_var);
+  PATTERN_DECL_NODE(d_intermediate_var);
+};
+
 // The following patterns are used to fuse linear and act (ReLu or GeLU)
 // formula: act(F.linear(x))
 // op: matmul_v2 + elementwise_add + act
diff --git a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc
index 8f9209f6a91d58..f2882eaf59d366 100644
--- a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc
+++ b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc
@@ -462,7 +462,6 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key,
   for (auto& var_name_item : *ins_map_temp) {
     bool should_skip_input =
         no_buffer_ins && no_buffer_ins->count(var_name_item.first) > 0;
-
     for (size_t i = 0; i < var_name_item.second.size(); ++i) {
       auto var = var_name_item.second[i];
       auto var_name = new_ins[var_name_item.first].at(i);
diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h
index 0d6a5e3b40da9b..50d2057dbd87bc 100644
--- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h
+++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h
@@ -664,11 +664,9 @@ class FusedElemwiseActivationGradKernel : public framework::OpKernel<T> {
         in_y,
         nullptr,
         platform::errors::InvalidArgument("Input(Y) should not be nullptr."));
-    auto in_out = ctx.Input<phi::DenseTensor>("Out");
-    PADDLE_ENFORCE_NE(
-        in_out,
-        nullptr,
-        platform::errors::InvalidArgument("Input(Out) should not be nullptr."));
+    phi::DenseTensor *in_out =
+        const_cast<phi::DenseTensor *>(ctx.Input<phi::DenseTensor>("Out"));
+
     auto in_out_grad =
         ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
     PADDLE_ENFORCE_NE(in_out_grad,
@@ -726,6 +724,23 @@ class FusedElemwiseActivationGradKernel : public framework::OpKernel<T> {
       in_x = const_cast<phi::DenseTensor *>(in_out_grad);
     }
 
+    // Get in_Out
+    if (ctx.HasInput("Out")) {
+      PADDLE_ENFORCE_NE(
+          in_out,
+          nullptr,
+          platform::errors::InvalidArgument("Input(X) should not be null."));
+    } else {
+      // If functor_list contains elementwise_add, the backward doesn't use
+      // in_x, in_y and in_out.
+      PADDLE_ENFORCE_EQ(InputXCanBeAbsent(functor_list),
+                        true,
+                        platform::errors::InvalidArgument(
+                            "Only when the compoundfunctor contains "
+                            "elementwise_add_grad, the 'X' could be absent."));
+      in_out = const_cast<phi::DenseTensor *>(in_out_grad);
+    }
+
     bool has_in_place = HasInPlaceUnary(functor_list);
     if (has_in_place) {
       RunGradFunctors<DeviceContext, T, true /*InPlace*/>(ctx,
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
index 9a33552f31af8e..6f3bc21e4bb86d 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
@@ -14,10 +14,11 @@
 
 import os
 import unittest
+import numpy
 
 from parallel_executor_test_base import DeviceType, TestParallelExecutorBase
 from simple_nets import fc_with_batchnorm, init_data, simple_fc_net
-
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 
@@ -89,8 +90,72 @@ def test_batchnorm_fc_with_fuse_op(self):
         )
 
 
-if __name__ == '__main__':
-    import paddle
+class TestFuseActElewiseAddInplaceGradPass(unittest.TestCase):
+    def build_program(self, main_program, startup_program):
+        with paddle.static.program_guard(main_program, startup_program):
+            X = fluid.data(name="X", shape=[3, 3], dtype='float32')
+            Y = fluid.data(name="Y", shape=[3, 3], dtype='float32')
+            Out1 = X * 5
+            Out2 = fluid.layers.relu(Out1)
+            prediction = fluid.layers.elementwise_add(Y, Out2, axis=1)
+            loss = paddle.mean(prediction)
+            sgd = fluid.optimizer.SGD(learning_rate=0.001)
+            sgd.minimize(loss)
+        return X, Y, loss
+
+    def check(self, place):
+        paddle.seed(1)
+        numpy.random.seed(1)
+        paddle.framework.random._manual_program_seed(1)
+        main_program = fluid.Program()
+        startup_program = fluid.Program()
+        X, Y, loss = self.build_program(main_program, startup_program)
+        exe = fluid.Executor(place)
+
+        x = numpy.random.random(size=(3, 3)).astype('float32')
+        y = numpy.random.random(size=(3, 3)).astype('float32')
+        label = numpy.random.random(size=(3, 3)).astype('float32')
 
+        # open fused_pass
+        build_strategy = fluid.BuildStrategy()
+        build_strategy.fuse_elewise_add_act_ops = True
+        compiled_prog_fused = paddle.static.CompiledProgram(
+            main_program, build_strategy=build_strategy
+        )
+        scope = fluid.Scope()
+        with fluid.scope_guard(scope):
+            exe.run(startup_program)
+            loss_data_fused = exe.run(
+                compiled_prog_fused,
+                feed={"X": x, "Y": y},
+                fetch_list=[loss.name],
+            )
+
+        # close fused_pass
+        build_strategy = fluid.BuildStrategy()
+        build_strategy.fuse_elewise_add_act_ops = False
+        compiled_prog = paddle.static.CompiledProgram(
+            main_program, build_strategy=build_strategy
+        )
+        scope = fluid.Scope()
+        with fluid.scope_guard(scope):
+            exe.run(startup_program)
+            loss_data = exe.run(
+                compiled_prog, feed={"X": x, "Y": y}, fetch_list=[loss.name]
+            )
+
+        self.assertEqual(loss_data_fused, loss_data)
+
+    def test_fuse_act_add_grad_pass_cpu(self):
+        place = fluid.CPUPlace()
+        self.check(place)
+
+    def test_fuse_act_add_grad_pass_cuda(self):
+        if fluid.core.is_compiled_with_cuda():
+            place = fluid.CUDAPlace(0)
+            self.check(place)
+
+
+if __name__ == '__main__':
     paddle.enable_static()
     unittest.main()

From 5de01e8aba04b47997ef06180aa085283b5b8027 Mon Sep 17 00:00:00 2001
From: Yuanle Liu <yuanlehome@163.com>
Date: Wed, 30 Nov 2022 11:56:56 +0800
Subject: [PATCH 056/154] [Paddle Inference] clean unused code  (#48392)

---
 paddle/fluid/framework/naive_executor.cc      |  1 +
 paddle/fluid/inference/analysis/analyzer.cc   |  3 +-
 .../inference/analysis/analyzer_tester.cc     |  4 +-
 paddle/fluid/inference/analysis/argument.h    |  4 +-
 paddle/fluid/inference/analysis/helper.h      | 19 -------
 .../inference/analysis/ir_pass_manager.cc     |  9 ----
 .../inference/analysis/passes/CMakeLists.txt  | 14 +-----
 .../passes/convert_to_mixed_precision.cc      |  8 ---
 .../passes/inference_op_replace_pass.cc       |  2 +-
 .../analysis/passes/ir_analysis_pass.cc       |  2 +-
 .../analysis/passes/ir_graph_build_pass.cc    |  5 +-
 .../analysis/passes/ir_graph_clean_pass.cc    | 49 -------------------
 .../analysis/passes/ir_graph_clean_pass.h     | 37 --------------
 .../passes/ir_graph_to_program_pass.cc        |  2 +-
 .../passes/ir_graph_to_program_pass.h         |  2 +-
 .../ir_params_sync_among_devices_pass.cc      |  2 +-
 .../analysis/passes/memory_optimize_pass.cc   |  2 +-
 .../fluid/inference/analysis/passes/passes.cc |  3 --
 paddle/fluid/inference/api/analysis_config.cc |  9 ----
 .../fluid/inference/api/analysis_predictor.cc | 43 ++++++----------
 .../fluid/inference/api/mkldnn_quantizer.cc   |  6 +--
 .../fluid/inference/api/paddle_pass_builder.h |  1 -
 22 files changed, 33 insertions(+), 194 deletions(-)
 delete mode 100644 paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.cc
 delete mode 100644 paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.h

diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc
index 52ed842d74e020..57e9a175b16f24 100644
--- a/paddle/fluid/framework/naive_executor.cc
+++ b/paddle/fluid/framework/naive_executor.cc
@@ -195,5 +195,6 @@ void NaiveExecutor::ResetTrtOps(int num) {
   }
 #endif
 }
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc
index 9615100f32ad39..4aadb34d7b354e 100644
--- a/paddle/fluid/inference/analysis/analyzer.cc
+++ b/paddle/fluid/inference/analysis/analyzer.cc
@@ -38,8 +38,7 @@ void Analyzer::RunAnalysis(Argument *argument) {
     if (!disable_logs) {
       string::PrettyLogH1("--- Running analysis [%s]", pass);
     }
-    if (!argument->enable_analysis_optim() && pass == "ir_analysis_pass")
-      continue;
+    if (!argument->enable_ir_optim() && pass == "ir_analysis_pass") continue;
 
     auto *ptr = PassRegistry::Global().Retreive(pass);
     PADDLE_ENFORCE_NOT_NULL(ptr,
diff --git a/paddle/fluid/inference/analysis/analyzer_tester.cc b/paddle/fluid/inference/analysis/analyzer_tester.cc
index 1df8d06dd89cac..3f5be92f5a3e6f 100644
--- a/paddle/fluid/inference/analysis/analyzer_tester.cc
+++ b/paddle/fluid/inference/analysis/analyzer_tester.cc
@@ -31,7 +31,7 @@ TEST(Analyzer, analysis_without_tensorrt) {
   Argument argument;
   argument.SetDisableLogs(false);
   argument.SetModelDir(FLAGS_inference_model_dir);
-  argument.SetEnableAnalysisOptim(false);
+  argument.SetEnableIrOptim(false);
   argument.SetUseGPU(false);
   argument.SetAnalysisPasses({"ir_graph_build_pass",
                               "ir_analysis_pass",
@@ -44,7 +44,7 @@ TEST(Analyzer, analysis_without_tensorrt) {
 TEST(Analyzer, analysis_with_tensorrt) {
   Argument argument;
   argument.SetDisableLogs(false);
-  argument.SetEnableAnalysisOptim(false);
+  argument.SetEnableIrOptim(false);
   argument.SetTensorRtMaxBatchSize(3);
   argument.SetTensorRtWorkspaceSize(1 << 20);
   argument.SetModelDir(FLAGS_inference_model_dir);
diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h
index 496cd9d1e2d530..fd5ba90eefb3fb 100644
--- a/paddle/fluid/inference/analysis/argument.h
+++ b/paddle/fluid/inference/analysis/argument.h
@@ -42,8 +42,6 @@ namespace paddle {
 namespace inference {
 namespace analysis {
 
-using framework::ir::Graph;
-
 #ifdef PADDLE_WITH_MKLDNN
 using VarQuantScale =
     std::unordered_map<std::string, std::pair<bool, phi::DenseTensor>>;
@@ -148,7 +146,7 @@ struct Argument {
   DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string);
   DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool);
   DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string);
-  DECL_ARGUMENT_FIELD(enable_analysis_optim, EnableAnalysisOptim, bool);
+  DECL_ARGUMENT_FIELD(enable_ir_optim, EnableIrOptim, bool);
 
   // For JITLayer
   DECL_ARGUMENT_FIELD(skip_load_params, SkipLoadParams, bool);
diff --git a/paddle/fluid/inference/analysis/helper.h b/paddle/fluid/inference/analysis/helper.h
index e8d719ddb659dc..e891da8e6d19fc 100644
--- a/paddle/fluid/inference/analysis/helper.h
+++ b/paddle/fluid/inference/analysis/helper.h
@@ -153,25 +153,6 @@ T &GetFromScope(const framework::Scope &scope, const std::string &name) {
   return *var->GetMutable<T>();
 }
 
-static framework::proto::ProgramDesc LoadProgramDesc(
-    const std::string &model_path) {
-  std::ifstream fin(model_path, std::ios::in | std::ios::binary);
-  PADDLE_ENFORCE_EQ(
-      fin.is_open(),
-      true,
-      platform::errors::NotFound(
-          "Cannot open file %s, please confirm whether the file exists",
-          model_path));
-  fin.seekg(0, std::ios::end);
-  std::string buffer(fin.tellg(), ' ');
-  fin.seekg(0, std::ios::beg);
-  fin.read(&buffer[0], buffer.size());
-  fin.close();
-  framework::proto::ProgramDesc program_desc;
-  program_desc.ParseFromString(buffer);
-  return program_desc;
-}
-
 static bool FileExists(const std::string &filepath) {
   std::ifstream file(filepath);
   bool exists = file.is_open();
diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc
index 71bfd1d7bfca25..862a019da6d57c 100644
--- a/paddle/fluid/inference/analysis/ir_pass_manager.cc
+++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc
@@ -36,15 +36,6 @@ using string::PrettyLogEndl;
 using string::Style;
 
 IRPassManager::IRPassManager(Argument *argument) {
-  ARGUMENT_CHECK_FIELD(argument, main_program);
-  graph_ = std::unique_ptr<Graph>(new Graph(argument->main_program()));
-  if (argument->Has("scope")) {
-    auto *scope_ptr = argument->scope_ptr();
-    PADDLE_ENFORCE_NOT_NULL(scope_ptr,
-                            platform::errors::PreconditionNotMet(
-                                "The scope ptr should not be nullptr."));
-    graph_->SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
-  }
   disable_logs_ = argument->disable_logs();
 
   ARGUMENT_CHECK_FIELD(argument, ir_analysis_passes);
diff --git a/paddle/fluid/inference/analysis/passes/CMakeLists.txt b/paddle/fluid/inference/analysis/passes/CMakeLists.txt
index 126e2500c48900..fa074f962eb3d4 100644
--- a/paddle/fluid/inference/analysis/passes/CMakeLists.txt
+++ b/paddle/fluid/inference/analysis/passes/CMakeLists.txt
@@ -30,17 +30,6 @@ cc_library(
   inference_op_replace_pass
   SRCS inference_op_replace_pass.cc
   DEPS analysis_pass graph_to_program_pass)
-if(WITH_TESTING)
-  cc_library(
-    ir_graph_clean_pass
-    SRCS ir_graph_clean_pass.cc
-    DEPS analysis_pass gtest)
-else()
-  cc_library(
-    ir_graph_clean_pass
-    SRCS ir_graph_clean_pass.cc
-    DEPS analysis_pass)
-endif()
 
 cc_library(
   analysis_passes
@@ -52,8 +41,7 @@ cc_library(
        memory_optim_pass
        convert_to_mixed_precision
        inference_op_replace_pass
-       ir_graph_to_program_pass
-       ir_graph_clean_pass)
+       ir_graph_to_program_pass)
 
 set(analysis_deps
     ${analysis_deps} analysis_passes subgraph_detector
diff --git a/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc b/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
index a37cfda021d5b1..afc1d8a882ca6e 100644
--- a/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
+++ b/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
@@ -32,8 +32,6 @@
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/framework/var_desc.h"
-#include "paddle/fluid/inference/analysis/argument.h"
-#include "paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.h"
 #include "paddle/fluid/inference/io.h"
 #include "paddle/phi/common/bfloat16.h"
 #include "paddle/phi/common/data_type.h"
@@ -358,12 +356,6 @@ void ConvertToMixedPrecisionPass::LoadAndPrepare() {
     }
   }
 
-  // Remove all control var
-  IrInferCleanGraphPass pass;
-  Argument arg;
-  arg.SetMainGraphNotOwned(main_graph_.get());
-  pass.Run(&arg);
-
   ProcessCircleCases();
 }
 
diff --git a/paddle/fluid/inference/analysis/passes/inference_op_replace_pass.cc b/paddle/fluid/inference/analysis/passes/inference_op_replace_pass.cc
index ed45ec3301d1d2..126d16933fd820 100644
--- a/paddle/fluid/inference/analysis/passes/inference_op_replace_pass.cc
+++ b/paddle/fluid/inference/analysis/passes/inference_op_replace_pass.cc
@@ -40,7 +40,7 @@ void InferenceOpReplacePass::RunImpl(Argument* argument) {
 }
 
 std::string InferenceOpReplacePass::repr() const {
-  return "inference-op-replace-pass";
+  return "inference_op_replace_pass";
 }
 
 }  // namespace analysis
diff --git a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
index 2b2b0ab5cabf4c..2f9914ac54a79b 100644
--- a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
+++ b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
@@ -58,7 +58,7 @@ void IrAnalysisPass::CollectFusionStatis(Argument* argument) {
           framework::ir::kFuseStatisAttr));
 }
 
-std::string IrAnalysisPass::repr() const { return "ir-analysis-pass"; }
+std::string IrAnalysisPass::repr() const { return "ir_analysis_pass"; }
 
 }  // namespace analysis
 }  // namespace inference
diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
index 18f5c9e4a9c6c9..3f13fd639aa554 100644
--- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
+++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
@@ -64,7 +64,8 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
         "set."));
   }
 
-  auto graph = std::unique_ptr<Graph>(new Graph(argument->main_program()));
+  auto graph = std::unique_ptr<framework::ir::Graph>(
+      new framework::ir::Graph(argument->main_program()));
   argument->SetMainGraph(graph.release());
   auto *scope_ptr = argument->scope_ptr();
   PADDLE_ENFORCE_NOT_NULL(scope_ptr,
@@ -128,7 +129,7 @@ std::unique_ptr<framework::ProgramDesc> IrGraphBuildPass::LoadModel(
   }
 }
 
-std::string IrGraphBuildPass::repr() const { return "ir-graph-build-pass"; }
+std::string IrGraphBuildPass::repr() const { return "ir_graph_build_pass"; }
 
 }  // namespace analysis
 }  // namespace inference
diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.cc
deleted file mode 100644
index 6c18c625637166..00000000000000
--- a/paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.cc
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.h"
-
-#include "paddle/fluid/framework/ir/graph.h"
-#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
-#include "paddle/fluid/framework/ir/node.h"
-
-namespace paddle {
-namespace inference {
-namespace analysis {
-
-void IrInferCleanGraphPass::RunImpl(Argument* argument) {
-  auto& graph = argument->main_graph();
-  auto is_valid_node = [](framework::ir::Node* x) {
-    return x && IsControlDepVar(*x) && x->IsVar() && !x->Var();
-  };
-
-  std::unordered_set<const framework::ir::Node*> invalid_nodes;
-  int valid_op = 0;
-  for (auto* node : graph.Nodes()) {
-    PADDLE_ENFORCE_NOT_NULL(node,
-                            platform::errors::PreconditionNotMet(
-                                "The node should not be nullptr."));
-    if (is_valid_node(node)) {
-      invalid_nodes.insert(node);
-    } else if (node->IsOp()) {
-      ++valid_op;
-    }
-  }
-
-  GraphSafeRemoveNodes(&graph, invalid_nodes);
-}
-
-}  // namespace analysis
-}  // namespace inference
-}  // namespace paddle
diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.h b/paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.h
deleted file mode 100644
index a4d60e91e8455c..00000000000000
--- a/paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.h
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <string>
-#include <unordered_set>
-
-#include "paddle/fluid/inference/analysis/analysis_pass.h"
-
-namespace paddle {
-namespace inference {
-namespace analysis {
-
-struct Argument;
-
-class IrInferCleanGraphPass : public AnalysisPass {
- public:
-  void RunImpl(Argument *argument) override;
-
-  std::string repr() const override { return "ir_graph_clean_pass"; }
-};
-
-}  // namespace analysis
-}  // namespace inference
-}  // namespace paddle
diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc
index 999fb4ad8d7642..3d86f7bf399a99 100644
--- a/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc
+++ b/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc
@@ -31,7 +31,7 @@ void IrGraphToProgramPass::RunImpl(Argument *argument) {
               new int(argument->memory_optim_sort_kind()));
   }
 
-  std::unique_ptr<Graph> graph(argument->main_graph_ptr());
+  std::unique_ptr<framework::ir::Graph> graph(argument->main_graph_ptr());
 
   // Direct using ProgramDesc desc(argument->main_program()) may cause
   // incomplete copies of information.
diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h b/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h
index 5b20667d62ab60..8e90eb0e20d57d 100644
--- a/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h
+++ b/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h
@@ -28,7 +28,7 @@ class IrGraphToProgramPass : public AnalysisPass {
  public:
   void RunImpl(Argument *argument) override;
 
-  std::string repr() const override { return "ir-graph-to-param-pass"; }
+  std::string repr() const override { return "ir_graph_to_param_pass"; }
 };
 
 }  // namespace analysis
diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
index 1c9e5bd7b9f454..8e6470b2c1a0ba 100644
--- a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
+++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
@@ -167,7 +167,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
 }
 
 std::string IrParamsSyncAmongDevicesPass::repr() const {
-  return "ir-params-sync-among-devices-pass";
+  return "ir_params_sync_among_devices_pass";
 }
 
 }  // namespace analysis
diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
index 775b61e9494ee9..63aaa7d97967a4 100644
--- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
+++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
@@ -295,7 +295,7 @@ void UpdateOpDescsByReuse(
   }
 }
 
-std::string MemoryOptimizePass::repr() const { return "memory optimize pass"; }
+std::string MemoryOptimizePass::repr() const { return "memory_optimize_pass"; }
 
 void MemoryOptimizePass::RunImpl(Argument* argument) {
   // Memory optimization.
diff --git a/paddle/fluid/inference/analysis/passes/passes.cc b/paddle/fluid/inference/analysis/passes/passes.cc
index 19aab1a948dd2d..cd65757d08f3fb 100644
--- a/paddle/fluid/inference/analysis/passes/passes.cc
+++ b/paddle/fluid/inference/analysis/passes/passes.cc
@@ -18,7 +18,6 @@
 #include "paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h"
 #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
 #include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
-#include "paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.h"
 #include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
 #include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
 #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
@@ -34,8 +33,6 @@ PassRegistry::PassRegistry() {
                   std::unique_ptr<AnalysisPass>(new IrAnalysisPass));
   passes_.emplace("ir_graph_build_pass",
                   std::unique_ptr<AnalysisPass>(new IrGraphBuildPass));
-  passes_.emplace("ir_graph_clean_pass",
-                  std::unique_ptr<AnalysisPass>(new IrInferCleanGraphPass));
   passes_.emplace("memory_optimize_pass",
                   std::unique_ptr<AnalysisPass>(new MemoryOptimizePass));
   passes_.emplace(
diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc
index 7d243c6df2a55d..22b8e4487d1704 100755
--- a/paddle/fluid/inference/api/analysis_config.cc
+++ b/paddle/fluid/inference/api/analysis_config.cc
@@ -770,13 +770,7 @@ void AnalysisConfig::Update() {
       ((use_custom_device() ^ pass_builder_->use_custom_device()))) {
     if (use_gpu()) {
       pass_builder_.reset(new GpuPassStrategy);
-
-      if (use_tensorrt_) {
-        // Append after the Affine_channel_conv_fuse pass.
-        pass_builder()->InsertPass(3, "tensorrt_subgraph_pass");
-      }
     } else if (use_ipu()) {
-      VLOG(1) << "IpuPassStrategy has been used for new.";
       pass_builder_.reset(new IpuPassStrategy);
     } else if (use_xpu()) {
       PADDLE_ENFORCE_EQ(
@@ -982,9 +976,6 @@ void AnalysisConfig::Update() {
         "but did not have the option -DWITH_CUSTOM_DEVICE compiled."));
 #endif
   }
-  if (ir_debug_) {
-    pass_builder()->TurnOnDebug();
-  }
 }
 
 std::string AnalysisConfig::SerializeInfoCache() {
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 0fa6f243cee79c..6a23f11e4522a7 100755
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -1074,7 +1074,7 @@ void AnalysisPredictor::PrepareArgument() {
   argument_.SetUseGPU(config_.use_gpu());
   argument_.SetUseFcPadding(config_.use_fc_padding());
   argument_.SetGPUDeviceId(config_.gpu_device_id());
-  argument_.SetEnableAnalysisOptim(config_.enable_ir_optim_);
+  argument_.SetEnableIrOptim(config_.enable_ir_optim_);
   argument_.SetEnableMemoryOptim(config_.enable_memory_optim());
   argument_.SetModelFromMemory(config_.model_from_memory_);
   // Analyze inference_program
@@ -1223,48 +1223,35 @@ void AnalysisPredictor::PrepareArgument() {
   }
 #endif
 
-  auto passes = config_.pass_builder()->AllPasses();
+  auto *pass_builder = config_.pass_builder();
   if (model_precision_ != phi::DataType::FLOAT32) {
     LOG(INFO) << "Model is mixed precision type with " << model_precision_
               << ", we will use a new PassStrategy. Note that only the GPU "
                  "backend is supported for now.";
-    passes.clear();
+    pass_builder->ClearPasses();
+    const auto &deleted_passes = pass_builder->GetAllDeletedPasses();
     if (config_.tensorrt_engine_enabled()) {
       for (const auto &pass : kTrtLowerPrecisionPasses) {
-        passes.push_back(pass);
+        if (deleted_passes.count(pass)) continue;
+        pass_builder->AppendPass(pass);
       }
     } else if (config_.use_gpu()) {
       for (const auto &pass : kGpuLowerPrecisionPasses) {
-        passes.push_back(pass);
-      }
-    }
-
-    const auto &deleted_passes = config_.pass_builder()->GetAllDeletedPasses();
-    for (const auto &it : deleted_passes) {
-      auto iterator = std::find(passes.begin(), passes.end(), it);
-      if (iterator != passes.end()) {
-        passes.erase(iterator);
-      }
-    }
-
-    if (config_.ir_debug_) {
-      auto it = std::begin(passes);
-      while (it != std::end(passes)) {
-        if (*it != "graph_viz_pass") {
-          it = passes.insert(it + 1, "graph_viz_pass");
-        } else {
-          ++it;
-        }
+        if (deleted_passes.count(pass)) continue;
+        pass_builder->AppendPass(pass);
       }
     }
   }
+  if (config_.ir_debug_) {
+    pass_builder->TurnOnDebug();
+  }
   if (!config_.ir_optim()) {
-    passes.clear();
+    argument_.SetEnableIrOptim(false);
     LOG(INFO) << "ir_optim is turned off, no IR pass will be executed";
   }
   argument_.SetDisableLogs(config_.glog_info_disabled());
-  argument_.SetIrAnalysisPasses(passes);
-  argument_.SetAnalysisPasses(config_.pass_builder()->AnalysisPasses());
+  argument_.SetIrAnalysisPasses(pass_builder->AllPasses());
+  argument_.SetAnalysisPasses(pass_builder->AnalysisPasses());
   argument_.SetScopeNotOwned(scope_.get());
 
   // mixed precison.
@@ -2138,7 +2125,9 @@ std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) {
   }
   x->predictor_stream_ = stream;
   x->Init(scope_, inference_program_);
+#ifdef PADDLE_WITH_TENSORRT
   x->executor_->ResetTrtOps(++AnalysisPredictor::clone_num_);
+#endif
   return std::unique_ptr<PaddlePredictor>(x);
 }
 
diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.cc b/paddle/fluid/inference/api/mkldnn_quantizer.cc
index 7231559e199ce1..53ed8c8134937d 100644
--- a/paddle/fluid/inference/api/mkldnn_quantizer.cc
+++ b/paddle/fluid/inference/api/mkldnn_quantizer.cc
@@ -606,10 +606,8 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const {
   if (predictor_.config_.ir_debug_) builder->TurnOnDebug();
   auto passes = builder->AllPasses();
   predictor_.argument_.SetIrAnalysisPasses(passes);
-  predictor_.argument_.SetAnalysisPasses({"ir_graph_clean_pass",
-                                          "ir_analysis_pass",
-                                          "memory_optimize_pass",
-                                          "ir_graph_to_program_pass"});
+  predictor_.argument_.SetAnalysisPasses(
+      {"ir_analysis_pass", "memory_optimize_pass", "ir_graph_to_program_pass"});
   predictor_.argument_.SetQuantVarScales(scales_);
 }
 
diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h
index 1b81098470a66d..0990a61da34e16 100644
--- a/paddle/fluid/inference/api/paddle_pass_builder.h
+++ b/paddle/fluid/inference/api/paddle_pass_builder.h
@@ -115,7 +115,6 @@ class PD_INFER_DECL PaddlePassBuilder {
   /// \cond Protected
   std::vector<std::string> analysis_passes_{
       {"ir_graph_build_pass",
-       "ir_graph_clean_pass",
        "ir_analysis_pass",
        "ir_params_sync_among_devices_pass",
        "adjust_cudnn_workspace_size_pass",

From e9ca7600a00f2389c5e5f53fbce29c4f660596d9 Mon Sep 17 00:00:00 2001
From: feng_shuai <fengshuai03@baidu.com>
Date: Wed, 30 Nov 2022 12:34:24 +0800
Subject: [PATCH 057/154] feat:add the support for vit_attention_op on gpu
 (#48515)

---
 paddle/fluid/inference/api/paddle_pass_builder.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc
index f02776d00f8c77..16db8bee9ecdae 100644
--- a/paddle/fluid/inference/api/paddle_pass_builder.cc
+++ b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -216,6 +216,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
         "conv_eltwiseadd_bn_fuse_pass",                                 //
         "embedding_eltwise_layernorm_fuse_pass",                        //
         "multihead_matmul_fuse_pass_v2",                                //
+        "vit_attention_fuse_pass",                                      //
         "fused_multi_transformer_encoder_pass",                         //
         "fused_multi_transformer_decoder_pass",                         //
         "fused_multi_transformer_encoder_fuse_qkv_pass",                //

From 8a717a3e98aed104816b47eb30364602bb1832f7 Mon Sep 17 00:00:00 2001
From: ZZK <359521840@qq.com>
Date: Wed, 30 Nov 2022 13:30:02 +0800
Subject: [PATCH 058/154] Support more activation in fused multi transformer
 (#48371)

* add activation support
* fix cublasLt bug
* remove useless code and fix test random range
---
 .../fused/fused_multi_transformer_op.cc       |  12 +-
 .../fused/fused_multi_transformer_op.cu       |  50 +++---
 .../fused/fused_multi_transformer_op.cu.h     | 142 ++++++++++--------
 .../test_fused_multi_transformer_op.py        |  53 +++++--
 4 files changed, 157 insertions(+), 100 deletions(-)

diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cc b/paddle/fluid/operators/fused/fused_multi_transformer_op.cc
index 09c3dfe24c13eb..6a4c3890e5bc96 100644
--- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cc
+++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cc
@@ -270,7 +270,17 @@ class FusedMultiTransformerOpOpMaker
                   "dropout_implementation can only be downgrade_in_infer or "
                   "upscale_in_train"));
         });
-    AddAttr<std::string>("act_method", "act_method").SetDefault("gelu");
+    AddAttr<std::string>("act_method", "act_method")
+        .SetDefault("gelu")
+        .AddCustomChecker([](const std::string &act_type) {
+          PADDLE_ENFORCE_EQ(
+              act_type == "gelu" || act_type == "relu" || act_type == "none",
+              true,
+              platform::errors::InvalidArgument(
+                  "Only support `gelu`, `relu`, `none` activation in "
+                  "FusedMultiTransformer. "));
+        });
+
     AddAttr<bool>(
         "trans_qkvw",
         "Whether the weights of qkv should be transposed. If true,"
diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu
index f56baef1d26726..aeb00a7947cd62 100644
--- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu
+++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu
@@ -31,6 +31,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel<T> {
     int seq_len = input_x_dims[1];
     int dim_embed = input_x_dims[2];
     int bsz_seq = bsz * seq_len;
+    const std::string act_method = ctx.Attr<std::string>("act_method");
 
     // 1. layer norm
     const auto pre_layer_norm = ctx.Attr<bool>("pre_layer_norm");
@@ -61,7 +62,6 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel<T> {
 
     bool compute_bias = qkv_biases.size() > 0 && time_step == nullptr;
     // (transA, transB, compute_bias) = (false, trans_qkvw, false)
-
     // Since we fused QKVBias into QKVBiasAddTransposeSplit kernel, here we set
     // compute_bias as false.
     auto qkv_compute = AttnMatMul<T>(dev_ctx,
@@ -191,24 +191,23 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel<T> {
     auto *dropout_mask_out_data = dev_ctx.Alloc<uint8_t>(
         &dropout_mask_out, dropout_mask_out.numel() * sizeof(uint8_t));
 
-    // 6. ffn1 matmul + bias_add + gelu.
+    // 6. ffn1 matmul + act + bias
     auto ffn1_weights = ctx.MultiInput<phi::DenseTensor>("FFN1Weight");
     auto ffn1_biases = ctx.MultiInput<phi::DenseTensor>("FFN1Bias");
     auto ffn1_weight_dim = ffn1_weights[0]->dims();
 
     int dim_ffn = ffn1_weight_dim[1];
 
+    auto ffn1_cublas_linear = CublasFusedMLP<T>(dev_ctx);
+    const phi::DDim ffn1_input_shape({bsz_seq, dim_embed});
+    ffn1_cublas_linear.Setup(ffn1_input_shape, ffn1_weight_dim, false, false);
+
     Tensor ffn1_out;
     ffn1_out.Resize({{bsz_seq, dim_ffn}});
     auto *ffn1_out_data =
         dev_ctx.Alloc<T>(&ffn1_out, ffn1_out.numel() * sizeof(T));
 
-    auto ffn1_linear_bias_gelu = CublasFusedMLP<T>(dev_ctx);
-    const phi::DDim ffn1_input_shape({bsz_seq, dim_ffn});
-    ffn1_linear_bias_gelu.Setup(
-        ffn1_input_shape, ffn1_weight_dim, false, false);
-
-    // 8. ffn2 matmul + bias_add + residual.
+    // 7. ffn2 matmul + bias + residual.
     auto ffn2_weights = ctx.MultiInput<phi::DenseTensor>("FFN2Weight");
     auto ffn2_biases = ctx.MultiInput<phi::DenseTensor>("FFN2Bias");
 
@@ -216,7 +215,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel<T> {
     ffn2_linear_bias_residual.Setup(
         ffn1_out.dims(), ffn2_weights[0]->dims(), false, false);
 
-    // 9. ffn2 residual bias
+    // 8. ffn2 Layernorm
     DropoutParam ffn2_dropout_param(true, 0, true, true, 0.0, nullptr, 0);
     FusedDropoutLayerNormHelper<T, uint8_t> ffn2_fused_dropout_helper(
         dev_ctx, bsz_seq, dim_embed, ffn2_dropout_param, epsilon);
@@ -333,7 +332,6 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel<T> {
                                                     &attn_dropout_out,
                                                     &qktv_out,
                                                     &fmha_out);
-
         const T *k_ptr = nullptr;
         const T *v_ptr = nullptr;
 
@@ -450,20 +448,23 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel<T> {
             ln_mean_data,
             ln_var_data);
       }
-
 #ifdef _DEBUG_FUSED_MULTI_TRANSFORMER
       VLOG(0) << "step5";
 #endif
-      // step6. ffn1 matmul + bias_add + gelu.
 
-      ffn1_linear_bias_gelu.ComputeForward(
-          buf1, ffn1_weights[i], ffn1_biases[i], nullptr, &ffn1_out, "gelu");
+      // step6. ffn matmul1
+      ffn1_cublas_linear.ComputeForward(buf1,
+                                        ffn1_weights[i],
+                                        ffn1_biases[i],
+                                        nullptr,
+                                        &ffn1_out,
+                                        act_method);
 
 #ifdef _DEBUG_FUSED_MULTI_TRANSFORMER
       VLOG(0) << "step6";
 #endif
 
-      // step7. ffn2 matmul + bias_add + residual.
+      // step7. ffn2 matmul
       if (pre_layer_norm) {
         ffn2_linear_bias_residual.ComputeForward(&ffn1_out,
                                                  ffn2_weights[i],
@@ -477,18 +478,21 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel<T> {
             &ffn1_out, ffn2_weights[i], ffn2_biases[i], buf1, buf0, "none");
       }
 
+#ifdef _DEBUG_FUSED_MULTI_TRANSFORMER
+      VLOG(0) << "step7";
+#endif
+
       if (pre_layer_norm) {
         AllReduce<T>(*buf1, ring_id, buf1->numel(), dev_ctx);
       } else {
         AllReduce<T>(*buf0, ring_id, buf0->numel(), dev_ctx);
       }
-
 #ifdef _DEBUG_FUSED_MULTI_TRANSFORMER
-      VLOG(0) << "step7";
+      VLOG(0) << "step7.1";
 #endif
 
-      // step8. layer norm or do nothing(because bias_add + residual has been
-      // fused into cublasFusedMLP. )
+      // step8. layer norm or do nothing
+      // because bias_add + residual has been fused into cublasFusedMLP
       if (pre_layer_norm) {
         if (i < layers - 1) {
           auto *ln_scale_data = ln_scales[i + 1]->data<U>();
@@ -512,6 +516,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel<T> {
                                             ln_mean_data,
                                             ln_var_data);
       }
+
 #ifdef _DEBUG_FUSED_MULTI_TRANSFORMER
       VLOG(0) << "step8";
 #endif
@@ -540,6 +545,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel<T> {
     int seq_len = input_x_dims[1];
     int dim_embed = input_x_dims[2];
     int bsz_seq = bsz * seq_len;
+    const std::string act_method = ctx.Attr<std::string>("act_method");
 
     // 1. layer norm
     const auto pre_layer_norm = ctx.Attr<bool>("pre_layer_norm");
@@ -570,8 +576,8 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel<T> {
 
     bool compute_bias = qkv_biases.size() > 0 && time_step == nullptr;
     // (transA, transB, compute_bias) = (false, trans_qkvw, false)
-    // Since we fused QKVBias into QKVBiasAddTransposeSplit kernel, here we set
-    // compute_bias as false.
+    // Since we fused QKVBias into QKVBiasAddTransposeSplit kernel, here we
+    // set compute_bias as false.
     auto qkv_compute = AttnMatMul<T>(dev_ctx,
                                      false,
                                      trans_qkvw,
@@ -979,7 +985,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel<T> {
       fused_act_dropout_helper.DropoutActBias(dev_ctx,
                                               ffn1_out_data,
                                               ffn1_biases[i]->data<T>(),
-                                              "gelu",
+                                              act_method,
                                               ffn1_dropout_out_data,
                                               ffn1_dropout_mask_data);
 #ifdef _DEBUG_FUSED_MULTI_TRANSFORMER
diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h
index 3c3a59b219615c..69ac06206c62b4 100644
--- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h
+++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h
@@ -1414,14 +1414,15 @@ class CublasFusedMLP {
  public:
   // (m, n, k) = bsz_seq, hidden_feature, in_feature
   explicit CublasFusedMLP(const phi::GPUContext &dev_ctx) : dev_ctx_(dev_ctx) {
-    // Set Math Type
     cudaDataType_t mat_type = CUDA_R_32F;
     cudaDataType_t scale_type = CUDA_R_32F;
     cublasComputeType_t compute_type = CUBLAS_COMPUTE_32F;
-
     if (std::is_same<T, paddle::platform::float16>::value) {
       mat_type = CUDA_R_16F;
       if (FLAGS_gemm_use_half_precision_compute_type) {
+        // This option default value is true, it tends to result NaN, but get
+        // better inference speed. you can turn off by using `export
+        // FLAGS_gemm_use_half_precision_compute_type=0`.
         compute_type = CUBLAS_COMPUTE_16F;
         scale_type = CUDA_R_16F;
       }
@@ -1435,7 +1436,6 @@ class CublasFusedMLP {
       compute_type = CUBLAS_COMPUTE_64F;
     }
 
-    // Just for init.
     PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cublasLtMatmulDescCreate(
         &operation_desc_, compute_type, scale_type));
     PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cublasLtMatrixLayoutCreate(
@@ -1445,7 +1445,6 @@ class CublasFusedMLP {
     PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cublasLtMatrixLayoutCreate(
         &out_desc_, mat_type, 1, 1, 1));
   }
-
   ~CublasFusedMLP() {
     PADDLE_ENFORCE_GPU_SUCCESS(
         platform::dynload::cublasLtMatmulDescDestroy(operation_desc_));
@@ -1457,7 +1456,6 @@ class CublasFusedMLP {
         platform::dynload::cublasLtMatrixLayoutDestroy(out_desc_));
   }
 
-  // Change to use tensor's shape.
   void Setup(const phi::DDim &x_shape,
              const phi::DDim &w_shape,
              bool trans_x,
@@ -1481,39 +1479,34 @@ class CublasFusedMLP {
             &cublas_transB,
             sizeof(cublas_transB)));
 
-    /*
-    cublas use col major: x(M, K) matmul w(K, N) = out(M, N) equals to w_t(N, K)
-    * x_t(K, M) = out(N, M)
-    */
-    SetCublasMatrixLayout_(x_desc_, cublas_transA, K, M);
-    SetCublasMatrixLayout_(w_desc_, cublas_transB, N, K);
-    SetCublasMatrixLayout_(out_desc_, CUBLAS_OP_N, N, M);
+    SetCublasMatrixLayout(x_desc_, trans_x, M, K);
+    SetCublasMatrixLayout(w_desc_, trans_w, K, N);
+    SetCublasMatrixLayout(out_desc_, false, M, N);
   }
 
-  void ComputeForward(const phi::DenseTensor *input,
+  void ComputeForward(const phi::DenseTensor *x,
                       const phi::DenseTensor *weight,
                       const phi::DenseTensor *bias,
                       phi::DenseTensor *residual,
                       phi::DenseTensor *output,
                       const std::string &activation) {
-    // here: (transa, transb): nt, input * weight.
-    // (M * K) * (K * N)
-    cublasLtHandle_t lt_handle = dev_ctx_.cublaslt_handle();
-    size_t workspace_size = static_cast<size_t>(16) * 1024 * 1024;
-    cudaStream_t stream = dev_ctx_.stream();
-    memory::allocation::AllocationPtr workspace =
-        memory::Alloc(dev_ctx_.GetPlace(),
-                      workspace_size,
-                      phi::Stream(reinterpret_cast<phi::StreamId>(stream)));
+    T *out_data = output->data<T>();
 
     const bool add_residual = (residual == nullptr) ? false : true;
     const bool add_bias = (bias == nullptr) ? false : true;
+
+    const T *bias_data = nullptr;
     if (add_bias) {
-      SetCublasBiasPtr_(bias);
+      bias_data = bias->data<T>();
     }
+    PADDLE_ENFORCE_GPU_SUCCESS(
+        platform::dynload::cublasLtMatmulDescSetAttribute(
+            operation_desc_,
+            CUBLASLT_MATMUL_DESC_BIAS_POINTER,
+            &bias_data,
+            sizeof(bias_data)));
 
-    // Set cublasLt epilogue.
-    cublasLtEpilogue_t epiloque_func = GetEpilogueType_(activation, add_bias);
+    cublasLtEpilogue_t epiloque_func = GetEpilogueType(activation, add_bias);
     PADDLE_ENFORCE_GPU_SUCCESS(
         platform::dynload::cublasLtMatmulDescSetAttribute(
             operation_desc_,
@@ -1521,25 +1514,44 @@ class CublasFusedMLP {
             &epiloque_func,
             sizeof(epiloque_func)));
 
-    const auto *x_data = input->data<T>();
-    const auto *w_data = weight->data<T>();
-    auto *residual_data =
-        add_residual ? residual->data<T>() : output->data<T>();
-    auto *out_data = output->data<T>();
+    T *residual_data = add_residual ? residual->data<T>() : out_data;
+
+    cublasLtHandle_t lt_handle = dev_ctx_.cublaslt_handle();
+    size_t workspace_size = static_cast<size_t>(4) * 1024 * 1024;
+    cudaStream_t stream = dev_ctx_.stream();
+    memory::allocation::AllocationPtr workspace = memory::Alloc(
+        dev_ctx_.GetPlace(),
+        workspace_size,
+        phi::Stream(reinterpret_cast<phi::StreamId>(dev_ctx_.stream())));
 
-    // if add_residual, we compute result + 1.0 * residual, else result + 0.0 *
-    // out.
+    // if add_residual, we compute result + 1.0 * residual,
+    // else result + 0.0 * out.
     double alpha64 = 1.0, beta64 = add_residual ? 1.0 : 0.0;
     float alpha32 = 1.0f, beta32 = add_residual ? 1.0f : 0.0f;
+    half alpha16 = static_cast<half>(1.0),
+         beta16 =
+             add_residual ? static_cast<half>(1.0) : static_cast<half>(0.0);
+
     void *alpha = nullptr, *beta = nullptr;
     if (std::is_same<T, double>::value) {
       alpha = &alpha64;
       beta = &beta64;
+    } else if (std::is_same<T, float>::value) {
+      alpha = &alpha64;
+      beta = &beta64;
+    } else if (std::is_same<T, phi::dtype::float16>::value) {
+      alpha = &alpha16;
+      beta = &beta16;
     } else {
-      alpha = &alpha32;
-      beta = &beta32;
+      PADDLE_ENFORCE_EQ(true,
+                        false,
+                        platform::errors::InvalidArgument(
+                            "Only support double, float, half data type. "));
     }
 
+    const auto *x_data = x->data<T>();
+    const auto *w_data = weight->data<T>();
+
     auto algo = GemmEpilogueAlgoCache::Instance().GetGemmAlgo(lt_handle,
                                                               operation_desc_,
                                                               w_desc_,
@@ -1567,15 +1579,15 @@ class CublasFusedMLP {
                                           out_desc_,
                                           out_data,
                                           out_desc_,
-                                          algo /*algo*/,
-                                          workspace->ptr() /*workspace*/,
+                                          algo,
+                                          workspace->ptr(),
                                           workspace_size,
                                           stream));
   }
 
  private:
-  static cublasLtEpilogue_t GetEpilogueType_(const std::string &activation,
-                                             const bool add_bias) {
+  cublasLtEpilogue_t GetEpilogueType(const std::string &activation,
+                                     const bool add_bias) {
     if (activation == "relu") {
       if (add_bias) {
         return CUBLASLT_EPILOGUE_RELU_BIAS;
@@ -1606,23 +1618,41 @@ class CublasFusedMLP {
     }
   }
 
-  void SetCublasMatrixLayout_(cublasLtMatrixLayout_t layout_desc,
-                              cublasOperation_t cublas_trans,
-                              const size_t cublas_m,
-                              const size_t cublas_n) {
+  void SetCublasMatrixLayout(cublasLtMatrixLayout_t layout_desc,
+                             const bool transpose,
+                             const uint64_t cublas_row,
+                             const uint64_t cublas_col) {
+    cudaDataType_t mat_type = CUDA_R_32F;
+    if (std::is_same<T, paddle::platform::float16>::value) {
+      mat_type = CUDA_R_16F;
+    }
+    if (std::is_same<T, platform::bfloat16>::value) {
+      mat_type = CUDA_R_16BF;
+    }
+    if (std::is_same<T, double>::value) {
+      mat_type = CUDA_R_64F;
+    }
+
+    PADDLE_ENFORCE_GPU_SUCCESS(
+        platform::dynload::cublasLtMatrixLayoutSetAttribute(
+            layout_desc,
+            CUBLASLT_MATRIX_LAYOUT_TYPE,
+            &mat_type,
+            sizeof(mat_type)));
+
     PADDLE_ENFORCE_GPU_SUCCESS(
         platform::dynload::cublasLtMatrixLayoutSetAttribute(
             layout_desc,
             CUBLASLT_MATRIX_LAYOUT_ROWS,
-            cublas_trans == CUBLAS_OP_N ? &cublas_m : &cublas_n,
-            sizeof(cublas_m)));
+            transpose ? &cublas_row : &cublas_col,
+            sizeof(cublas_row)));
     PADDLE_ENFORCE_GPU_SUCCESS(
         platform::dynload::cublasLtMatrixLayoutSetAttribute(
             layout_desc,
             CUBLASLT_MATRIX_LAYOUT_COLS,
-            cublas_trans == CUBLAS_OP_N ? &cublas_n : &cublas_m,
-            sizeof(cublas_m)));
-    const size_t cublas_ld = cublas_trans == CUBLAS_OP_N ? cublas_m : cublas_n;
+            transpose ? &cublas_col : &cublas_row,
+            sizeof(cublas_col)));
+    int64_t cublas_ld = transpose ? cublas_row : cublas_col;
     PADDLE_ENFORCE_GPU_SUCCESS(
         platform::dynload::cublasLtMatrixLayoutSetAttribute(
             layout_desc,
@@ -1631,21 +1661,11 @@ class CublasFusedMLP {
             sizeof(cublas_ld)));
   }
 
-  void SetCublasBiasPtr_(const phi::DenseTensor *bias) {
-    const T *bias_data = bias->data<T>();
-    PADDLE_ENFORCE_GPU_SUCCESS(
-        platform::dynload::cublasLtMatmulDescSetAttribute(
-            operation_desc_,
-            CUBLASLT_MATMUL_DESC_BIAS_POINTER,
-            &bias_data,
-            sizeof(bias_data)));
-  }
-
   const phi::GPUContext &dev_ctx_;
-  cublasLtMatmulDesc_t operation_desc_;
-  cublasLtMatrixLayout_t x_desc_;
-  cublasLtMatrixLayout_t w_desc_;
-  cublasLtMatrixLayout_t out_desc_;
+  cublasLtMatmulDesc_t operation_desc_ = NULL;
+  cublasLtMatrixLayout_t x_desc_ = NULL;
+  cublasLtMatrixLayout_t w_desc_ = NULL;
+  cublasLtMatrixLayout_t out_desc_ = NULL;
 };
 
 #endif  // PADDLE_FLUID_OPERATORS_FUSED_FUSED_MULTI_TRANSFORMER_OP_CU_H_
diff --git a/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py b/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py
index 199c1e48bb324e..8aadeba437f5b8 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py
@@ -124,6 +124,7 @@ def config(self):
         self.training = False
 
         self.layers = 4
+
         self.batch_size = 8
         self.query_length = 128
         self.cache_length = 128
@@ -144,21 +145,27 @@ def config(self):
         )
 
     def generate_input_data(self):
-        self.query = np.random.rand(
-            self.batch_size, self.query_length, self.embed_dim
+        self.query = np.random.uniform(
+            -1, 1, (self.batch_size, self.query_length, self.embed_dim)
         ).astype(self.x_type)
+
         out_seq_len = self.key_length
         if self.has_cache_kv:
             assert self.training is False, ValueError(
                 'cache_kv can only used in inference'
             )
-            self.cache_kv = np.random.rand(
-                2,
-                self.batch_size,
-                self.num_heads,
-                self.cache_length,
-                self.head_dim,
+            self.cache_kv = np.random.uniform(
+                -1,
+                1,
+                (
+                    2,
+                    self.batch_size,
+                    self.num_heads,
+                    self.cache_length,
+                    self.head_dim,
+                ),
             ).astype(self.x_type)
+
             if self.gen_cache_kv:
                 self.cache_kv[:] = 0
             else:
@@ -168,12 +175,16 @@ def generate_input_data(self):
 
         if self.has_pre_cache:
             out_seq_len += self.pre_cache_num
-            self.pre_cache_kv = np.random.rand(
-                2,
-                self.batch_size,
-                self.num_heads,
-                self.pre_cache_num,
-                self.head_dim,
+            self.pre_cache_kv = np.random.uniform(
+                -1,
+                1,
+                (
+                    2,
+                    self.batch_size,
+                    self.num_heads,
+                    self.pre_cache_num,
+                    self.head_dim,
+                ),
             ).astype(self.x_type)
 
         if self.has_attn_mask:
@@ -204,8 +215,8 @@ def generate_input_data(self):
             self.attn_mask = None
         self.key, self.value = self.query, self.query
 
-        self.dout = np.random.random(
-            (self.batch_size, self.query_length, self.embed_dim)
+        self.dout = np.random.uniform(
+            -1, 1, (self.batch_size, self.query_length, self.embed_dim)
         ).astype(self.x_type)
 
     def GetBaselineOut(self):
@@ -544,6 +555,7 @@ def GetFusedMultiTransformerOut(self):
             time_step=time_step,
             attn_mask=attn_mask,
             dropout_rate=self.dropout_prob,
+            activation=self.act_method,
             training=self.training,
         )
 
@@ -668,6 +680,7 @@ def GetFusedMultiTransformerOutStatic(self):
             self.num_heads,
             4 * self.embed_dim,
             self.dropout_prob,
+            activation=self.act_method,
             normalize_before=self.pre_layer_norm,
             ln_scale_attrs=ln_scales_attr,
             ln_bias_attrs=ln_biases_attr,
@@ -797,6 +810,14 @@ def config(self):
         self.layers = 3  # odd layers
 
 
+class TestFusedMultiTransformerOpActReluFp16(TestFusedMultiTransformerOp):
+    def config(self):
+        super().config()
+        self.x_type = np.float16
+        self.act_method = "relu"
+        self.layers = 3  # odd layers
+
+
 class TestFusedMultiTransformerOpCacheKV(TestFusedMultiTransformerOp):
     def config(self):
         super().config()

From 0b2a66bbf6610308c74d8d0dd78ce702b2febfbf Mon Sep 17 00:00:00 2001
From: Aurelius84 <zhangliujie@baidu.com>
Date: Wed, 30 Nov 2022 14:01:24 +0800
Subject: [PATCH 059/154] [Perf]Fix interploate OutSize data transform problem
 (#48498)

* [Perf]Fix interploate OutSize data transform problem

* fix code style

* fix grad

* fix phi kernel
---
 paddle/fluid/operators/interpolate_v2_op.cc  | 7 +++++--
 paddle/phi/kernels/gpu/interpolate_kernel.cu | 5 +++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/paddle/fluid/operators/interpolate_v2_op.cc b/paddle/fluid/operators/interpolate_v2_op.cc
index 10a072b5623f9d..95404bbd4a8a7a 100644
--- a/paddle/fluid/operators/interpolate_v2_op.cc
+++ b/paddle/fluid/operators/interpolate_v2_op.cc
@@ -466,7 +466,9 @@ class InterpolateV2Op : public framework::OperatorWithKernel {
       }
     }
 #endif
-    if (var_name == "SizeTensor" || var_name == "Scale") {
+
+    if (var_name == "OutSize" || var_name == "SizeTensor" ||
+        var_name == "Scale") {
       return expected_kernel_type;
     }
     return framework::OpKernelType(
@@ -701,7 +703,8 @@ class InterpolateV2OpGrad : public framework::OperatorWithKernel {
       const std::string& var_name,
       const phi::DenseTensor& tensor,
       const framework::OpKernelType& expected_kernel_type) const override {
-    if (var_name == "SizeTensor" || var_name == "Scale") {
+    if (var_name == "OutSize" || var_name == "SizeTensor" ||
+        var_name == "Scale") {
       return expected_kernel_type;
     }
     return framework::OpKernelType(
diff --git a/paddle/phi/kernels/gpu/interpolate_kernel.cu b/paddle/phi/kernels/gpu/interpolate_kernel.cu
index 07e113ef7aa800..8ca24b3e4f05de 100644
--- a/paddle/phi/kernels/gpu/interpolate_kernel.cu
+++ b/paddle/phi/kernels/gpu/interpolate_kernel.cu
@@ -1458,6 +1458,7 @@ PD_REGISTER_KERNEL(bilinear_interp,
                    double,
                    phi::dtype::float16,
                    int) {
+  kernel->InputAt(1).SetBackend(phi::Backend::ALL_BACKEND);
   kernel->InputAt(2).SetBackend(phi::Backend::ALL_BACKEND);
   kernel->InputAt(3).SetBackend(phi::Backend::ALL_BACKEND);
 }
@@ -1471,6 +1472,7 @@ PD_REGISTER_KERNEL(nearest_interp,
                    phi::dtype::bfloat16,
                    int,
                    int64_t) {
+  kernel->InputAt(1).SetBackend(phi::Backend::ALL_BACKEND);
   kernel->InputAt(2).SetBackend(phi::Backend::ALL_BACKEND);
   kernel->InputAt(3).SetBackend(phi::Backend::ALL_BACKEND);
 }
@@ -1482,6 +1484,7 @@ PD_REGISTER_KERNEL(trilinear_interp,
                    double,
                    phi::dtype::float16,
                    int) {
+  kernel->InputAt(1).SetBackend(phi::Backend::ALL_BACKEND);
   kernel->InputAt(2).SetBackend(phi::Backend::ALL_BACKEND);
   kernel->InputAt(3).SetBackend(phi::Backend::ALL_BACKEND);
 }
@@ -1493,6 +1496,7 @@ PD_REGISTER_KERNEL(linear_interp,
                    double,
                    phi::dtype::float16,
                    int) {
+  kernel->InputAt(1).SetBackend(phi::Backend::ALL_BACKEND);
   kernel->InputAt(2).SetBackend(phi::Backend::ALL_BACKEND);
   kernel->InputAt(3).SetBackend(phi::Backend::ALL_BACKEND);
 }
@@ -1504,6 +1508,7 @@ PD_REGISTER_KERNEL(bicubic_interp,
                    double,
                    phi::dtype::float16,
                    int) {
+  kernel->InputAt(1).SetBackend(phi::Backend::ALL_BACKEND);
   kernel->InputAt(2).SetBackend(phi::Backend::ALL_BACKEND);
   kernel->InputAt(3).SetBackend(phi::Backend::ALL_BACKEND);
 }

From 2de881aaadea5b38f5187fab3ca660f7885c2898 Mon Sep 17 00:00:00 2001
From: wanghuancoder <wanghuan29@baidu.com>
Date: Wed, 30 Nov 2022 14:05:16 +0800
Subject: [PATCH 060/154] refine mmap allocator (#48511)

---
 paddle/fluid/memory/allocation/mmap_allocator.cc | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/paddle/fluid/memory/allocation/mmap_allocator.cc b/paddle/fluid/memory/allocation/mmap_allocator.cc
index 3c828f13bebb51..557a9cf333a3b9 100644
--- a/paddle/fluid/memory/allocation/mmap_allocator.cc
+++ b/paddle/fluid/memory/allocation/mmap_allocator.cc
@@ -74,6 +74,7 @@ void AllocateMemoryMap(
               "File descriptor %s open failed, unable in read-write mode",
               filename.c_str()));
       VLOG(6) << "shm_open: " << filename;
+      MemoryMapFdSet::Instance().Insert(filename);
     }
   } else {
     fd = -1;
@@ -171,11 +172,7 @@ void RefcountedMemoryMapAllocation::close() {
   void *data = map_ptr_;
   CountInfo *info = reinterpret_cast<CountInfo *>(data);
   if (--info->refcount == 0) {
-    PADDLE_ENFORCE_NE(
-        shm_unlink(ipc_name_.c_str()),
-        -1,
-        platform::errors::Unavailable(
-            "could not unlink the shared memory file ", ipc_name_));
+    shm_unlink(ipc_name_.c_str());
     VLOG(6) << "shm_unlink file: " << ipc_name_;
   }
 

From 7c903ae73f5afea31a6c9049947888c4802879b6 Mon Sep 17 00:00:00 2001
From: yunyaoXYY <109218879+yunyaoXYY@users.noreply.github.com>
Date: Wed, 30 Nov 2022 14:08:03 +0800
Subject: [PATCH 061/154] [Clean fluid] Clean ones, reverse, save,
 save_combine, load_combine, has_inf, zeros_like and ones_like (#48424)

* Clean fluid ones

* clean ones_like

* clean zeros_like

* clean save,save_combine,load_combine

* clean reverse

* clean has_inf

* clean reverse tests
---
 .../paddle/fluid/contrib/layers/rnn_impl.py   |  12 +-
 python/paddle/fluid/layers/distributions.py   |   8 +-
 python/paddle/fluid/layers/rnn.py             |  18 +-
 python/paddle/fluid/layers/tensor.py          | 313 ------------------
 .../unittests/dygraph_to_static/test_list.py  |   2 +-
 .../tests/unittests/npu/test_while_op_npu.py  |   4 +-
 .../fluid/tests/unittests/test_desc_clone.py  |   4 +-
 ..._executor_return_tensor_not_overwriting.py |  10 +-
 .../unittests/test_fill_zeros_like2_op.py     |  33 --
 .../unittests/test_imperative_double_grad.py  |   2 +-
 .../fluid/tests/unittests/test_isfinite_op.py |  20 --
 .../tests/unittests/test_math_op_patch.py     |   2 +-
 .../unittests/test_math_op_patch_var_base.py  |   2 +-
 .../fluid/tests/unittests/test_ones_like.py   |  22 --
 .../fluid/tests/unittests/test_ones_op.py     |   8 +-
 .../fluid/tests/unittests/test_reverse_op.py  | 245 +-------------
 .../tests/unittests/test_zeros_like_op.py     |   2 +-
 17 files changed, 38 insertions(+), 669 deletions(-)

diff --git a/python/paddle/fluid/contrib/layers/rnn_impl.py b/python/paddle/fluid/contrib/layers/rnn_impl.py
index 5f9a4d2827fb22..b28cac87950c9f 100644
--- a/python/paddle/fluid/contrib/layers/rnn_impl.py
+++ b/python/paddle/fluid/contrib/layers/rnn_impl.py
@@ -407,15 +407,15 @@ def get_single_direction_output(
     )
 
     if bidirectional:
-        bw_input = layers.reverse(input, axis=[0])
+        bw_input = paddle.reverse(input, axis=[0])
         bw_mask = None
         if mask:
-            bw_mask = layers.reverse(mask, axis=[0])
+            bw_mask = paddle.reverse(mask, axis=[0])
         bw_rnn_out, bw_last_hidden = get_single_direction_output(
             bw_input, bw_unit_list, bw_mask, direc_index=1
         )
 
-        bw_rnn_out = layers.reverse(bw_rnn_out, axis=[0])
+        bw_rnn_out = paddle.reverse(bw_rnn_out, axis=[0])
 
         rnn_out = layers.concat([fw_rnn_out, bw_rnn_out], axis=2)
         last_hidden = layers.concat([fw_last_hidden, bw_last_hidden], axis=1)
@@ -718,15 +718,15 @@ def get_single_direction_output(
     )
 
     if bidirectional:
-        bw_input = layers.reverse(input, axis=[0])
+        bw_input = paddle.reverse(input, axis=[0])
         bw_mask = None
         if mask:
-            bw_mask = layers.reverse(mask, axis=[0])
+            bw_mask = paddle.reverse(mask, axis=[0])
         bw_rnn_out, bw_last_hidden, bw_last_cell = get_single_direction_output(
             bw_input, bw_unit_list, bw_mask, direc_index=1
         )
 
-        bw_rnn_out = layers.reverse(bw_rnn_out, axis=[0])
+        bw_rnn_out = paddle.reverse(bw_rnn_out, axis=[0])
 
         rnn_out = layers.concat([fw_rnn_out, bw_rnn_out], axis=2)
         last_hidden = layers.concat([fw_last_hidden, bw_last_hidden], axis=1)
diff --git a/python/paddle/fluid/layers/distributions.py b/python/paddle/fluid/layers/distributions.py
index 6ea68228a16dd5..d3ca0de64b5c3e 100644
--- a/python/paddle/fluid/layers/distributions.py
+++ b/python/paddle/fluid/layers/distributions.py
@@ -659,9 +659,9 @@ def __init__(self, loc, scale):
     def _det(self, value):
 
         batch_shape = list(value.shape)
-        one_all = tensor.ones(shape=batch_shape, dtype=self.loc.dtype)
+        one_all = paddle.ones(shape=batch_shape, dtype=self.loc.dtype)
         one_diag = tensor.diag(
-            tensor.ones(shape=[batch_shape[0]], dtype=self.loc.dtype)
+            paddle.ones(shape=[batch_shape[0]], dtype=self.loc.dtype)
         )
         det_diag = paddle.prod(value + one_all - one_diag)
 
@@ -670,9 +670,9 @@ def _det(self, value):
     def _inv(self, value):
 
         batch_shape = list(value.shape)
-        one_all = tensor.ones(shape=batch_shape, dtype=self.loc.dtype)
+        one_all = paddle.ones(shape=batch_shape, dtype=self.loc.dtype)
         one_diag = tensor.diag(
-            tensor.ones(shape=[batch_shape[0]], dtype=self.loc.dtype)
+            paddle.ones(shape=[batch_shape[0]], dtype=self.loc.dtype)
         )
         inv_diag = paddle.pow(value, (one_all - 2 * one_diag))
 
diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py
index c49d6d4d5283ee..e65e4b63300ea7 100644
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -595,9 +595,9 @@ def _rnn_dynamic_graph(
         mask = paddle.transpose(mask, [1, 0])
 
     if is_reverse:
-        inputs = map_structure(lambda x: tensor.reverse(x, axis=[0]), inputs)
+        inputs = map_structure(lambda x: paddle.reverse(x, axis=[0]), inputs)
         mask = (
-            tensor.reverse(mask, axis=[0])
+            paddle.reverse(mask, axis=[0])
             if sequence_length is not None
             else None
         )
@@ -626,7 +626,7 @@ def _rnn_dynamic_graph(
 
     if is_reverse:
         final_outputs = map_structure(
-            lambda x: tensor.reverse(x, axis=time_step_index), final_outputs
+            lambda x: paddle.reverse(x, axis=time_step_index), final_outputs
         )
 
     final_states = new_states
@@ -681,8 +681,8 @@ def _switch_grad(x, stop=False):
         )
         mask = paddle.transpose(mask, [1, 0])
     if is_reverse:
-        inputs = map_structure(lambda x: tensor.reverse(x, axis=[0]), inputs)
-        mask = tensor.reverse(mask, axis=[0]) if sequence_length else None
+        inputs = map_structure(lambda x: paddle.reverse(x, axis=[0]), inputs)
+        mask = paddle.reverse(mask, axis=[0]) if sequence_length else None
 
     # StaticRNN
     rnn = control_flow.StaticRNN()
@@ -711,7 +711,7 @@ def _switch_grad(x, stop=False):
 
     if is_reverse:
         final_outputs = map_structure(
-            lambda x: tensor.reverse(x, axis=[0]), final_outputs
+            lambda x: paddle.reverse(x, axis=[0]), final_outputs
         )
 
     if not time_major:
@@ -1251,7 +1251,7 @@ def initialize(self, initial_cell_states):
             value=False,
             force_cpu=True,
         )
-        init_lengths = tensor.zeros_like(init_inputs)
+        init_lengths = paddle.zeros_like(init_inputs)
         init_inputs = (
             self.embedding_fn(init_inputs) if self.embedding_fn else init_inputs
         )
@@ -1482,7 +1482,7 @@ def _maybe_copy(state, new_state, step_mask):
         initial_finished,
     )
     cond = paddle.logical_not((nn.reduce_all(initial_finished)))
-    sequence_lengths = tensor.cast(tensor.zeros_like(initial_finished), "int64")
+    sequence_lengths = tensor.cast(paddle.zeros_like(initial_finished), "int64")
     outputs = None
 
     step_idx = 0
@@ -1596,7 +1596,7 @@ def _dynamic_decode_declarative(
         )
     while_op = control_flow.While(cond, is_test=is_test)
 
-    sequence_lengths = tensor.cast(tensor.zeros_like(initial_finished), "int64")
+    sequence_lengths = tensor.cast(paddle.zeros_like(initial_finished), "int64")
     sequence_lengths.stop_gradient = True
 
     if is_test:
diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py
index c5a28a913a285d..1066efabf12efc 100644
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -60,13 +60,8 @@
     'argmin',
     'argmax',
     'argsort',
-    'ones',
     'zeros',
-    'reverse',
-    'has_inf',
     'linspace',
-    'zeros_like',
-    'ones_like',
     'diag',
 ]
 
@@ -1324,35 +1319,6 @@ def argsort(input, axis=-1, descending=False, name=None):
     return out, ids
 
 
-def ones(shape, dtype, force_cpu=False):
-    """
-    The OP creates a tensor of specified :attr:`shape` and :attr:`dtype`, and fills it with 1.
-    Its :attr:`stop_gradient` will be set to True to stop gradient computation.
-
-    Parameters:
-        shape(tuple|list|Tensor): Shape of output Tensor, the data type of shape is int32 or int64.
-        dtype (np.dtype|str): Data type of output Tensor, it supports
-            bool, float16, float32, float64, int32 and int64.
-        force_cpu (bool, optional): Whether force to store the output Tensor in CPU memory.
-            If :attr:`force_cpu` is False, the output Tensor will be stored in running device memory.
-            Default: False.
-
-    Returns:
-        Tensor: A tensor of data type :attr:`dtype` with shape :attr:`shape` and all elements set to 1.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          data0 = fluid.layers.ones(shape=[2, 4], dtype='float32') # [[1., 1., 1., 1.], [1., 1., 1., 1.]]
-
-          # shape is a Tensor
-          shape = fluid.layers.fill_constant(shape=[2], dtype='int32', value=2)
-          data1 = fluid.layers.ones(shape=shape, dtype='int32') #[[1, 1], [1, 1]]
-    """
-    return fill_constant(value=1.0, **locals())
-
-
 def zeros(shape, dtype, force_cpu=False, name=None):
     """
     The OP creates a tensor of specified :attr:`shape` and :attr:`dtype`, and fills it with 0.
@@ -1384,190 +1350,6 @@ def zeros(shape, dtype, force_cpu=False, name=None):
     return fill_constant(value=0.0, **locals())
 
 
-def reverse(x, axis):
-    """
-        :alias_main: paddle.reverse
-        :alias: paddle.reverse,paddle.tensor.reverse,paddle.tensor.manipulation.reverse
-        :old_api: paddle.fluid.layers.reverse
-
-    The OP reverses the tensor :attr:`x` along the given :attr:`axis`.
-
-    .. code-block:: text
-
-        Case 1:
-
-            Given a LoDTensor:
-                x = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
-                axis = [0, 1]
-
-            Then:
-                output = [[8, 7, 6], [5, 4, 3], [2, 1, 0]]
-
-        Case 2:
-
-            Given a LoDTensorArray:
-                x = {[[0, 1], [2, 3]],
-                     [[4, 5, 6]],
-                     [[7],[8], [9]]}
-                axis = 0
-
-            Then:
-                output = {[[7],[8], [9]],
-                          [[4, 5, 6]],
-                          [[0, 1], [2, 3]]}
-
-    Parameters:
-        x (Variable): A tensor or LoDTensorArray to be reversed, its data type supports bool, float32, float64, int32, int64 and uint8.
-                      If input is a LoDTensorArray, returns a new reversed LoDTensorArray without changing the internal order of each inner tensor.
-        axis (int|tuple|list): A dimension or a set of dimensions of :attr:`x` to reverse. Must be
-            in the range [-rank( :attr:`x` ), rank( :attr:`x` )). If it is a tuple or a list, reversing
-            will be apply on each axis in the tuple or list. If input is a LoDTensorArray, the value of axis shall be 0, or a
-            list [0] or tuple (0, ) with shape [1].
-
-    Returns:
-        Variable: The reversed tensor with the same shape and data type as :attr:`x`.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          import numpy as np
-          data = fluid.layers.assign(np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]], dtype='float32')) # [[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]]
-          result1 = fluid.layers.reverse(data, 0) # [[6., 7., 8.], [3., 4., 5.], [0., 1., 2.]]
-          result2 = fluid.layers.reverse(data, [0, 1]) # [[8., 7., 6.], [5., 4., 3.], [2., 1., 0.]]
-
-          # example of LoDTensorArray
-          data1 = fluid.layers.assign(np.array([[0, 1, 2]], dtype='float32'))
-          data2 = fluid.layers.assign(np.array([[3, 4, 5]], dtype='float32'))
-          tensor_array = fluid.layers.create_array(dtype='float32')
-          i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0)
-          fluid.layers.array_write(data1, i, tensor_array)
-          fluid.layers.array_write(data2, i+1, tensor_array)
-
-          reversed_tensor_array = fluid.layers.reverse(tensor_array, 0) # {[[3, 4, 5]], [[0, 1, 2]]}
-    """
-    check_variable_and_dtype(
-        x, 'x', ('float32', 'float64', 'int32', 'int64', 'uint8'), 'reverse'
-    )
-    check_type(axis, 'axis', (int, tuple, list, Variable), 'reverse')
-    if isinstance(axis, int):
-        axis = [axis]
-    if in_dygraph_mode():
-        return _C_ops.reverse(x, axis)
-    helper = LayerHelper("reverse", **locals())
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-    helper.append_op(
-        type='reverse',
-        inputs={'X': x},
-        outputs={'Out': [out]},
-        attrs={'axis': axis},
-    )
-    return out
-
-
-def save(x, file_path, overwrite=True):
-    """
-    Saves a variable as a file.
-
-    Args:
-        x(variable): The Tensor/LoDTensor to be saved.
-        file_path(str): The file path where the variable will be saved.
-        overwrite(bool): Whether or not cover the given file when it has already
-            existed. If it's set 'False' and the file is existed, a runtime
-            error will be thrown.
-    """
-    helper = LayerHelper("save", **locals())
-    helper.append_op(
-        type="save",
-        inputs={"input": x},
-        outputs={},
-        args={"file_path": file_path, "overwrite": overwrite},
-    )
-
-
-def save_combine(x, file_path, overwrite=True):
-    """
-    Saves a list of variables into a single file.
-
-    Args:
-        x(list): A list of Tensor/LoDTensor variables to be saved together in
-                 a single file.
-        file_path(str): The file path where variables will be saved.
-        overwrite(bool): Whether or not cover the given file when it has already
-            existed. If it's set 'False' and the file is existed, a runtime
-            error will be thrown.
-
-    Returns:
-        There is no return value.
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            v1 = fluid.layers.data(name="data",
-                                   shape=(4, 6),
-                                   dtype="float32")
-            v2 = fluid.layers.data(name="data",
-                                   shape=(6, 8, 4),
-                                   dtype="float32")
-            normed = fluid.layers.save_combine([v1, v2], file_path="output")
-    """
-    helper = LayerHelper("save_combine", **locals())
-    helper.append_op(
-        type="save_combine",
-        inputs={"input": x},
-        outputs={},
-        args={"file_path": file_path, "overwrite": overwrite},
-    )
-
-
-def load_combine(out, file_path):
-    """
-    Loads a list of variable from a single file.
-
-    Args:
-        out(list): The list of variables to be read from the disk file.
-        file_path(str): The path of the disk file.
-    """
-    helper = LayerHelper("load_combine", **locals())
-    helper.append_op(
-        type="load_combine",
-        inputs={},
-        output={"Out": out},
-        args={"file_path": file_path},
-    )
-
-
-def has_inf(x):
-    """
-    Test if any of x contains an infinity number
-
-    Args:
-       x (Tensor): The Tensor to be checked.
-
-    Returns:
-       Tensor: The tensor storing the output, only a bool value, indicating that whether there is infinity number in x or not.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle
-          data = paddle.randn(shape=[4, 32, 32], dtype="float32")
-          res = paddle.fluid.layers.has_inf(data)
-          # [False]
-
-    """
-    if _non_static_mode():
-        return _legacy_C_ops.isinf(x)
-
-    check_type(x, 'x', (Variable), 'has_inf')
-    helper = LayerHelper("isinf", **locals())
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-    helper.append_op(type="isinf", inputs={"X": x}, outputs={"Out": out})
-    return out
-
-
 def linspace(start, stop, num, dtype=None, name=None):
     r"""
     This OP return fixed number of evenly spaced values within a given interval.
@@ -1683,55 +1465,6 @@ def linspace(start, stop, num, dtype=None, name=None):
     return out
 
 
-def zeros_like(x, out=None):
-    """
-    This OP creates a zeros tensor which has identical shape and dtype
-    with `x`.
-
-    Args:
-        x(Variable): The input tensor which specifies shape and dtype, the
-            input data dtype could be bool, float32, float64, int32, int64.
-        out(Variable, optional): If is :attr:`None` , the op will create the
-            variable as output, the data type and shape of this variable will
-            be same as input :attr:`x`. If is a tensor, the data type and shape
-            need to be same as input :attr:`x`. The default value is :attr:`None` .
-
-    Returns:
-        Variable: The N-D tensor, the element in tensor is related to input
-            data type, if the input data type is bool, the output value is
-            False, otherwise is zero. The output shape is the same as the input.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          x = fluid.data(name='x', dtype='float32', shape=[3])
-          data = fluid.layers.zeros_like(x) # [0.0, 0.0, 0.0]
-
-    """
-    check_variable_and_dtype(
-        x, "x", ['bool', 'float32', 'float64', 'int32', 'int64'], 'zeros_like'
-    )
-    helper = LayerHelper("zeros_like", **locals())
-    if out is None:
-        out = helper.create_variable_for_type_inference(dtype=x.dtype)
-    else:
-        check_variable_and_dtype(
-            out,
-            "out",
-            ['bool', 'float32', 'float64', 'int32', 'int64'],
-            'zeros_like',
-        )
-    helper.append_op(
-        type='fill_any_like',
-        inputs={'X': [x]},
-        attrs={'value': 0, "dtype": x.dtype},
-        outputs={'Out': [out]},
-    )
-    out.stop_gradient = True
-    return out
-
-
 @deprecated(since="2.0.0", update_to="paddle.diag")
 def diag(diagonal):
     r"""
@@ -1783,49 +1516,3 @@ def diag(diagonal):
 
     out.stop_gradient = True
     return out
-
-
-def ones_like(x, out=None):
-    """
-    **ones_like**
-
-    This function creates a ones tensor which has identical shape and dtype
-    with `x`.
-
-    Args:
-        x(Variable): The input tensor which specifies shape and dtype.
-        out(Variable): The output tensor.
-
-    Returns:
-        out(Variable): The tensor variable storing the output.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-
-          x = fluid.layers.data(name='x', dtype='float32', shape=[3], append_batch_size=False)
-          data = fluid.layers.ones_like(x) # [1.0, 1.0, 1.0]
-
-    """
-    check_variable_and_dtype(
-        x, "x", ['bool', 'float32', 'float64', 'int32', 'int64'], 'ones_like'
-    )
-
-    helper = LayerHelper("ones_like", **locals())
-    if out is None:
-        out = helper.create_variable_for_type_inference(dtype=x.dtype)
-    else:
-        check_variable_and_dtype(
-            out,
-            "out",
-            ['bool', 'float32', 'float64', 'int32', 'int64'],
-            'ones_like',
-        )
-    helper.append_op(
-        type='fill_any_like',
-        inputs={'X': [x]},
-        attrs={'value': 1.0},
-        outputs={'Out': [out]},
-    )
-    return out
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_list.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_list.py
index 54762c793ff435..279d8c9f30e839 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_list.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_list.py
@@ -183,7 +183,7 @@ def test_list_pop_in_for_loop(x, iter_num):
         a.append(x + i)
         b.append(x * 2)
 
-    one = fluid.layers.ones(shape=[1], dtype="int32")
+    one = paddle.ones(shape=[1], dtype="int32")
     for i in range(one.numpy()[0]):
         item = a.pop()
     return a[0], item, b[1]
diff --git a/python/paddle/fluid/tests/unittests/npu/test_while_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_while_op_npu.py
index 6d2ef04e78301f..1ff374fa9f9a7e 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_while_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_while_op_npu.py
@@ -53,7 +53,7 @@ def simple_net(self):
         array_len = layers.fill_constant(shape=[1], dtype='int32', value=5)
         array_len = layers.cast(array_len, 'int64')
         array_len.stop_gradient = True
-        cond = layers.ones(shape=[1], dtype='int32')
+        cond = paddle.ones(shape=[1], dtype='int32')
         cond = layers.cast(cond, 'bool')
         j = layers.fill_constant(shape=[1], dtype='int32', value=1)
         j = layers.cast(j, 'int64')
@@ -62,7 +62,7 @@ def simple_net(self):
         array_len2 = layers.cast(array_len2, 'int64')
         array_len2.stop_gradient = True
         cond2 = paddle.logical_or(x=j, y=array_len2)
-        cond2 = layers.ones(shape=[1], dtype='int32')
+        cond2 = paddle.ones(shape=[1], dtype='int32')
         cond2 = layers.cast(cond2, 'bool')
         while_op = layers.While(cond=cond)
         while_op2 = layers.While(cond=cond2)
diff --git a/python/paddle/fluid/tests/unittests/test_desc_clone.py b/python/paddle/fluid/tests/unittests/test_desc_clone.py
index 5028a3e251e546..e2a90cb544b646 100644
--- a/python/paddle/fluid/tests/unittests/test_desc_clone.py
+++ b/python/paddle/fluid/tests/unittests/test_desc_clone.py
@@ -209,7 +209,7 @@ def test_clone_with_stop_gradient(self):
         startup_program = fluid.Program()
         with fluid.program_guard(train_program, startup_program):
             img = fluid.layers.data(name='image', shape=[784])
-            true = fluid.layers.ones(shape=[1], dtype="float32")
+            true = paddle.ones(shape=[1], dtype="float32")
             hidden1 = fluid.layers.fc(input=img, size=200, act='relu')
             hidden1.stop_gradient = True
 
@@ -250,7 +250,7 @@ def test_clone_with_stop_gradient(self):
         startup_program = fluid.Program()
         with fluid.program_guard(train_program, startup_program):
             img = fluid.layers.data(name='image', shape=[784])
-            true = fluid.layers.ones(shape=[1], dtype="float32")
+            true = paddle.ones(shape=[1], dtype="float32")
             hidden1 = fluid.layers.fc(input=img, size=200, act='relu')
             hidden1.stop_gradient = True
 
diff --git a/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py b/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py
index c9ddac80f0eb6a..9671448e3ff912 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import unittest
-
+import paddle
 import numpy as np
 from op_test import OpTest, skip_check_grad_ci
 
@@ -72,8 +72,8 @@ def setUp(self):
         pass
 
     def calc_add_out(self, place=None, parallel=None):
-        x = fluid.layers.ones(shape=[3, 3], dtype='float32')
-        y = fluid.layers.ones(shape=[3, 3], dtype='float32')
+        x = paddle.ones(shape=[3, 3], dtype='float32')
+        y = paddle.ones(shape=[3, 3], dtype='float32')
         out = fluid.layers.elementwise_add(x=x, y=y)
         program = fluid.default_main_program()
         if parallel:
@@ -85,8 +85,8 @@ def calc_add_out(self, place=None, parallel=None):
         return out
 
     def calc_sub_out(self, place=None, parallel=None):
-        x = fluid.layers.ones(shape=[2, 2], dtype='float32')
-        y = fluid.layers.ones(shape=[2, 2], dtype='float32')
+        x = paddle.ones(shape=[2, 2], dtype='float32')
+        y = paddle.ones(shape=[2, 2], dtype='float32')
         out = fluid.layers.elementwise_sub(x=x, y=y)
         program = fluid.default_main_program()
         if parallel:
diff --git a/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py b/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py
index 2cb86460190936..5cf61d53b30eef 100644
--- a/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py
@@ -16,8 +16,6 @@
 
 import numpy as np
 from op_test import OpTest
-
-import paddle.fluid as fluid
 from paddle.fluid.framework import convert_np_dtype_to_dtype_
 
 
@@ -47,36 +45,5 @@ def init_dtype(self):
         self.dtype = np.float64
 
 
-class TestZerosError(unittest.TestCase):
-    def test_errors(self):
-        def test_zeros_like_type_error():
-            with fluid.program_guard(fluid.Program(), fluid.Program()):
-                fluid.layers.zeros_like([10], dtype="float")
-
-        self.assertRaises(TypeError, test_zeros_like_type_error)
-
-        def test_zeros_like_dtype_error():
-            with fluid.program_guard(fluid.Program(), fluid.Program()):
-                data = fluid.data(name="data", shape=[10], dtype="float16")
-                fluid.layers.zeros_like(data, dtype="float32")
-
-        self.assertRaises(TypeError, test_zeros_like_dtype_error)
-
-        def test_zeros_like_out_type_error():
-            with fluid.program_guard(fluid.Program(), fluid.Program()):
-                data = fluid.data(name="data", shape=[10], dtype="float32")
-                fluid.layers.zeros_like(data, dtype="float32", out=[10])
-
-        self.assertRaises(TypeError, test_zeros_like_out_type_error)
-
-        def test_zeros_like_out_dtype_error():
-            with fluid.program_guard(fluid.Program(), fluid.Program()):
-                data = fluid.data(name="data", shape=[10], dtype="float32")
-                out = fluid.data(name="out", shape=[10], dtype="float16")
-                fluid.layers.zeros_like(data, dtype="float32", out=out)
-
-        self.assertRaises(TypeError, test_zeros_like_out_dtype_error)
-
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
index 2c020c0465bb71..7f7330eca39b59 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
@@ -636,7 +636,7 @@ def test_compare(self):
 class TestRaiseNoDoubleGradOp(TestCase):
     def raise_no_grad_op(self):
         with fluid.dygraph.guard():
-            x = fluid.layers.ones(shape=[2, 3, 2, 2], dtype='float32')
+            x = paddle.ones(shape=[2, 3, 2, 2], dtype='float32')
             x.stop_gradient = False
             y = paddle.static.nn.group_norm(x, groups=1)
 
diff --git a/python/paddle/fluid/tests/unittests/test_isfinite_op.py b/python/paddle/fluid/tests/unittests/test_isfinite_op.py
index 24f6ab3dea2e2a..1ec8063bf57282 100644
--- a/python/paddle/fluid/tests/unittests/test_isfinite_op.py
+++ b/python/paddle/fluid/tests/unittests/test_isfinite_op.py
@@ -16,9 +16,6 @@
 
 import numpy as np
 from op_test import OpTest
-
-import paddle
-import paddle.fluid as fluid
 import paddle.fluid.core as core
 
 
@@ -107,22 +104,5 @@ def init_dtype(self):
         self.dtype = np.float16
 
 
-class BadInputTest(unittest.TestCase):
-    def test_error(self):
-        with fluid.program_guard(fluid.Program()):
-
-            def test_has_inf_bad_x():
-                data = [1, 2, 3]
-                result = fluid.layers.has_inf(data)
-
-            self.assertRaises(TypeError, test_has_inf_bad_x)
-
-        with fluid.dygraph.guard():
-            data = paddle.zeros([2, 3])
-            result = paddle.fluid.layers.has_inf(data)
-            expect_value = np.array([False])
-            self.assertEqual((result.numpy() == expect_value).all(), True)
-
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_math_op_patch.py b/python/paddle/fluid/tests/unittests/test_math_op_patch.py
index 4ad708df5da7f7..a582b4951b1177 100644
--- a/python/paddle/fluid/tests/unittests/test_math_op_patch.py
+++ b/python/paddle/fluid/tests/unittests/test_math_op_patch.py
@@ -234,7 +234,7 @@ def test_equal_and_cond(self):
         a = fluid.layers.data(name="a", shape=[1], dtype='float32')
         b = fluid.layers.data(name="b", shape=[1], dtype='float32')
 
-        one = fluid.layers.ones(shape=[1], dtype='int32')
+        one = paddle.ones(shape=[1], dtype='int32')
         zero = fluid.layers.zeros(shape=[1], dtype='int32')
         cond = one == zero
         c = fluid.layers.cond(cond, lambda: a + b, lambda: a - b)
diff --git a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py
index 972ce32ca6fcf2..ea54a4f3076e62 100644
--- a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py
+++ b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py
@@ -384,7 +384,7 @@ def test_index(self):
     def func_test_np_left_mul(self):
         with fluid.dygraph.guard():
             t = np.sqrt(2.0 * np.pi)
-            x = fluid.layers.ones((2, 2), dtype="float32")
+            x = paddle.ones((2, 2), dtype="float32")
             y = t * x
 
             np.testing.assert_allclose(
diff --git a/python/paddle/fluid/tests/unittests/test_ones_like.py b/python/paddle/fluid/tests/unittests/test_ones_like.py
index 0ee8ab313a53b9..e7f06d526c0964 100644
--- a/python/paddle/fluid/tests/unittests/test_ones_like.py
+++ b/python/paddle/fluid/tests/unittests/test_ones_like.py
@@ -64,28 +64,6 @@ def test_api(self):
             self.assertEqual((outs[i] == np.ones(shape, dtype)).all(), True)
 
 
-class TestOnesLikeImpeartive(unittest.TestCase):
-    def test_out(self):
-        shape = [3, 4]
-        place = (
-            fluid.CUDAPlace(0)
-            if core.is_compiled_with_cuda()
-            else fluid.CPUPlace()
-        )
-        paddle.disable_static(place)
-        x = paddle.to_tensor(np.ones(shape))
-        for dtype in [np.bool_, np.float32, np.float64, np.int32, np.int64]:
-            out = ones_like(x, dtype)
-            self.assertEqual((out.numpy() == np.ones(shape, dtype)).all(), True)
-
-        out = paddle.tensor.ones_like(x)
-        self.assertEqual((out.numpy() == np.ones(shape, dtype)).all(), True)
-
-        out = paddle.tensor.creation.ones_like(x)
-        self.assertEqual((out.numpy() == np.ones(shape, dtype)).all(), True)
-        paddle.enable_static()
-
-
 class TestOnesAPI(unittest.TestCase):
     def test_api(self):
         shape = [3, 4]
diff --git a/python/paddle/fluid/tests/unittests/test_ones_op.py b/python/paddle/fluid/tests/unittests/test_ones_op.py
index 7c93de4b1a446b..6b2bbd3786522f 100644
--- a/python/paddle/fluid/tests/unittests/test_ones_op.py
+++ b/python/paddle/fluid/tests/unittests/test_ones_op.py
@@ -17,7 +17,7 @@
 import numpy as np
 
 import paddle
-import paddle.fluid as fluid
+import numpy as np
 
 
 class ApiOnesTest(unittest.TestCase):
@@ -48,7 +48,7 @@ def test_paddle_ones(self):
 
     def test_fluid_ones(self):
         with paddle.static.program_guard(paddle.static.Program()):
-            ones = fluid.layers.ones(shape=[10], dtype="int64")
+            ones = paddle.ones(shape=[10], dtype="int64")
             place = paddle.CPUPlace()
             exe = paddle.static.Executor(place)
             (result,) = exe.run(fetch_list=[ones])
@@ -72,13 +72,13 @@ def test_error2():
 
         def test_error3():
             with paddle.static.program_guard(paddle.static.Program()):
-                ones = fluid.layers.ones(shape=10, dtype="int64")
+                ones = paddle.ones(shape=10, dtype="int64")
 
         self.assertRaises(TypeError, test_error3)
 
         def test_error4():
             with paddle.static.program_guard(paddle.static.Program()):
-                ones = fluid.layers.ones(shape=[10], dtype="int8")
+                ones = paddle.ones(shape=[10], dtype="int8")
 
         self.assertRaises(TypeError, test_error4)
 
diff --git a/python/paddle/fluid/tests/unittests/test_reverse_op.py b/python/paddle/fluid/tests/unittests/test_reverse_op.py
index 995eca8473d157..a1d82c077e0962 100644
--- a/python/paddle/fluid/tests/unittests/test_reverse_op.py
+++ b/python/paddle/fluid/tests/unittests/test_reverse_op.py
@@ -12,20 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import unittest
 
-import gradient_checker
 import numpy as np
-from decorator_helper import prog_scope
 from op_test import OpTest
-from test_attribute_var import UnittestBase
 
 import paddle
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-from paddle.fluid import core
-from paddle.fluid.framework import Program, program_guard
 
 
 class TestReverseOp(OpTest):
@@ -36,7 +28,7 @@ def initTestCase(self):
     def setUp(self):
         self.initTestCase()
         self.op_type = "reverse"
-        self.python_api = fluid.layers.reverse
+        self.python_api = paddle.reverse
         self.inputs = {"X": self.x}
         self.attrs = {'axis': self.axis}
         out = self.x
@@ -99,241 +91,6 @@ def initTestCase(self):
         self.axis = [-1, -2]
 
 
-class TestCase4(unittest.TestCase):
-    def test_error(self):
-        place = fluid.CPUPlace()
-        exe = fluid.Executor(place)
-
-        train_program = fluid.Program()
-        startup_program = fluid.Program()
-        with fluid.program_guard(train_program, startup_program):
-            label = fluid.layers.data(
-                name="label", shape=[1, 1, 1, 1, 1, 1, 1, 1], dtype="int64"
-            )
-            rev = fluid.layers.reverse(label, axis=[-1, -2])
-
-        def _run_program():
-            x = np.random.random(size=(10, 1, 1, 1, 1, 1, 1)).astype('int64')
-            exe.run(train_program, feed={"label": x})
-
-        self.assertRaises(IndexError, _run_program)
-
-
-class TestReverseLoDTensorArray(unittest.TestCase):
-    def setUp(self):
-        self.shapes = [[5, 25], [5, 20], [5, 5]]
-        self.place = (
-            fluid.CUDAPlace(0)
-            if fluid.is_compiled_with_cuda()
-            else fluid.CPUPlace()
-        )
-        self.exe = fluid.Executor(self.place)
-
-    def run_program(self, arr_len, axis=0):
-        main_program = fluid.Program()
-
-        with fluid.program_guard(main_program):
-            inputs, inputs_data = [], []
-            for i in range(arr_len):
-                x = fluid.data("x%s" % i, self.shapes[i], dtype='float32')
-                x.stop_gradient = False
-                inputs.append(x)
-                inputs_data.append(
-                    np.random.random(self.shapes[i]).astype('float32')
-                )
-
-            tensor_array = fluid.layers.create_array(dtype='float32')
-            for i in range(arr_len):
-                idx = fluid.layers.array_length(tensor_array)
-                fluid.layers.array_write(inputs[i], idx, tensor_array)
-
-            reverse_array = fluid.layers.reverse(tensor_array, axis=axis)
-            output, _ = fluid.layers.tensor_array_to_tensor(reverse_array)
-            loss = paddle.sum(output)
-            fluid.backward.append_backward(loss)
-            input_grads = list(
-                map(
-                    main_program.global_block().var,
-                    [x.name + "@GRAD" for x in inputs],
-                )
-            )
-
-            feed_dict = dict(zip([x.name for x in inputs], inputs_data))
-            res = self.exe.run(
-                main_program,
-                feed=feed_dict,
-                fetch_list=input_grads + [output.name],
-            )
-
-            return np.hstack(inputs_data[::-1]), res
-
-    def test_case1(self):
-        gt, res = self.run_program(arr_len=3)
-        self.check_output(gt, res)
-        # test with tuple type of axis
-        gt, res = self.run_program(arr_len=3, axis=(0,))
-        self.check_output(gt, res)
-
-    def test_case2(self):
-        gt, res = self.run_program(arr_len=1)
-        self.check_output(gt, res)
-        # test with list type of axis
-        gt, res = self.run_program(arr_len=1, axis=[0])
-        self.check_output(gt, res)
-
-    def check_output(self, gt, res):
-        arr_len = len(res) - 1
-        reversed_array = res[-1]
-        # check output
-        np.testing.assert_array_equal(gt, reversed_array)
-        # check grad
-        for i in range(arr_len):
-            np.testing.assert_array_equal(res[i], np.ones_like(res[i]))
-
-    def test_raise_error(self):
-        # The len(axis) should be 1 is input(X) is LoDTensorArray
-        with self.assertRaises(Exception):
-            self.run_program(arr_len=3, axis=[0, 1])
-        # The value of axis should be 0 is input(X) is LoDTensorArray
-        with self.assertRaises(Exception):
-            self.run_program(arr_len=3, axis=1)
-
-
-class TestReverseAxisTensor(UnittestBase):
-    def init_info(self):
-        self.shapes = [[2, 3, 4]]
-        self.save_path = os.path.join(self.temp_dir.name, self.path_prefix())
-
-    def test_static(self):
-        main_prog = Program()
-        starup_prog = Program()
-        with program_guard(main_prog, starup_prog):
-            fc = paddle.nn.Linear(4, 10)
-            x = paddle.randn([2, 3, 4])
-            x.stop_gradient = False
-            feat = fc(x)  # [2,3,10]
-
-            out = self.call_func(feat)
-
-            sgd = paddle.optimizer.SGD()
-            sgd.minimize(paddle.mean(out))
-            self.assertTrue(self.var_prefix() in str(main_prog))
-
-            exe = paddle.static.Executor()
-            exe.run(starup_prog)
-            res = exe.run(fetch_list=[feat, out])
-            gt = res[0][::-1, :, ::-1]
-            np.testing.assert_allclose(res[1], gt)
-
-            paddle.static.save_inference_model(
-                self.save_path, [x], [feat, out], exe
-            )
-            # Test for Inference Predictor
-            infer_outs = self.infer_prog()
-            gt = infer_outs[0][::-1, :, ::-1]
-            np.testing.assert_allclose(infer_outs[1], gt)
-
-    def path_prefix(self):
-        return 'reverse_tensor'
-
-    def var_prefix(self):
-        return "Var["
-
-    def call_func(self, x):
-        # axes is a Variable
-        axes = paddle.assign([0, 2])
-        out = paddle.fluid.layers.reverse(x, axes)
-        return out
-
-
-class TestReverseAxisListTensor(TestReverseAxisTensor):
-    def path_prefix(self):
-        return 'reverse_tensors'
-
-    def var_prefix(self):
-        return "Vars["
-
-    def call_func(self, x):
-        # axes is a List[Variable]
-        axes = [paddle.assign([0]), paddle.assign([2])]
-        out = paddle.fluid.layers.reverse(x, axes)
-
-        # check attrs
-        axis_attrs = (
-            paddle.static.default_main_program()
-            .block(0)
-            .ops[-1]
-            .all_attrs()["axis"]
-        )
-        self.assertTrue(axis_attrs[0].name, axes[0].name)
-        self.assertTrue(axis_attrs[1].name, axes[1].name)
-        return out
-
-
-class TestReverseDoubleGradCheck(unittest.TestCase):
-    def reverse_wrapper(self, x):
-        return fluid.layers.reverse(x[0], [0, 1])
-
-    @prog_scope()
-    def func(self, place):
-        # the shape of input variable should be clearly specified, not inlcude -1.
-        eps = 0.005
-        dtype = np.float64
-
-        data = layers.data('data', [3, 4], False, dtype)
-        data.persistable = True
-        out = fluid.layers.reverse(data, [0, 1])
-        data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
-
-        gradient_checker.double_grad_check(
-            [data], out, x_init=[data_arr], place=place, eps=eps
-        )
-        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
-        gradient_checker.double_grad_check_for_dygraph(
-            self.reverse_wrapper, [data], out, x_init=[data_arr], place=place
-        )
-
-    def test_grad(self):
-        paddle.enable_static()
-        places = [fluid.CPUPlace()]
-        if core.is_compiled_with_cuda():
-            places.append(fluid.CUDAPlace(0))
-        for p in places:
-            self.func(p)
-
-
-class TestReverseTripleGradCheck(unittest.TestCase):
-    def reverse_wrapper(self, x):
-        return fluid.layers.reverse(x[0], [0, 1])
-
-    @prog_scope()
-    def func(self, place):
-        # the shape of input variable should be clearly specified, not inlcude -1.
-        eps = 0.005
-        dtype = np.float32
-
-        data = layers.data('data', [2, 3], False, dtype)
-        data.persistable = True
-        out = fluid.layers.reverse(data, [0, 1])
-        data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
-
-        gradient_checker.triple_grad_check(
-            [data], out, x_init=[data_arr], place=place, eps=eps
-        )
-        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
-        gradient_checker.triple_grad_check_for_dygraph(
-            self.reverse_wrapper, [data], out, x_init=[data_arr], place=place
-        )
-
-    def test_grad(self):
-        paddle.enable_static()
-        places = [fluid.CPUPlace()]
-        if core.is_compiled_with_cuda():
-            places.append(fluid.CUDAPlace(0))
-        for p in places:
-            self.func(p)
-
-
 if __name__ == '__main__':
     paddle.enable_static()
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_zeros_like_op.py b/python/paddle/fluid/tests/unittests/test_zeros_like_op.py
index 204eea2a48a6a9..3fd7a992dc914a 100644
--- a/python/paddle/fluid/tests/unittests/test_zeros_like_op.py
+++ b/python/paddle/fluid/tests/unittests/test_zeros_like_op.py
@@ -83,7 +83,7 @@ def test_out(self):
             self.assertEqual(
                 (out.numpy() == np.zeros(shape, dtype)).all(), True
             )
-        out = paddle.tensor.zeros_like(x)
+        out = paddle.zeros_like(x)
         self.assertEqual((out.numpy() == np.zeros(shape, dtype)).all(), True)
         out = paddle.tensor.creation.zeros_like(x)
         self.assertEqual((out.numpy() == np.zeros(shape, dtype)).all(), True)

From 91af6df84a3d8726c154c48c66268455f5d52346 Mon Sep 17 00:00:00 2001
From: Vvsmile <17864154871@163.com>
Date: Wed, 30 Nov 2022 15:18:33 +0800
Subject: [PATCH 062/154] [Clean Fluid API]Remove API: log (#47966)

* replace log with paddle.log

* replace log with paddle.nn.functional.log

* fix the code style of remove_log

* fix the ImportError of log

* fix the error of modification of the dist_transformer.py

* fix error of Static-Check
---
 python/paddle/distribution/normal.py          |  6 +--
 python/paddle/distribution/uniform.py         | 10 +++--
 python/paddle/fluid/layers/distributions.py   | 23 ++++++----
 python/paddle/fluid/layers/nn.py              | 42 -------------------
 python/paddle/fluid/layers/rnn.py             |  6 +--
 .../fluid/tests/unittests/dist_transformer.py |  2 +-
 .../seq2seq_dygraph_model.py                  |  4 +-
 .../unittests/dygraph_to_static/test_bmn.py   | 14 +++----
 .../test_reinforcement_learning.py            |  2 +-
 .../transformer_dygraph_model.py              |  2 +-
 .../tests/unittests/test_activation_op.py     |  4 +-
 .../tests/unittests/test_beam_search_op.py    |  2 +-
 .../test_imperative_reinforcement.py          |  4 +-
 13 files changed, 40 insertions(+), 81 deletions(-)

diff --git a/python/paddle/distribution/normal.py b/python/paddle/distribution/normal.py
index f28b92ec86baea..b3877af277fbb9 100644
--- a/python/paddle/distribution/normal.py
+++ b/python/paddle/distribution/normal.py
@@ -242,7 +242,7 @@ def entropy(self):
         )
         return paddle.add(
             0.5 + zero_tmp,
-            0.5 * math.log(2 * math.pi) + nn.log((self.scale + zero_tmp)),
+            0.5 * math.log(2 * math.pi) + paddle.log((self.scale + zero_tmp)),
             name=name,
         )
 
@@ -260,7 +260,7 @@ def log_prob(self, value):
         value = self._check_values_dtype_in_probs(self.loc, value)
 
         var = self.scale * self.scale
-        log_scale = nn.log(self.scale)
+        log_scale = paddle.log(self.scale)
         return paddle.subtract(
             -1.0 * ((value - self.loc) * (value - self.loc)) / (2.0 * var),
             log_scale + math.log(math.sqrt(2.0 * math.pi)),
@@ -331,5 +331,5 @@ def kl_divergence(self, other):
         t1 = (self.loc - other.loc) / other.scale
         t1 = t1 * t1
         return paddle.add(
-            0.5 * var_ratio, 0.5 * (t1 - 1.0 - nn.log(var_ratio)), name=name
+            0.5 * var_ratio, 0.5 * (t1 - 1.0 - paddle.log(var_ratio)), name=name
         )
diff --git a/python/paddle/distribution/uniform.py b/python/paddle/distribution/uniform.py
index f242dc3db0da93..b9566d3c8dbc27 100644
--- a/python/paddle/distribution/uniform.py
+++ b/python/paddle/distribution/uniform.py
@@ -27,6 +27,8 @@
     nn,
     tensor,
 )
+
+import paddle
 from paddle.tensor import random
 
 
@@ -216,7 +218,7 @@ def log_prob(self, value):
             if in_dygraph_mode():
                 lb = _C_ops.cast(lb_bool, value.dtype)
                 ub = _C_ops.cast(ub_bool, value.dtype)
-                return nn.log(lb * ub) - nn.log(self.high - self.low)
+                return paddle.log(lb * ub) - paddle.log(self.high - self.low)
 
             if _in_legacy_dygraph():
                 lb = _legacy_C_ops.cast(
@@ -225,7 +227,7 @@ def log_prob(self, value):
                 ub = _legacy_C_ops.cast(
                     ub_bool, 'in_dtype', ub_bool.dtype, 'out_dtype', value.dtype
                 )
-                return nn.log(lb * ub) - nn.log(self.high - self.low)
+                return paddle.log(lb * ub) - paddle.log(self.high - self.low)
 
         name = self.name + '_log_prob'
         lb_bool = self.low < value
@@ -233,7 +235,7 @@ def log_prob(self, value):
         lb = tensor.cast(lb_bool, dtype=value.dtype)
         ub = tensor.cast(ub_bool, dtype=value.dtype)
         return paddle.subtract(
-            nn.log(lb * ub), nn.log(self.high - self.low), name=name
+            paddle.log(lb * ub), paddle.log(self.high - self.low), name=name
         )
 
     def probs(self, value):
@@ -286,4 +288,4 @@ def entropy(self):
 
         """
         name = self.name + '_entropy'
-        return nn.log(self.high - self.low, name=name)
+        return paddle.log(self.high - self.low, name=name)
diff --git a/python/paddle/fluid/layers/distributions.py b/python/paddle/fluid/layers/distributions.py
index d3ca0de64b5c3e..a54403013c69cd 100644
--- a/python/paddle/fluid/layers/distributions.py
+++ b/python/paddle/fluid/layers/distributions.py
@@ -264,7 +264,7 @@ def log_prob(self, value):
         ub_bool = control_flow.less_than(value, self.high)
         lb = tensor.cast(lb_bool, dtype=value.dtype)
         ub = tensor.cast(ub_bool, dtype=value.dtype)
-        return nn.log(lb * ub) - nn.log(self.high - self.low)
+        return paddle.log(lb * ub) - paddle.log(self.high - self.low)
 
     def entropy(self):
         """Shannon entropy in nats.
@@ -273,7 +273,7 @@ def entropy(self):
           Variable: Shannon entropy of uniform distribution.The data type is float32.
 
         """
-        return nn.log(self.high - self.low)
+        return paddle.log(self.high - self.low)
 
 
 class Normal(Distribution):
@@ -412,7 +412,9 @@ def entropy(self):
             self.loc + self.scale, batch_shape, self.loc.dtype, 0.0
         )
         return (
-            0.5 + 0.5 * math.log(2 * math.pi) + nn.log((self.scale + zero_tmp))
+            0.5
+            + 0.5 * math.log(2 * math.pi)
+            + paddle.log((self.scale + zero_tmp))
         )
 
     def log_prob(self, value):
@@ -430,7 +432,7 @@ def log_prob(self, value):
         )
 
         var = self.scale * self.scale
-        log_scale = nn.log(self.scale)
+        log_scale = paddle.log(self.scale)
         return (
             -1.0 * ((value - self.loc) * (value - self.loc)) / (2.0 * var)
             - log_scale
@@ -454,7 +456,7 @@ def kl_divergence(self, other):
         var_ratio = var_ratio * var_ratio
         t1 = (self.loc - other.loc) / other.scale
         t1 = t1 * t1
-        return 0.5 * (var_ratio + t1 - 1.0 - nn.log(var_ratio))
+        return 0.5 * (var_ratio + t1 - 1.0 - paddle.log(var_ratio))
 
 
 class Categorical(Distribution):
@@ -542,7 +544,8 @@ def kl_divergence(self, other):
         other_z = paddle.sum(other_e_logits, axis=-1, keepdim=True)
         prob = e_logits / z
         kl = paddle.sum(
-            prob * (logits - nn.log(z) - other_logits + nn.log(other_z)),
+            prob
+            * (logits - paddle.log(z) - other_logits + paddle.log(other_z)),
             axis=-1,
             keepdim=True,
         )
@@ -562,7 +565,7 @@ def entropy(self):
 
         prob = e_logits / z
         entropy = -1.0 * paddle.sum(
-            prob * (logits - nn.log(z)), axis=-1, keepdim=True
+            prob * (logits - paddle.log(z)), axis=-1, keepdim=True
         )
 
         return entropy
@@ -687,7 +690,7 @@ def entropy(self):
         """
         entropy = 0.5 * (
             self.scale.shape[0] * (1.0 + math.log(2 * math.pi))
-            + nn.log(self._det(self.scale))
+            + paddle.log(self._det(self.scale))
         )
 
         return entropy
@@ -710,7 +713,9 @@ def kl_divergence(self, other):
         )
         tri_matmul = nn.matmul(loc_matmul_cov, (other.loc - self.loc))
         k = list(self.scale.shape)[0]
-        ln_cov = nn.log(self._det(other.scale)) - nn.log(self._det(self.scale))
+        ln_cov = paddle.log(self._det(other.scale)) - paddle.log(
+            self._det(self.scale)
+        )
         kl = 0.5 * (tr_cov_matmul + tri_matmul - k + ln_cov)
 
         return kl
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index b59e0c4c800e8d..911ac5d74a339e 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -97,7 +97,6 @@
     'resize_trilinear',
     'resize_nearest',
     'relu',
-    'log',
     'unique',
     'unique_with_counts',
     'elementwise_add',
@@ -5246,47 +5245,6 @@ def resize_nearest(
     )
 
 
-def log(x, name=None):
-    r"""
-    Calculates the natural log of the given input tensor, element-wise.
-
-    .. math::
-
-        Out = \\ln(x)
-
-    Args:
-        x (Tensor): Input Tensor. Must be one of the following types: float32, float64.
-        name (str|None): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`
-
-
-    Returns:
-        Tensor: The natural log of the input Tensor computed element-wise.
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle
-
-            x = [[2,3,4], [7,8,9]]
-            x = paddle.to_tensor(x, dtype='float32')
-            res = paddle.log(x)
-            # [[0.693147, 1.09861, 1.38629], [1.94591, 2.07944, 2.19722]]
-    """
-    if in_dygraph_mode():
-        return _C_ops.log(x)
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.log(x)
-
-    check_variable_and_dtype(x, 'x', ['float32', 'float64'], "log")
-    inputs = {'X': [x]}
-    helper = LayerHelper('log', **locals())
-    dtype = helper.input_dtype(input_param_name='x')
-    out = helper.create_variable_for_type_inference(dtype)
-    helper.append_op(type="log", inputs={"X": x}, outputs={"Out": out})
-    return out
-
-
 @deprecated(since="2.0.0", update_to="paddle.nn.functional.relu")
 def relu(x, name=None):
     """
diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py
index e65e4b63300ea7..23e61db9f47a6f 100644
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -1304,7 +1304,7 @@ def _beam_search_step(self, time, logits, next_cell_states, beam_state):
                 self.noend_mask_tensor, "float64"
             )
 
-        step_log_probs = nn.log(nn.softmax(logits))
+        step_log_probs = paddle.log(nn.softmax(logits))
         step_log_probs = self._mask_probs(step_log_probs, beam_state.finished)
         log_probs = nn.elementwise_add(
             x=step_log_probs, y=beam_state.log_probs, axis=0
@@ -3529,8 +3529,8 @@ def beam_search(
                 name='probs', shape=[None, 10000], dtype='float32')
             topk_scores, topk_indices = fluid.layers.topk(probs, k=beam_size)
             accu_scores = fluid.layers.elementwise_add(
-                x=fluid.layers.log(x=topk_scores),
-                y=fluid.layers.reshape(pre_scores, shape=[-1]),
+                x=paddle.log(x=topk_scores),
+                y=paddle.reshape(pre_scores, shape=[-1]),
                 axis=0)
             selected_ids, selected_scores = fluid.layers.beam_search(
                 pre_ids=pre_ids,
diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py
index 4d12648354a88a..8a8b013b6b053a 100644
--- a/python/paddle/fluid/tests/unittests/dist_transformer.py
+++ b/python/paddle/fluid/tests/unittests/dist_transformer.py
@@ -1837,7 +1837,7 @@ def beam_search():
                 input=layers.softmax(logits), k=beam_size
             )
             accu_scores = layers.elementwise_add(
-                x=layers.log(topk_scores),
+                x=paddle.log(topk_scores),
                 y=paddle.reshape(pre_scores, shape=[-1]),
                 axis=0,
             )
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
index cb9e92bf629cea..0b5efa636afce8 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
@@ -435,9 +435,7 @@ def beam_search(self, inputs):
             cell_outputs = self._split_batch_beams(step_input)
             cell_outputs = self.fc(cell_outputs)
 
-            step_log_probs = fluid.layers.log(
-                fluid.layers.softmax(cell_outputs)
-            )
+            step_log_probs = paddle.log(fluid.layers.softmax(cell_outputs))
             noend_array = [-self.kinf] * self.tar_vocab_size
             noend_array[self.beam_end_token] = 0
             noend_mask_tensor = to_variable(
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
index 90a7b4d35efd9d..3773187b2596c1 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
@@ -329,13 +329,11 @@ def bi_loss(pred_score, gt_label):
             coef_0 = 0.5 * ratio / (ratio - 1)
             coef_1 = 0.5 * ratio
             epsilon = 0.000001
-            # temp = fluid.layers.log(pred_score + epsilon)
-            loss_pos = paddle.multiply(
-                fluid.layers.log(pred_score + epsilon), pmask
-            )
+            # temp = paddle.log(pred_score + epsilon)
+            loss_pos = paddle.multiply(paddle.log(pred_score + epsilon), pmask)
             loss_pos = coef_1 * fluid.layers.reduce_mean(loss_pos)
             loss_neg = paddle.multiply(
-                fluid.layers.log(1.0 - pred_score + epsilon), (1.0 - pmask)
+                paddle.log(1.0 - pred_score + epsilon), (1.0 - pmask)
             )
             loss_neg = coef_0 * fluid.layers.reduce_mean(loss_neg)
             loss = -1 * (loss_pos + loss_neg)
@@ -400,12 +398,10 @@ def pem_cls_loss_func(pred_score, gt_iou_map, mask):
         coef_0 = 0.5 * ratio / (ratio - 1)
         coef_1 = 0.5 * ratio
         epsilon = 0.000001
-        loss_pos = paddle.multiply(
-            fluid.layers.log(pred_score + epsilon), pmask
-        )
+        loss_pos = paddle.multiply(paddle.log(pred_score + epsilon), pmask)
         loss_pos = coef_1 * paddle.sum(loss_pos)
         loss_neg = paddle.multiply(
-            fluid.layers.log(1.0 - pred_score + epsilon), nmask
+            paddle.log(1.0 - pred_score + epsilon), nmask
         )
         loss_neg = coef_0 * paddle.sum(loss_neg)
         loss = -1 * (loss_pos + loss_neg) / num_entries
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
index 6423d0d6bbcbf1..15e6827766a311 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
@@ -122,7 +122,7 @@ def select_action(state):
             mask = to_variable(_mask)
             mask.stop_gradient = True
 
-            loss_probs = fluid.layers.log(loss_probs)
+            loss_probs = paddle.log(loss_probs)
             loss_probs = paddle.multiply(loss_probs, mask)
             loss_probs = paddle.sum(loss_probs, axis=-1)
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
index 50d00a653170c1..ae7da008dfc74b 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
@@ -845,7 +845,7 @@ def gather(input, indices, batch_pos):
             )
             caches = map_structure(split_batch_beams, caches)
             step_log_probs = split_batch_beams(
-                fluid.layers.log(fluid.layers.softmax(logits))
+                paddle.log(fluid.layers.softmax(logits))
             )
 
             step_log_probs = mask_probs(
diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py
index 9f5bbee0fc88b0..2479312a51ef54 100755
--- a/python/paddle/fluid/tests/unittests/test_activation_op.py
+++ b/python/paddle/fluid/tests/unittests/test_activation_op.py
@@ -2417,8 +2417,8 @@ def test_error(self):
             name="in2", shape=[11, 17], append_batch_size=False, dtype="int64"
         )
 
-        self.assertRaises(TypeError, fluid.layers.log, in1)
-        self.assertRaises(TypeError, fluid.layers.log, in2)
+        self.assertRaises(TypeError, paddle.log, in1)
+        self.assertRaises(TypeError, paddle.log, in2)
 
 
 class TestLog_ZeroDim(TestLog):
diff --git a/python/paddle/fluid/tests/unittests/test_beam_search_op.py b/python/paddle/fluid/tests/unittests/test_beam_search_op.py
index 0d44764e2da1bf..bc737a5ed55f4f 100644
--- a/python/paddle/fluid/tests/unittests/test_beam_search_op.py
+++ b/python/paddle/fluid/tests/unittests/test_beam_search_op.py
@@ -314,7 +314,7 @@ def test_errors(self):
             probs = fluid.data(name='probs', shape=[10000], dtype='float32')
             topk_scores, topk_indices = fluid.layers.topk(probs, k=4)
             accu_scores = fluid.layers.elementwise_add(
-                x=fluid.layers.log(x=topk_scores),
+                x=paddle.log(x=topk_scores),
                 y=paddle.reshape(pre_scores, shape=[-1]),
                 axis=0,
             )
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
index bea24aa2739327..2a2d2ef9053317 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
@@ -71,7 +71,7 @@ def run_dygraph():
             dy_mask = fluid.dygraph.base.to_variable(mask)
             dy_mask.stop_gradient = True
 
-            loss_probs = fluid.layers.log(loss_probs)
+            loss_probs = paddle.log(loss_probs)
             loss_probs = fluid.layers.elementwise_mul(loss_probs, dy_mask)
             loss_probs = paddle.sum(loss_probs, axis=-1)
 
@@ -139,7 +139,7 @@ def run_dygraph():
 
             st_loss_probs = policy(st_state)
 
-            st_loss_probs = fluid.layers.log(st_loss_probs)
+            st_loss_probs = paddle.log(st_loss_probs)
             st_loss_probs = fluid.layers.elementwise_mul(st_loss_probs, st_mask)
             st_loss_probs = paddle.sum(st_loss_probs, axis=-1)
 

From fd1c0d7fc5a0d65cfe18aaa92cf1e62183f0f050 Mon Sep 17 00:00:00 2001
From: ShenLiang <1422485404@qq.com>
Date: Wed, 30 Nov 2022 15:46:00 +0800
Subject: [PATCH 063/154] [BugFix]Fix tuple output bug of pylayer  (#48533)

* fix bug of pylayer

* fix bug
---
 paddle/fluid/pybind/eager_py_layer.cc | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/paddle/fluid/pybind/eager_py_layer.cc b/paddle/fluid/pybind/eager_py_layer.cc
index 294da6956abc57..f80a39f9f0a9ba 100644
--- a/paddle/fluid/pybind/eager_py_layer.cc
+++ b/paddle/fluid/pybind/eager_py_layer.cc
@@ -445,11 +445,14 @@ PyObject* pylayer_method_apply(PyObject* cls,
   }
 
   if (outputs_size == 1) {
-    Py_XDECREF(outputs);
-    outputs = PyTuple_GetItem(outputs_tuple, 0);
-    Py_INCREF(outputs);
-    Py_XDECREF(outputs_tuple);
+    if (!PyTuple_Check(outputs) && !PyList_Check(outputs)) {
+      Py_XDECREF(outputs);
+      outputs = PyTuple_GetItem(outputs_tuple, 0);
+      Py_INCREF(outputs);
+      Py_XDECREF(outputs_tuple);
+    }
   }
+
   Py_XDECREF(forward_args);
   Py_XDECREF(kwargs_value_list);
   Py_XDECREF(backward_function);

From e9b4c6e70f0b844bd96019b82672e72a3be4b04a Mon Sep 17 00:00:00 2001
From: wasupandceacar <wasupandceacar@gmail.com>
Date: Wed, 30 Nov 2022 15:50:48 +0800
Subject: [PATCH 064/154] fix AddQuantDequantPass indent (#48550)

---
 .../paddle/fluid/contrib/slim/quantization/quantization_pass.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
index 8902b40aa68e52..6d99f0949d4a7c 100644
--- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
@@ -1956,7 +1956,7 @@ def apply(self, graph):
                         graph.update_input_link(
                             in_node, quant_var_node, op_node
                         )
-            t.update()
+                t.update()
 
         # Backward stage, update input link
         for op_node in all_op_nodes:

From 612b81c54b478f2950e2ae2a3163cb3daa17485d Mon Sep 17 00:00:00 2001
From: ccrrong <101700995+ccrrong@users.noreply.github.com>
Date: Wed, 30 Nov 2022 15:51:50 +0800
Subject: [PATCH 065/154] remove cos_sim (#48501)

---
 python/paddle/fluid/layers/nn.py              | 38 -------------------
 .../tests/book/test_recommender_system.py     |  4 +-
 .../tests/unittests/dist_fleet_simnet_bow.py  |  4 +-
 .../dygraph_to_static/simnet_dygraph_model.py |  2 +-
 .../fluid/tests/unittests/test_cos_sim_op.py  | 21 ----------
 .../tests/unittests/test_dist_fleet_ps.py     |  4 +-
 .../tests/unittests/test_dist_fleet_ps11.py   |  4 +-
 .../tests/unittests/test_dist_fleet_ps12.py   |  4 +-
 .../tests/unittests/test_dist_fleet_ps13.py   |  4 +-
 .../tests/unittests/test_dist_fleet_ps2.py    |  4 +-
 .../tests/unittests/test_dist_fleet_ps3.py    |  4 +-
 .../tests/unittests/test_dist_fleet_ps4.py    |  4 +-
 .../tests/unittests/test_dist_fleet_ps5.py    |  4 +-
 .../tests/unittests/test_dist_fleet_ps6.py    |  4 +-
 14 files changed, 24 insertions(+), 81 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 911ac5d74a339e..dbac69df682720 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -67,7 +67,6 @@
     'embedding',
     'linear_chain_crf',
     'crf_decoding',
-    'cos_sim',
     'conv2d',
     'softmax',
     'pool2d',
@@ -1005,43 +1004,6 @@ def crf_decoding(input, param_attr, label=None, length=None):
     return viterbi_path
 
 
-@templatedoc()
-def cos_sim(X, Y):
-    """
-    ${comment}
-
-    Args:
-        X (Tensor): ${x_comment}.
-        Y (Tensor): ${y_comment}.
-
-    Returns:
-        A Tensor representing the output of cosine(X, Y).
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-
-            x = paddle.rand(shape=[3, 7], dtype='float32')
-            y = paddle.rand(shape=[1, 7], dtype='float32')
-            out = paddle.fluid.layers.cos_sim(x, y)
-            print(out)
-
-    """
-    check_variable_and_dtype(X, 'X', ['float32'], 'cos_sim')
-    check_variable_and_dtype(Y, 'Y', ['float32'], 'cos_sim')
-    helper = LayerHelper('cos_sim', **locals())
-    out = helper.create_variable_for_type_inference(dtype=X.dtype)
-    xnorm = helper.create_variable_for_type_inference(dtype=X.dtype)
-    ynorm = helper.create_variable_for_type_inference(dtype=X.dtype)
-    helper.append_op(
-        type='cos_sim',
-        inputs={'X': [X], 'Y': [Y]},
-        outputs={'Out': [out], 'XNorm': [xnorm], 'YNorm': [ynorm]},
-    )
-    return out
-
-
 @deprecated(since="2.0.0", update_to="paddle.nn.functional.dropout")
 def dropout(
     x,
diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py
index c270c87b3c2acc..3aacd377dc0c7e 100644
--- a/python/paddle/fluid/tests/book/test_recommender_system.py
+++ b/python/paddle/fluid/tests/book/test_recommender_system.py
@@ -161,7 +161,9 @@ def model():
     mov_combined_features = get_mov_combined_features()
 
     # need cos sim
-    inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features)
+    inference = paddle.nn.functional.cosine_similarity(
+        x1=usr_combined_features, x2=mov_combined_features
+    )
     scale_infer = paddle.scale(x=inference, scale=5.0)
 
     label = layers.data(name='score', shape=[1], dtype='float32')
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
index eb128f9be75fa5..2c16fff90a59f9 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
@@ -192,8 +192,8 @@ def train_network(
         ),
         bias_attr=fluid.ParamAttr(name="__fc_b__"),
     )
-    cos_q_pt = fluid.layers.cos_sim(q_fc, pt_fc)
-    cos_q_nt = fluid.layers.cos_sim(q_fc, nt_fc)
+    cos_q_pt = paddle.nn.functional.cosine_similarity(q_fc, pt_fc)
+    cos_q_nt = paddle.nn.functional.cosine_similarity(q_fc, nt_fc)
     # loss
     avg_cost = get_loss(cos_q_pt, cos_q_nt)
     # acc
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
index 10e4b9d85ec161..f236faccdc2df8 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
@@ -136,7 +136,7 @@ def ops(self, x, y):
         """
         operation
         """
-        sim = fluid.layers.cos_sim(x, y)
+        sim = paddle.nn.functional.cosine_similarity(x, y)
         return sim
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_cos_sim_op.py b/python/paddle/fluid/tests/unittests/test_cos_sim_op.py
index 15d1d1e75df84c..f9c761c9eedf3f 100644
--- a/python/paddle/fluid/tests/unittests/test_cos_sim_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cos_sim_op.py
@@ -17,9 +17,6 @@
 import numpy as np
 from op_test import OpTest
 
-import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
-
 
 class TestCosSimOp(OpTest):
     def setUp(self):
@@ -121,23 +118,5 @@ def setUp(self):
         }
 
 
-class TestCosSimOpError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            # the input of batch_norm must be Variable.
-            x1 = fluid.create_lod_tensor(
-                np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()
-            )
-            x2 = fluid.create_lod_tensor(
-                np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()
-            )
-            self.assertRaises(TypeError, fluid.layers.cos_sim, x1, x2)
-
-            # the input dtype of batch_norm must be float32
-            x3 = fluid.layers.data(name='x3', shape=[3, 4, 5, 6], dtype="int32")
-            x4 = fluid.layers.data(name='x4', shape=[3, 4, 5, 6], dtype="int64")
-            self.assertRaises(TypeError, fluid.layers.cos_sim, x3, x4)
-
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
index 685f8dd0e4c10c..f97e1d6c3cff2d 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
@@ -162,8 +162,8 @@ def get_loss(cos_q_pt, cos_q_nt):
             ),
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
-        cos_q_pt = fluid.layers.cos_sim(q_fc, pt_fc)
-        cos_q_nt = fluid.layers.cos_sim(q_fc, nt_fc)
+        cos_q_pt = paddle.nn.functional.cosine_similarity(q_fc, pt_fc)
+        cos_q_nt = paddle.nn.functional.cosine_similarity(q_fc, nt_fc)
         # loss
         avg_cost = get_loss(cos_q_pt, cos_q_nt)
         # acc
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
index cea69f92e69213..5e94a1949c551f 100755
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
@@ -150,8 +150,8 @@ def get_loss(cos_q_pt, cos_q_nt):
             ),
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
-        cos_q_pt = fluid.layers.cos_sim(q_fc, pt_fc)
-        cos_q_nt = fluid.layers.cos_sim(q_fc, nt_fc)
+        cos_q_pt = paddle.nn.functional.cosine_similarity(q_fc, pt_fc)
+        cos_q_nt = paddle.nn.functional.cosine_similarity(q_fc, nt_fc)
         # loss
         avg_cost = get_loss(cos_q_pt, cos_q_nt)
         # acc
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
index f56335454640a3..57937777386202 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
@@ -153,8 +153,8 @@ def get_loss(cos_q_pt, cos_q_nt):
             ),
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
-        cos_q_pt = fluid.layers.cos_sim(q_fc, pt_fc)
-        cos_q_nt = fluid.layers.cos_sim(q_fc, nt_fc)
+        cos_q_pt = paddle.nn.functional.cosine_similarity(q_fc, pt_fc)
+        cos_q_nt = paddle.nn.functional.cosine_similarity(q_fc, nt_fc)
         # loss
         avg_cost = get_loss(cos_q_pt, cos_q_nt)
         # acc
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
index 107488e771f3c4..a1eb88f8ff86b3 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
@@ -160,8 +160,8 @@ def get_loss(cos_q_pt, cos_q_nt):
             ),
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
-        cos_q_pt = fluid.layers.cos_sim(q_fc, pt_fc)
-        cos_q_nt = fluid.layers.cos_sim(q_fc, nt_fc)
+        cos_q_pt = paddle.nn.functional.cosine_similarity(q_fc, pt_fc)
+        cos_q_nt = paddle.nn.functional.cosine_similarity(q_fc, nt_fc)
         # loss
         avg_cost = get_loss(cos_q_pt, cos_q_nt)
         # acc
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
index 2afaa2bf472570..609611bd24e609 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
@@ -160,8 +160,8 @@ def get_loss(cos_q_pt, cos_q_nt):
             ),
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
-        cos_q_pt = fluid.layers.cos_sim(q_fc, pt_fc)
-        cos_q_nt = fluid.layers.cos_sim(q_fc, nt_fc)
+        cos_q_pt = paddle.nn.functional.cosine_similarity(q_fc, pt_fc)
+        cos_q_nt = paddle.nn.functional.cosine_similarity(q_fc, nt_fc)
         # loss
         avg_cost = get_loss(cos_q_pt, cos_q_nt)
         # acc
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
index 9cc5b947f67268..a2192b16f3e28f 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
@@ -162,8 +162,8 @@ def get_loss(cos_q_pt, cos_q_nt):
             ),
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
-        cos_q_pt = fluid.layers.cos_sim(q_fc, pt_fc)
-        cos_q_nt = fluid.layers.cos_sim(q_fc, nt_fc)
+        cos_q_pt = paddle.nn.functional.cosine_similarity(q_fc, pt_fc)
+        cos_q_nt = paddle.nn.functional.cosine_similarity(q_fc, nt_fc)
         # loss
         avg_cost = get_loss(cos_q_pt, cos_q_nt)
         # acc
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
index 751f0c70b8da2b..d4197129df1f9a 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
@@ -156,8 +156,8 @@ def get_loss(cos_q_pt, cos_q_nt):
             ),
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
-        cos_q_pt = fluid.layers.cos_sim(q_fc, pt_fc)
-        cos_q_nt = fluid.layers.cos_sim(q_fc, nt_fc)
+        cos_q_pt = paddle.nn.functional.cosine_similarity(q_fc, pt_fc)
+        cos_q_nt = paddle.nn.functional.cosine_similarity(q_fc, nt_fc)
         # loss
         avg_cost = get_loss(cos_q_pt, cos_q_nt)
         # acc
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
index 0b00a97a344921..deb6d97cd46594 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
@@ -162,8 +162,8 @@ def get_loss(cos_q_pt, cos_q_nt):
             ),
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
-        cos_q_pt = fluid.layers.cos_sim(q_fc, pt_fc)
-        cos_q_nt = fluid.layers.cos_sim(q_fc, nt_fc)
+        cos_q_pt = paddle.nn.functional.cosine_similarity(q_fc, pt_fc)
+        cos_q_nt = paddle.nn.functional.cosine_similarity(q_fc, nt_fc)
         # loss
         avg_cost = get_loss(cos_q_pt, cos_q_nt)
         # acc
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
index f90b08c3f3387f..581ea0e0bc039a 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
@@ -156,8 +156,8 @@ def get_loss(cos_q_pt, cos_q_nt):
             ),
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
-        cos_q_pt = fluid.layers.cos_sim(q_fc, pt_fc)
-        cos_q_nt = fluid.layers.cos_sim(q_fc, nt_fc)
+        cos_q_pt = paddle.nn.functional.cosine_similarity(q_fc, pt_fc)
+        cos_q_nt = paddle.nn.functional.cosine_similarity(q_fc, nt_fc)
         # loss
         avg_cost = get_loss(cos_q_pt, cos_q_nt)
         # acc

From 8a9bef70b23f2a882a66f0a009340d25e2b6b0cc Mon Sep 17 00:00:00 2001
From: Netpunk <69072522+Patrick-Star125@users.noreply.github.com>
Date: Wed, 30 Nov 2022 16:05:42 +0800
Subject: [PATCH 066/154] [PHI decoupling] migrate transpose_op.cu.h and
 gpu_utils.h to phi (#48286)

* migrate transpose_op.cu.h and gpu_utils.h

* format code style

* fix some problems

* format code

* reset tranpose_op.cc

* test commit

* recover transpose_op.h

* delete transpose_op.h

* adjust header files order in transpose_op.cc
---
 paddle/fluid/operators/fused/fmha_ref.h       | 12 +--
 .../operators/fused/fused_gate_attention.h    | 27 ++++---
 .../operators/mkldnn/transpose_mkldnn_op.cc   |  2 +-
 paddle/fluid/operators/transpose_op.cc        |  4 +-
 paddle/fluid/operators/transpose_op_mlu.cc    |  2 +-
 paddle/fluid/operators/unique_op.h            |  6 +-
 .../backends/gpu}/gpu_utils.h                 | 22 +++---
 .../kernels/funcs/transpose_functor.cu.h}     | 76 +++++++++----------
 .../kernels/funcs/transpose_functor.h}        | 52 ++-----------
 paddle/phi/kernels/gpu/transpose_kernel.cu    |  4 +-
 10 files changed, 83 insertions(+), 124 deletions(-)
 rename paddle/{fluid/framework => phi/backends/gpu}/gpu_utils.h (88%)
 rename paddle/{fluid/operators/transpose_op.cu.h => phi/kernels/funcs/transpose_functor.cu.h} (95%)
 rename paddle/{fluid/operators/transpose_op.h => phi/kernels/funcs/transpose_functor.h} (79%)

diff --git a/paddle/fluid/operators/fused/fmha_ref.h b/paddle/fluid/operators/fused/fmha_ref.h
index 66176c9e75422d..fc5f9cf71d3496 100644
--- a/paddle/fluid/operators/fused/fmha_ref.h
+++ b/paddle/fluid/operators/fused/fmha_ref.h
@@ -16,12 +16,12 @@ limitations under the License. */
 
 #include "paddle/fluid/operators/dropout_impl.cu.h"
 #include "paddle/fluid/operators/fused/fused_softmax_mask.cu.h"
-#include "paddle/fluid/operators/transpose_op.cu.h"
 #include "paddle/phi/kernels/funcs/broadcast_function.h"
 #include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
 #include "paddle/phi/kernels/funcs/elementwise_base.h"
 #include "paddle/phi/kernels/funcs/elementwise_functor.h"
 #include "paddle/phi/kernels/funcs/functors.h"
+#include "paddle/phi/kernels/funcs/transpose_functor.cu.h"
 #include "paddle/phi/kernels/gpudnn/softmax_gpudnn.h"
 
 namespace paddle {
@@ -98,7 +98,7 @@ class FMHARef {
     // transpose with perm [2, 0, 3, 1, 4],
     // output_shape: [3, bs, num_head, seq_len, head_dim]
     std::vector<int> perm_1 = {2, 0, 3, 1, 4};
-    TransposeGPUKernelDriver<T>(
+    phi::funcs::TransposeGPUKernelDriver<T>(
         dev_ctx_, qkv_input_tensor, perm_1, transpose_2_out_tensor);
     T* qkv_data = transpose_2_out_tensor->data<T>();
     T* qk_out_data = qk_out_tensor->data<T>();
@@ -254,7 +254,7 @@ class FMHARef {
     // transpose: [0, 2, 1, 3]
     // output shape: [batch_size, seq_len, num_heads, head_dim]
     std::vector<int> perm_3 = {0, 2, 1, 3};
-    TransposeGPUKernelDriver<T>(
+    phi::funcs::TransposeGPUKernelDriver<T>(
         dev_ctx_, *qktv_out_tensor, perm_3, fmha_out_tensor);
   }
 
@@ -428,7 +428,7 @@ class FMHARef {
     // transpose: [0, 2, 1, 3]
     // output shape: [batch_size, seq_len, num_heads, head_dim]
     std::vector<int> perm_3 = {0, 2, 1, 3};
-    TransposeGPUKernelDriver<T>(
+    phi::funcs::TransposeGPUKernelDriver<T>(
         dev_ctx_, *qktv_out_tensor, perm_3, fmha_out_tensor);
   }
 
@@ -470,7 +470,7 @@ class FMHARef {
 
     // transpose bw
     std::vector<int> perm_3 = {0, 2, 1, 3};
-    TransposeGPUKernelDriver<T>(
+    phi::funcs::TransposeGPUKernelDriver<T>(
         dev_ctx_, fmha_out_grad_tensor, perm_3, qktv_out_grad_tensor);
 
     // recall batchedgemm(nn) fw: softmax_out_data(x) * v_ptr(y) =
@@ -648,7 +648,7 @@ class FMHARef {
 
     // transpose bw
     std::vector<int> perm_1 = {1, 3, 0, 2, 4};
-    TransposeGPUKernelDriver<T>(
+    phi::funcs::TransposeGPUKernelDriver<T>(
         dev_ctx_, *transpose_2_out_grad_tensor, perm_1, qkv_input_grad_tensor);
   }
 
diff --git a/paddle/fluid/operators/fused/fused_gate_attention.h b/paddle/fluid/operators/fused/fused_gate_attention.h
index e50cc24d88adf1..1fba366ad2fe31 100644
--- a/paddle/fluid/operators/fused/fused_gate_attention.h
+++ b/paddle/fluid/operators/fused/fused_gate_attention.h
@@ -14,11 +14,11 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/fluid/operators/transpose_op.cu.h"
 #include "paddle/phi/kernels/funcs/broadcast_function.h"
 #include "paddle/phi/kernels/funcs/elementwise_base.h"
 #include "paddle/phi/kernels/funcs/elementwise_functor.h"
 #include "paddle/phi/kernels/funcs/reduce_function.h"
+#include "paddle/phi/kernels/funcs/transpose_functor.cu.h"
 #include "paddle/phi/kernels/gpudnn/softmax_gpudnn.h"
 
 namespace paddle {
@@ -626,9 +626,12 @@ class FMHAGateRef {
                                   phi::DenseTensor* k_transpose_out,
                                   phi::DenseTensor* v_transpose_out) {
     std::vector<int> perm = {0, 1, 3, 2, 4};
-    TransposeGPUKernelDriver<T>(dev_ctx_, q_out, perm, q_transpose_out);
-    TransposeGPUKernelDriver<T>(dev_ctx_, k_out, perm, k_transpose_out);
-    TransposeGPUKernelDriver<T>(dev_ctx_, v_out, perm, v_transpose_out);
+    phi::funcs::TransposeGPUKernelDriver<T>(
+        dev_ctx_, q_out, perm, q_transpose_out);
+    phi::funcs::TransposeGPUKernelDriver<T>(
+        dev_ctx_, k_out, perm, k_transpose_out);
+    phi::funcs::TransposeGPUKernelDriver<T>(
+        dev_ctx_, v_out, perm, v_transpose_out);
   }
 
   void ComputeQKVTransposeBackward(const phi::DenseTensor& q_transpose_out_grad,
@@ -638,11 +641,11 @@ class FMHAGateRef {
                                    phi::DenseTensor* k_out_grad,
                                    phi::DenseTensor* v_out_grad) {
     std::vector<int> perm = {0, 1, 3, 2, 4};
-    TransposeGPUKernelDriver<T>(
+    phi::funcs::TransposeGPUKernelDriver<T>(
         dev_ctx_, q_transpose_out_grad, perm, q_out_grad);
-    TransposeGPUKernelDriver<T>(
+    phi::funcs::TransposeGPUKernelDriver<T>(
         dev_ctx_, k_transpose_out_grad, perm, k_out_grad);
-    TransposeGPUKernelDriver<T>(
+    phi::funcs::TransposeGPUKernelDriver<T>(
         dev_ctx_, v_transpose_out_grad, perm, v_out_grad);
   }
 
@@ -651,14 +654,15 @@ class FMHAGateRef {
   void ComputeQKVTransposeForward(const phi::DenseTensor& qkv_out,
                                   phi::DenseTensor* qkv_transpose_out) {
     std::vector<int> perm = {3, 0, 1, 4, 2, 5};
-    TransposeGPUKernelDriver<T>(dev_ctx_, qkv_out, perm, qkv_transpose_out);
+    phi::funcs::TransposeGPUKernelDriver<T>(
+        dev_ctx_, qkv_out, perm, qkv_transpose_out);
   }
 
   void ComputeQKVTransposeBackward(
       const phi::DenseTensor& qkv_transpose_out_grad,
       phi::DenseTensor* qkv_out_grad) {
     std::vector<int> perm = {1, 2, 4, 0, 3, 5};
-    TransposeGPUKernelDriver<T>(
+    phi::funcs::TransposeGPUKernelDriver<T>(
         dev_ctx_, qkv_transpose_out_grad, perm, qkv_out_grad);
   }
 
@@ -667,13 +671,14 @@ class FMHAGateRef {
   void ComputeQKTVTransposeForward(const phi::DenseTensor& qktv_out,
                                    phi::DenseTensor* fmha_out) {
     std::vector<int> perm = {0, 1, 3, 2, 4};
-    TransposeGPUKernelDriver<T>(dev_ctx_, qktv_out, perm, fmha_out);
+    phi::funcs::TransposeGPUKernelDriver<T>(dev_ctx_, qktv_out, perm, fmha_out);
   }
 
   void ComputeQKTVTransposeBackward(const phi::DenseTensor& fmha_out_grad,
                                     phi::DenseTensor* qktv_out_grad) {
     std::vector<int> perm = {0, 1, 3, 2, 4};
-    TransposeGPUKernelDriver<T>(dev_ctx_, fmha_out_grad, perm, qktv_out_grad);
+    phi::funcs::TransposeGPUKernelDriver<T>(
+        dev_ctx_, fmha_out_grad, perm, qktv_out_grad);
   }
 
   // qk_out = qk_out + nonbatched_bias + src_mask
diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
index f7f7e5f6ad8935..2c5b269c3923b7 100644
--- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
@@ -15,8 +15,8 @@
 #include "paddle/fluid/framework/data_layout_transform.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/memory/malloc.h"
-#include "paddle/fluid/operators/transpose_op.h"
 #include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/kernels/funcs/transpose_functor.h"
 
 namespace paddle {
 namespace operators {
diff --git a/paddle/fluid/operators/transpose_op.cc b/paddle/fluid/operators/transpose_op.cc
index 9ee0196d8c7dc3..52a9955acc18d7 100644
--- a/paddle/fluid/operators/transpose_op.cc
+++ b/paddle/fluid/operators/transpose_op.cc
@@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/fluid/operators/transpose_op.h"
-
 #include <memory>
 #include <string>
 #include <vector>
@@ -21,6 +19,8 @@ limitations under the License. */
 #ifdef PADDLE_WITH_MKLDNN
 #include "paddle/fluid/platform/mkldnn_helper.h"
 #endif
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/phi/kernels/funcs/transpose_functor.h"
 
 namespace paddle {
 namespace operators {
diff --git a/paddle/fluid/operators/transpose_op_mlu.cc b/paddle/fluid/operators/transpose_op_mlu.cc
index 722ad3584f3545..0ef9fc247ab012 100644
--- a/paddle/fluid/operators/transpose_op_mlu.cc
+++ b/paddle/fluid/operators/transpose_op_mlu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/operators/mlu/mlu_baseop.h"
-#include "paddle/fluid/operators/transpose_op.h"
+#include "paddle/phi/kernels/funcs/transpose_functor.h"
 
 namespace paddle {
 namespace operators {
diff --git a/paddle/fluid/operators/unique_op.h b/paddle/fluid/operators/unique_op.h
index 45b1e3c435bdc4..d1e9afa03ccee6 100644
--- a/paddle/fluid/operators/unique_op.h
+++ b/paddle/fluid/operators/unique_op.h
@@ -23,8 +23,8 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/concat_and_split.h"
-#include "paddle/fluid/operators/transpose_op.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/transpose_functor.h"
 
 namespace paddle {
 namespace operators {
@@ -251,7 +251,7 @@ static void UniqueDim(const framework::ExecutionContext& context,
   in_trans.Resize(in_trans_dims);
   in_trans.mutable_data<InT>(context.GetPlace());
   auto& dev_ctx = context.template device_context<DeviceContext>();
-  TransCompute<DeviceContext, InT>(
+  phi::funcs::TransCompute<DeviceContext, InT>(
       in.dims().size(), dev_ctx, in, &in_trans, permute);
   // reshape tensor: eg. [dim1, dim0, dim2] -> [dim1, dim0*dim2]
   framework::DDim in_trans_flat_dims = phi::flatten_to_2d(in_trans_dims, 1);
@@ -315,7 +315,7 @@ static void UniqueDim(const framework::ExecutionContext& context,
   out->Resize(phi::make_ddim(out_trans_dims_vec));
   out->mutable_data<InT>(context.GetPlace());
   concat_functor(dev_ctx, input_unbind, 0, &out_trans);
-  TransCompute<DeviceContext, InT>(
+  phi::funcs::TransCompute<DeviceContext, InT>(
       out_trans.dims().size(), dev_ctx, out_trans, out, permute);
 
   if (return_inverse) {
diff --git a/paddle/fluid/framework/gpu_utils.h b/paddle/phi/backends/gpu/gpu_utils.h
similarity index 88%
rename from paddle/fluid/framework/gpu_utils.h
rename to paddle/phi/backends/gpu/gpu_utils.h
index 68cbc309c24d6b..ea97a086afc36e 100644
--- a/paddle/fluid/framework/gpu_utils.h
+++ b/paddle/phi/backends/gpu/gpu_utils.h
@@ -18,11 +18,11 @@
 
 #include <array>
 
-#include "paddle/fluid/platform/enforce.h"
+#include "paddle/phi/core/enforce.h"
 #include "unsupported/Eigen/CXX11/Tensor"
 
-namespace paddle {
-namespace framework {
+namespace phi {
+namespace funcs {
 
 template <typename T, int Size, T DefaultValue>
 struct DeviceArray {
@@ -110,16 +110,16 @@ IntType CeilOrFloor(IntType x, IntType deviser) {
   PADDLE_ENFORCE_GT(
       deviser,
       0,
-      platform::errors::InvalidArgument("deviser should be greater than 0, "
-                                        "but received is:%d",
-                                        deviser));
+      phi::errors::InvalidArgument("deviser should be greater than 0, "
+                                   "but received is:%d",
+                                   deviser));
 
   PADDLE_ENFORCE_GT(
       x,
       0,
-      platform::errors::InvalidArgument("input should be greater than 0, "
-                                        "but received is:%d",
-                                        x));
+      phi::errors::InvalidArgument("input should be greater than 0, "
+                                   "but received is:%d",
+                                   x));
 
   const IntType round_to_zero = x / deviser;
   const IntType inte_result = round_to_zero * deviser;
@@ -140,5 +140,5 @@ IntType CeilOrFloor(IntType x, IntType deviser) {
   }
 }
 
-}  // namespace framework
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace phi
diff --git a/paddle/fluid/operators/transpose_op.cu.h b/paddle/phi/kernels/funcs/transpose_functor.cu.h
similarity index 95%
rename from paddle/fluid/operators/transpose_op.cu.h
rename to paddle/phi/kernels/funcs/transpose_functor.cu.h
index 4fc610c393f103..0d24fdebef1489 100644
--- a/paddle/fluid/operators/transpose_op.cu.h
+++ b/paddle/phi/kernels/funcs/transpose_functor.cu.h
@@ -14,20 +14,18 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/fluid/framework/gpu_utils.h"
-#include "paddle/fluid/operators/transpose_op.h"
-#include "paddle/fluid/platform/fast_divmod.h"
 #include "paddle/phi/backends/gpu/gpu_launch_config.h"
 #include "paddle/phi/backends/gpu/gpu_primitives.h"
+#include "paddle/phi/backends/gpu/gpu_utils.h"
 #include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/autotune/auto_tune_base.h"
+#include "paddle/phi/kernels/funcs/transpose_functor.h"
+#include "paddle/phi/kernels/primitive/datamover_primitives.h"
 
-namespace paddle {
-namespace operators {
+namespace phi {
+namespace funcs {
 
 using Tensor = phi::DenseTensor;
-using Dim3 = framework::Dim3;
-using Index3 = framework::Index3;
 
 struct EqualTo {
   constexpr bool operator()(int a, int b) const { return a == b; }
@@ -118,8 +116,8 @@ __global__ void TilingSwapDim1And2(const T* __restrict__ input,
   };
 
   // Converts block idx to tile index, each block process a tile
-  Index3 input_block_tile_index = framework::ConvertTensorIndex<IndexType>(
-      blockIdx.x, tile_aligned_input_dim);
+  Index3 input_block_tile_index =
+      ConvertTensorIndex<IndexType>(blockIdx.x, tile_aligned_input_dim);
 
   // Compute real index align to tile:0, 32, 64...
   Index3 block_tile_index_in_input = {
@@ -130,8 +128,7 @@ __global__ void TilingSwapDim1And2(const T* __restrict__ input,
 
   // Compute block flat index against input dims.
   IndexType input_origin_block_flat_index =
-      framework::FlatTensorIndex<IndexType>(block_tile_index_in_input,
-                                            input_dims);
+      FlatTensorIndex<IndexType>(block_tile_index_in_input, input_dims);
 
   bool full_tile = true;
   IndexType tile_width = TileY;
@@ -193,8 +190,7 @@ __global__ void TilingSwapDim1And2(const T* __restrict__ input,
   };
 
   IndexType output_origin_block_flat_index =
-      framework::FlatTensorIndex<IndexType>(block_tile_index_in_output,
-                                            output_dims);
+      FlatTensorIndex<IndexType>(block_tile_index_in_output, output_dims);
 
   constexpr IndexType out_effective_thread_num = NumThreads / TileX * TileX;
 
@@ -230,13 +226,13 @@ bool SelectProperTileSize(std::vector<std::pair<int, int>>* tiles) {
   PADDLE_ENFORCE_LE(
       TSIZE,
       16,
-      platform::errors::InvalidArgument(
+      phi::errors::InvalidArgument(
           "The tile size should smaller than 16, but received is:%d.", TSIZE));
 
   PADDLE_ENFORCE_EQ(
       (TSIZE & (TSIZE - 1)),
       0,
-      platform::errors::InvalidArgument(
+      phi::errors::InvalidArgument(
           "Data types should be powers of 2, but reived size is:%d.", TSIZE));
 
   const int kMaxLongSideLen = 1024;
@@ -316,7 +312,7 @@ struct NarrowDims2TransposeDispatch {
     PADDLE_ENFORCE_EQ(
         (tile_long & (tile_long - 1)),
         0,
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The length of the longer side of the tile should be power of 2."
             " But received value is:%d.",
             tile_long));
@@ -381,7 +377,7 @@ struct NarrowDims2TransposeDispatch<
     PADDLE_ENFORCE_EQ(
         (tile_long & (tile_long - 1)),
         0,
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The length of the longer side of the tile should be power of 2."
             " But received value is:%d.",
             tile_long));
@@ -431,7 +427,7 @@ struct NarrowDims2TransposeDispatch<
     PADDLE_ENFORCE_EQ(
         (tile_long & (tile_long - 1)),
         0,
-        platform::errors::InvalidArgument(
+        phi::errors::InvalidArgument(
             "The length of the longer side of the tile should be power of 2,"
             " but received is:%d.",
             tile_long));
@@ -459,7 +455,7 @@ void SwapDim1And2InNarrow(const phi::GPUContext& d,
   PADDLE_ENFORCE_EQ(
       ret,
       true,
-      platform::errors::InvalidArgument(
+      phi::errors::InvalidArgument(
           "SelectProperTileSize should return true, but return value is:%d.",
           ret));
 
@@ -475,12 +471,12 @@ void SwapDim1And2InNarrow(const phi::GPUContext& d,
     // to find least wasted threads, which means we need to find tile
     // can split input properly, in another words: num_wasted_threads=0.
     int num_wasted_threads =
-        input_long_edge - framework::CeilOrFloor<int, false>(
-                              input_long_edge, proposed_tile_long_edge) *
-                              proposed_tile_long_edge;
+        input_long_edge -
+        CeilOrFloor<int, false>(input_long_edge, proposed_tile_long_edge) *
+            proposed_tile_long_edge;
 
-    int num_full_tiles = framework::CeilOrFloor<int, false>(
-        input_long_edge, proposed_tile_long_edge);
+    int num_full_tiles = 
+        CeilOrFloor<int, false>(input_long_edge, proposed_tile_long_edge);
 
     float cost = num_wasted_threads;
 
@@ -514,8 +510,8 @@ void SwapDim1And2InNarrow(const phi::GPUContext& d,
   // Here finally get proper long X short tile size.
   Dim3 input_dims_aligned = {
       input_dims[0],
-      framework::CeilOrFloor<int, true>(input_dims[1], select_tile_size_i),
-      framework::CeilOrFloor<int, true>(input_dims[2], select_tile_size_j),
+      CeilOrFloor<int, true>(input_dims[1], select_tile_size_i),
+      CeilOrFloor<int, true>(input_dims[2], select_tile_size_j),
   };
 
   IndexType total_tiles_count = input_dims_aligned[0];
@@ -549,7 +545,7 @@ __global__ void TransposeSimpleKernel(IndexType nthreads,
 
   CUDA_KERNEL_LOOP_TYPE(output_index, nthreads, IndexType) {
     Index3 output_tensor_index =
-        framework::ConvertTensorIndex<IndexType>(output_index, output_dims);
+        ConvertTensorIndex<IndexType>(output_index, output_dims);
 
     Index3 input_tensor_index;
     input_tensor_index[0] = output_tensor_index[pos0];
@@ -557,7 +553,7 @@ __global__ void TransposeSimpleKernel(IndexType nthreads,
     input_tensor_index[2] = output_tensor_index[pos2];
 
     IndexType input_index =
-        framework::FlatTensorIndex<IndexType>(input_tensor_index, input_dims);
+        FlatTensorIndex<IndexType>(input_tensor_index, input_dims);
 
     output[output_index] = input[input_index];
   }
@@ -585,8 +581,8 @@ void SendSwapDim1And2InTranspose(const phi::GPUContext& d,
 
     Dim3 input_dims_aligned = {
         input_dims[0],
-        framework::CeilOrFloor<int, true>(input_dims[1], kTileSize),
-        framework::CeilOrFloor<int, true>(input_dims[2], kTileSize),
+        CeilOrFloor<int, true>(input_dims[1], kTileSize),
+        CeilOrFloor<int, true>(input_dims[2], kTileSize),
     };
 
     IndexType total_tiles_count = input_dims_aligned[0];
@@ -653,13 +649,13 @@ struct SwapDim0And2InTranspose {
 
 // This function is to combine dimension. fox example:
 // (0, 1, 3, 2) --> (0, 2, 1)
-inline void CombineTransposeDim3(const framework::DDim& shape,
+inline void CombineTransposeDim3(const DDim& shape,
                                  const std::vector<int>& perm,
                                  std::vector<int>* new_perm,
-                                 framework::DDim* new_dims) {
+                                 DDim* new_dims) {
   PADDLE_ENFORCE_EQ(shape.size(),
                     perm.size(),
-                    platform::errors::InvalidArgument(
+                    phi::errors::InvalidArgument(
                         " shape should have the save dim with perm, but"
                         " received shape size is:%d, perm size is:%d.",
                         shape.size(),
@@ -717,7 +713,7 @@ struct TransposeSimple {
                   phi::DenseTensor* out) {
     // First reduce the dimensions of the input tensor if possible.
     std::vector<int> new_perm;
-    framework::DDim new_dims;
+    DDim new_dims;
     CombineTransposeDim3(in.dims(), perm, &new_perm, &new_dims);
 
     // Only use tile copy GPU kernel when dimension is 2 or 3.
@@ -796,7 +792,7 @@ class IdxHelper<N, uint32_t> {
   explicit IdxHelper(const uint32_t* dims) {
     for (int i = N - 1; i >= 0; --i) {
       uint32_t value = i < (N - 1) ? dims[i + 1] * stride_[i + 1] : 1;
-      divmoder_[i] = paddle::platform::FastDivMod(value);
+      divmoder_[i] = phi::kps::details::FastDivMod(value);
       stride_[i] = value;
     }
   }
@@ -817,7 +813,7 @@ class IdxHelper<N, uint32_t> {
 
  private:
   uint32_t stride_[N];
-  paddle::platform::FastDivMod divmoder_[N];
+  phi::kps::details::FastDivMod divmoder_[N];
 };
 
 // Transform index between memory offset and shape coodinate.
@@ -1188,8 +1184,8 @@ void TransposeGPUKernelDriver(const phi::GPUContext& ctx,
     ret = TransposeSimple<T>::run(ctx, in, perm, out);
   }
   if (!ret) {
-    auto* tuner =
-        phi::autotune::MakeTransposeTuner<T>(TransCompute<phi::GPUContext, T>);
+    auto* tuner = phi::autotune::MakeTransposeTuner<T>(
+        funcs::TransCompute<phi::GPUContext, T>);
     tuner->AddCallBack(PermuteAndTranspose<phi::GPUContext, T>);
 
     size_t key = phi::autotune::TransposeKey(
@@ -1208,5 +1204,5 @@ void TransposeGPUKernelDriver(const phi::GPUContext& ctx,
   }
 }
 
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace phi
diff --git a/paddle/fluid/operators/transpose_op.h b/paddle/phi/kernels/funcs/transpose_functor.h
similarity index 79%
rename from paddle/fluid/operators/transpose_op.h
rename to paddle/phi/kernels/funcs/transpose_functor.h
index 45495505e60599..d2a72efed0ac0d 100644
--- a/paddle/fluid/operators/transpose_op.h
+++ b/paddle/phi/kernels/funcs/transpose_functor.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,57 +16,15 @@ limitations under the License. */
 
 #include <vector>
 
-#include "paddle/fluid/framework/op_registry.h"
 #include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/aligned_vector.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
-namespace paddle {
-namespace operators {
+namespace phi {
+namespace funcs {
 
 enum { kTransposeMKLDNNFP32 = 1, kTransposeMKLDNNINT8 = 2 };
 
-template <typename DeviceContext, typename T>
-inline void TransCompute(const int dim,
-                         const DeviceContext& dev_ctx,
-                         const phi::DenseTensor& in,
-                         phi::DenseTensor* out,
-                         const std::vector<int>& axis) {
-  switch (dim) {
-    case 0:
-      phi::Copy<DeviceContext>(dev_ctx, in, dev_ctx.GetPlace(), false, out);
-      break;
-    case 1:
-      phi::funcs::Transpose<DeviceContext, T, 1> trans1;
-      trans1(dev_ctx, in, out, axis);
-      break;
-    case 2:
-      phi::funcs::Transpose<DeviceContext, T, 2> trans2;
-      trans2(dev_ctx, in, out, axis);
-      break;
-    case 3:
-      phi::funcs::Transpose<DeviceContext, T, 3> trans3;
-      trans3(dev_ctx, in, out, axis);
-      break;
-    case 4:
-      phi::funcs::Transpose<DeviceContext, T, 4> trans4;
-      trans4(dev_ctx, in, out, axis);
-      break;
-    case 5:
-      phi::funcs::Transpose<DeviceContext, T, 5> trans5;
-      trans5(dev_ctx, in, out, axis);
-      break;
-    case 6:
-      phi::funcs::Transpose<DeviceContext, T, 6> trans6;
-      trans6(dev_ctx, in, out, axis);
-      break;
-    default:
-      // for dim >= 7 situation
-      phi::funcs::TransposeNormal<DeviceContext, T> trans_normal;
-      trans_normal(dev_ctx, in, out, axis);
-  }
-}
-
 enum PermuteType {
   kCopy = 1,
   kTranspose = 2,
@@ -227,5 +185,5 @@ class TranposeTypeClassifier {
   }
 };
 
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace phi
diff --git a/paddle/phi/kernels/gpu/transpose_kernel.cu b/paddle/phi/kernels/gpu/transpose_kernel.cu
index 36cf3fb8e397fa..4b7265e2f3a24e 100644
--- a/paddle/phi/kernels/gpu/transpose_kernel.cu
+++ b/paddle/phi/kernels/gpu/transpose_kernel.cu
@@ -16,12 +16,12 @@
 
 #include <vector>
 
-#include "paddle/fluid/operators/transpose_op.cu.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_launch_config.h"
 #include "paddle/phi/backends/gpu/gpu_primitives.h"
 #include "paddle/phi/common/bfloat16.h"
 #include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/transpose_functor.cu.h"
 #include "paddle/phi/kernels/impl/transpose_grad_kernel_impl.h"
 
 namespace phi {
@@ -38,7 +38,7 @@ void TransposeKernel(const Context& ctx,
     phi::Copy<Context>(ctx, x, ctx.GetPlace(), false, out);
     return;
   }
-  paddle::operators::TransposeGPUKernelDriver<T>(ctx, x, axis, out);
+  phi::funcs::TransposeGPUKernelDriver<T>(ctx, x, axis, out);
 }
 
 }  // namespace phi

From f62b3fc8c7b1e15dc4b4d3085a9b3cdedf686242 Mon Sep 17 00:00:00 2001
From: zyfncg <zhangyunfei07@baidu.com>
Date: Wed, 30 Nov 2022 16:33:44 +0800
Subject: [PATCH 067/154] Fix error log for yaml check (#48126)

* fix error log for yaml check

* remove grad_op of increment
---
 paddle/fluid/operators/generator/CMakeLists.txt | 17 +++++++++++------
 paddle/phi/api/yaml/legacy_backward.yaml        | 17 +----------------
 2 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/paddle/fluid/operators/generator/CMakeLists.txt b/paddle/fluid/operators/generator/CMakeLists.txt
index 111e1da92ca950..7ebc1300345ad8 100644
--- a/paddle/fluid/operators/generator/CMakeLists.txt
+++ b/paddle/fluid/operators/generator/CMakeLists.txt
@@ -77,16 +77,21 @@ execute_process(
     ./parsed_ops/ops.parsed.yaml ./parsed_ops/legacy_ops.parsed.yaml
     --backward_yaml_paths ./parsed_ops/backward_ops.parsed.yaml
     ./parsed_ops/legacy_backward_ops.parsed.yaml
+  RESULT_VARIABLE _result)
+if(${_result})
+  message(FATAL_ERROR "ops validation failed, exiting.")
+endif()
+
+execute_process(
+  WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator
   COMMAND
     ${PYTHON_EXECUTABLE} cross_validate.py --forward_yaml_paths
     ./parsed_ops/sparse_ops.parsed.yaml --backward_yaml_paths
     ./parsed_ops/sparse_backward.parsed.yaml
-  RESULT_VARIABLE _results)
-foreach(_result in ${_results})
-  if(${_result})
-    message(FATAL_ERROR "ops validation failed, exiting.")
-  endif()
-endforeach()
+  RESULT_VARIABLE _result)
+if(${_result})
+  message(FATAL_ERROR "sparse ops validation failed, exiting.")
+endif()
 
 # code generation for op, op makers, and argument mapping functions
 message(
diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
index 814b3c89c01a11..47ba24b091d7a7 100755
--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -326,7 +326,7 @@
     func : conv3d_transpose_grad
 
 - backward_op : crop_grad
-  forward : crop_tensor (Tensor x, IntArray shape, IntArray offsets) -> Tensor(out)
+  forward : crop (Tensor x, IntArray shape, IntArray offsets) -> Tensor(out)
   args : (Tensor x, Tensor out_grad, IntArray offsets)
   output : Tensor(x_grad)
   infer_meta :
@@ -674,12 +674,6 @@
   output : Tensor(x_grad)
   invoke : imag_grad_impl(out_grad, x_grad)
 
-- backward_op : increment_grad
-  forward : increment (Tensor x, float value) -> Tensor(out)
-  args : (Tensor out, float value)
-  output : Tensor(x_grad)
-  invoke : increment (out, -value)
-
 - backward_op : index_add_grad
   forward : index_add(Tensor x, Tensor index,  Tensor add_value, int axis) -> Tensor(out)
   args : (Tensor index, Tensor add_value, Tensor out_grad, int axis)
@@ -1338,15 +1332,6 @@
   backward : reshape_double_grad
   inplace : (out_grad -> x_grad)
 
-- backward_op : reverse_array_grad
-  forward : reverse_array (Tensor[] x, IntArray axis) -> Tensor[](out)
-  args : (Tensor[] out_grad, IntArray axis)
-  output : Tensor[](x_grad){out_grad.size()}
-  infer_meta :
-    func : ReverseArrayInferMeta
-  kernel :
-    func : reverse
-
 - backward_op : reverse_grad
   forward : reverse (Tensor x, IntArray axis) -> Tensor(out)
   args : (Tensor out_grad, IntArray axis)

From 8b611f04048e0c1b554e7db9599c3789c47bac0a Mon Sep 17 00:00:00 2001
From: cyber-pioneer <116002591+cyber-pioneer@users.noreply.github.com>
Date: Wed, 30 Nov 2022 16:34:47 +0800
Subject: [PATCH 068/154] remove fluid.layer.gather_tree (#48480)

---
 python/paddle/fluid/layers/nn.py              | 65 -------------------
 python/paddle/fluid/layers/rnn.py             |  2 +-
 .../seq2seq_dygraph_model.py                  |  4 +-
 .../transformer_dygraph_model.py              |  2 +-
 .../tests/unittests/test_gather_tree_op.py    | 10 +--
 5 files changed, 10 insertions(+), 73 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index dbac69df682720..15eada61cf0d5a 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -128,7 +128,6 @@
     'shard_index',
     'hard_swish',
     'mish',
-    'gather_tree',
     'uniform_random',
     'unbind',
 ]
@@ -7928,70 +7927,6 @@ def mish(x, threshold=20, name=None):
     return out
 
 
-def gather_tree(ids, parents):
-    r"""
-    To be used after beam search. After beam search, we get selected ids at
-    each time step and the corresponding parents in the search tree. Both ids
-    and parents have the layout :attr:`[max_time, batch_size, beam_size]`. Then
-    :attr:`gather_tree` is used to backtrace from the last time step and
-    generate the full sequences by collecting selected ids.
-
-    Here is an example:
-
-    .. code-block:: text
-
-            Given:
-                ids = [[[2 2]
-                        [6 1]]
-                       [[3 9]
-                        [6 1]]
-                       [[0 1]
-                        [9 0]]]
-                parents = [[[0 0]
-                            [1 1]]
-                           [[1 0]
-                            [1 0]]
-                           [[0 0]
-                            [0 1]]]
-
-            Then:
-                gather_tree(ids, parents)
-                         = [[[2 2]
-                             [1 6]]
-                            [[3 3]
-                             [6 1]]
-                            [[0 1]
-                             [9 0]]]
-
-    Args:
-        ids(Tensor): A Tensor with shape :attr:`[length, batch_size, beam_size]`
-            and data type :attr:`int32` or :attr:`int64`. It contains the selected
-            ids of all time steps.
-        parents(Tensor): A Tensor with the same shape and data type as :attr:`ids`,
-            It contains the parents corresponding to selected ids when searching
-            among beams.
-
-    Returns:
-            A Tensor with the same shape and data type as :attr:`ids`. \
-            It contains the full sequences. The sequences are collected from \
-            :attr:`ids` by backtracing according to :attr:`parents`.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-
-            ids = paddle.to_tensor([[[2, 2], [6, 1]], [[3, 9], [6, 1]], [[0, 1], [9, 0]]])
-
-            parents = paddle.to_tensor([[[0, 0], [1, 1]], [[1, 0], [1, 0]], [[0, 0], [0, 1]]])
-
-            final_sequences = paddle.nn.functional.gather_tree(ids, parents)
-            # [[[2, 2], [1, 6]], [[3, 3], [6, 1]], [[0, 1], [9, 0]]]
-
-    """
-    return paddle.nn.functional.gather_tree(ids, parents)
-
-
 @deprecated(since="2.0.0", update_to="paddle.uniform")
 @templatedoc()
 def uniform_random(
diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py
index 23e61db9f47a6f..1914c38f5423b0 100644
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -1427,7 +1427,7 @@ def finalize(self, outputs, final_states, sequence_lengths):
                 `[time_step, batch_size, beam_size]`. `final_states` is the same \
                 as the input argument `final_states`.
         """
-        predicted_ids = nn.gather_tree(
+        predicted_ids = paddle.nn.functional.gather_tree(
             outputs.predicted_ids, outputs.parent_ids
         )
         # TODO: use FinalBeamSearchDecoderOutput as output
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
index 0b5efa636afce8..adc1909c64cd8a 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
@@ -502,7 +502,9 @@ def beam_search(self, inputs):
 
         predicted_ids = paddle.stack(predicted_ids)
         parent_ids = paddle.stack(parent_ids)
-        predicted_ids = fluid.layers.gather_tree(predicted_ids, parent_ids)
+        predicted_ids = paddle.nn.functional.gather_tree(
+            predicted_ids, parent_ids
+        )
         predicted_ids = self._transpose_batch_time(predicted_ids)
         return predicted_ids
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
index ae7da008dfc74b..fb00473f3d4521 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
@@ -884,7 +884,7 @@ def gather(input, indices, batch_pos):
         predict_ids = paddle.stack(predict_ids, axis=0)
         parent_ids = paddle.stack(parent_ids, axis=0)
         finished_seq = paddle.transpose(
-            layers.gather_tree(predict_ids, parent_ids), [1, 2, 0]
+            paddle.nn.functional.gather_tree(predict_ids, parent_ids), [1, 2, 0]
         )
         finished_scores = topk_scores
 
diff --git a/python/paddle/fluid/tests/unittests/test_gather_tree_op.py b/python/paddle/fluid/tests/unittests/test_gather_tree_op.py
index 1af5776cd60e58..bcd319ed2d24a2 100644
--- a/python/paddle/fluid/tests/unittests/test_gather_tree_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gather_tree_op.py
@@ -67,7 +67,7 @@ def test_case(self):
             dtype='int64',
             append_batch_size=False,
         )
-        final_sequences = fluid.layers.gather_tree(ids, parents)
+        final_sequences = paddle.nn.functional.gather_tree(ids, parents)
         paddle.disable_static()
 
     def test_case2(self):
@@ -100,14 +100,14 @@ def test_errors(self):
             def test_Variable_ids():
                 # the input type must be Variable
                 np_ids = np.random.random((5, 2, 2), dtype='int64')
-                fluid.layers.gather_tree(np_ids, parents)
+                paddle.nn.functional.gather_tree(np_ids, parents)
 
             self.assertRaises(TypeError, test_Variable_ids)
 
             def test_Variable_parents():
                 # the input type must be Variable
                 np_parents = np.random.random((5, 2, 2), dtype='int64')
-                fluid.layers.gather_tree(ids, np_parents)
+                paddle.nn.functional.gather_tree(ids, np_parents)
 
             self.assertRaises(TypeError, test_Variable_parents)
 
@@ -119,7 +119,7 @@ def test_type_ids():
                     dtype='float32',
                     append_batch_size=False,
                 )
-                fluid.layers.gather_tree(bad_ids, parents)
+                paddle.nn.functional.gather_tree(bad_ids, parents)
 
             self.assertRaises(TypeError, test_type_ids)
 
@@ -131,7 +131,7 @@ def test_type_parents():
                     dtype='float32',
                     append_batch_size=False,
                 )
-                fluid.layers.gather_tree(ids, bad_parents)
+                paddle.nn.functional.gather_tree(ids, bad_parents)
 
             self.assertRaises(TypeError, test_type_parents)
 

From cbb1cfbb5b9902f75401aa3118e12e554a4dac41 Mon Sep 17 00:00:00 2001
From: Qi Li <qili93@qq.com>
Date: Wed, 30 Nov 2022 17:46:05 +0800
Subject: [PATCH 069/154] fix phi header file without fluid header,
 test=develop (#48488)

---
 paddle/phi/core/enforce.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/paddle/phi/core/enforce.h b/paddle/phi/core/enforce.h
index d8449d5ca45d22..e0dd918cef4188 100644
--- a/paddle/phi/core/enforce.h
+++ b/paddle/phi/core/enforce.h
@@ -98,7 +98,6 @@ limitations under the License. */
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 #include "paddle/phi/backends/gpu/gpu_types.h"
 #endif
-#include "paddle/fluid/platform/flags.h"
 
 #include "paddle/utils/variant.h"
 

From 96a8bbe78fc16296cbb2d62dd66e2a0a8c0ebdef Mon Sep 17 00:00:00 2001
From: wanghuancoder <wanghuan29@baidu.com>
Date: Wed, 30 Nov 2022 18:34:20 +0800
Subject: [PATCH 070/154] refine conv add for xpu (#48432)

---
 python/paddle/nn/functional/conv.py | 43 ++++++++---------------------
 1 file changed, 11 insertions(+), 32 deletions(-)

diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py
index 38c5064a1cfc39..d29f91d035f288 100644
--- a/python/paddle/nn/functional/conv.py
+++ b/python/paddle/nn/functional/conv.py
@@ -145,20 +145,9 @@ def _conv_nd(
             data_format,
         )
         if bias is not None:
-            channel_dim = (
-                channel_dim + len(x.shape) if channel_dim < 0 else channel_dim
-            )
-            if isinstance(x, tuple):
-                x = x[0]
-            if isinstance(bias, tuple):
-                bias = bias[0]
-            if len(bias.shape) < len(x.shape):
-                bias = _C_ops.reshape(
-                    bias,
-                    [1 for i in range(channel_dim)]
-                    + bias.shape
-                    + [1 for i in range(len(x.shape) - channel_dim - 1)],
-                )
+            new_shape = [1] * len(x.shape)
+            new_shape[channel_dim] = -1
+            bias = bias.reshape(new_shape)
             # TODO(qili93): temporary for ascned npu performance to be removed along with npu_identity op
             if 'npu' in get_all_custom_device_type():
                 with no_grad():
@@ -182,16 +171,10 @@ def _conv_nd(
             data_format,
         )
         if bias is not None:
-            channel_dim = (
-                channel_dim + len(x.shape) if channel_dim < 0 else channel_dim
-            )
-            tmp_bias = _C_ops.reshape(
-                bias,
-                [1 for i in range(channel_dim)]
-                + bias.shape
-                + [1 for i in range(len(x.shape) - channel_dim - 1)],
-            )
-            return _C_ops.add(pre_bias, tmp_bias)
+            new_shape = [1] * len(x.shape)
+            new_shape[channel_dim] = -1
+            bias = bias.reshape(new_shape)
+            return _C_ops.add(pre_bias, bias)
         else:
             return pre_bias
 
@@ -207,14 +190,10 @@ def _conv_nd(
             data_format,
         )
         if bias is not None:
-            channel_dim = (
-                channel_dim + len(x.shape) if channel_dim < 0 else channel_dim
-            )
-            tmp_bias = _C_ops.reshape(
-                bias,
-                bias.shape + [1 for i in range(len(x.shape) - channel_dim - 1)],
-            )
-            return _C_ops.add(pre_bias, tmp_bias)
+            new_shape = [1] * len(x.shape)
+            new_shape[channel_dim] = -1
+            bias = bias.reshape(new_shape)
+            return _C_ops.add(pre_bias, bias)
         else:
             return pre_bias
 

From 9218e742f8d5f74c5392c8c129fe024e2e1b6719 Mon Sep 17 00:00:00 2001
From: HongyuJia <jiahongyu@baidu.com>
Date: Wed, 30 Nov 2022 20:15:15 +0800
Subject: [PATCH 071/154] clean elem_arithmetic part5 unittest (#48466)

---
 .../tests/unittests/test_dist_fleet_ps.py      |  6 +++---
 .../tests/unittests/test_dist_fleet_ps11.py    |  6 +++---
 .../tests/unittests/test_dist_fleet_ps12.py    |  6 +++---
 .../tests/unittests/test_dist_fleet_ps13.py    |  6 +++---
 .../tests/unittests/test_dist_fleet_ps2.py     |  6 +++---
 .../tests/unittests/test_dist_fleet_ps3.py     |  6 +++---
 .../tests/unittests/test_dist_fleet_ps4.py     |  6 +++---
 .../tests/unittests/test_dist_fleet_ps5.py     |  6 +++---
 .../tests/unittests/test_dist_fleet_ps6.py     |  6 +++---
 .../tests/unittests/test_dist_transpiler.py    |  8 ++++----
 .../test_eager_deletion_padding_rnn.py         |  6 +++---
 .../test_eager_deletion_recurrent_op.py        | 18 ++++++++----------
 .../unittests/test_elementwise_nn_grad.py      |  6 +++---
 .../test_image_classification_layer.py         |  3 ++-
 .../tests/unittests/test_imperative_basic.py   | 10 +++++-----
 .../tests/unittests/test_imperative_deepcf.py  |  2 +-
 ...t_imperative_lod_tensor_to_selected_rows.py |  2 +-
 .../test_imperative_ocr_attention_model.py     |  6 ++----
 .../test_imperative_parallel_coalesce_split.py |  2 +-
 19 files changed, 57 insertions(+), 60 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
index f97e1d6c3cff2d..a1e568f6a05321 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
@@ -39,7 +39,7 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
-            acc = fluid.layers.elementwise_div(
+            acc = paddle.divide(
                 cond_3,
                 fluid.layers.fill_constant(
                     shape=[1], value=batch_size * 1.0, dtype='float64'
@@ -49,13 +49,13 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             return acc
 
         def get_loss(cos_q_pt, cos_q_nt):
-            loss_op1 = fluid.layers.elementwise_sub(
+            loss_op1 = paddle.subtract(
                 fluid.layers.fill_constant_batch_size_like(
                     input=cos_q_pt, shape=[-1, 1], value=margin, dtype='float32'
                 ),
                 cos_q_pt,
             )
-            loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt)
+            loss_op2 = paddle.add(loss_op1, cos_q_nt)
             loss_op3 = paddle.maximum(
                 fluid.layers.fill_constant_batch_size_like(
                     input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
index 5e94a1949c551f..e7ce16057e7cfa 100755
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
@@ -39,7 +39,7 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
-            acc = fluid.layers.elementwise_div(
+            acc = paddle.divide(
                 cond_3,
                 fluid.layers.fill_constant(
                     shape=[1], value=batch_size * 1.0, dtype='float64'
@@ -49,13 +49,13 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             return acc
 
         def get_loss(cos_q_pt, cos_q_nt):
-            loss_op1 = fluid.layers.elementwise_sub(
+            loss_op1 = paddle.subtract(
                 fluid.layers.fill_constant_batch_size_like(
                     input=cos_q_pt, shape=[-1, 1], value=margin, dtype='float32'
                 ),
                 cos_q_pt,
             )
-            loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt)
+            loss_op2 = paddle.add(loss_op1, cos_q_nt)
             loss_op3 = paddle.maximum(
                 fluid.layers.fill_constant_batch_size_like(
                     input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
index 57937777386202..f97046db9477b4 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
@@ -42,7 +42,7 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
-            acc = fluid.layers.elementwise_div(
+            acc = paddle.divide(
                 cond_3,
                 fluid.layers.fill_constant(
                     shape=[1], value=batch_size * 1.0, dtype='float64'
@@ -52,13 +52,13 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             return acc
 
         def get_loss(cos_q_pt, cos_q_nt):
-            loss_op1 = fluid.layers.elementwise_sub(
+            loss_op1 = paddle.subtract(
                 fluid.layers.fill_constant_batch_size_like(
                     input=cos_q_pt, shape=[-1, 1], value=margin, dtype='float32'
                 ),
                 cos_q_pt,
             )
-            loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt)
+            loss_op2 = paddle.add(loss_op1, cos_q_nt)
             loss_op3 = paddle.maximum(
                 fluid.layers.fill_constant_batch_size_like(
                     input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
index a1eb88f8ff86b3..ead8e6437a080d 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
@@ -43,7 +43,7 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
-            acc = fluid.layers.elementwise_div(
+            acc = paddle.divide(
                 cond_3,
                 fluid.layers.fill_constant(
                     shape=[1], value=batch_size * 1.0, dtype='float64'
@@ -53,13 +53,13 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             return acc
 
         def get_loss(cos_q_pt, cos_q_nt):
-            loss_op1 = fluid.layers.elementwise_sub(
+            loss_op1 = paddle.subtract(
                 fluid.layers.fill_constant_batch_size_like(
                     input=cos_q_pt, shape=[-1, 1], value=margin, dtype='float32'
                 ),
                 cos_q_pt,
             )
-            loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt)
+            loss_op2 = paddle.add(loss_op1, cos_q_nt)
             loss_op3 = paddle.maximum(
                 fluid.layers.fill_constant_batch_size_like(
                     input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
index 609611bd24e609..1ea94b85bc1dc6 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
@@ -42,7 +42,7 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
-            acc = fluid.layers.elementwise_div(
+            acc = paddle.divide(
                 cond_3,
                 fluid.layers.fill_constant(
                     shape=[1], value=batch_size * 1.0, dtype='float64'
@@ -52,13 +52,13 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             return acc
 
         def get_loss(cos_q_pt, cos_q_nt):
-            loss_op1 = fluid.layers.elementwise_sub(
+            loss_op1 = paddle.subtract(
                 fluid.layers.fill_constant_batch_size_like(
                     input=cos_q_pt, shape=[-1, 1], value=margin, dtype='float32'
                 ),
                 cos_q_pt,
             )
-            loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt)
+            loss_op2 = paddle.add(loss_op1, cos_q_nt)
             loss_op3 = paddle.maximum(
                 fluid.layers.fill_constant_batch_size_like(
                     input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
index a2192b16f3e28f..0f2c840019412b 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
@@ -39,7 +39,7 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
-            acc = fluid.layers.elementwise_div(
+            acc = paddle.divide(
                 cond_3,
                 fluid.layers.fill_constant(
                     shape=[1], value=batch_size * 1.0, dtype='float64'
@@ -49,13 +49,13 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             return acc
 
         def get_loss(cos_q_pt, cos_q_nt):
-            loss_op1 = fluid.layers.elementwise_sub(
+            loss_op1 = paddle.subtract(
                 fluid.layers.fill_constant_batch_size_like(
                     input=cos_q_pt, shape=[-1, 1], value=margin, dtype='float32'
                 ),
                 cos_q_pt,
             )
-            loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt)
+            loss_op2 = paddle.add(loss_op1, cos_q_nt)
             loss_op3 = paddle.maximum(
                 fluid.layers.fill_constant_batch_size_like(
                     input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
index d4197129df1f9a..a7d4f06c029034 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
@@ -39,7 +39,7 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
-            acc = fluid.layers.elementwise_div(
+            acc = paddle.divide(
                 cond_3,
                 fluid.layers.fill_constant(
                     shape=[1], value=batch_size * 1.0, dtype='float64'
@@ -49,13 +49,13 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             return acc
 
         def get_loss(cos_q_pt, cos_q_nt):
-            loss_op1 = fluid.layers.elementwise_sub(
+            loss_op1 = paddle.subtract(
                 fluid.layers.fill_constant_batch_size_like(
                     input=cos_q_pt, shape=[-1, 1], value=margin, dtype='float32'
                 ),
                 cos_q_pt,
             )
-            loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt)
+            loss_op2 = paddle.add(loss_op1, cos_q_nt)
             loss_op3 = paddle.maximum(
                 fluid.layers.fill_constant_batch_size_like(
                     input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
index deb6d97cd46594..25bb1b0e37f471 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
@@ -39,7 +39,7 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
-            acc = fluid.layers.elementwise_div(
+            acc = paddle.divide(
                 cond_3,
                 fluid.layers.fill_constant(
                     shape=[1], value=batch_size * 1.0, dtype='float64'
@@ -49,13 +49,13 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             return acc
 
         def get_loss(cos_q_pt, cos_q_nt):
-            loss_op1 = fluid.layers.elementwise_sub(
+            loss_op1 = paddle.subtract(
                 fluid.layers.fill_constant_batch_size_like(
                     input=cos_q_pt, shape=[-1, 1], value=margin, dtype='float32'
                 ),
                 cos_q_pt,
             )
-            loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt)
+            loss_op2 = paddle.add(loss_op1, cos_q_nt)
             loss_op3 = paddle.maximum(
                 fluid.layers.fill_constant_batch_size_like(
                     input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
index 581ea0e0bc039a..a8c26ed9b70ff6 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
@@ -39,7 +39,7 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
-            acc = fluid.layers.elementwise_div(
+            acc = paddle.divide(
                 cond_3,
                 fluid.layers.fill_constant(
                     shape=[1], value=batch_size * 1.0, dtype='float64'
@@ -49,13 +49,13 @@ def get_acc(cos_q_nt, cos_q_pt, batch_size):
             return acc
 
         def get_loss(cos_q_pt, cos_q_nt):
-            loss_op1 = fluid.layers.elementwise_sub(
+            loss_op1 = paddle.subtract(
                 fluid.layers.fill_constant_batch_size_like(
                     input=cos_q_pt, shape=[-1, 1], value=margin, dtype='float32'
                 ),
                 cos_q_pt,
             )
-            loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt)
+            loss_op2 = paddle.add(loss_op1, cos_q_nt)
             loss_op3 = paddle.maximum(
                 fluid.layers.fill_constant_batch_size_like(
                     input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'
diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
index 04ed91fb0565c7..1b5af488460b05 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
@@ -403,9 +403,9 @@ def net_conf(self):
 
         neg_emb_b_vec = paddle.reshape(neg_emb_b, shape=[-1, neg_num])
 
-        true_logits = fluid.layers.elementwise_add(
+        true_logits = paddle.add(
             paddle.sum(
-                fluid.layers.elementwise_mul(input_emb, true_emb_w),
+                paddle.multiply(input_emb, true_emb_w),
                 dim=1,
                 keep_dim=True,
             ),
@@ -418,7 +418,7 @@ def net_conf(self):
             input_emb_re, neg_emb_w_re, transpose_y=True
         )
         neg_matmul_re = paddle.reshape(neg_matmul, shape=[-1, neg_num])
-        neg_logits = fluid.layers.elementwise_add(neg_matmul_re, neg_emb_b_vec)
+        neg_logits = paddle.add(neg_matmul_re, neg_emb_b_vec)
         # nce loss
         label_ones = fluid.layers.fill_constant_batch_size_like(
             true_logits, shape=[-1, 1], value=1.0, dtype='float32'
@@ -433,7 +433,7 @@ def net_conf(self):
         neg_xent = paddle.nn.functional.binary_cross_entropy_with_logits(
             neg_logits, label_zeros
         )
-        cost = fluid.layers.elementwise_add(
+        cost = paddle.add(
             paddle.sum(true_xent, axis=1),
             paddle.sum(neg_xent, axis=1),
         )
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
index 66bf53c8a902d5..4e3e204c2d286d 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
@@ -169,7 +169,7 @@ def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None):
                 nn = layers.concat([input, pre_hidden], 1)
                 gate_input = layers.matmul(x=nn, y=weight_1)
 
-                gate_input = layers.elementwise_add(gate_input, bias)
+                gate_input = paddle.add(gate_input, bias)
                 i = paddle.slice(
                     gate_input, axes=[1], starts=[0], ends=[hidden_size]
                 )
@@ -293,7 +293,7 @@ def encoder_static(
                 nn = layers.concat([input, pre_hidden], 1)
                 gate_input = layers.matmul(x=nn, y=weight_1)
 
-                gate_input = layers.elementwise_add(gate_input, bias)
+                gate_input = paddle.add(gate_input, bias)
                 i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1)
 
                 c = pre_cell * paddle.nn.functional.sigmoid(
@@ -460,7 +460,7 @@ def encoder_static(
     )
 
     projection = layers.matmul(rnn_out, softmax_weight)
-    projection = layers.elementwise_add(projection, softmax_bias)
+    projection = paddle.add(projection, softmax_bias)
     projection = paddle.reshape(projection, shape=[-1, vocab_size])
 
     loss = layers.softmax_with_cross_entropy(
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py
index e044071e43d88e..b7952c3736b26e 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py
@@ -157,7 +157,7 @@ def create_rnn_op(self):
             x_t = rnn.step_input(x)
 
             h = paddle.scale(
-                x=layers.elementwise_add(x=h_pre, y=x_t),
+                x=paddle.add(x=h_pre, y=x_t),
                 scale=self.py_rnn.scale,
             )
 
@@ -328,9 +328,7 @@ def create_rnn_op(self):
                 bias_attr=False,
             )
 
-            h = paddle.nn.functional.sigmoid(
-                x=layers.elementwise_add(x=temp_l, y=temp_r)
-            )
+            h = paddle.nn.functional.sigmoid(x=paddle.add(x=temp_l, y=temp_r))
 
             rnn.update_memory(h_pre, h)
             rnn.output(h)
@@ -504,7 +502,7 @@ def create_rnn_op(self):
         with rnn.step():
             mem_pre = rnn.memory(shape=[-1, self.input_dim], batch_ref=x)
             x_t = rnn.step_input(x)
-            mem = layers.elementwise_add(x=mem_pre, y=x_t)
+            mem = paddle.add(x=mem_pre, y=x_t)
             rnn.update_memory(mem_pre, mem)
             rnn.output(mem)
 
@@ -584,7 +582,7 @@ def create_rnn_op(self):
         with rnn_0.step():
             x_t = rnn_0.step_input(x)
             mem_pre = rnn_0.memory(shape=[-1, self.input_dim], batch_ref=x)
-            mem = layers.elementwise_add(x=mem_pre, y=x_t)
+            mem = paddle.add(x=mem_pre, y=x_t)
             rnn_0.update_memory(mem_pre, mem)
             rnn_0.output(mem)
 
@@ -594,8 +592,8 @@ def create_rnn_op(self):
             x_t = rnn_1.step_input(x)
             last_rnn_output = rnn_0()
             last_rnn_sum = paddle.sum(last_rnn_output)
-            mem = layers.elementwise_add(x=x_t, y=last_rnn_sum)
-            y = layers.elementwise_add(x=mem_pre, y=mem)
+            mem = paddle.add(x=x_t, y=last_rnn_sum)
+            y = paddle.add(x=mem_pre, y=mem)
             rnn_1.update_memory(mem_pre, mem)
             rnn_1.output(y)
         return rnn_1()
@@ -693,7 +691,7 @@ def setUp(self):
                 x_t = forward_only_rnn.step_input(x)
 
                 h = paddle.scale(
-                    x=layers.elementwise_add(x=h_pre, y=x_t),
+                    x=paddle.add(x=h_pre, y=x_t),
                     scale=self.py_rnn.scale,
                 )
 
@@ -709,7 +707,7 @@ def setUp(self):
                 x_t = rnn.step_input(x)
 
                 h = paddle.scale(
-                    x=layers.elementwise_add(x=h_pre, y=x_t),
+                    x=paddle.add(x=h_pre, y=x_t),
                     scale=self.py_rnn.scale,
                 )
 
diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py b/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
index 115d5e947f6a73..63bab370b1f391 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
@@ -94,7 +94,7 @@ def func(self, place):
         y = layers.data('y', shape, False, dtype)
         x.persistable = True
         y.persistable = True
-        out = layers.elementwise_add(x, y)
+        out = paddle.add(x, y)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
         y_arr = np.random.uniform(-1, 1, shape).astype(dtype)
 
@@ -155,7 +155,7 @@ def func(self, place):
         y = layers.data('y', shape, False, dtype)
         x.persistable = True
         y.persistable = True
-        out = layers.elementwise_sub(x, y)
+        out = paddle.subtract(x, y)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
         y_arr = np.random.uniform(-1, 1, shape).astype(dtype)
 
@@ -291,7 +291,7 @@ def func(self, place):
         y = layers.data('y', shape, False, dtype)
         x.persistable = True
         y.persistable = True
-        out = layers.elementwise_add(x, y)
+        out = paddle.add(x, y)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
         y_arr = np.random.uniform(-1, 1, shape).astype(dtype)
 
diff --git a/python/paddle/fluid/tests/unittests/test_image_classification_layer.py b/python/paddle/fluid/tests/unittests/test_image_classification_layer.py
index fbcf634634b34f..7a61eaaa04937b 100644
--- a/python/paddle/fluid/tests/unittests/test_image_classification_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_image_classification_layer.py
@@ -14,6 +14,7 @@
 
 import unittest
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.nets as nets
 from paddle.fluid.framework import Program
@@ -81,7 +82,7 @@ def test_elementwise_add_with_act(self):
             image2 = fluid.layers.data(
                 name='pixel2', shape=[3, 48, 48], dtype='float32'
             )
-            fluid.layers.elementwise_add(x=image1, y=image2, act='relu')
+            paddle.nn.functional.relu(paddle.add(x=image1, y=image2))
         print(main_program)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
index d6d40dfc61c73b..f92a248168793d 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
@@ -33,7 +33,7 @@ def __init__(self):
     def forward(self, inputs):
         x = fluid.layers.relu(inputs)
         self._x_for_debug = x
-        x = fluid.layers.elementwise_mul(x, x)
+        x = paddle.multiply(x, x)
         x = paddle.sum(x)
         return [x]
 
@@ -722,9 +722,9 @@ def func_dygraph_vs_static(self):
             inp1 = paddle.to_tensor(np_inp1)
             inp2 = paddle.to_tensor(np_inp2)
             if np.sum(np_inp1) < np.sum(np_inp2):
-                x = fluid.layers.elementwise_add(inp1, inp2)
+                x = paddle.add(inp1, inp2)
             else:
-                x = fluid.layers.elementwise_sub(inp1, inp2)
+                x = paddle.subtract(inp1, inp2)
             dygraph_result = x.numpy()
 
         # static graph
@@ -750,13 +750,13 @@ def func_dygraph_vs_static(self):
             with ie.true_block():
                 d1 = ie.input(inp_data1)
                 d2 = ie.input(inp_data2)
-                d3 = fluid.layers.elementwise_add(d1, d2)
+                d3 = paddle.add(d1, d2)
                 ie.output(d3)
 
             with ie.false_block():
                 d1 = ie.input(inp_data1)
                 d2 = ie.input(inp_data2)
-                d3 = fluid.layers.elementwise_sub(d1, d2)
+                d3 = paddle.subtract(d1, d2)
                 ie.output(d3)
             out = ie()
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
index 15f9365772b3eb..532dddd87c5ce3 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
@@ -76,7 +76,7 @@ def forward(self, users, items):
         for ul, il in zip(self._user_layers, self._item_layers):
             users = ul(users)
             items = il(items)
-        return fluid.layers.elementwise_mul(users, items)
+        return paddle.multiply(users, items)
 
 
 class MLP(fluid.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
index f15205eb3f64dc..b8efe8fbd1c9f2 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
@@ -67,7 +67,7 @@ def forward(self, input, label):
         projection = fluid.layers.matmul(
             x_emb, paddle.transpose(self.embedding.weight, perm=[1, 0])
         )
-        projection = fluid.layers.elementwise_add(projection, self.softmax_bias)
+        projection = paddle.add(projection, self.softmax_bias)
         projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
         loss = fluid.layers.softmax_with_cross_entropy(
             logits=projection, label=label, soft_label=False
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
index d89d7d6b258b24..81eb24f98f8247 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
@@ -306,9 +306,7 @@ def forward(self, encoder_vec, encoder_proj, decoder_state):
             decoder_state_proj_reshape,
             [-1, encoder_proj.shape[1], -1],
         )
-        concated = fluid.layers.elementwise_add(
-            encoder_proj, decoder_state_expand
-        )
+        concated = paddle.add(encoder_proj, decoder_state_expand)
         concated = paddle.tanh(x=concated)
         attention_weight = self.fc_2(concated)
 
@@ -362,7 +360,7 @@ def forward(
             )
             fc_1 = self.fc_1_layer(context)
             fc_2 = self.fc_2_layer(current_word)
-            decoder_inputs = fluid.layers.elementwise_add(x=fc_1, y=fc_2)
+            decoder_inputs = paddle.add(x=fc_1, y=fc_2)
 
             h, _, _ = self.gru_unit(decoder_inputs, hidden_mem)
             hidden_mem = h
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_parallel_coalesce_split.py b/python/paddle/fluid/tests/unittests/test_imperative_parallel_coalesce_split.py
index 721feaf188ea7c..2482359d745c24 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_parallel_coalesce_split.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_parallel_coalesce_split.py
@@ -35,7 +35,7 @@ def __init__(self, name_scope):
 
     def forward(self, inputs):
         x = fluid.layers.relu(inputs)
-        x = fluid.layers.elementwise_mul(x, x)
+        x = paddle.multiply(x, x)
         x = paddle.sum(x)
         return [x]
 

From 08c5f4c18d2cd4e6485eadc7b9791a70f8e16932 Mon Sep 17 00:00:00 2001
From: Jianghai <72591262+CjhHa1@users.noreply.github.com>
Date: Thu, 1 Dec 2022 09:38:53 +0800
Subject: [PATCH 072/154] [Auto Parallel]Add Embedding flops (#47978)

* c_embedding

* add annotations

* add annotations

* revision

* revise attrs
---
 python/paddle/fluid/tests/unittests/test_profiler.py | 4 ++++
 python/paddle/utils/flops.py                         | 9 +++++++++
 2 files changed, 13 insertions(+)

diff --git a/python/paddle/fluid/tests/unittests/test_profiler.py b/python/paddle/fluid/tests/unittests/test_profiler.py
index e248a4f7f5b1dd..5fbedfaaa7ff0c 100644
--- a/python/paddle/fluid/tests/unittests/test_profiler.py
+++ b/python/paddle/fluid/tests/unittests/test_profiler.py
@@ -297,6 +297,10 @@ def test_flops(self):
         self.assertTrue(
             flops('softmax', {'X': [[12, 12, 12]]}, {}) == 3 * 12 * 12 * 12
         )
+        self.assertTrue(
+            flops('c_embedding', {'Ids': [[12, 12]], 'W': [[12, 12, 3]]}, {})
+            == 0
+        )
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/utils/flops.py b/python/paddle/utils/flops.py
index cfcdf940569fae..114ca6d9ab6c77 100644
--- a/python/paddle/utils/flops.py
+++ b/python/paddle/utils/flops.py
@@ -60,6 +60,15 @@ def register(func):
     return register
 
 
+@register_flops("c_embedding")
+def _c_embedding_flops(input_shapes, attrs):
+    """FLOPs computation for c_embedding op.
+    For c_embedding(input):
+        equation: flops = 0
+    """
+    return 0
+
+
 @register_flops("dropout")
 def _dropout_flops(input_shapes, attrs):
     """FLOPs computation for dropout op.

From bc01d56e6a56ca4b24ec78e57f7def72611bc7a7 Mon Sep 17 00:00:00 2001
From: Nyakku Shigure <sigure.qaq@gmail.com>
Date: Thu, 1 Dec 2022 09:59:11 +0800
Subject: [PATCH 073/154] [CodeStyle][isort] introduce isort (part6) (#48522)

---
 pyproject.toml                                |  9 +--------
 ...uto_parallel_data_parallel_optimization.py | 17 +++++++++--------
 .../passes/auto_parallel_grad_clip.py         | 19 ++++++++++---------
 python/paddle/distribution/normal.py          |  5 +----
 python/paddle/distribution/uniform.py         |  7 +------
 .../dygraph_to_static/bert_dygraph_model.py   |  2 +-
 .../dygraph_to_static/simnet_dygraph_model.py |  3 ---
 .../dygraph_to_static/test_convert_call.py    | 10 ++++------
 .../unittests/dygraph_to_static/test_lac.py   |  4 +---
 .../unittests/dygraph_to_static/test_len.py   |  1 +
 .../dygraph_to_static/test_logical.py         |  5 +----
 .../unittests/dygraph_to_static/test_loop.py  |  9 +++++----
 .../unittests/dygraph_to_static/test_mnist.py |  2 +-
 .../dygraph_to_static/test_mobile_net.py      | 11 ++---------
 .../test_reinforcement_learning.py            |  2 +-
 .../dygraph_to_static/test_resnet.py          |  6 ++----
 .../test_save_inference_model.py              |  7 +++----
 .../dygraph_to_static/test_save_load.py       |  8 ++++----
 .../dygraph_to_static/test_se_resnet.py       |  4 ++--
 .../dygraph_to_static/test_sentiment.py       |  3 +--
 .../dygraph_to_static/test_seq2seq.py         |  7 +++----
 .../unittests/dygraph_to_static/test_tsm.py   |  7 +++----
 .../dygraph_to_static/test_word2vec.py        |  7 +++----
 .../transformer_dygraph_model.py              |  9 ++-------
 .../test_add_position_encoding_op.py          |  3 ++-
 .../tests/unittests/test_affine_channel_op.py |  1 +
 .../tests/unittests/test_data_norm_op.py      |  8 +++++---
 .../fluid/tests/unittests/test_detach.py      |  2 +-
 .../unittests/test_dygraph_mnist_fp16.py      |  2 +-
 .../unittests/test_dygraph_multi_forward.py   |  2 +-
 ..._executor_return_tensor_not_overwriting.py |  3 ++-
 .../unittests/test_fill_zeros_like2_op.py     |  1 +
 .../test_fuse_elewise_add_act_pass.py         |  3 ++-
 .../tests/unittests/test_group_norm_op.py     |  7 ++++---
 .../tests/unittests/test_imperative_basic.py  |  2 +-
 .../test_imperative_container_sequential.py   |  2 +-
 .../tests/unittests/test_imperative_deepcf.py |  2 +-
 .../unittests/test_imperative_framework.py    |  1 -
 .../tests/unittests/test_imperative_gan.py    |  2 +-
 .../test_imperative_layer_trainable.py        |  2 +-
 .../test_imperative_load_static_param.py      |  4 ----
 .../tests/unittests/test_imperative_mnist.py  |  5 ++---
 .../test_imperative_ocr_attention_model.py    |  9 ++-------
 .../unittests/test_imperative_optimizer.py    |  7 +++----
 .../unittests/test_imperative_optimizer_v2.py |  7 +++----
 .../test_imperative_partitial_backward.py     |  2 +-
 .../test_imperative_reinforcement.py          |  2 +-
 .../tests/unittests/test_imperative_resnet.py |  3 +--
 .../unittests/test_imperative_se_resnext.py   |  1 -
 ...imperative_trace_non_persistable_inputs.py |  2 +-
 ..._imperative_transformer_sorted_gradient.py | 11 ++++-------
 .../fluid/tests/unittests/test_isfinite_op.py |  1 +
 .../tests/unittests/test_jit_save_load.py     |  2 +-
 .../unittests/test_learning_rate_scheduler.py |  8 +++++---
 .../test_multiprocess_dataloader_dynamic.py   |  5 ++---
 ...ess_dataloader_iterable_dataset_dynamic.py |  6 ++----
 .../fluid/tests/unittests/test_ones_op.py     |  1 -
 .../tests/unittests/test_paddle_save_load.py  | 13 +++++++------
 .../fluid/tests/unittests/test_pool3d_op.py   |  3 ++-
 .../fluid/tests/unittests/test_roi_pool_op.py |  9 +++++----
 ...st_sigmoid_cross_entropy_with_logits_op.py | 11 ++++++-----
 .../unittests/test_similarity_focus_op.py     |  1 +
 .../fluid/tests/unittests/test_size_op.py     |  5 +++--
 .../tests/unittests/test_static_save_load.py  | 14 ++++++++------
 python/paddle/fluid/tests/unittests/utils.py  |  5 +++--
 .../xpu/test_affine_channel_op_xpu.py         |  2 ++
 python/paddle/hapi/progressbar.py             |  3 ++-
 .../incubate/multiprocessing/reductions.py    |  8 ++++----
 python/paddle/inference/wrapper.py            | 18 ++++++++++++------
 python/paddle/static/nn/common.py             | 14 ++++++--------
 python/paddle/vision/datasets/folder.py       |  1 +
 71 files changed, 178 insertions(+), 212 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2c53986c539caf..2530f0936b03da 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,13 +19,6 @@ extend_skip_glob = [
     "python/paddle/fluid/tests/unittests/mlu/**",
 
     # These files will be fixed in the future
-    "cmake/**",
-    "paddle/**",
-    "r/**",
-    "tools/**",
-    "python/paddle/[!f]**",
-    "python/paddle/fluid/tests/unittests/[t-z]**",
     "python/paddle/fluid/tests/unittests/dygraph_to_static/test_error.py",
-    "python/paddle/fluid/tests/unittests/dygraph_to_static/**",
-    "python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py",
+    "python/paddle/jit/**",
 ]
diff --git a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
index 47759484a66ee2..66f80ee9950491 100644
--- a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
+++ b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
@@ -15,24 +15,25 @@
 from collections import OrderedDict
 
 import paddle
-from paddle.fluid import unique_name
-from paddle.fluid.framework import default_main_program
-from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
-from .pass_base import PassBase, PassType, register_pass
 from paddle.distributed.auto_parallel.operators.common import (
-    is_data_parallel_scale_op,
     is_data_parallel_reduce_op,
+    is_data_parallel_scale_op,
 )
 from paddle.distributed.auto_parallel.utils import (
     find_higher_order_backward_op,
+    get_var_numel,
+    insert_dependencies_for_two_vars,
+    is_forward_op,
     is_loss_grad_op,
     is_optimize_op,
-    is_forward_op,
     ring_id_to_process_group,
-    get_var_numel,
     use_standalone_executor,
-    insert_dependencies_for_two_vars,
 )
+from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
+from paddle.fluid import unique_name
+from paddle.fluid.framework import default_main_program
+
+from .pass_base import PassBase, PassType, register_pass
 
 # add new optimizers supporting rescale_grad here
 __rescale_grad_supported_opts__ = [
diff --git a/python/paddle/distributed/passes/auto_parallel_grad_clip.py b/python/paddle/distributed/passes/auto_parallel_grad_clip.py
index a475f8e0ac317e..af5259680e4a59 100644
--- a/python/paddle/distributed/passes/auto_parallel_grad_clip.py
+++ b/python/paddle/distributed/passes/auto_parallel_grad_clip.py
@@ -12,27 +12,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
 from functools import reduce
 
+import numpy as np
+
 import paddle
 
-from .pass_base import PassBase, register_pass
-from ..auto_parallel.reshard import Resharder
+from ..auto_parallel.dist_attribute import (
+    OperatorDistributedAttribute,
+    TensorDistributedAttribute,
+)
 from ..auto_parallel.process_group import get_world_process_group
+from ..auto_parallel.reshard import Resharder
 from ..auto_parallel.utils import (
-    is_gradient_clip_op,
-    is_optimize_op,
     OP_ROLE_KEY,
     OpRole,
     _get_comm_group,
     insert_dependencies_for_two_vars,
+    is_gradient_clip_op,
+    is_optimize_op,
     use_standalone_executor,
 )
-from ..auto_parallel.dist_attribute import (
-    TensorDistributedAttribute,
-    OperatorDistributedAttribute,
-)
+from .pass_base import PassBase, register_pass
 
 
 def _get_params_grads(block):
diff --git a/python/paddle/distribution/normal.py b/python/paddle/distribution/normal.py
index b3877af277fbb9..3eb3fd2d59009c 100644
--- a/python/paddle/distribution/normal.py
+++ b/python/paddle/distribution/normal.py
@@ -21,10 +21,7 @@
 from paddle.distribution import distribution
 from paddle.fluid.data_feeder import check_type, convert_dtype
 from paddle.fluid.framework import _non_static_mode
-from paddle.fluid.layers import (
-    nn,
-    tensor,
-)
+from paddle.fluid.layers import nn, tensor
 
 
 class Normal(distribution.Distribution):
diff --git a/python/paddle/distribution/uniform.py b/python/paddle/distribution/uniform.py
index b9566d3c8dbc27..1b1ef5906d9643 100644
--- a/python/paddle/distribution/uniform.py
+++ b/python/paddle/distribution/uniform.py
@@ -23,12 +23,7 @@
     _non_static_mode,
     in_dygraph_mode,
 )
-from paddle.fluid.layers import (
-    nn,
-    tensor,
-)
-
-import paddle
+from paddle.fluid.layers import nn, tensor
 from paddle.tensor import random
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
index 8eb757d87ac4f9..3e372a6d9408f6 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
@@ -17,8 +17,8 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.dygraph import Embedding, Layer
-from paddle.nn import Linear
 from paddle.jit.api import declarative
+from paddle.nn import Linear
 
 
 class PositionwiseFeedForwardLayer(Layer):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
index f236faccdc2df8..075900b939fdf1 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
@@ -17,9 +17,6 @@
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.param_attr as attr
-
-from functools import reduce
-
 from paddle.fluid.dygraph import Embedding, Layer
 from paddle.jit.api import declarative
 from paddle.static import Variable
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
index 302045ed4038f2..743fff189cb9f8 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
@@ -12,19 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
 import unittest
 
-import logging
 import numpy as np
+from test_program_translator import get_source_code
 
 import paddle
 import paddle.fluid as fluid
-from paddle.jit import ProgramTranslator
-from paddle.jit.dy2static.convert_call_func import (
-    CONVERSION_OPTIONS,
-)
-from test_program_translator import get_source_code
 import paddle.jit.dy2static as _jst
+from paddle.jit import ProgramTranslator
+from paddle.jit.dy2static.convert_call_func import CONVERSION_OPTIONS
 
 program_translator = ProgramTranslator()
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py
index dd4e7e6746d1ce..54b97c9280a404 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py
@@ -24,10 +24,8 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph import to_variable
-from paddle.fluid.dygraph import Embedding, GRUUnit
-
 from paddle import _legacy_C_ops
+from paddle.fluid.dygraph import Embedding, GRUUnit, to_variable
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
 from paddle.fluid.framework import _non_static_mode
 from paddle.jit import ProgramTranslator
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py
index 6ab1fbc9f20657..3acd56b6f6fc73 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py
@@ -15,6 +15,7 @@
 import unittest
 
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 from paddle.jit.api import declarative
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logical.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logical.py
index 97454a8cefb5a6..fd335c41ba24ef 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logical.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logical.py
@@ -22,10 +22,7 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.jit import ProgramTranslator
-from paddle.jit.dy2static.logical_transformer import (
-    cmpop_node_to_str,
-)
-from paddle.jit import ProgramTranslator
+from paddle.jit.dy2static.logical_transformer import cmpop_node_to_str
 from paddle.utils import gast
 
 program_translator = ProgramTranslator()
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_loop.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_loop.py
index c6c8a392274f2f..06b17978d6687f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_loop.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_loop.py
@@ -12,15 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.utils import gast
 import inspect
+import unittest
+
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import unittest
-
-from paddle.jit.dy2static.loop_transformer import NameVisitor
 from paddle.jit.api import declarative
+from paddle.jit.dy2static.loop_transformer import NameVisitor
+from paddle.utils import gast
 
 SEED = 2020
 np.random.seed(SEED)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
index fe7e463e1db2f3..43947a505e6b75 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
@@ -23,11 +23,11 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.dygraph import to_variable
-from paddle.nn import Linear
 from paddle.fluid.dygraph.base import switch_to_static_graph
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
 from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.optimizer import AdamOptimizer
+from paddle.nn import Linear
 
 SEED = 2020
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
index b40eb92753dacc..2c458b006daaf6 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
@@ -26,16 +26,9 @@
 from paddle.fluid.dygraph.nn import BatchNorm, Linear
 from paddle.fluid.initializer import MSRA
 from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.dygraph.nn import BatchNorm
-from paddle.nn import Linear
-from paddle.jit.api import declarative
 from paddle.jit import ProgramTranslator
-
-from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
-
-import unittest
-
-from predictor_utils import PredictorTools
+from paddle.jit.api import declarative
+from paddle.nn import Linear
 
 # Note: Set True to eliminate randomness.
 #     1. For one operation, cuDNN has several algorithms,
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
index 15e6827766a311..b98d9c304dc9a3 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
@@ -21,7 +21,7 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph import to_variable, Layer
+from paddle.fluid.dygraph import Layer, to_variable
 from paddle.jit import ProgramTranslator
 from paddle.jit.api import declarative
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
index fec0109168b50d..00d6f24da345db 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
@@ -17,16 +17,14 @@
 import tempfile
 import time
 import unittest
-import paddle
+
 import numpy as np
 from predictor_utils import PredictorTools
 
 import paddle
 import paddle.fluid as fluid
-
-from paddle.fluid.dygraph.nn import BatchNorm
-from paddle.jit import ProgramTranslator
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
+from paddle.fluid.dygraph.nn import BatchNorm
 from paddle.jit import ProgramTranslator
 
 SEED = 2020
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_inference_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_inference_model.py
index c22c78fefe3d2f..461e9791d23ad3 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_inference_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_inference_model.py
@@ -15,16 +15,15 @@
 import os
 import tempfile
 import unittest
+
 import numpy as np
 
 import paddle
 import paddle.fluid as fluid
+from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
 from paddle.jit import ProgramTranslator
 from paddle.jit.api import declarative
-from paddle.jit.dy2static.partial_program import (
-    partial_program_from,
-)
-from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
+from paddle.jit.dy2static.partial_program import partial_program_from
 
 SEED = 2020
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_load.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_load.py
index b64bd35a0b0666..d9b0feeaeeca71 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_load.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_load.py
@@ -12,16 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
 import os
 import tempfile
+import unittest
 
 import numpy as np
-import paddle.fluid as fluid
+from test_fetch_feed import Linear
 
-from paddle.jit import ProgramTranslator
+import paddle.fluid as fluid
 from paddle.fluid.optimizer import AdamOptimizer
-from test_fetch_feed import Linear
+from paddle.jit import ProgramTranslator
 
 np.random.seed(2020)
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
index 4b1aad178d0208..269f50a8dd9c5f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
@@ -25,11 +25,11 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.base import to_variable
-from paddle.fluid.dygraph.nn import BatchNorm
-from paddle.nn import Linear
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
+from paddle.fluid.dygraph.nn import BatchNorm
 from paddle.jit import ProgramTranslator
 from paddle.jit.api import declarative
+from paddle.nn import Linear
 
 SEED = 2020
 np.random.seed(SEED)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py
index b02f6f418b3afa..ef9436864a83f4 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py
@@ -19,12 +19,11 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.nn import Embedding
-from paddle.nn import Linear
 from paddle.fluid.dygraph import to_variable
 from paddle.fluid.dygraph.nn import Embedding, Linear
 from paddle.jit import ProgramTranslator
 from paddle.jit.api import declarative
+from paddle.nn import Linear
 
 SEED = 2020
 program_translator = ProgramTranslator()
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py
index c7bef3d7b1ca3a..4a83495987cac7 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py
@@ -18,14 +18,13 @@
 import unittest
 
 import numpy as np
+from seq2seq_dygraph_model import AttentionModel, BaseModel
+from seq2seq_utils import Seq2SeqModelHyperParams, get_data_iter
+
 import paddle.fluid as fluid
 from paddle.fluid.clip import GradientClipByGlobalNorm
 from paddle.jit import ProgramTranslator
 
-from seq2seq_dygraph_model import BaseModel, AttentionModel
-from seq2seq_utils import Seq2SeqModelHyperParams
-from seq2seq_utils import get_data_iter
-
 place = (
     fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
 )
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
index 0919e4bced39b0..9c52786842dfd5 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
@@ -23,12 +23,11 @@
 
 import paddle
 import paddle.fluid as fluid
+from paddle.fluid.dygraph import to_variable
 from paddle.fluid.dygraph.nn import BatchNorm
-from paddle.nn import Linear
-from paddle.jit.api import declarative
 from paddle.jit import ProgramTranslator
-from paddle.fluid.dygraph import to_variable
-from tsm_config_utils import merge_configs, parse_config, print_configs
+from paddle.jit.api import declarative
+from paddle.nn import Linear
 
 random.seed(0)
 np.random.seed(0)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py
index fb7027e88be601..44dd23a4c3abe1 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py
@@ -14,13 +14,12 @@
 
 import math
 import random
-import paddle
-import numpy as np
-import paddle
-import paddle.fluid as fluid
 import unittest
 
+import numpy as np
+
 import paddle
+import paddle.fluid as fluid
 from paddle.fluid.dygraph.nn import Embedding
 from paddle.jit import ProgramTranslator
 from paddle.jit.api import declarative
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
index fb00473f3d4521..209127104bd4fd 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
@@ -18,15 +18,10 @@
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
 import paddle.nn.functional as F
-from paddle.fluid.dygraph import (
-    Embedding,
-    Layer,
-    LayerNorm,
-    to_variable,
-)
-from paddle.nn import Linear
+from paddle.fluid.dygraph import Embedding, Layer, LayerNorm, to_variable
 from paddle.fluid.layers.utils import map_structure
 from paddle.jit.api import dygraph_to_static_func
+from paddle.nn import Linear
 
 
 def position_encoding_init(n_position, d_pos_vec):
diff --git a/python/paddle/fluid/tests/unittests/test_add_position_encoding_op.py b/python/paddle/fluid/tests/unittests/test_add_position_encoding_op.py
index c908bfb5a4fc33..89c52dc203bca8 100644
--- a/python/paddle/fluid/tests/unittests/test_add_position_encoding_op.py
+++ b/python/paddle/fluid/tests/unittests/test_add_position_encoding_op.py
@@ -11,9 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import math
 import unittest
+
 import numpy as np
-import math
 from op_test import OpTest
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_affine_channel_op.py b/python/paddle/fluid/tests/unittests/test_affine_channel_op.py
index 5221fd9d69465c..1153d92a06fddf 100644
--- a/python/paddle/fluid/tests/unittests/test_affine_channel_op.py
+++ b/python/paddle/fluid/tests/unittests/test_affine_channel_op.py
@@ -16,6 +16,7 @@
 """
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_data_norm_op.py b/python/paddle/fluid/tests/unittests/test_data_norm_op.py
index b5a2e76fe87bf9..2b84f2b5685af7 100644
--- a/python/paddle/fluid/tests/unittests/test_data_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_data_norm_op.py
@@ -14,13 +14,15 @@
 """This is unit test of Test data_norm Op."""
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
+
 import paddle
-import paddle.fluid.core as core
-from paddle.fluid.op import Operator
 import paddle.fluid as fluid
-from op_test import OpTest
+import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
+from paddle.fluid.op import Operator
 
 
 def _reference_testing(x, batch_size, batch_sum, batch_square_sum, slot_dim=-1):
diff --git a/python/paddle/fluid/tests/unittests/test_detach.py b/python/paddle/fluid/tests/unittests/test_detach.py
index cf7214b858889b..58190b150e6c78 100644
--- a/python/paddle/fluid/tests/unittests/test_detach.py
+++ b/python/paddle/fluid/tests/unittests/test_detach.py
@@ -18,8 +18,8 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.nn import Linear
 from paddle.fluid.dygraph.base import to_variable
+from paddle.nn import Linear
 
 
 class Test_Detach(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py b/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py
index b9a130be6bfbb8..477db13a701b70 100644
--- a/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py
+++ b/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py
@@ -18,8 +18,8 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 
 class SimpleImgConvPool(fluid.dygraph.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
index d8b8c2ac4f0dff..fd7f97063b646c 100644
--- a/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
+++ b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
@@ -20,9 +20,9 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
-from paddle.nn import Linear
 from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.optimizer import SGDOptimizer
+from paddle.nn import Linear
 
 SEED = 123123111
 
diff --git a/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py b/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py
index 9671448e3ff912..954d47287bfe13 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import unittest
-import paddle
+
 import numpy as np
 from op_test import OpTest, skip_check_grad_ci
 
+import paddle
 import paddle.fluid as fluid
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py b/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py
index 5cf61d53b30eef..a610e52037be67 100644
--- a/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py
@@ -16,6 +16,7 @@
 
 import numpy as np
 from op_test import OpTest
+
 from paddle.fluid.framework import convert_np_dtype_to_dtype_
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
index 6f3bc21e4bb86d..b840fa205dc95f 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
@@ -14,10 +14,11 @@
 
 import os
 import unittest
-import numpy
 
+import numpy
 from parallel_executor_test_base import DeviceType, TestParallelExecutorBase
 from simple_nets import fc_with_batchnorm, init_data, simple_fc_net
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
diff --git a/python/paddle/fluid/tests/unittests/test_group_norm_op.py b/python/paddle/fluid/tests/unittests/test_group_norm_op.py
index 2b74636939e993..24f0bc2a0b1f00 100644
--- a/python/paddle/fluid/tests/unittests/test_group_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_group_norm_op.py
@@ -13,14 +13,15 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest, skip_check_grad_ci
+from testsuite import create_op
 
 import paddle
-import paddle.fluid.core as core
 import paddle.fluid as fluid
-from op_test import OpTest, skip_check_grad_ci
+import paddle.fluid.core as core
 from paddle.fluid.framework import _test_eager_guard
-from testsuite import create_op
 
 
 def group_norm_naive(x, scale, bias, epsilon, groups, data_layout):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
index f92a248168793d..4441098c941413 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
@@ -19,8 +19,8 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import core
 import paddle.fluid.dygraph_utils as dygraph_utils
+from paddle.fluid import core
 from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper
 from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
 from paddle.fluid.layer_helper import LayerHelper
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py b/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py
index eca6e5d81d0109..1049c08c64d40a 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py
@@ -15,10 +15,10 @@
 import unittest
 
 import numpy as np
-from paddle.nn import Linear
 
 import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 
 class TestImperativeContainerSequential(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
index 532dddd87c5ce3..c4e280ea46fd0b 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
@@ -24,8 +24,8 @@
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.dygraph.base import to_variable
-from paddle.nn import Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 
 class DMF(fluid.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_framework.py b/python/paddle/fluid/tests/unittests/test_imperative_framework.py
index 4aec8b308a6c8d..abdc8edd89edfa 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_framework.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_framework.py
@@ -20,7 +20,6 @@
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
-import paddle
 
 
 class MLP(fluid.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gan.py b/python/paddle/fluid/tests/unittests/test_imperative_gan.py
index 6b0e4fb66f5748..845f47434e59d7 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_gan.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_gan.py
@@ -20,10 +20,10 @@
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.nn import Linear
 from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.optimizer import SGDOptimizer
+from paddle.nn import Linear
 
 
 class Discriminator(fluid.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py b/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py
index 335db28d70c2d0..73f101ed5ee280 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_layer_trainable.py
@@ -16,10 +16,10 @@
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.dygraph as dygraph
 from paddle.fluid.framework import _test_eager_guard
-import paddle
 
 
 class TestImperativeLayerTrainable(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
index 8e9b6c7f2ff8ef..528ddac3ff4e57 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
@@ -27,13 +27,9 @@
     Embedding,
     GroupNorm,
     LayerNorm,
-    NCE,
     PRelu,
 )
 from paddle.nn import Linear
-import numpy as np
-import os
-import tempfile
 
 
 class TestDygraphLoadStatic(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py
index 69796f69c6b23c..66d7eb19fb4031 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py
@@ -15,16 +15,15 @@
 import unittest
 
 import numpy as np
+from test_imperative_base import new_program_scope
 from utils import DyGraphProgramDescTracerTestHelper
 
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
+from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
 from paddle.fluid.optimizer import SGDOptimizer
 from paddle.nn import Linear
-from test_imperative_base import new_program_scope
-from utils import DyGraphProgramDescTracerTestHelper
-from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph
 
 
 class SimpleImgConvPool(fluid.dygraph.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
index 81eb24f98f8247..657774b7298a36 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
@@ -21,14 +21,9 @@
 import paddle.fluid as fluid
 from paddle.fluid import core
 from paddle.fluid.dygraph.base import to_variable
-from paddle.fluid.dygraph.nn import (
-    BatchNorm,
-    Embedding,
-    GRUUnit,
-    Linear,
-)
-from paddle.nn import Linear
+from paddle.fluid.dygraph.nn import BatchNorm, Embedding, GRUUnit, Linear
 from paddle.fluid.framework import _test_eager_guard
+from paddle.nn import Linear
 
 
 class Config:
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
index 1650532e49d7a9..71eb99c229369c 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
@@ -16,10 +16,13 @@
 import unittest
 
 import numpy as np
+from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
+from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
 from paddle.fluid import core
+from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.optimizer import (
     AdadeltaOptimizer,
     AdagradOptimizer,
@@ -39,10 +42,6 @@
     RMSPropOptimizer,
     SGDOptimizer,
 )
-from test_imperative_base import new_program_scope
-from paddle.fluid.framework import _test_eager_guard
-
-from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
 
 # Note(wangzhongpu)
 # In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
index 48ee814b4ddc91..27c50d2e8af4ea 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
@@ -16,10 +16,13 @@
 import unittest
 
 import numpy as np
+from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
+from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
 from paddle.fluid import core
+from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.optimizer import (
     AdadeltaOptimizer,
     AdagradOptimizer,
@@ -36,10 +39,6 @@
     RecomputeOptimizer,
     RMSPropOptimizer,
 )
-from test_imperative_base import new_program_scope
-from paddle.fluid.framework import _test_eager_guard
-
-from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
 
 # Note(wangzhongpu)
 # In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py b/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
index 29d42076e2c739..67f5a7d26b5085 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
@@ -16,9 +16,9 @@
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
-import paddle
 
 
 class TestImperativePartitialBackward(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
index 2a2d2ef9053317..2cc85fbc30f0d8 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
@@ -15,11 +15,11 @@
 import unittest
 
 import numpy as np
+from test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
-from test_imperative_base import new_program_scope
 from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.optimizer import SGDOptimizer
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
index 6c3fdf77a2d1c7..53e8b1d93c4b7a 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
@@ -20,8 +20,7 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import core
-from paddle.fluid import BatchNorm
+from paddle.fluid import BatchNorm, core
 from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
 from paddle.fluid.layer_helper import LayerHelper
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
index 265f3720680088..c09f6a1449faa8 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
@@ -21,7 +21,6 @@
 import paddle.fluid as fluid
 from paddle.fluid import core
 from paddle.fluid.dygraph.nn import BatchNorm
-from test_imperative_base import new_program_scope
 from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.layer_helper import LayerHelper
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py b/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py
index 959ed8bbfec38c..341aa800c96262 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py
@@ -16,7 +16,7 @@
 import unittest
 
 import numpy as np
-import os
+
 import paddle
 import paddle.fluid as fluid
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
index f5737956c3b600..654ebf198b7d09 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
@@ -19,15 +19,12 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import Embedding, LayerNorm, Layer
-from paddle.nn import Linear
-from paddle.fluid.dygraph import to_variable, guard
-from test_imperative_base import new_program_scope
-from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
-from paddle.fluid import core
-import numpy as np
 import paddle.nn.functional as F
+from paddle.fluid import Embedding, Layer, LayerNorm, core
+from paddle.fluid.dygraph import guard, to_variable
+from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
 from paddle.jit import TracedLayer
+from paddle.nn import Linear
 
 np.set_printoptions(suppress=True)
 
diff --git a/python/paddle/fluid/tests/unittests/test_isfinite_op.py b/python/paddle/fluid/tests/unittests/test_isfinite_op.py
index 1ec8063bf57282..053e9bbc08f571 100644
--- a/python/paddle/fluid/tests/unittests/test_isfinite_op.py
+++ b/python/paddle/fluid/tests/unittests/test_isfinite_op.py
@@ -16,6 +16,7 @@
 
 import numpy as np
 from op_test import OpTest
+
 import paddle.fluid.core as core
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py
index eac87065048954..acb8d0859835f5 100644
--- a/python/paddle/fluid/tests/unittests/test_jit_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py
@@ -23,11 +23,11 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.nn import Linear
 from paddle.fluid import unique_name
 from paddle.fluid.dygraph.io import INFER_PARAMS_INFO_SUFFIX
 from paddle.fluid.layers.utils import flatten
 from paddle.jit.api import declarative
+from paddle.nn import Linear
 from paddle.static import InputSpec
 
 BATCH_SIZE = 32
diff --git a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
index dd37c2bff66a7a..a31809c0974bb2 100644
--- a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
+++ b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
@@ -14,13 +14,15 @@
 
 import copy
 import math
-import numpy as np
 import unittest
+
+import numpy as np
+
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-import paddle.fluid.framework as framework
 import paddle.fluid.core as core
+import paddle.fluid.framework as framework
+import paddle.fluid.layers as layers
 
 
 def exponential_decay(
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py
index 2ea1b4faf8d24a..8bcde4489e4254 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py
@@ -15,9 +15,6 @@
 import sys
 import time
 import unittest
-import numpy as np
-import paddle
-from paddle.nn import Linear
 
 import numpy as np
 from test_multiprocess_dataloader_static import (
@@ -31,8 +28,10 @@
     prepare_places,
 )
 
+import paddle
 import paddle.fluid as fluid
 from paddle.io import DataLoader
+from paddle.nn import Linear
 
 
 class SimpleFCNet(fluid.dygraph.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py
index 1f15241b26c6a0..e739c0c2cb755d 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py
@@ -15,10 +15,6 @@
 import sys
 import time
 import unittest
-import numpy as np
-
-import paddle
-from paddle.nn import Linear
 
 import numpy as np
 from test_multiprocess_dataloader_iterable_dataset_static import (
@@ -32,8 +28,10 @@
     prepare_places,
 )
 
+import paddle
 import paddle.fluid as fluid
 from paddle.io import DataLoader
+from paddle.nn import Linear
 
 
 class SimpleFCNet(fluid.dygraph.Layer):
diff --git a/python/paddle/fluid/tests/unittests/test_ones_op.py b/python/paddle/fluid/tests/unittests/test_ones_op.py
index 6b2bbd3786522f..02e349e0c30203 100644
--- a/python/paddle/fluid/tests/unittests/test_ones_op.py
+++ b/python/paddle/fluid/tests/unittests/test_ones_op.py
@@ -17,7 +17,6 @@
 import numpy as np
 
 import paddle
-import numpy as np
 
 
 class ApiOnesTest(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_paddle_save_load.py b/python/paddle/fluid/tests/unittests/test_paddle_save_load.py
index ed9ffcaab9da99..c78fa2ed847b41 100644
--- a/python/paddle/fluid/tests/unittests/test_paddle_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_paddle_save_load.py
@@ -12,19 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import numpy as np
 import os
-from io import BytesIO
 import tempfile
+import unittest
+from io import BytesIO
+
+import numpy as np
+from test_imperative_base import new_program_scope
 
 import paddle
+import paddle.fluid as fluid
+import paddle.fluid.framework as framework
 import paddle.nn as nn
 import paddle.optimizer as opt
-import paddle.fluid as fluid
 from paddle.fluid.optimizer import Adam
-import paddle.fluid.framework as framework
-from test_imperative_base import new_program_scope
 from paddle.optimizer.lr import LRScheduler
 
 BATCH_SIZE = 16
diff --git a/python/paddle/fluid/tests/unittests/test_pool3d_op.py b/python/paddle/fluid/tests/unittests/test_pool3d_op.py
index 0c62aeb257ad36..7204c0c49389ae 100644
--- a/python/paddle/fluid/tests/unittests/test_pool3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pool3d_op.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
+from op_test import OpTest
 
 import paddle
 import paddle.fluid.core as core
-from op_test import OpTest
 
 
 def adaptive_start_index(index, input_size, output_size):
diff --git a/python/paddle/fluid/tests/unittests/test_roi_pool_op.py b/python/paddle/fluid/tests/unittests/test_roi_pool_op.py
index b84cca41bbd294..6ae113fa8a1ca7 100644
--- a/python/paddle/fluid/tests/unittests/test_roi_pool_op.py
+++ b/python/paddle/fluid/tests/unittests/test_roi_pool_op.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-import unittest
-import numpy as np
 import math
 import sys
+import unittest
+from decimal import ROUND_HALF_UP, Decimal
+
+import numpy as np
 from op_test import OpTest
 
-from decimal import Decimal, ROUND_HALF_UP
+import paddle
 
 
 def _round(x):
diff --git a/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py b/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py
index aa0e0e36ff6134..ce18a4647966d3 100644
--- a/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest
+
 import numpy as np
 from op_test import OpTest
-from scipy.special import logit
-from scipy.special import expit
-import unittest
-from paddle.fluid import Program, program_guard
-import paddle.fluid as fluid
+from scipy.special import expit, logit
+
 import paddle
+import paddle.fluid as fluid
+from paddle.fluid import Program, program_guard
 
 
 class TestSigmoidCrossEntropyWithLogitsOp1(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py b/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py
index 6b92a3253b51b3..1227a489493414 100755
--- a/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py
+++ b/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
 from op_test import OpTest
 
diff --git a/python/paddle/fluid/tests/unittests/test_size_op.py b/python/paddle/fluid/tests/unittests/test_size_op.py
index ac070c099e63c0..b3ae19b8ef20eb 100644
--- a/python/paddle/fluid/tests/unittests/test_size_op.py
+++ b/python/paddle/fluid/tests/unittests/test_size_op.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle
-import paddle.fluid as fluid
 from op_test import OpTest
+
 import paddle
+import paddle.fluid as fluid
 
 
 class TestSizeOp(OpTest):
diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py
index c0472b16515a01..8871966d350aa7 100644
--- a/python/paddle/fluid/tests/unittests/test_static_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py
@@ -13,18 +13,20 @@
 # limitations under the License.
 
 
+import errno
+import os
+import pickle
+import tempfile
 import unittest
+
+import numpy as np
+from test_imperative_base import new_program_scope
+
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.fluid.framework as framework
 from paddle.fluid.optimizer import Adam
-from test_imperative_base import new_program_scope
-import numpy as np
-import pickle
-import os
-import errno
-import tempfile
 
 paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/utils.py b/python/paddle/fluid/tests/unittests/utils.py
index 04c3085c3dffc1..a9cbc845c2dfb3 100644
--- a/python/paddle/fluid/tests/unittests/utils.py
+++ b/python/paddle/fluid/tests/unittests/utils.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.framework import _dygraph_guard
-import paddle.fluid as fluid
 import numpy as np
 
+import paddle.fluid as fluid
+from paddle.fluid.framework import _dygraph_guard
+
 __all__ = ['DyGraphProgramDescTracerTestHelper', 'is_equal_program']
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_affine_channel_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_affine_channel_op_xpu.py
index fc3c9612ead016..13255906925655 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_affine_channel_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_affine_channel_op_xpu.py
@@ -20,8 +20,10 @@
 sys.path.append("..")
 
 import unittest
+
 import numpy as np
 from op_test_xpu import XPUOpTest
+
 import paddle
 import paddle.fluid.core as core
 
diff --git a/python/paddle/hapi/progressbar.py b/python/paddle/hapi/progressbar.py
index 0f6f375b2a38be..6c33d4962e79cb 100644
--- a/python/paddle/hapi/progressbar.py
+++ b/python/paddle/hapi/progressbar.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import os
+import struct
 import sys
 import time
+
 import numpy as np
-import struct
 
 __all__ = []
 
diff --git a/python/paddle/incubate/multiprocessing/reductions.py b/python/paddle/incubate/multiprocessing/reductions.py
index 04fe123fdc1930..5bc1e211862121 100644
--- a/python/paddle/incubate/multiprocessing/reductions.py
+++ b/python/paddle/incubate/multiprocessing/reductions.py
@@ -12,18 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
+import copy
 
 # TODO: check the hooks of tensor
 # TODO: check serializing named tensor
 # TODO: check influence on autograd
 import sys
-import copy
 import threading
-from multiprocessing.util import register_after_fork
+from collections import OrderedDict
 from multiprocessing.reduction import ForkingPickler
+from multiprocessing.util import register_after_fork
 
-from collections import OrderedDict
+import paddle
 
 
 def _supported_check():
diff --git a/python/paddle/inference/wrapper.py b/python/paddle/inference/wrapper.py
index c69cfa06f3982e..ab532dc32266dc 100644
--- a/python/paddle/inference/wrapper.py
+++ b/python/paddle/inference/wrapper.py
@@ -12,15 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle.fluid.core as core
-from paddle.fluid.core import AnalysisConfig, PaddleDType, PaddlePlace
-from paddle.fluid.core import PaddleInferPredictor, PaddleInferTensor
-from paddle.fluid.core import convert_to_mixed_precision_bind
-
 import os
-import numpy as np
 from typing import Set
 
+import numpy as np
+
+import paddle.fluid.core as core
+from paddle.fluid.core import (
+    AnalysisConfig,
+    PaddleDType,
+    PaddleInferPredictor,
+    PaddleInferTensor,
+    PaddlePlace,
+    convert_to_mixed_precision_bind,
+)
+
 DataType = PaddleDType
 PlaceType = PaddlePlace
 PrecisionType = AnalysisConfig.Precision
diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py
index d70d958016ab4e..9fc2bbd975d5fb 100755
--- a/python/paddle/static/nn/common.py
+++ b/python/paddle/static/nn/common.py
@@ -13,19 +13,17 @@
 # limitations under the License.
 
 import paddle
-from paddle.fluid.param_attr import ParamAttr
-from paddle.fluid.initializer import Normal, Constant
-from paddle.fluid.framework import static_only, Variable, _non_static_mode
-from paddle.fluid.layers.layer_function_generator import templatedoc
-
-from paddle.fluid.data_feeder import check_dtype
-
 from paddle.common_ops_import import (
+    LayerHelper,
     check_type,
     check_variable_and_dtype,
     utils,
-    LayerHelper,
 )
+from paddle.fluid.data_feeder import check_dtype
+from paddle.fluid.framework import Variable, _non_static_mode, static_only
+from paddle.fluid.initializer import Constant, Normal
+from paddle.fluid.layers.layer_function_generator import templatedoc
+from paddle.fluid.param_attr import ParamAttr
 
 __all__ = []
 
diff --git a/python/paddle/vision/datasets/folder.py b/python/paddle/vision/datasets/folder.py
index 96a5b9b200eea9..650114cd19da3e 100644
--- a/python/paddle/vision/datasets/folder.py
+++ b/python/paddle/vision/datasets/folder.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import os
+
 from PIL import Image
 
 import paddle

From 4f834cb275a95b49d009ae98ddb74b543494c261 Mon Sep 17 00:00:00 2001
From: zhangyikun02 <48021248+zhangyk0314@users.noreply.github.com>
Date: Thu, 1 Dec 2022 10:11:43 +0800
Subject: [PATCH 074/154] change d2d copy to api copy in xpu kernel,
 test=kunlun (#48505)

---
 .../phi/kernels/xpu/elementwise_add_grad_kernel.cc   | 12 ++++++++++--
 paddle/phi/kernels/xpu/warpctc_kernel.cc             |  9 +++++++++
 .../fluid/tests/unittests/xpu/test_warpctc_op_xpu.py |  7 +++++--
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc
index 829747932b3ecc..a25cd0cd61303f 100644
--- a/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc
@@ -45,7 +45,11 @@ void AddGradKernel(const Context& dev_ctx,
     T* dx_data = dev_ctx.template Alloc<T>(dx);
     if (dx->dims() == dz_dims) {
       if (dx_data != dz_data) {
-        Copy(dev_ctx, *dz, dev_ctx.GetPlace(), false, dx);
+        int ret = xpu::copy(dev_ctx.x_context(),
+                            reinterpret_cast<const XPUType*>(dz_data),
+                            reinterpret_cast<XPUType*>(dx->data<T>()),
+                            dx->numel());
+        PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy");
       }
     } else {
       // For inplace strategy, dx will be stored in addr of dz, which makes
@@ -73,7 +77,11 @@ void AddGradKernel(const Context& dev_ctx,
     T* dy_data = dy->mutable_data<T>(dev_ctx.GetPlace());
     if (dy->dims() == dz_dims) {
       if (dy_data != dz_data) {
-        Copy(dev_ctx, *dz, dev_ctx.GetPlace(), false, dy);
+        int ret = xpu::copy(dev_ctx.x_context(),
+                            reinterpret_cast<const XPUType*>(dz_data),
+                            reinterpret_cast<XPUType*>(dy->data<T>()),
+                            dy->numel());
+        PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy");
       }
     } else {
       std::vector<int> reduce_dims =
diff --git a/paddle/phi/kernels/xpu/warpctc_kernel.cc b/paddle/phi/kernels/xpu/warpctc_kernel.cc
index 833ff81daa2088..0bbb0f8e59b256 100644
--- a/paddle/phi/kernels/xpu/warpctc_kernel.cc
+++ b/paddle/phi/kernels/xpu/warpctc_kernel.cc
@@ -68,6 +68,15 @@ void WarpctcKernel(const Context& dev_ctx,
                         "but received %d. ",
                         sequence_width));
 
+  int lm_workspace = (max_sequence_length + 1) *
+                         (2 * max_target_seq_length + sequence_width + 1) *
+                         sizeof(T) +
+                     (7 * max_target_seq_length + 3) * sizeof(int);
+  PADDLE_ENFORCE_LE(lm_workspace,
+                    6144,
+                    phi::errors::InvalidArgument(
+                        "Input size is too large for xpu in warpctc kernel"));
+
   loss->Resize(phi::make_ddim({num_sequences, 1}));
   dev_ctx.template Alloc<T>(loss);
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py
index a6c8ae8656ca82..b6d11a88930c6f 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py
@@ -239,7 +239,6 @@ def setUp(self):
             logits = np.random.uniform(
                 0.1, 1.0, [sum(self.logits_length), self.num_classes]
             ).astype(self.dtype)
-            print("logits.shape = ", logits.shape)
             softmax = np.apply_along_axis(stable_softmax, 1, logits)
             # labels should not be blank
             labels = np.random.randint(
@@ -416,7 +415,11 @@ def test_dygraph_with_lod():
                 labels = paddle.to_tensor(labels)
 
                 paddle.nn.functional.ctc_loss(
-                    log_probs=softmax, labels=labels, reduction='none'
+                    log_probs=softmax,
+                    labels=labels,
+                    input_lengths=None,
+                    label_lengths=None,
+                    reduction='none',
                 )
 
             paddle.disable_static()

From 89db045a9142a6d3167a8fd3ae264807982b58c1 Mon Sep 17 00:00:00 2001
From: risemeup1 <62429225+risemeup1@users.noreply.github.com>
Date: Thu, 1 Dec 2022 10:37:11 +0800
Subject: [PATCH 075/154] protobuf update (#48495)

---
 python/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/requirements.txt b/python/requirements.txt
index fcdfddc9e2eb69..a4550d3ad906d0 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,6 +1,6 @@
 requests>=2.20.0
 numpy>=1.13
-protobuf>=3.1.0, <=3.20.0
+protobuf>=3.1.0, <=3.20.2
 Pillow
 decorator
 astor

From da0022e40393783c5ee48627e62be04f21bb9e1b Mon Sep 17 00:00:00 2001
From: Matsumoto Ruko <38883252+gsq7474741@users.noreply.github.com>
Date: Thu, 1 Dec 2022 10:59:09 +0800
Subject: [PATCH 076/154] [CodeStyle][py36] remove sys.version branch for py36
 (#48540)

---
 .../fleet/data_generator/data_generator.py       |  4 ++--
 python/paddle/distributed/metric/metrics.py      |  7 ++-----
 tools/dockerfile/build_scripts/ssl-check.py      | 16 ++++------------
 3 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/python/paddle/distributed/fleet/data_generator/data_generator.py b/python/paddle/distributed/fleet/data_generator/data_generator.py
index 297a2cf003ecb4..64f8e377b94741 100644
--- a/python/paddle/distributed/fleet/data_generator/data_generator.py
+++ b/python/paddle/distributed/fleet/data_generator/data_generator.py
@@ -262,7 +262,7 @@ def _gen_str(self, line):
         Returns:
             Return a string data that can be read directly by the MultiSlotDataFeed.
         '''
-        if sys.version > '3' and isinstance(line, zip):
+        if isinstance(line, zip):
             line = list(line)
 
         if not isinstance(line, list) and not isinstance(line, tuple):
@@ -311,7 +311,7 @@ def _gen_str(self, line):
         Returns:
             Return a string data that can be read directly by the MultiSlotDataFeed.
         '''
-        if sys.version > '3' and isinstance(line, zip):
+        if isinstance(line, zip):
             line = list(line)
 
         if not isinstance(line, list) and not isinstance(line, tuple):
diff --git a/python/paddle/distributed/metric/metrics.py b/python/paddle/distributed/metric/metrics.py
index 306590b92dae1e..5aea93bc8b54dd 100644
--- a/python/paddle/distributed/metric/metrics.py
+++ b/python/paddle/distributed/metric/metrics.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import logging
-import sys
 
 import yaml
 
@@ -36,10 +35,8 @@ def init_metric(
     bucket_size=1000000,
 ):
     yaml_fobj = open(metric_yaml_path)
-    if sys.version.startswith('2.7.13'):
-        content = yaml.load(yaml_fobj)
-    else:
-        content = yaml.load(yaml_fobj, Loader=yaml.FullLoader)
+
+    content = yaml.load(yaml_fobj, Loader=yaml.FullLoader)
 
     print("yaml metric config: \n")
     print(content)
diff --git a/tools/dockerfile/build_scripts/ssl-check.py b/tools/dockerfile/build_scripts/ssl-check.py
index 6c0f07330e9354..aaf21c70f09be6 100644
--- a/tools/dockerfile/build_scripts/ssl-check.py
+++ b/tools/dockerfile/build_scripts/ssl-check.py
@@ -14,22 +14,14 @@
 
 # cf. https://github.com/pypa/manylinux/issues/53
 
+import sys
+from urllib.request import urlopen
+
 GOOD_SSL = "https://google.com"
 BAD_SSL = "https://self-signed.badssl.com"
 
-import sys
-
 print("Testing SSL certificate checking for Python:", sys.version)
 
-if sys.version_info[0] >= 3:
-    from urllib.request import urlopen
-
-    EXC = OSError
-else:
-    from urllib import urlopen
-
-    EXC = IOError
-
 print("Connecting to %s should work" % (GOOD_SSL,))
 urlopen(GOOD_SSL)
 print("...it did, yay.")
@@ -40,5 +32,5 @@
     # If we get here then we failed:
     print("...it DIDN'T!!!!!11!!1one!")
     sys.exit(1)
-except EXC:
+except OSError:
     print("...it did, yay.")

From 47e7b7a52bdb5086684ad03a0de09020a323bd03 Mon Sep 17 00:00:00 2001
From: HongyuJia <jiahongyu@baidu.com>
Date: Thu, 1 Dec 2022 11:49:55 +0800
Subject: [PATCH 077/154] [Fix Type] Fix typo error (#48391)

* fix typo error

* pass CI-coverage
---
 paddle/fluid/eager/grad_node_info.h           |  2 +-
 paddle/fluid/framework/operator.cc            |  4 +-
 paddle/fluid/imperative/dygraph_grad_maker.h  |  4 +-
 paddle/fluid/operators/ops_extra_info.h       |  2 +-
 paddle/fluid/pybind/eager_functions.cc        |  8 ++--
 python/paddle/fluid/tests/custom_op/utils.py  |  2 +-
 .../utils/cpp_extension/cpp_extension.py      | 37 +++++++++----------
 .../utils/cpp_extension/extension_utils.py    |  4 +-
 8 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h
index 650446401468f5..f20bad71c517ab 100644
--- a/paddle/fluid/eager/grad_node_info.h
+++ b/paddle/fluid/eager/grad_node_info.h
@@ -173,7 +173,7 @@ class GradNodeBase {
   virtual ~GradNodeBase() { VLOG(7) << "Destruct GradNodeBase"; }
 
   /**
-   * operator() designed to contian the real backward execution logic, it should
+   * operator() designed to contain the real backward execution logic, it should
    * be overrided by derived class defined for each operator. It accepts a
    * vector of Tensor which contains grads input of current operator
    *
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index 0784c8330686a3..b294779837fc2c 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -3262,7 +3262,7 @@ void OperatorWithKernel::BuildPhiKernelContext(
   for (const auto& attr_iter : runtime_attrs) {
     auto& attr_name = attr_iter.first;
     auto& attr = attr_iter.second;
-    auto attr_propertys = paddle::operators::GetExtraAttrPropertys(attr_name);
+    auto attr_propertys = paddle::operators::GetExtraAttrProperties(attr_name);
     SetDnnAttrIntoDeviceContext(dev_ctx, attr, attr_name, attr_propertys);
   }
   // TODO(chenweihang): Since the pass will still `SetAttr` in the OpDesc,
@@ -3277,7 +3277,7 @@ void OperatorWithKernel::BuildPhiKernelContext(
   for (const auto& attr_iter : attrs) {
     auto& attr_name = attr_iter.first;
     auto& attr = attr_iter.second;
-    auto attr_propertys = paddle::operators::GetExtraAttrPropertys(attr_name);
+    auto attr_propertys = paddle::operators::GetExtraAttrProperties(attr_name);
     SetDnnAttrIntoDeviceContext(dev_ctx, attr, attr_name, attr_propertys);
   }
   VLOG(4) << "Done runtime attributes";
diff --git a/paddle/fluid/imperative/dygraph_grad_maker.h b/paddle/fluid/imperative/dygraph_grad_maker.h
index e0c943f18ce20a..0683bdc16a80d3 100644
--- a/paddle/fluid/imperative/dygraph_grad_maker.h
+++ b/paddle/fluid/imperative/dygraph_grad_maker.h
@@ -237,7 +237,7 @@ class TracedGradOp {
 
     if (kRole == TracedVarRole::kBackward) {
       for (auto& var : vars) {
-        VLOG(6) << "SetIutput var name: " << var->Name();
+        VLOG(6) << "SetInput var name: " << var->Name();
         if (var && !var->OverridedStopGradient()) {
           var->SetGraphIsFreed(false);
           auto dirty_grad_node = var->GradNode();
@@ -351,7 +351,7 @@ class TracedGradOp {
 
   // Get a snapshot of VariableWrapper at a certain inplace version.
   // The inplace version number of VariableWrapper is used for inplace
-  // detection in gradient compution.
+  // detection in gradient computation.
   static const std::shared_ptr<VariableWrapper> SnapshotVarWrapper(
       const std::shared_ptr<VariableWrapper>& var_wrapper) {
     // NOTE(liym27):
diff --git a/paddle/fluid/operators/ops_extra_info.h b/paddle/fluid/operators/ops_extra_info.h
index 12df9f96d6d58d..94f0fa2a606c36 100644
--- a/paddle/fluid/operators/ops_extra_info.h
+++ b/paddle/fluid/operators/ops_extra_info.h
@@ -139,7 +139,7 @@ const std::unordered_map<std::string, ExtraAttrPropertySet>
              ExtraAttrPropertySet(ExtraAttrProperty::GPUDNN)},
 };
 
-inline ExtraAttrPropertySet GetExtraAttrPropertys(
+inline ExtraAttrPropertySet GetExtraAttrProperties(
     const std::string& attr_name) {
   auto iter = extra_attr_properties.find(attr_name);
   if (iter != extra_attr_properties.end()) {
diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
index 14368a9d99cad4..9c0f189e63050b 100644
--- a/paddle/fluid/pybind/eager_functions.cc
+++ b/paddle/fluid/pybind/eager_functions.cc
@@ -421,7 +421,7 @@ static void ConstructFwdAndBwdMap(
   }
 }
 
-static std::vector<paddle::any> CastAttrsToTragetType(
+static std::vector<paddle::any> CastAttrsToTargetType(
     const std::vector<paddle::any>& src,
     const std::vector<std::string>& attrs_names) {
   std::vector<paddle::any> res;
@@ -488,7 +488,7 @@ static PyObject* eager_api_jit_function_call(PyObject* self,
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
-static PyObject* eager_api_run_costum_op(PyObject* self,
+static PyObject* eager_api_run_custom_op(PyObject* self,
                                          PyObject* args,
                                          PyObject* kwargs) {
   EAGER_TRY
@@ -511,7 +511,7 @@ static PyObject* eager_api_run_costum_op(PyObject* self,
             op_type));
     VLOG(7) << "Run Kernel of Custom Op: " << op_type;
     std::vector<paddle::any> res_attrs =
-        CastAttrsToTragetType(ctx.Attrs(),
+        CastAttrsToTargetType(ctx.Attrs(),
                               paddle::framework::OpMetaInfoHelper::GetAttrs(
                                   meta_info_map.at(op_type)[0]));
     ctx.EmplaceBackAttrs(res_attrs);
@@ -1087,7 +1087,7 @@ PyMethodDef variable_functions[] = {
      METH_VARARGS | METH_KEYWORDS,
      NULL},
     {"_run_custom_op",
-     (PyCFunction)(void (*)(void))eager_api_run_costum_op,
+     (PyCFunction)(void (*)(void))eager_api_run_custom_op,
      METH_VARARGS | METH_KEYWORDS,
      NULL},
     {"tensor_copy",
diff --git a/python/paddle/fluid/tests/custom_op/utils.py b/python/paddle/fluid/tests/custom_op/utils.py
index ab15f742ecedc7..671d412e80851b 100644
--- a/python/paddle/fluid/tests/custom_op/utils.py
+++ b/python/paddle/fluid/tests/custom_op/utils.py
@@ -23,7 +23,7 @@
 site_packages_path = get_python_lib()
 # Note(Aurelius84): We use `add_test` in Cmake to config how to run unittest in CI.
 # `PYTHONPATH` will be set as `build/python/paddle` that will make no way to find
-# paddle include directory. Because the following path is generated after insalling
+# paddle include directory. Because the following path is generated after installing
 # PaddlePaddle whl. So here we specific `include_dirs` to avoid errors in CI.
 paddle_includes = [
     os.path.join(site_packages_path, 'paddle', 'include'),
diff --git a/python/paddle/utils/cpp_extension/cpp_extension.py b/python/paddle/utils/cpp_extension/cpp_extension.py
index c31b934c55ec86..91ce1203867b5a 100644
--- a/python/paddle/utils/cpp_extension/cpp_extension.py
+++ b/python/paddle/utils/cpp_extension/cpp_extension.py
@@ -56,7 +56,6 @@
     IS_WINDOWS,
     OS_NAME,
     MSVC_COMPILE_FLAGS,
-    MSVC_COMPILE_FLAGS,
 )
 from .extension_utils import CLANG_COMPILE_FLAGS, CLANG_LINK_FLAGS
 
@@ -85,7 +84,7 @@ def setup(**attr):
     ``import`` statement.
 
     It encapsulates the python built-in ``setuptools.setup`` function and keeps arguments
-    and usage same as the native interface. Meanwhile, it hiddens Paddle inner framework
+    and usage same as the native interface. Meanwhile, it hides Paddle inner framework
     concepts, such as necessary compiling flags, included paths of head files, and linking
     flags. It also will automatically search and valid local environment and versions of
     ``cc(Linux)`` , ``cl.exe(Windows)`` and ``nvcc`` , then compiles customized operators
@@ -104,8 +103,8 @@ def setup(**attr):
 
     Note:
 
-        1. Currently we support Linux, MacOS and Windows platfrom.
-        2. On Linux platform, we recommend to use GCC 8.2 as soft linking condidate of ``/usr/bin/cc`` .
+        1. Currently we support Linux, MacOS and Windows platform.
+        2. On Linux platform, we recommend to use GCC 8.2 as soft linking candidate of ``/usr/bin/cc`` .
            Then, Use ``which cc`` to ensure location of ``cc`` and using ``cc --version`` to ensure linking
            GCC version.
         3. On Windows platform, we recommend to install `` Visual Studio`` (>=2017).
@@ -164,7 +163,7 @@ def setup(**attr):
                                  ``site-package/paddle/include`` . Please add the corresponding directory path if including third-party
                                  head files. Default is None.
         extra_compile_args(list[str] | dict, optional): Specify the extra compiling flags such as ``-O3`` . If set ``list[str]`` , all these flags
-                                will be applied for ``cc`` and ``nvcc`` compiler. It support specify flags only applied ``cc`` or ``nvcc``
+                                will be applied for ``cc`` and ``nvcc`` compiler. It supports specify flags only applied ``cc`` or ``nvcc``
                                 compiler using dict type with ``{'cxx': [...], 'nvcc': [...]}`` . Default is None.
         **attr(dict, optional): Specify other arguments same as ``setuptools.setup`` .
 
@@ -211,7 +210,7 @@ def setup(**attr):
     ), "Required only one Extension, but received {}. If you want to compile multi operators, you can include all necessary source files in one Extension.".format(
         len(ext_modules)
     )
-    # replace Extension.name with attr['name] to keep consistant with Package name.
+    # replace Extension.name with attr['name] to keep consistent with Package name.
     for ext_module in ext_modules:
         ext_module.name = attr['name']
 
@@ -263,7 +262,7 @@ def CppExtension(sources, *args, **kwargs):
 
 
     Note:
-        It is mainly used in ``setup`` and the nama of built shared library keeps same
+        It is mainly used in ``setup`` and the name of built shared library keeps same
         as ``name`` argument specified in ``setup`` interface.
 
 
@@ -277,7 +276,7 @@ def CppExtension(sources, *args, **kwargs):
     """
     kwargs = normalize_extension_kwargs(kwargs, use_cuda=False)
     # Note(Aurelius84): While using `setup` and `jit`, the Extension `name` will
-    # be replaced as `setup.name` to keep consistant with package. Because we allow
+    # be replaced as `setup.name` to keep consistent with package. Because we allow
     # users can not specific name in Extension.
     # See `paddle.utils.cpp_extension.setup` for details.
     name = kwargs.get('name', None)
@@ -315,7 +314,7 @@ def CUDAExtension(sources, *args, **kwargs):
 
 
     Note:
-        It is mainly used in ``setup`` and the nama of built shared library keeps same
+        It is mainly used in ``setup`` and the name of built shared library keeps same
         as ``name`` argument specified in ``setup`` interface.
 
 
@@ -329,7 +328,7 @@ def CUDAExtension(sources, *args, **kwargs):
     """
     kwargs = normalize_extension_kwargs(kwargs, use_cuda=True)
     # Note(Aurelius84): While using `setup` and `jit`, the Extension `name` will
-    # be replaced as `setup.name` to keep consistant with package. Because we allow
+    # be replaced as `setup.name` to keep consistent with package. Because we allow
     # users can not specific name in Extension.
     # See `paddle.utils.cpp_extension.setup` for details.
     name = kwargs.get('name', None)
@@ -376,7 +375,7 @@ def __init__(self, *args, **kwargs):
 
     def __init__(self, *args, **kwargs):
         """
-        Attributes is initialized with following oreder:
+        Attributes is initialized with following order:
 
             1. super().__init__()
             2. initialize_options(self)
@@ -430,9 +429,9 @@ def unix_custom_single_compiler(
             obj, src, ext, cc_args, extra_postargs, pp_opts
         ):
             """
-            Monkey patch machanism to replace inner compiler to custom complie process on Unix platform.
+            Monkey patch mechanism to replace inner compiler to custom complie process on Unix platform.
             """
-            # use abspath to ensure no warning and don't remove deecopy because modify params
+            # use abspath to ensure no warning and don't remove deepcopy because modify params
             # with dict type is dangerous.
             src = os.path.abspath(src)
             cflags = copy.deepcopy(extra_postargs)
@@ -593,7 +592,7 @@ def win_custom_spawn(cmd):
 
         def object_filenames_with_cuda(origina_func, build_directory):
             """
-            Decorated the function to add customized naming machanism.
+            Decorated the function to add customized naming mechanism.
             Originally, both .cc/.cu will have .o object output that will
             bring file override problem. Use .cu.o as CUDA object suffix.
             """
@@ -645,7 +644,7 @@ def wrapper(source_filenames, strip_dir=0, output_dir=''):
         _reset_so_rpath(so_path)
 
     def get_ext_filename(self, fullname):
-        # for example: custommed_extension.cpython-37m-x86_64-linux-gnu.so
+        # for example: customized_extension.cpython-37m-x86_64-linux-gnu.so
         ext_name = super().get_ext_filename(fullname)
         split_str = '.'
         name_items = ext_name.split(split_str)
@@ -658,7 +657,7 @@ def get_ext_filename(self, fullname):
             name_items.pop(-2)
             ext_name = split_str.join(name_items)
 
-        # custommed_extension.dylib
+        # customized_extension.dylib
         if OS_NAME.startswith('darwin'):
             name_items[-1] = 'dylib'
             ext_name = split_str.join(name_items)
@@ -728,7 +727,7 @@ def _record_op_info(self):
 
 class EasyInstallCommand(easy_install):
     """
-    Extend easy_intall Command to control the behavior of naming shared library
+    Extend easy_install Command to control the behavior of naming shared library
     file.
 
     NOTE(Aurelius84): This is a hook subclass inherited Command used to rename shared
@@ -838,8 +837,8 @@ def load(
 
     Note:
 
-        1. Currently we support Linux, MacOS and Windows platfrom.
-        2. On Linux platform, we recommend to use GCC 8.2 as soft linking condidate of ``/usr/bin/cc`` .
+        1. Currently we support Linux, MacOS and Windows platform.
+        2. On Linux platform, we recommend to use GCC 8.2 as soft linking candidate of ``/usr/bin/cc`` .
            Then, Use ``which cc`` to ensure location of ``cc`` and using ``cc --version`` to ensure linking
            GCC version.
         3. On Windows platform, we recommend to install `` Visual Studio`` (>=2017).
diff --git a/python/paddle/utils/cpp_extension/extension_utils.py b/python/paddle/utils/cpp_extension/extension_utils.py
index 46046b5166029f..09b5492e54180a 100644
--- a/python/paddle/utils/cpp_extension/extension_utils.py
+++ b/python/paddle/utils/cpp_extension/extension_utils.py
@@ -197,7 +197,7 @@ def __bootstrap__():
         """
     ).lstrip()
 
-    # Parse registerring op information
+    # Parse registering op information
     _, op_info = CustomOpInfo.instance().last()
     so_path = op_info.so_path
 
@@ -250,7 +250,7 @@ def add(self, op_name, so_name, so_path=None):
 
     def last(self):
         """
-        Return the lastest insert custom op info.
+        Return the last inserted custom op info.
         """
         assert len(self.op_info_map) > 0
         return next(reversed(self.op_info_map.items()))

From 310f4320b705458131a2ef6f98a4b9acec57447b Mon Sep 17 00:00:00 2001
From: 201716010711 <87008376+201716010711@users.noreply.github.com>
Date: Thu, 1 Dec 2022 14:33:45 +0800
Subject: [PATCH 078/154] clean fluid task: delete sum api (#48438)

---
 python/paddle/fluid/layers/nn.py              |  73 ------------
 python/paddle/fluid/optimizer.py              |   4 +-
 .../tests/unittests/ipu/test_sum_op_ipu.py    |   4 +-
 .../unittests/ir/test_ir_fusion_group_pass.py |   4 +-
 .../fluid/tests/unittests/test_layers.py      |   2 +-
 .../tests/unittests/test_optimizer_grad.py    |   2 +-
 .../test_paddle_fluid_modelaverage.py         | 109 ++++++++++++++++++
 .../fluid/tests/unittests/test_sum_op.py      |   6 +-
 .../tests/unittests/xpu/test_sum_op_xpu.py    |   6 +-
 python/paddle/hapi/model.py                   |   4 +-
 .../paddle/incubate/optimizer/modelaverage.py |   4 +-
 11 files changed, 127 insertions(+), 91 deletions(-)
 create mode 100644 python/paddle/fluid/tests/unittests/test_paddle_fluid_modelaverage.py

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 15eada61cf0d5a..03059afb191b5f 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -104,7 +104,6 @@
     'elementwise_mul',
     'gaussian_random',
     'sampling_id',
-    'sum',
     'shape',
     'clip',
     'clip_by_norm',
@@ -5439,78 +5438,6 @@ def sampling_id(x, min=0.0, max=1.0, seed=0, dtype='float32'):
     return out
 
 
-@templatedoc()
-def sum(x):
-    """
-    ${comment}
-
-    Case 1:
-    ::
-        Input:
-            Input. Shape = [2, 3]
-            Input = [[1, 2, 3],
-                     [4, 5, 6]]
-
-        Output:
-            The output. Shape = [2, 3]
-            Output = [[1, 2, 3],
-                      [4, 5, 6]]
-
-    Case 2:
-    ::
-        Input:
-            First input:
-            Input1. Shape = [2, 3]
-            Input1 = [[1, 2, 3],
-                      [4, 5, 6]]
-
-        The second input:
-            Input2. Shape = [2, 3]
-            Input2 = [[7, 8, 9],
-                      [10, 11, 12]]
-
-        Output:
-            The output. Shape = [2, 3]
-            Output = [[8, 10, 12],
-                      [14, 16, 18]]
-
-    Args:
-        x (Variable|list(Variable)): ${x_comment}
-
-    Returns:
-        Variable: ${out_comment}
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-
-            input0 = fluid.layers.fill_constant(shape=[2, 3], dtype='int64', value=5)
-            input1 = fluid.layers.fill_constant(shape=[2, 3], dtype='int64', value=3)
-            sum = fluid.layers.sum([input0, input1])
-
-            # You can print out 'sum' via executor.
-            out = fluid.layers.Print(sum, message="the sum of input0 and input1: ")
-            exe = fluid.Executor(fluid.CPUPlace())
-            exe.run(fluid.default_main_program())
-
-            # The printed result is:
-            # 1570701754	the sum of input0 and input1: 	The place is:CPUPlace
-            # Tensor[sum_0.tmp_0]
-            #    shape: [2,3,]
-            #    dtype: l
-            #    data: 8,8,8,8,8,8,
-
-            # the sum of input0 and input1 is 2-D Tensor with shape [2,3].
-            # dtype is the corresponding C++ data type, which may vary in different environments.
-            # Eg: if the data type of tensor is int64, then the corresponding C++ data type is int64_t,
-            #       so the dtype value is typeid(int64_t).Name(), which is 'x' on MacOS, 'l' on Linux,
-            #       and '__int64' on Windows. They both represent 64-bit integer variables.
-    """
-
-    return paddle.add_n(x)
-
-
 def shape(input):
     """
     :alias_main: paddle.shape
diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index c7a817e1d7594e..8c9a940d846be9 100755
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -3980,8 +3980,8 @@ def _add_average_apply_op(self, block, param_grad):
         # backup param value to grad
         layers.assign(input=param, output=grad)
         # param = (sum_1 + sum_2 + sum_3) / (num_accumulates + old_num_accumulates)
-        tmp = layers.sum(x=[num_accumulates, old_num_accumulates])
-        sum = layers.sum(x=[sum_1, sum_2, sum_3])
+        tmp = paddle.add_n([num_accumulates, old_num_accumulates])
+        sum = paddle.add_n([sum_1, sum_2, sum_3])
         tmp = layers.cast(
             x=tmp, dtype='float32' if self._dtype is None else self._dtype
         )
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_sum_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_sum_op_ipu.py
index c0bc022f0579fa..91c7596a8f90a2 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_sum_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_sum_op_ipu.py
@@ -51,7 +51,7 @@ def build_model(self):
         y = paddle.static.data(
             name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32'
         )
-        out = paddle.fluid.layers.sum([x, y], **self.attrs)
+        out = paddle.add_n([x, y], **self.attrs)
         self.fetch_list = [out.name]
 
     def run_model(self, exec_mode):
@@ -92,7 +92,7 @@ def build_model(self):
         z = paddle.static.data(
             name=self.feed_list[2], shape=self.feed_shape[2], dtype='float32'
         )
-        out = paddle.fluid.layers.sum([x, y, z], **self.attrs)
+        out = paddle.add_n([x, y, z], **self.attrs)
         self.fetch_list = [out.name]
 
 
diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py
index 19754fd6f2da05..1538bac16ff916 100644
--- a/python/paddle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py
@@ -165,13 +165,13 @@ def build_program(self, dtype):
             )
 
             # subgraph with 2 op nodes
-            tmp_0 = layers.sum(
+            tmp_0 = paddle.add_n(
                 [self.feed_vars[0], self.feed_vars[1], self.feed_vars[2]]
             )
             tmp_1 = paddle.sqrt(tmp_0)
             tmp_2 = layers.mul(tmp_0, self.feed_vars[3])
             # subgraph with 2 op nodes
-            tmp_3 = paddle.square(layers.sum([tmp_1, tmp_2]))
+            tmp_3 = paddle.square(paddle.add_n([tmp_1, tmp_2]))
 
         self.append_gradients(tmp_3)
 
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index eaf7acaba59963..7750f6b613b974 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -3534,7 +3534,7 @@ def make_sum(self):
                 name="input", shape=[13, 11], dtype='float32'
             )
 
-            out = layers.sum(input)
+            out = paddle.add_n(input)
             return out
 
     def make_slice(self):
diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
index d5e5a7a200c93a..acdc43659d81a4 100644
--- a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
@@ -122,7 +122,7 @@ def cond_false():
 
         cond_i = fluid.layers.assign(np.array([cond_i], dtype='float32'))
         sum_cond = fluid.layers.cond(cond_i > 1.0, cond_true, cond_false)
-        sum_all = fluid.layers.sum([sum_xy, sub_yz, sum_cond])
+        sum_all = paddle.add_n([sum_xy, sub_yz, sum_cond])
         mean_out = paddle.mean(sum_all)
         if use_bf16:
             import paddle.static.amp as amp
diff --git a/python/paddle/fluid/tests/unittests/test_paddle_fluid_modelaverage.py b/python/paddle/fluid/tests/unittests/test_paddle_fluid_modelaverage.py
new file mode 100644
index 00000000000000..7be33d31fd483a
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_paddle_fluid_modelaverage.py
@@ -0,0 +1,109 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+import paddle.fluid as fluid
+import paddle
+
+
+class TestModelAverage(unittest.TestCase):
+    def test_model_average_static(self):
+        paddle.enable_static()
+        place = fluid.CPUPlace()
+        shape = [2, 3, 8, 8]
+        exe = fluid.Executor(place)
+        train_program = fluid.Program()
+        startup = fluid.Program()
+        test_program = fluid.Program()
+        with fluid.program_guard(train_program, startup):
+            with fluid.unique_name.guard():
+                data = fluid.data(name='X', shape=[None, 1], dtype='float32')
+                hidden = fluid.layers.fc(input=data, size=10)
+                loss = paddle.mean(hidden)
+                test_program = train_program.clone()
+                optimizer = paddle.optimizer.Momentum(
+                    learning_rate=0.2, momentum=0.1
+                )
+
+                optimizer.minimize(loss)
+                # build ModelAverage optimizer
+                model_average = paddle.fluid.optimizer.ModelAverage(
+                    0.15, min_average_window=2, max_average_window=10
+                )
+
+        exe.run(startup)
+        for i in range(10):
+            x = np.random.random(size=(10, 1)).astype('float32')
+            (
+                latest_b,
+                sum_1,
+                sum_2,
+                sum_3,
+                num_accumulates,
+                old_num_accumulates,
+                num_updates,
+            ) = exe.run(
+                program=train_program,
+                feed={'X': x},
+                fetch_list=[
+                    'fc_0.b_0',
+                    'fc_0.b_0_sum_1_0',
+                    'fc_0.b_0_sum_2_0',
+                    'fc_0.b_0_sum_3_0',
+                    'fc_0.b_0_num_accumulates_0',
+                    'fc_0.b_0_old_num_accumulates_0',
+                    'fc_0.b_0_num_updates_0',
+                ],
+            )
+        self.assertTrue(
+            np.equal(sum_1, np.zeros(shape=[10], dtype='float32')).all()
+        )
+        self.assertTrue(
+            np.equal(sum_2, np.zeros(shape=[10], dtype='float32')).all()
+        )
+        self.assertTrue(
+            np.equal(num_accumulates, np.array([0], dtype='int64')).all()
+        )
+        self.assertTrue(
+            np.equal(old_num_accumulates, np.array([2], dtype='int64')).all()
+        )
+        self.assertTrue(
+            np.equal(num_updates, np.array([10], dtype='int64')).all()
+        )
+
+        average_b = (sum_1 + sum_2 + sum_3) / (
+            num_accumulates + old_num_accumulates
+        )
+        # apply ModelAverage
+        with model_average.apply(exe):
+            x = np.random.random(size=(10, 1)).astype('float32')
+            outs, b = exe.run(
+                program=test_program,
+                feed={'X': x},
+                fetch_list=[loss.name, 'fc_0.b_0'],
+            )
+            self.assertAlmostEqual(np.mean(average_b), np.mean(b))
+
+        x = np.random.random(size=(10, 1)).astype('float32')
+        outs, b = exe.run(
+            program=test_program,
+            feed={'X': x},
+            fetch_list=[loss.name, 'fc_0.b_0'],
+        )
+        self.assertAlmostEqual(np.mean(latest_b), np.mean(b))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_sum_op.py b/python/paddle/fluid/tests/unittests/test_sum_op.py
index 68fdfcb9908e80..9626a872e2ffc2 100644
--- a/python/paddle/fluid/tests/unittests/test_sum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sum_op.py
@@ -426,20 +426,20 @@ def test_add_n_and_add_and_grad(self):
 class TestRaiseSumError(unittest.TestCase):
     def test_errors(self):
         def test_type():
-            fluid.layers.sum([11, 22])
+            paddle.add_n([11, 22])
 
         self.assertRaises(TypeError, test_type)
 
         def test_dtype():
             data1 = fluid.data(name="input1", shape=[10], dtype="int8")
             data2 = fluid.data(name="input2", shape=[10], dtype="int8")
-            fluid.layers.sum([data1, data2])
+            paddle.add_n([data1, data2])
 
         self.assertRaises(TypeError, test_dtype)
 
         def test_dtype1():
             data1 = fluid.data(name="input1", shape=[10], dtype="int8")
-            fluid.layers.sum(data1)
+            paddle.add_n(data1)
 
         self.assertRaises(TypeError, test_dtype1)
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py
index ec615324bcc635..b400bd12d3f493 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py
@@ -130,20 +130,20 @@ def test_api(self):
 class TestRaiseSumError(unittest.TestCase):
     def test_errors(self):
         def test_type():
-            fluid.layers.sum([11, 22])
+            paddle.add_n([11, 22])
 
         self.assertRaises(TypeError, test_type)
 
         def test_dtype():
             data1 = fluid.data(name="input1", shape=[10], dtype="int8")
             data2 = fluid.data(name="input2", shape=[10], dtype="int8")
-            fluid.layers.sum([data1, data2])
+            paddle.add_n([data1, data2])
 
         self.assertRaises(TypeError, test_dtype)
 
         def test_dtype1():
             data1 = fluid.data(name="input1", shape=[10], dtype="int8")
-            fluid.layers.sum(data1)
+            paddle.add_n(data1)
 
         self.assertRaises(TypeError, test_dtype1)
 
diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py
index 025abd9acc9dcd..116f433c8f0297 100644
--- a/python/paddle/hapi/model.py
+++ b/python/paddle/hapi/model.py
@@ -637,7 +637,7 @@ def _make_program(self, mode):
                     metrics.append(to_list(metric.compute(*(outputs + labels))))
 
             if mode == 'train' and self.model._optimizer:
-                self._loss_endpoint = fluid.layers.sum(losses)
+                self._loss_endpoint = paddle.add_n(losses)
                 if self._nranks > 1:
                     role = role_maker.PaddleCloudRoleMaker(is_collective=True)
                     fleet.init(role)
@@ -795,7 +795,7 @@ def train_batch(self, inputs, labels=None, update=True):
 
         losses = self.model._loss(*(to_list(outputs) + labels))
         losses = to_list(losses)
-        final_loss = fluid.layers.sum(losses)
+        final_loss = paddle.add_n(losses)
 
         if self._amp_level != "O0":
             scaled = self.model._scaler.scale(final_loss)
diff --git a/python/paddle/incubate/optimizer/modelaverage.py b/python/paddle/incubate/optimizer/modelaverage.py
index 21b176573407aa..52bf1ac4f34e12 100644
--- a/python/paddle/incubate/optimizer/modelaverage.py
+++ b/python/paddle/incubate/optimizer/modelaverage.py
@@ -548,8 +548,8 @@ def _add_average_apply_op(self, block, param):
         # backup param value to grad
         layers.assign(input=param, output=grad)
         # param = (sum_1 + sum_2 + sum_3) / (num_accumulates + old_num_accumulates)
-        tmp = layers.sum(x=[num_accumulates, old_num_accumulates])
-        sum = layers.sum(x=[sum_1, sum_2, sum_3])
+        tmp = paddle.add_n([num_accumulates, old_num_accumulates])
+        sum = paddle.add_n([sum_1, sum_2, sum_3])
         tmp = layers.cast(
             x=tmp, dtype='float32' if self._dtype is None else self._dtype
         )

From a365024c8190fc3f6199a9b9c6b26032a36f8efa Mon Sep 17 00:00:00 2001
From: minghaoBD <79566150+minghaoBD@users.noreply.github.com>
Date: Thu, 1 Dec 2022 15:19:25 +0800
Subject: [PATCH 079/154] fuse-mt passes compatible with structured pruning
 (#48585)

* fuse-mt passes compatible with structured pruning
---
 .../fused_multi_transformer_encoder_pass.cc   | 72 ++++++++++---------
 .../fused/fused_multi_transformer_op.cc       | 21 ------
 2 files changed, 40 insertions(+), 53 deletions(-)

diff --git a/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc b/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc
index 3635613f8c54b3..6f0ef5b755ca9a 100644
--- a/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc
+++ b/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc
@@ -1325,17 +1325,6 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
                           Node* ffn_eltadd0_b,
                           Node* ffn_eltadd1_b,
                           Node* ffn_output) {
-    auto reshape_desc = reshape2_0->Op();
-    int num_head =
-        PADDLE_GET_CONST(std::vector<int>, reshape_desc->GetAttr("shape"))
-            .at(2);
-    int dim_head =
-        PADDLE_GET_CONST(std::vector<int>, reshape_desc->GetAttr("shape"))
-            .at(3);
-    auto* layer_norm_bias_tensor =
-        scope->FindVar(layer_norm_bias->Name())->GetMutable<phi::DenseTensor>();
-    int dim_embed = layer_norm_bias_tensor->dims()[0];
-
     auto* matmul0_op = matmul0->Op();
     auto* matmul_linear_op = matmul_linear->Op();
     auto* ffn_matmul_0_op = ffn_matmul0->Op();
@@ -1364,6 +1353,20 @@ int FusedMultiTransformerEncoderPass::BuildFusion(Graph* graph,
     auto* bv_tensor =
         scope->FindVar(eltadd2_b->Name())->GetMutable<phi::DenseTensor>();
 
+    // NOTE(minghaoBD): to make it compatible with strucutured pruning on
+    // num_head dimension:
+    // 1. get dim_head from reshape.shape[3], dim_embed from
+    // layer_norm_bias.shape[0]
+    // 2. calculate num_head according to wq_tensor.shape[1] and dim_head
+    auto reshape_desc = reshape2_0->Op();
+    int dim_head =
+        PADDLE_GET_CONST(std::vector<int>, reshape_desc->GetAttr("shape"))
+            .at(3);
+    auto* layer_norm_bias_tensor =
+        scope->FindVar(layer_norm_bias->Name())->GetMutable<phi::DenseTensor>();
+    int dim_embed = layer_norm_bias_tensor->dims()[0];
+    int num_head = wq_tensor->dims()[1] / dim_head;
+
     QKVWeightsBiasProcess(wq_tensor,
                           wk_tensor,
                           wv_tensor,
@@ -2195,18 +2198,6 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
                           Node* ffn_eltadd0_b,
                           Node* ffn_eltadd1_b,
                           Node* ffn_output) {
-    auto reshape_desc = reshape2_0->Op();
-    int num_head =
-        PADDLE_GET_CONST(std::vector<int>, reshape_desc->GetAttr("shape"))
-            .at(2);
-    int dim_head =
-        PADDLE_GET_CONST(std::vector<int>, reshape_desc->GetAttr("shape"))
-            .at(3) /
-        3;  // 3 for qkv
-    auto* layer_norm_bias_tensor =
-        scope->FindVar(layer_norm_bias->Name())->GetMutable<phi::DenseTensor>();
-    int dim_embed = layer_norm_bias_tensor->dims()[0];
-
     auto* matmul0_op = matmul0->Op();
     auto* matmul_linear_op = matmul_linear->Op();
     auto* ffn_matmul_0_op = ffn_matmul0->Op();
@@ -2226,6 +2217,21 @@ int FusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
     auto* qkv_b_tensor =
         scope->FindVar(eltadd0_b->Name())->GetMutable<phi::DenseTensor>();
 
+    // NOTE(minghaoBD): to make it compatible with strucutured pruning on
+    // num_head dimension:
+    // 1. get dim_head from reshape.shape[3], dim_embed from
+    // layer_norm_bias.shape[0]
+    // 2. calculate num_head according to wqkv_tensor.shape[1]/3 and dim_head
+    auto reshape_desc = reshape2_0->Op();
+    int dim_head =
+        PADDLE_GET_CONST(std::vector<int>, reshape_desc->GetAttr("shape"))
+            .at(3) /
+        3;  // 3 for qkv
+    auto* layer_norm_bias_tensor =
+        scope->FindVar(layer_norm_bias->Name())->GetMutable<phi::DenseTensor>();
+    int dim_embed = layer_norm_bias_tensor->dims()[0];
+    int num_head = qkv_w_tensor->dims()[1] / 3 / dim_head;
+
     QKVWeightsBiasProcessFuseQKV(
         qkv_w_tensor, qkv_b_tensor, num_head, dim_head, dim_embed);
 
@@ -2995,15 +3001,6 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
                           Node* ffn_eltadd0_b,
                           Node* ffn_eltadd1_b,
                           Node* ffn_output) {
-    auto reshape_desc = reshape2_0->Op();
-    int num_head =
-        PADDLE_GET_CONST(std::vector<int>, reshape_desc->GetAttr("shape"))
-            .at(2);
-    int dim_head =
-        PADDLE_GET_CONST(std::vector<int>, reshape_desc->GetAttr("shape"))
-            .at(3) /
-        3;  // 3 for qkv
-
     auto* matmul0_op = matmul0->Op();
     auto* matmul_linear_op = matmul_linear->Op();
     auto* ffn_matmul_0_op = ffn_matmul0->Op();
@@ -3023,9 +3020,20 @@ int MultiDevicesFusedMultiTransformerEncoderFuseQKVPass::BuildFusion(
     auto* qkv_b_tensor =
         scope->FindVar(eltadd0_b->Name())->GetMutable<phi::DenseTensor>();
 
+    // NOTE(minghaoBD): to make it compatible with strucutured pruning on
+    // num_head dimension:
+    // 1. get dim_head from reshape.shape[3], dim_embed from
+    // layer_norm_bias.shape[0]
+    // 2. calculate num_head according to wqkv_tensor.shape[1]/3 and dim_head
     auto* layer_norm_bias_tensor =
         scope->FindVar(layer_norm_bias->Name())->GetMutable<phi::DenseTensor>();
     int dim_embed = layer_norm_bias_tensor->dims()[0];
+    auto reshape_desc = reshape2_0->Op();
+    int dim_head =
+        PADDLE_GET_CONST(std::vector<int>, reshape_desc->GetAttr("shape"))
+            .at(3) /
+        3;  // 3 for qkv
+    int num_head = qkv_w_tensor->dims()[1] / 3 / dim_head;
 
     QKVWeightsBiasProcessFuseQKV(
         qkv_w_tensor, qkv_b_tensor, num_head, dim_head, dim_embed);
diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cc b/paddle/fluid/operators/fused/fused_multi_transformer_op.cc
index 6a4c3890e5bc96..94a89338a6205f 100644
--- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cc
+++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cc
@@ -93,27 +93,6 @@ class FusedMultiTransformerOp : public framework::OperatorWithKernel {
             x_dim,
             y_dim));
 
-    if (ctx->Attrs().Get<int>("ring_id") == -1) {
-      if (trans_qkvw) {
-        PADDLE_ENFORCE_EQ(y_dim[1] * y_dim[2],
-                          y_dim[3],
-                          platform::errors::InvalidArgument(
-                              "The dimensions of qkv_weight must be 4"
-                              "(3, num_head, dim_head, dim_embed),"
-                              "and must satisfy the limitations: "
-                              "(num_head * dim_head == dim_embed)"));
-
-      } else {
-        PADDLE_ENFORCE_EQ(y_dim[2] * y_dim[3],
-                          y_dim[0],
-                          platform::errors::InvalidArgument(
-                              "The dimensions of qkv_weight must be 4"
-                              "(dim_embed, 3, num_head, dim_head),"
-                              "and must satisfy the limitations: "
-                              "(num_head * dim_head == dim_embed)"));
-      }
-    }
-
     if (ctx->HasInputs("CacheKV")) {
       // [2, batch_size, num_head, max_seq_len, head_size]
       const auto &c_dims = ctx->GetInputsDim("CacheKV");

From b9421dc13903d3f15487cf01ec5fdae73e1bb991 Mon Sep 17 00:00:00 2001
From: heyanru <81976792+heyanru01@users.noreply.github.com>
Date: Thu, 1 Dec 2022 16:12:27 +0800
Subject: [PATCH 080/154] [Fluid Clean] remove pixel_shuffle, fsp_matrix,
 where, sign, unique, unique_with_counts (#48441)

---
 python/paddle/fluid/layers/nn.py              | 307 ------------------
 .../unittests/mlu/test_where_index_op_mlu.py  |   4 +-
 .../unittests/npu/test_where_index_npu.py     |   4 +-
 .../fluid/tests/unittests/test_fsp_op.py      |  27 --
 .../fluid/tests/unittests/test_layers.py      |  11 +-
 .../fluid/tests/unittests/test_sign_op.py     |   8 +-
 .../fluid/tests/unittests/test_unique.py      |   4 +-
 .../unittests/test_unique_with_counts.py      |   5 +-
 .../fluid/tests/unittests/test_var_base.py    |   2 +-
 .../fluid/tests/unittests/test_where_index.py | 123 -------
 .../unittests/xpu/test_where_index_xpu.py     |   4 +-
 python/paddle/fluid/variable_index.py         |   6 +-
 12 files changed, 19 insertions(+), 486 deletions(-)
 delete mode 100644 python/paddle/fluid/tests/unittests/test_where_index.py

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 03059afb191b5f..5ab7f3fbdcddcb 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -96,8 +96,6 @@
     'resize_trilinear',
     'resize_nearest',
     'relu',
-    'unique',
-    'unique_with_counts',
     'elementwise_add',
     'elementwise_div',
     'elementwise_sub',
@@ -117,11 +115,7 @@
     'get_tensor_from_selected_rows',
     'temporal_shift',
     'py_func',
-    'pixel_shuffle',
-    'fsp_matrix',
     'continuous_value_model',
-    'where',
-    'sign',
     'unfold',
     'deformable_roi_pooling',
     'shard_index',
@@ -7020,121 +7014,6 @@ def py_func_demo():
 py_func.registered_func_num = PyFuncRegistry.registered_func_num
 
 
-def pixel_shuffle(x, upscale_factor):
-    """
-
-    This op rearranges elements in a tensor of shape [N, C, H, W]
-    to a tensor of shape [N, C/r**2, H*r, W*r].
-    This is useful for implementing efficient sub-pixel convolution
-    with a stride of 1/r.
-    Please refer to the paper: `Real-Time Single Image and Video Super-Resolution
-    Using an Efficient Sub-Pixel Convolutional Neural Network <https://arxiv.org/abs/1609.05158v2>`_ .
-    by Shi et. al (2016) for more details.
-
-    Parameters:
-
-        x(Variable): 4-D tensor, the data type should be float32 or float64.
-        upscale_factor(int): factor to increase spatial resolution.
-
-    Returns:
-        Out(Variable): Reshaped tensor according to the new dimension.
-
-    Raises:
-        ValueError: If the square of upscale_factor cannot divide the channels of input.
-
-    Examples:
-        .. code-block:: python
-
-            # declarative mode
-            import paddle.fluid as fluid
-            import numpy as np
-            input = fluid.data(name="input", shape=[2,9,4,4])
-            output = fluid.layers.pixel_shuffle(x=input, upscale_factor=3)
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            exe.run(fluid.default_startup_program())
-
-            input_data = np.random.rand(2,9,4,4).astype("float32")
-            output_data = exe.run(fluid.default_main_program(),
-                feed={"input":input_data},
-                fetch_list=[output],
-                return_numpy=True)
-
-            # print(output.shape)
-            # (2L, 1L, 12L, 12L)
-
-    """
-
-    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'pixel_shuffle')
-    helper = LayerHelper("pixel_shuffle", **locals())
-
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-
-    if not isinstance(upscale_factor, int):
-        raise TypeError("upscale factor must be int type")
-
-    helper.append_op(
-        type="pixel_shuffle",
-        inputs={"X": x},
-        outputs={"Out": out},
-        attrs={"upscale_factor": upscale_factor},
-    )
-    return out
-
-
-def fsp_matrix(x, y):
-    """
-
-    **FSP matrix op**
-
-    This op is used to calculate the flow of solution procedure (FSP) matrix of two 4-D Tensor feature maps.
-    Given feature map x with shape [x_channel, h, w] and feature map y with shape
-    [y_channel, h, w], we can get the fsp matrix of x and y in two steps:
-
-    1. reshape x into matrix with shape [x_channel, h * w] and reshape and
-       transpose y into matrix with shape [h * w, y_channel].
-    2. multiply x and y to get fsp matrix with shape [x_channel, y_channel].
-
-    The output is a batch of fsp matrices.
-
-    Args:
-
-        x (Variable): A 4-D Tensor feature map with shape [batch_size, x_channel, height, width].
-                      A Tensor with type float32, float64.
-        y (Variable): A 4-D Tensor feature map with shape [batch_size, y_channel, height, width].
-                      The y_channel can be different with the x_channel of Input(X)
-                      while the other dimensions must be the same with Input(X)'s. A Tensor with
-                      type float32, float64.
-
-    Returns:
-
-        fsp matrix (Variable): The output of FSP op with shape [batch_size, x_channel, y_channel].
-        The x_channel is the channel of x and the y_channel is the channel of y. A Tensor with
-        type float32, float64.
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            data = fluid.data(name='data', shape=[None, 3, 32, 32])
-            feature_map_0 = fluid.layers.conv2d(data, num_filters=2,
-                                                filter_size=3)
-            feature_map_1 = fluid.layers.conv2d(feature_map_0, num_filters=2,
-                                                filter_size=1)
-            loss = fluid.layers.fsp_matrix(feature_map_0, feature_map_1)
-
-    """
-    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'fsp_matrix')
-    check_variable_and_dtype(y, 'y', ['float32', 'float64'], 'fsp_matrix')
-    helper = LayerHelper('fsp_matrix', **locals())
-    out = helper.create_variable_for_type_inference(
-        dtype=helper.input_dtype(input_param_name='x')
-    )
-    helper.append_op(type='fsp', inputs={'X': x, 'Y': y}, outputs={'Out': out})
-    return out
-
-
 def continuous_value_model(input, cvm, use_cvm=True):
     r"""
 
@@ -7192,192 +7071,6 @@ def continuous_value_model(input, cvm, use_cvm=True):
     return out
 
 
-def where(condition):
-    """
-    Return an int64 tensor with rank 2, specifying the coordinate of true element in `condition`.
-
-    Args:
-        condition(Variable): A bool tensor with rank at least 1, the data type is bool.
-
-    Returns:
-        Variable, the output data type is int64. : The tensor variable storing a 2-D tensor, which involves all coordinate.
-
-    Examples:
-        .. code-block:: python
-
-             import paddle.fluid as fluid
-             import paddle.fluid.layers as layers
-             import numpy as np
-
-             # condition is a tensor [True, False, True]
-             condition = layers.assign(np.array([1, 0, 1], dtype='int32'))
-             condition = layers.cast(condition, 'bool')
-             out = layers.where(condition) # [[0], [2]]
-
-             # condition is a tensor [[True, False], [False, True]]
-             condition = layers.assign(np.array([[1, 0], [0, 1]], dtype='int32'))
-             condition = layers.cast(condition, 'bool')
-             out = layers.where(condition) # [[0, 0], [1, 1]]
-
-             # condition is a tensor [False, False, False]
-             condition = layers.assign(np.array([0, 0, 0], dtype='int32'))
-             condition = layers.cast(condition, 'bool')
-             out = layers.where(condition) # [[]]
-
-    """
-
-    if in_dygraph_mode():
-        return _C_ops.nonzero(condition)
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.where_index(condition)
-
-    helper = LayerHelper("where_index", **locals())
-
-    out = helper.create_variable_for_type_inference(
-        dtype=core.VarDesc.VarType.INT64
-    )
-
-    helper.append_op(
-        type='where_index',
-        inputs={'Condition': condition},
-        outputs={'Out': [out]},
-    )
-    return out
-
-
-@deprecated(since="2.0.0", update_to="paddle.sign")
-def sign(x):
-    r"""
-    This OP returns sign of every element in `x`: 1 for positive, -1 for negative and 0 for zero.
-
-    Args:
-        x(Variable|numpy.ndarray): The input variable could be N-D tensor or N-D numpy array, \
-            the input data type is float32 or float64.
-
-    Returns:
-        Variable, the output data type is the same as input data type. : The output sign tensor with identical shape to input :attr:`x`.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          import numpy as np
-
-          # [1.0, 0.0, -1.0]
-          data = fluid.layers.sign(np.array([3.0, 0.0, -2.0], dtype='float32'))
-    """
-
-    helper = LayerHelper("sign", **locals())
-    check_type(x, 'x', (Variable, np.ndarray), 'sign')
-    if isinstance(x, np.ndarray):
-        x = assign(x)
-    check_dtype(x.dtype, 'x', ['float16', 'float32', 'float64'], 'sign')
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-
-    helper.append_op(type='sign', inputs={'X': [x]}, outputs={'Out': [out]})
-
-    return out
-
-
-def unique(x, dtype='int32'):
-    r"""
-    Return a unique tensor for `x` and an index tensor pointing to this unique tensor.
-
-    Args:
-        x(Tensor): A 1-D input tensor, it's data type should be float32, float64, int32, int64.
-        dtype(np.dtype|str, optional): The type of index tensor: int32, int64. Default: int32.
-
-    Returns:
-        tuple: (out, index). `out` is the unique tensor for `x`, with identical dtype to `x`, and \
-            `index` is an index tensor pointing to `out`, by which user can recover the original `x` tensor.
-
-    Examples:
-        .. code-block:: python
-
-             import numpy as np
-             import paddle.fluid as fluid
-             x = fluid.layers.assign(np.array([2, 3, 3, 1, 5, 3], dtype='int32'))
-             out, index = fluid.layers.unique(x) # out is [2, 3, 1, 5]; index is [0, 1, 1, 2, 3, 1]
-    """
-
-    check_variable_and_dtype(
-        x, "x", ['float32', 'float64', 'int32', 'int64'], "unique"
-    )
-    helper = LayerHelper("unique", **locals())
-
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-
-    index = helper.create_variable_for_type_inference(dtype)
-
-    helper.append_op(
-        type='unique',
-        inputs={'X': x},
-        attrs={'dtype': convert_np_dtype_to_dtype_(dtype)},
-        outputs={'Out': [out], 'Index': [index]},
-    )
-
-    return out, index
-
-
-def unique_with_counts(x, dtype='int32'):
-    r"""
-    This OP return a unique tensor for `x` , and count tensor that the count of unique result in raw input, \
-    and an index tensor pointing to this unique tensor.
-
-    **NOTICE**: This op support the variable type of Tensor only.
-
-    Args:
-        x(Variable): A 1-D input tensor with input shape of :math:`[N]` , the input data type is float32, float64, int32, int64.
-        dtype(np.dtype|core.VarDesc.VarType|str): The type of count and index tensor, it could be int32, int64. Default value is int32.
-
-    Returns:
-        tuple, the variable type in tuple is Tensor, the output :attr:`out` data type is the same as input :attr:`x`, \
-        and data type of output :attr:`index` and :attr:`count` will be int32 or int64.: The :attr:`out` is unique tensor for input :attr:`x`,\
-        the data shape is :math:`[K]`, the `K` may be different to the `N` in shape of :attr:`x`. :attr:`index` is an index tensor pointing\
-        to :attr:`out`, the data shape is :math:`[N]` , the data shape is the same as input :attr:`x`. :attr:`count` is count of unique element in\
-        the :attr:`x`, the data shape is :math:`[K]`, the data shape is the same as output :attr:`out`.
-
-    Examples:
-        .. code-block:: python
-
-             import numpy as np
-             import paddle.fluid as fluid
-             x = fluid.layers.assign(np.array([2, 3, 3, 1, 5, 3], dtype='int32'))
-             out, index, count = fluid.layers.unique_with_counts(x) # out is [2, 3, 1, 5]; index is [0, 1, 1, 2, 3, 1]
-                                                        # count is [1, 3, 1, 1]
-            # x.shape=(6,) out.shape=(4,), index.shape=(6,), count.shape=(4,)
-    """
-    check_variable_and_dtype(
-        x, "x", ['float32', 'float64', 'int32', 'int64'], "unique_with_counts"
-    )
-    if not (dtype == 'int32' or dtype == 'int64'):
-        raise TypeError(
-            "Op unique_with_counts, index dtype must be int32 or int64"
-        )
-
-    if x is None or len(x.shape) != 1:
-        raise ValueError(
-            "Op unique_with_counts, x must not be null and size of dim must be 1"
-        )
-
-    helper = LayerHelper("unique_with_counts", **locals())
-
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-
-    index = helper.create_variable_for_type_inference(dtype)
-
-    count = helper.create_variable_for_type_inference(dtype)
-
-    helper.append_op(
-        type='unique_with_counts',
-        inputs={'X': x},
-        attrs={'dtype': convert_np_dtype_to_dtype_(dtype)},
-        outputs={'Out': [out], 'Index': [index], 'Count': [count]},
-    )
-
-    return out, index, count
-
-
 def unfold(x, kernel_sizes, strides=1, paddings=0, dilations=1, name=None):
     r"""
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_where_index_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_where_index_op_mlu.py
index 163d47e33c2fb7..877207eab7d2e6 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_where_index_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_where_index_op_mlu.py
@@ -108,7 +108,7 @@ class TestWhereOpError(unittest.TestCase):
     def test_api(self):
         with program_guard(Program(), Program()):
             cond = fluid.layers.data(name='cond', shape=[4], dtype='bool')
-            result = fluid.layers.where(cond)
+            result = paddle.nonzero(cond)
 
             exe = fluid.Executor(paddle.device.MLUPlace(0))
             exe.run(fluid.default_startup_program())
@@ -119,7 +119,7 @@ def test_api(self):
 class TestWhereRaiseError(unittest.TestCase):
     def test_errors(self):
         def test_type():
-            fluid.layers.where([10])
+            paddle.nonzero([10])
 
         self.assertRaises(TypeError, test_type)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_where_index_npu.py b/python/paddle/fluid/tests/unittests/npu/test_where_index_npu.py
index 370a1934bff960..315f475a5c5bfd 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_where_index_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_where_index_npu.py
@@ -98,7 +98,7 @@ class TestWhereOpError(unittest.TestCase):
     def test_api(self):
         with program_guard(Program(), Program()):
             cond = fluid.layers.data(name='cond', shape=[4], dtype='bool')
-            result = fluid.layers.where(cond)
+            result = paddle.nonzero(cond)
 
             exe = fluid.Executor(paddle.NPUPlace(0))
             exe.run(fluid.default_startup_program())
@@ -109,7 +109,7 @@ def test_api(self):
 class TestWhereRaiseError(unittest.TestCase):
     def test_errors(self):
         def test_type():
-            fluid.layers.where([10])
+            paddle.nonzero([10])
 
         self.assertRaises(TypeError, test_type)
 
diff --git a/python/paddle/fluid/tests/unittests/test_fsp_op.py b/python/paddle/fluid/tests/unittests/test_fsp_op.py
index 3e325d189148bd..abeaae9f24d3db 100644
--- a/python/paddle/fluid/tests/unittests/test_fsp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fsp_op.py
@@ -17,8 +17,6 @@
 import numpy as np
 from op_test import OpTest
 
-import paddle.fluid as fluid
-
 
 def fsp_matrix(a, b):
     batch = a.shape[0]
@@ -62,30 +60,5 @@ def test_check_grad_normal(self):
         self.check_grad(['X', 'Y'], 'Out')
 
 
-class BadInputTest(unittest.TestCase):
-    def test_error(self):
-        with fluid.program_guard(fluid.Program()):
-
-            def test_bad_x():
-                data = fluid.layers.data(name='data', shape=[3, 32, 32])
-                feature_map_0 = [1, 2, 3]
-                feature_map_1 = fluid.layers.conv2d(
-                    data, num_filters=2, filter_size=3
-                )
-                loss = fluid.layers.fsp_matrix(feature_map_0, feature_map_1)
-
-            self.assertRaises(TypeError, test_bad_x)
-
-            def test_bad_y():
-                data = fluid.layers.data(name='data', shape=[3, 32, 32])
-                feature_map_0 = fluid.layers.conv2d(
-                    data, num_filters=2, filter_size=3
-                )
-                feature_map_1 = [1, 2, 3]
-                loss = fluid.layers.fsp_matrix(feature_map_0, feature_map_1)
-
-            self.assertRaises(TypeError, test_bad_y)
-
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 7750f6b613b974..912382f49ac447 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -3677,21 +3677,12 @@ def make_temporal_shift(self):
             out = layers.temporal_shift(x, seg_num=2, shift_ratio=0.2)
             return out
 
-    def make_fsp_matrix(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            x = self._get_data(name="X", shape=[16, 4, 4], dtype="float32")
-            y = self._get_data(name="Y", shape=[8, 4, 4], dtype="float32")
-            out = layers.fsp_matrix(x, y)
-            return out
-
     def make_pixel_shuffle(self):
         with program_guard(
             fluid.default_main_program(), fluid.default_startup_program()
         ):
             x = self._get_data(name="X", shape=[9, 4, 4], dtype="float32")
-            out = layers.pixel_shuffle(x, upscale_factor=3)
+            out = paddle.nn.functional.pixel_shuffle(x, upscale_factor=3)
             return out
 
     def make_mse_loss(self):
diff --git a/python/paddle/fluid/tests/unittests/test_sign_op.py b/python/paddle/fluid/tests/unittests/test_sign_op.py
index c48de7d58663fb..33e4ca795bf7a9 100644
--- a/python/paddle/fluid/tests/unittests/test_sign_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sign_op.py
@@ -46,7 +46,7 @@ def test_errors(self):
         with program_guard(Program(), Program()):
             # The input type of sign_op must be Variable or numpy.ndarray.
             input1 = 12
-            self.assertRaises(TypeError, fluid.layers.sign, input1)
+            self.assertRaises(TypeError, paddle.sign, input1)
             # The input dtype of sign_op must be float16, float32, float64.
             input2 = fluid.layers.data(
                 name='input2', shape=[12, 10], dtype="int32"
@@ -54,12 +54,12 @@ def test_errors(self):
             input3 = fluid.layers.data(
                 name='input3', shape=[12, 10], dtype="int64"
             )
-            self.assertRaises(TypeError, fluid.layers.sign, input2)
-            self.assertRaises(TypeError, fluid.layers.sign, input3)
+            self.assertRaises(TypeError, paddle.sign, input2)
+            self.assertRaises(TypeError, paddle.sign, input3)
             input4 = fluid.layers.data(
                 name='input4', shape=[4], dtype="float16"
             )
-            fluid.layers.sign(input4)
+            paddle.sign(input4)
 
 
 class TestSignAPI(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_unique.py b/python/paddle/fluid/tests/unittests/test_unique.py
index c56ec313a395c4..d454d96a446cbe 100644
--- a/python/paddle/fluid/tests/unittests/test_unique.py
+++ b/python/paddle/fluid/tests/unittests/test_unique.py
@@ -74,13 +74,13 @@ def init_config(self):
 class TestUniqueRaiseError(unittest.TestCase):
     def test_errors(self):
         def test_type():
-            fluid.layers.unique([10])
+            paddle.unique([10])
 
         self.assertRaises(TypeError, test_type)
 
         def test_dtype():
             data = fluid.data(shape=[10], dtype="float16", name="input")
-            fluid.layers.unique(data)
+            paddle.unique(data)
 
         self.assertRaises(TypeError, test_dtype)
 
diff --git a/python/paddle/fluid/tests/unittests/test_unique_with_counts.py b/python/paddle/fluid/tests/unittests/test_unique_with_counts.py
index cc72b9e11d4d9c..ccbf56c66473bd 100644
--- a/python/paddle/fluid/tests/unittests/test_unique_with_counts.py
+++ b/python/paddle/fluid/tests/unittests/test_unique_with_counts.py
@@ -17,6 +17,7 @@
 import numpy as np
 from op_test import OpTest
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 
@@ -82,13 +83,13 @@ def init_config(self):
 class TestUniqueWithCountsRaiseError(unittest.TestCase):
     def test_errors(self):
         def test_type():
-            fluid.layers.unique_with_counts([10])
+            paddle.unique([10])
 
         self.assertRaises(TypeError, test_type)
 
         def test_dtype():
             data = fluid.data(shape=[10], dtype="float16", name="input")
-            fluid.layers.unique_with_counts(data)
+            paddle.unique(data)
 
         self.assertRaises(TypeError, test_dtype)
 
diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py
index 439ff3d2b82226..62b66be09367dd 100644
--- a/python/paddle/fluid/tests/unittests/test_var_base.py
+++ b/python/paddle/fluid/tests/unittests/test_var_base.py
@@ -1289,7 +1289,7 @@ def test_tensor_str_scaler(self):
     def func_test_tensor_str_shape_with_zero(self):
         paddle.disable_static(paddle.CPUPlace())
         x = paddle.ones((10, 10))
-        y = paddle.fluid.layers.where(x == 0)
+        y = paddle.nonzero(x == 0)
         a_str = str(y)
 
         expected = '''Tensor(shape=[0, 2], dtype=int64, place=Place(cpu), stop_gradient=True,
diff --git a/python/paddle/fluid/tests/unittests/test_where_index.py b/python/paddle/fluid/tests/unittests/test_where_index.py
deleted file mode 100644
index d6960621d763ce..00000000000000
--- a/python/paddle/fluid/tests/unittests/test_where_index.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-from op_test import OpTest
-
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-from paddle.fluid import Program, program_guard
-from paddle.fluid.op import Operator
-
-
-class TestWhereIndexOp(OpTest):
-    def setUp(self):
-        self.op_type = "where_index"
-        self.init_config()
-
-    def test_check_output(self):
-        self.check_output()
-
-    def init_config(self):
-        self.inputs = {
-            'Condition': np.array([True, False, True]),
-        }
-
-        self.outputs = {'Out': np.array([[0], [2]], dtype='int64')}
-
-
-class TestAllFalse(unittest.TestCase):
-    def setUp(self):
-        self.op_type = "where_index"
-        self.init_config()
-
-    def check_with_place(self, place):
-        scope = core.Scope()
-        condition = scope.var('Condition').get_tensor()
-        condition.set(self.cond_data, place)
-
-        out = scope.var("Out").get_tensor()
-        out.set(np.full(self.shape, 0).astype('int64'), place)
-
-        op = Operator("where_index", Condition="Condition", Out="Out")
-        op.run(scope, place)
-
-        out_array = np.array(out)
-        self.assertTrue((out_array == self.out_data).all())
-
-    def init_config(self):
-        self.cond_data = np.array([False, False, False])
-        self.shape = (3, 1)
-        self.out_data = np.array([], dtype='int64')
-
-    def test_all_false(self):
-        self.check_with_place(core.CPUPlace())
-
-        if core.is_compiled_with_cuda():
-            self.check_with_place(core.CUDAPlace(0))
-
-
-class TestRank2(TestWhereIndexOp):
-    def init_config(self):
-        self.inputs = {
-            'Condition': np.array([[True, False], [False, True]]),
-        }
-
-        self.outputs = {'Out': np.array([[0, 0], [1, 1]], dtype='int64')}
-
-
-class TestRank3(TestWhereIndexOp):
-    def init_config(self):
-        self.inputs = {
-            'Condition': np.array(
-                [
-                    [[True, False], [False, True]],
-                    [[False, True], [True, False]],
-                    [[False, False], [False, True]],
-                ]
-            ),
-        }
-
-        self.outputs = {
-            'Out': np.array(
-                [[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 0], [2, 1, 1]],
-                dtype='int64',
-            )
-        }
-
-
-class TestWhereOpError(unittest.TestCase):
-    def test_api(self):
-        with program_guard(Program(), Program()):
-            cond = fluid.layers.data(name='cond', shape=[4], dtype='bool')
-            result = fluid.layers.where(cond)
-
-            exe = fluid.Executor(fluid.CPUPlace())
-            exe.run(fluid.default_startup_program())
-            cond_i = np.array([True, False, False, False]).astype("bool")
-            out = exe.run(fluid.default_main_program(), feed={'cond': cond_i})
-
-
-class TestWhereRaiseError(unittest.TestCase):
-    def test_errors(self):
-        def test_type():
-            fluid.layers.where([10])
-
-        self.assertRaises(TypeError, test_type)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py
index 939a5868643382..1c74bd715a34b7 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py
@@ -102,7 +102,7 @@ class TestWhereOpError(unittest.TestCase):
     def test_api(self):
         with program_guard(Program(), Program()):
             cond = fluid.layers.data(name='cond', shape=[4], dtype='bool')
-            result = fluid.layers.where(cond)
+            result = paddle.nonzero(cond)
 
             exe = fluid.Executor(paddle.XPUPlace(0))
             exe.run(fluid.default_startup_program())
@@ -113,7 +113,7 @@ def test_api(self):
 class TestWhereRaiseError(unittest.TestCase):
     def test_errors(self):
         def test_type():
-            fluid.layers.where([10])
+            paddle.nonzero([10])
 
         self.assertRaises(TypeError, test_type)
 
diff --git a/python/paddle/fluid/variable_index.py b/python/paddle/fluid/variable_index.py
index 552fb7a9aa93a1..cf298501a29f8b 100644
--- a/python/paddle/fluid/variable_index.py
+++ b/python/paddle/fluid/variable_index.py
@@ -331,10 +331,9 @@ def get_value_for_bool_tensor(var, item):
             )
 
     def idx_not_empty(var, item):
-        from .layers.nn import where
         from ..tensor import gather_nd
 
-        bool_2_idx = where(item == True)
+        bool_2_idx = paddle.nonzero(item == True)
         return gather_nd(var, bool_2_idx)
 
     def idx_empty(var):
@@ -864,13 +863,12 @@ def set_value_for_bool_tensor(var, item, value):
     def idx_not_empty(var, item, value):
         from .framework import Variable
         from .layers import assign
-        from .layers.nn import where
         from ..tensor import gather_nd, scatter_nd_add
 
         if not isinstance(value, Variable):
             value = assign(value).cast(var.dtype)
 
-        idx = where(item)
+        idx = paddle.nonzero(item)
         gather_val = gather_nd(var, idx)
         gather_val_new = value - gather_val
         out = scatter_nd_add(var, idx, gather_val_new)

From 9ffc760f99840cd3392e814ffb1ff81abc841ebe Mon Sep 17 00:00:00 2001
From: heyanru <81976792+heyanru01@users.noreply.github.com>
Date: Thu, 1 Dec 2022 16:25:28 +0800
Subject: [PATCH 081/154] [Fluid Clean] remove
 paddle.fluid.layers.nn.reduce_mean (#48196)

---
 .../contrib/slim/quantization/adaround.py     |   2 +-
 .../test_moving_average_abs_max_scale_op.py   |   2 +-
 .../fluid/contrib/tests/test_correlation.py   |   2 +-
 python/paddle/fluid/layers/nn.py              |  58 ----
 .../tests/unittests/auto_checkpoint_utils.py  |   3 +-
 .../auto_parallel/test_dist_embedding.py      |   2 +-
 .../dygraph_to_static/ifelse_simple_func.py   |   2 +-
 .../seq2seq_dygraph_model.py                  |   6 +-
 .../dygraph_to_static/simnet_dygraph_model.py |   2 +-
 .../unittests/dygraph_to_static/test_bmn.py   |   4 +-
 .../dygraph_to_static/test_cycle_gan.py       |  16 +-
 .../dygraph_to_static/test_ptb_lm.py          |   2 +-
 .../dygraph_to_static/test_word2vec.py        |   2 +-
 .../unittests/dygraph_to_static/yolov3.py     |   2 +-
 .../unittests/ipu/test_reduce_x_op_ipu.py     |   2 +-
 .../unittests/ir/inference/CMakeLists.txt     |   1 -
 .../ir/inference/test_trt_reduce_mean_op.py   | 278 ------------------
 .../tests/unittests/mlu/test_adam_op_mlu.py   |   2 +-
 .../tests/unittests/mlu/test_adamw_op_mlu.py  |   2 +-
 .../mlu/test_elementwise_max_op_mlu.py        |   2 +-
 .../mlu/test_elementwise_min_op_mlu.py        |   2 +-
 .../tests/unittests/mlu/test_gelu_op_mlu.py   |   2 +-
 .../unittests/mlu/test_leaky_relu_op_mlu.py   |   2 +-
 .../tests/unittests/mlu/test_relu6_op_mlu.py  |   2 +-
 .../tests/unittests/mlu/test_relu_op_mlu.py   |   2 +-
 .../test_softmax_with_cross_entropy_op_mlu.py |   2 +-
 .../tests/unittests/mlu/test_tanh_op_mlu.py   |   2 +-
 .../tests/unittests/npu/test_adam_op_npu.py   |   4 +-
 .../tests/unittests/npu/test_adamw_op_npu.py  |   2 +-
 .../tests/unittests/npu/test_cos_op_npu.py    |   2 +-
 .../npu/test_elementwise_div_op_npu.py        |   2 +-
 .../npu/test_elementwise_max_op_npu.py        |   2 +-
 .../npu/test_elementwise_min_op_npu.py        |   2 +-
 .../npu/test_elementwise_pow_op_npu.py        |   2 +-
 .../npu/test_elementwise_sub_op_npu.py        |   2 +-
 .../tests/unittests/npu/test_gather_op_npu.py |   2 +-
 .../tests/unittests/npu/test_gelu_op_npu.py   |   2 +-
 .../unittests/npu/test_leaky_relu_op_npu.py   |   2 +-
 .../tests/unittests/npu/test_log_op_npu.py    |   2 +-
 .../tests/unittests/npu/test_mul_op_npu.py    |   8 +-
 .../tests/unittests/npu/test_pow_op_npu.py    |   2 +-
 .../unittests/npu/test_reduce_sum_op_npu.py   |   2 +-
 .../tests/unittests/npu/test_relu6_op_npu.py  |   2 +-
 .../tests/unittests/npu/test_relu_op_npu.py   |   2 +-
 .../unittests/npu/test_rmsprop_op_npu.py      |   4 +-
 .../tests/unittests/npu/test_sgd_op_npu.py    |   2 +-
 .../test_softmax_with_cross_entropy_op_npu.py |   2 +-
 .../tests/unittests/npu/test_sqrt_op_npu.py   |   2 +-
 .../tests/unittests/npu/test_square_op_npu.py |   2 +-
 .../tests/unittests/npu/test_tanh_op_npu.py   |   2 +-
 .../parallel_dygraph_sparse_embedding.py      |   2 +-
 .../fluid/tests/unittests/seresnext_net.py    |   4 +-
 .../fluid/tests/unittests/test_adam_op.py     |   6 +-
 .../unittests/test_dataloader_early_reset.py  |   9 +-
 .../unittests/test_dataloader_keep_order.py   |   3 +-
 .../unittests/test_dataloader_unkeep_order.py |   3 +-
 .../unittests/test_dist_sparse_load_ps0.py    |   2 +-
 .../test_dist_sparse_tensor_load_sgd.py       |   2 +-
 .../tests/unittests/test_dist_transpiler.py   |   2 +-
 .../test_dynamic_rnn_stop_gradient.py         |   2 +-
 .../test_eager_deletion_padding_rnn.py        |   2 +-
 .../tests/unittests/test_gradient_clip.py     |   6 +-
 .../fluid/tests/unittests/test_hsigmoid_op.py |   2 +-
 .../unittests/test_imperative_auto_prune.py   |  16 +-
 ...test_imperative_container_parameterlist.py |   4 +-
 .../test_imperative_container_sequential.py   |   9 +-
 .../unittests/test_imperative_double_grad.py  |  16 +-
 .../tests/unittests/test_imperative_gan.py    |  18 +-
 ..._imperative_lod_tensor_to_selected_rows.py |   2 +-
 .../unittests/test_imperative_optimizer.py    |  14 +-
 .../unittests/test_imperative_optimizer_v2.py |  14 +-
 .../test_imperative_partitial_backward.py     |   2 +-
 .../unittests/test_imperative_ptb_rnn.py      |   2 +-
 .../unittests/test_imperative_save_load.py    |   2 +-
 .../unittests/test_imperative_save_load_v2.py |   2 +-
 ..._imperative_selected_rows_to_lod_tensor.py |   2 +-
 ...perative_star_gan_with_gradient_penalty.py |   4 +-
 ...imperative_trace_non_persistable_inputs.py |   2 +-
 .../unittests/test_learning_rate_scheduler.py |   2 +-
 .../fluid/tests/unittests/test_mean_op.py     |   4 +-
 .../test_memory_reuse_exclude_feed_var.py     |   3 +-
 .../test_multiprocess_dataloader_dynamic.py   |   5 +-
 ...ess_dataloader_iterable_dataset_dynamic.py |   4 +-
 ...cess_dataloader_iterable_dataset_static.py |   3 +-
 .../test_multiprocess_dataloader_static.py    |   2 +-
 .../fluid/tests/unittests/test_nn_grad.py     |   2 +-
 .../fluid/tests/unittests/test_optimizer.py   |   4 +-
 .../test_paddle_imperative_double_grad.py     |  12 +-
 .../tests/unittests/test_paddle_save_load.py  |   8 +-
 .../unittests/test_paddle_save_load_binary.py |   2 +-
 ...st_parallel_executor_fetch_isolated_var.py |   2 +-
 ...el_executor_inference_feed_partial_data.py |   3 +-
 .../tests/unittests/test_rnn_cell_api.py      |   2 +-
 .../tests/unittests/test_rnn_decode_api.py    |   4 +-
 .../tests/unittests/test_static_save_load.py  |   2 +-
 .../unittests/test_traced_layer_err_msg.py    |   2 +-
 python/paddle/nn/layer/loss.py                |  14 +-
 97 files changed, 176 insertions(+), 520 deletions(-)
 delete mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py

diff --git a/python/paddle/fluid/contrib/slim/quantization/adaround.py b/python/paddle/fluid/contrib/slim/quantization/adaround.py
index 25e0d902e67a80..2003380fa1a7d4 100644
--- a/python/paddle/fluid/contrib/slim/quantization/adaround.py
+++ b/python/paddle/fluid/contrib/slim/quantization/adaround.py
@@ -64,7 +64,7 @@ def compute_recon_loss(self, ada_quantized_output, orig_output):
         square_cost = fluid.layers.square_error_cost(
             ada_quantized_output, orig_output
         )
-        recon_loss = fluid.layers.reduce_mean(paddle.sum(square_cost, axis=-1))
+        recon_loss = paddle.mean(paddle.sum(square_cost, axis=-1))
         return recon_loss
 
     def compute_round_loss(self, alpha_v, warm_start, beta):
diff --git a/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py
index 201aa9c1b4ba24..8ddca1b354c709 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py
@@ -53,7 +53,7 @@ def check_backward(self, use_cuda):
             cross_entropy = fluid.layers.softmax_with_cross_entropy(
                 fc_tmp, label
             )
-            loss = fluid.layers.reduce_mean(cross_entropy)
+            loss = paddle.mean(cross_entropy)
             sgd = fluid.optimizer.SGD(learning_rate=1e-3)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/contrib/tests/test_correlation.py b/python/paddle/fluid/contrib/tests/test_correlation.py
index 46886cebd1f8ae..4e9ef9b0fe8f55 100644
--- a/python/paddle/fluid/contrib/tests/test_correlation.py
+++ b/python/paddle/fluid/contrib/tests/test_correlation.py
@@ -122,7 +122,7 @@ def test_check_output(self):
             stride2=1,
         )
 
-        loss = fluid.layers.reduce_mean(out)
+        loss = paddle.mean(out)
         optimizer = fluid.optimizer.Momentum(0.0001, 0.9)
         optimizer.minimize(loss)
 
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 5ab7f3fbdcddcb..c2599454c1c2f1 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -71,7 +71,6 @@
     'softmax',
     'pool2d',
     'batch_norm',
-    'reduce_mean',
     'reduce_all',
     'reduce_any',
     'dropout',
@@ -2506,63 +2505,6 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None):
     return out
 
 
-@deprecated(since="2.0.0", update_to="paddle.mean")
-def reduce_mean(input, dim=None, keep_dim=False, name=None):
-    """
-    Computes the mean of the input tensor's elements along the given dimension.
-
-    Args:
-        input (Variable): The input variable which is a Tensor, the data type is float32,
-            float64, int32, int64.
-        dim (list|int, optional): The dimension along which the mean is computed. If
-            `None`, compute the mean over all elements of :attr:`input`
-            and return a variable with a single element, otherwise it
-            must be in the range :math:`[-rank(input), rank(input))`. If
-            :math:`dim[i] < 0`, the dimension to reduce is
-            :math:`rank(input) + dim[i]`.
-        keep_dim (bool, optional): Whether to reserve the reduced dimension in the
-            output Tensor. The result tensor will have one fewer dimension
-            than the :attr:`input` unless :attr:`keep_dim` is true, default
-            value is False.
-        name(str, optional): The default value is None.  Normally there is no need for
-            user to set this property.  For more information, please refer to :ref:`api_guide_Name`
-
-    Returns:
-        Variable: Tensor, results of average on the specified dim of input tensor,
-        it's data type is the same as input's Tensor.
-
-    Raises:
-        TypeError, if out data type is different with the input data type.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-            import paddle.fluid as fluid
-            paddle.enable_static()
-
-            # x is a Tensor variable with following elements:
-            #    [[0.2, 0.3, 0.5, 0.9]
-            #     [0.1, 0.2, 0.6, 0.7]]
-            # Each example is followed by the corresponding output tensor.
-            x = fluid.data(name='x', shape=[2, 4], dtype='float32')
-            fluid.layers.reduce_mean(x)  # [0.4375]
-            fluid.layers.reduce_mean(x, dim=0)  # [0.15, 0.25, 0.55, 0.8]
-            fluid.layers.reduce_mean(x, dim=-1)  # [0.475, 0.4]
-            fluid.layers.reduce_mean(x, dim=1, keep_dim=True)  # [[0.475], [0.4]]
-
-            # y is a Tensor variable with shape [2, 2, 2] and elements as below:
-            #      [[[1.0, 2.0], [3.0, 4.0]],
-            #      [[5.0, 6.0], [7.0, 8.0]]]
-            # Each example is followed by the corresponding output tensor.
-            y = fluid.data(name='y', shape=[2, 2, 2], dtype='float32')
-            fluid.layers.reduce_mean(y, dim=[1, 2]) # [2.5, 6.5]
-            fluid.layers.reduce_mean(y, dim=[0, 1]) # [4.0, 5.0]
-    """
-
-    return paddle.mean(x=input, axis=dim, keepdim=keep_dim, name=name)
-
-
 def reduce_all(input, dim=None, keep_dim=False, name=None):
     """
 
diff --git a/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py b/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py
index d43fda45444333..15d62544d217a3 100644
--- a/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py
+++ b/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py
@@ -17,6 +17,7 @@
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.incubate.checkpoint.auto_checkpoint as acp
 from paddle.fluid import unique_name
@@ -71,7 +72,7 @@ def simple_net():
             cross_entropy = fluid.layers.softmax_with_cross_entropy(
                 fc_tmp, label
             )
-            loss = fluid.layers.reduce_mean(cross_entropy)
+            loss = paddle.mean(cross_entropy)
             sgd = fluid.optimizer.SGD(learning_rate=1e-3)
             if minimize:
                 sgd.minimize(loss)
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_embedding.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_embedding.py
index a75207c5718df1..b998fea4bae195 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_embedding.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_embedding.py
@@ -39,7 +39,7 @@ def make_program_lookup_table_v1_mp_dp():
             dtype="float32",
             is_sparse=False,
         )
-        loss = paddle.fluid.layers.reduce_mean(emb_out)
+        loss = paddle.mean(emb_out)
 
         auto.shard_tensor(
             src_ids,
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py
index fd084e06649080..2fa012559cc77f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py
@@ -57,7 +57,7 @@ def dyfunc_with_if_else2(x, col=100):
         #  `x` is Tensor, `col` is not Tensor, and `col` is the return value of `true_fn` after transformed.
         # col = -1
         col = fluid.layers.fill_constant(shape=[1], value=-1, dtype="int64")
-    if fluid.layers.reduce_mean(x).numpy()[0] > x.numpy()[row][col]:
+    if paddle.mean(x).numpy()[0] > x.numpy()[row][col]:
         y = fluid.layers.relu(x)
     else:
         x_pow = paddle.pow(x, 2)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
index adc1909c64cd8a..c9a58b9c78f48a 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
@@ -303,7 +303,7 @@ def forward(self, inputs):
             tar_sequence_length, maxlen=max_tar_seq_len, dtype='float32'
         )
         loss = loss * tar_mask
-        loss = fluid.layers.reduce_mean(loss, dim=[0])
+        loss = paddle.mean(loss, axis=[0])
         loss = paddle.sum(loss)
 
         return loss
@@ -837,7 +837,7 @@ def forward(self, inputs):
             tar_sequence_length, maxlen=max_tar_seq_len, dtype='float32'
         )
         loss = loss * tar_mask
-        loss = fluid.layers.reduce_mean(loss, dim=[0])
-        loss = paddle.sum(loss)
+        loss = paddle.mean(loss, axis=[0])
+        loss = fluid.layers.reduce_sum(loss)
 
         return loss
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
index 075900b939fdf1..8c3d62feacc62c 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
@@ -114,7 +114,7 @@ def ops(self, input):
         """
         operation
         """
-        mean = fluid.layers.reduce_mean(input)
+        mean = paddle.mean(input)
         return mean
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
index 3773187b2596c1..f8e657499a4cd7 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
@@ -331,11 +331,11 @@ def bi_loss(pred_score, gt_label):
             epsilon = 0.000001
             # temp = paddle.log(pred_score + epsilon)
             loss_pos = paddle.multiply(paddle.log(pred_score + epsilon), pmask)
-            loss_pos = coef_1 * fluid.layers.reduce_mean(loss_pos)
+            loss_pos = coef_1 * paddle.mean(loss_pos)
             loss_neg = paddle.multiply(
                 paddle.log(1.0 - pred_score + epsilon), (1.0 - pmask)
             )
-            loss_neg = coef_0 * fluid.layers.reduce_mean(loss_neg)
+            loss_neg = coef_0 * paddle.mean(loss_neg)
             loss = -1 * (loss_pos + loss_neg)
             return loss
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py
index 312d716af70624..a8d6595c5bd0b1 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py
@@ -93,8 +93,8 @@ def forward(self, input_A, input_B):
 
         diff_A = paddle.abs(paddle.subtract(x=input_A, y=cyc_A))
         diff_B = paddle.abs(paddle.subtract(x=input_B, y=cyc_B))
-        cyc_A_loss = fluid.layers.reduce_mean(diff_A) * lambda_A
-        cyc_B_loss = fluid.layers.reduce_mean(diff_B) * lambda_B
+        cyc_A_loss = paddle.mean(diff_A) * lambda_A
+        cyc_B_loss = paddle.mean(diff_B) * lambda_B
         cyc_loss = cyc_A_loss + cyc_B_loss
 
         fake_rec_A = self.build_gen_discriminator_a(fake_B)
@@ -105,8 +105,8 @@ def forward(self, input_A, input_B):
         G = g_A_loss + g_B_loss
         idt_A = self.build_generator_resnet_9blocks_a(input_B)
         idt_loss_A = (
-            fluid.layers.reduce_mean(
-                paddle.abs(paddle.subtract(x=input_B, y=idt_A))
+            paddle.mean(
+                paddle.abs(fluid.layers.elementwise_sub(x=input_B, y=idt_A))
             )
             * lambda_B
             * lambda_identity
@@ -114,8 +114,8 @@ def forward(self, input_A, input_B):
 
         idt_B = self.build_generator_resnet_9blocks_b(input_A)
         idt_loss_B = (
-            fluid.layers.reduce_mean(
-                paddle.abs(paddle.subtract(x=input_A, y=idt_B))
+            paddle.mean(
+                paddle.abs(fluid.layers.elementwise_sub(x=input_A, y=idt_B))
             )
             * lambda_A
             * lambda_identity
@@ -648,7 +648,7 @@ def train(args, to_static):
                 d_loss_A = (
                     paddle.square(fake_pool_rec_B) + paddle.square(rec_B - 1)
                 ) / 2.0
-                d_loss_A = fluid.layers.reduce_mean(d_loss_A)
+                d_loss_A = paddle.mean(d_loss_A)
 
                 d_loss_A.backward()
                 optimizer2.minimize(d_loss_A)
@@ -661,7 +661,7 @@ def train(args, to_static):
                 d_loss_B = (
                     paddle.square(fake_pool_rec_A) + paddle.square(rec_A - 1)
                 ) / 2.0
-                d_loss_B = fluid.layers.reduce_mean(d_loss_B)
+                d_loss_B = paddle.mean(d_loss_B)
 
                 d_loss_B.backward()
                 optimizer3.minimize(d_loss_B)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py
index fa062464d5aa96..c7135a8ff781cc 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py
@@ -220,7 +220,7 @@ def forward(self, input, label, init_hidden, init_cell):
             logits=projection, label=label, soft_label=False
         )
         loss = paddle.reshape(loss, shape=[-1, self.num_steps])
-        loss = fluid.layers.reduce_mean(loss, dim=[0])
+        loss = paddle.mean(loss, axis=[0])
         loss = paddle.sum(loss)
 
         return loss, last_hidden, last_cell
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py
index 44dd23a4c3abe1..22a62d64cbbb5a 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py
@@ -265,7 +265,7 @@ def forward(self, center_words, target_words, label):
         loss = paddle.nn.functional.binary_cross_entropy_with_logits(
             word_sim, label
         )
-        loss = fluid.layers.reduce_mean(loss)
+        loss = paddle.mean(loss)
 
         return pred, loss
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py
index 5af50594e18bff..5f894744700f0b 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py
@@ -325,7 +325,7 @@ def forward(
                     downsample_ratio=self.downsample,
                     use_label_smooth=cfg.label_smooth,
                 )
-                self.losses.append(fluid.layers.reduce_mean(loss))
+                self.losses.append(paddle.mean(loss))
 
             else:
                 mask_anchors = []
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_reduce_x_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_reduce_x_op_ipu.py
index deb29605cb44e8..fe373e91038dea 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_reduce_x_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_reduce_x_op_ipu.py
@@ -28,7 +28,7 @@ def setUp(self):
         self.set_test_op()
 
     def set_test_op(self):
-        self.op = paddle.fluid.layers.reduce_mean
+        self.op = paddle.mean
 
     def set_feed_attr(self):
         self.feed_shape = [x.shape for x in self.feed_fp32.values()]
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
index 045dee09c74601..006814a56fc4f3 100755
--- a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
@@ -144,7 +144,6 @@ if(WITH_GPU AND TENSORRT_FOUND)
       test_trt_pool3d_op
       PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 45)
   endif()
-  set_tests_properties(test_trt_reduce_mean_op PROPERTIES TIMEOUT 60)
   set_tests_properties(test_trt_tile_op PROPERTIES TIMEOUT 60)
   set_tests_properties(test_trt_fc_fuse_quant_dequant_pass PROPERTIES TIMEOUT
                                                                       100)
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py
deleted file mode 100644
index 235b0518c44bc1..00000000000000
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-from inference_pass_test import InferencePassTest
-
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-from paddle.fluid.core import AnalysisConfig, PassVersionChecker
-
-
-class TRTReduceMeanTest(InferencePassTest):
-    def setUp(self):
-        with fluid.program_guard(self.main_program, self.startup_program):
-            data = fluid.data(
-                name="data", shape=[-1, 3, -1, -1], dtype="float32"
-            )
-            reduce_mean = fluid.layers.reduce_mean(
-                data, dim=[2, -1], keep_dim=True
-            )
-            out = fluid.layers.batch_norm(reduce_mean, is_test=True)
-
-        self.feeds = {
-            "data": np.random.random([3, 3, 56, 56]).astype("float32"),
-        }
-        self.enable_trt = True
-        self.trt_parameters = TRTReduceMeanTest.TensorRTParam(
-            1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False
-        )
-        self.fetch_list = [out]
-        self.dynamic_shape_params = TRTReduceMeanTest.DynamicShapeParam(
-            {'data': [1, 3, 16, 16]},
-            {'data': [3, 3, 56, 56]},
-            {'data': [3, 3, 56, 56]},
-            False,
-        )
-
-    def test_check_output(self):
-        if core.is_compiled_with_cuda():
-            use_gpu = True
-            self.check_output_with_option(use_gpu, flatten=True)
-            self.assertTrue(
-                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
-            )
-
-
-class TRTReduceMeanAllNoBatchTest(InferencePassTest):
-    def setUp(self):
-        with fluid.program_guard(self.main_program, self.startup_program):
-            data = fluid.data(
-                name="data", shape=[-1, 3, -1, -1], dtype="float32"
-            )
-            reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True)
-            out = fluid.layers.batch_norm(reduce_mean, is_test=True)
-
-        self.feeds = {
-            "data": np.random.random([3, 3, 56, 56]).astype("float32"),
-        }
-        self.enable_trt = True
-        self.trt_parameters = TRTReduceMeanAllNoBatchTest.TensorRTParam(
-            1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False
-        )
-        self.fetch_list = [out]
-        self.dynamic_shape_params = (
-            TRTReduceMeanAllNoBatchTest.DynamicShapeParam(
-                {'data': [1, 3, 16, 16]},
-                {'data': [3, 3, 56, 56]},
-                {'data': [3, 3, 56, 56]},
-                False,
-            )
-        )
-
-    def test_check_output(self):
-        if core.is_compiled_with_cuda():
-            use_gpu = True
-            self.check_output_with_option(use_gpu, flatten=True)
-            self.assertTrue(
-                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
-            )
-
-
-class TRTReduceMeanTestFP16(InferencePassTest):
-    def setUp(self):
-        with fluid.program_guard(self.main_program, self.startup_program):
-            data = fluid.data(
-                name="data", shape=[-1, 3, -1, -1], dtype="float32"
-            )
-            reduce_mean = fluid.layers.reduce_mean(
-                data, dim=[2, -1], keep_dim=True
-            )
-            out = fluid.layers.batch_norm(reduce_mean, is_test=True)
-
-        self.feeds = {
-            "data": np.random.random([3, 3, 56, 56]).astype("float32"),
-        }
-        self.enable_trt = True
-        self.trt_parameters = TRTReduceMeanTestFP16.TensorRTParam(
-            1 << 30, 32, 1, AnalysisConfig.Precision.Half, False, False
-        )
-        self.fetch_list = [out]
-        self.dynamic_shape_params = TRTReduceMeanTestFP16.DynamicShapeParam(
-            {'data': [1, 3, 16, 16]},
-            {'data': [3, 3, 56, 56]},
-            {'data': [3, 3, 56, 56]},
-            False,
-        )
-
-    def test_check_output(self):
-        if core.is_compiled_with_cuda():
-            use_gpu = True
-            self.check_output_with_option(use_gpu, flatten=True)
-            self.assertTrue(
-                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
-            )
-
-
-class TRTReduceMeanAllTest(InferencePassTest):
-    def setUp(self):
-        with fluid.program_guard(self.main_program, self.startup_program):
-            data = fluid.data(
-                name="data", shape=[-1, 3, 56, 56], dtype="float32"
-            )
-            reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True)
-            out = fluid.layers.batch_norm(reduce_mean, is_test=True)
-
-        self.feeds = {
-            "data": np.random.random([3, 3, 56, 56]).astype("float32"),
-        }
-        self.enable_trt = True
-        self.trt_parameters = TRTReduceMeanAllTest.TensorRTParam(
-            1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False
-        )
-        self.fetch_list = [out]
-        self.dynamic_shape_params = TRTReduceMeanAllTest.DynamicShapeParam(
-            {'data': [1, 3, 56, 56]},
-            {'data': [3, 3, 56, 56]},
-            {'data': [3, 3, 56, 56]},
-            False,
-        )
-
-    def test_check_output(self):
-        if core.is_compiled_with_cuda():
-            use_gpu = True
-            self.check_output_with_option(use_gpu, flatten=True)
-            self.assertTrue(
-                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
-            )
-
-
-class TRTReduceMeanTestStatic(InferencePassTest):
-    def setUp(self):
-        with fluid.program_guard(self.main_program, self.startup_program):
-            data = fluid.data(
-                name="data", shape=[3, 3, 56, 56], dtype="float32"
-            )
-            reduce_mean = fluid.layers.reduce_mean(
-                data, dim=[2, -1], keep_dim=True
-            )
-            out = fluid.layers.batch_norm(reduce_mean, is_test=True)
-
-        self.feeds = {
-            "data": np.random.random([3, 3, 56, 56]).astype("float32"),
-        }
-        self.enable_trt = True
-        self.trt_parameters = TRTReduceMeanTestStatic.TensorRTParam(
-            1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False
-        )
-        self.fetch_list = [out]
-
-    def test_check_output(self):
-        if core.is_compiled_with_cuda():
-            use_gpu = True
-            self.check_output_with_option(use_gpu, flatten=True)
-            self.assertTrue(
-                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
-            )
-
-
-class TRTReduceMeanStaticAllTest(InferencePassTest):
-    def setUp(self):
-        with fluid.program_guard(self.main_program, self.startup_program):
-            data = fluid.data(
-                name="data", shape=[4, 3, 56, 56], dtype="float32"
-            )
-            reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True)
-            out = fluid.layers.batch_norm(reduce_mean, is_test=True)
-
-        self.feeds = {
-            "data": np.random.random([4, 3, 56, 56]).astype("float32"),
-        }
-        self.enable_trt = True
-        self.trt_parameters = TRTReduceMeanStaticAllTest.TensorRTParam(
-            1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False
-        )
-        self.fetch_list = [out]
-
-    def test_check_output(self):
-        if core.is_compiled_with_cuda():
-            use_gpu = True
-            self.check_output_with_option(use_gpu, flatten=True)
-            self.assertTrue(
-                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
-            )
-
-
-class TRTReduceMeanStaticFP16(InferencePassTest):
-    def setUp(self):
-        with fluid.program_guard(self.main_program, self.startup_program):
-            data = fluid.data(
-                name="data", shape=[4, 3, 56, 56], dtype="float32"
-            )
-            reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True)
-            out = fluid.layers.batch_norm(reduce_mean, is_test=True)
-
-        self.feeds = {
-            "data": np.random.random([4, 3, 56, 56]).astype("float32"),
-        }
-        self.enable_trt = True
-        self.trt_parameters = TRTReduceMeanStaticFP16.TensorRTParam(
-            1 << 30, 32, 1, AnalysisConfig.Precision.Half, False, False
-        )
-        self.fetch_list = [out]
-
-    def test_check_output(self):
-        if core.is_compiled_with_cuda():
-            use_gpu = True
-            self.check_output_with_option(
-                use_gpu, flatten=True, atol=1e-3, rtol=1e-3
-            )
-            self.assertTrue(
-                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
-            )
-
-
-class TRTReduceMeanFP16Static(InferencePassTest):
-    def setUp(self):
-        with fluid.program_guard(self.main_program, self.startup_program):
-            data = fluid.data(
-                name="data", shape=[4, 3, 56, 56], dtype="float32"
-            )
-            reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True)
-            out = fluid.layers.batch_norm(reduce_mean, is_test=True)
-
-        self.feeds = {
-            "data": np.random.random([4, 3, 56, 56]).astype("float32"),
-        }
-        self.enable_trt = True
-        self.trt_parameters = TRTReduceMeanFP16Static.TensorRTParam(
-            1 << 30, 32, 1, AnalysisConfig.Precision.Half, True, False
-        )
-        self.fetch_list = [out]
-
-    def test_check_output(self):
-        if core.is_compiled_with_cuda():
-            use_gpu = True
-            self.check_output_with_option(
-                use_gpu, flatten=True, atol=1e-3, rtol=1e-3
-            )
-            self.assertTrue(
-                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
-            )
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_adam_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_adam_op_mlu.py
index 70a3a2671b6b8f..7b33c46a933d7a 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_adam_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_adam_op_mlu.py
@@ -264,7 +264,7 @@ def _test(self, run_mlu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             adam = fluid.optimizer.Adam(learning_rate=0.01)
             adam.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_adamw_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_adamw_op_mlu.py
index c9b3b527e72dba..e38402a6009406 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_adamw_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_adamw_op_mlu.py
@@ -215,7 +215,7 @@ def _test(self, run_mlu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             adam = paddle.optimizer.AdamW(learning_rate=0.01, weight_decay=0.02)
             adam.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_max_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_max_op_mlu.py
index cdc354acdcba51..310664806931af 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_max_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_max_op_mlu.py
@@ -344,7 +344,7 @@ def _test(self, run_mlu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_min_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_min_op_mlu.py
index f1546b5ac63e32..8f9c9224b1daaa 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_min_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_min_op_mlu.py
@@ -190,7 +190,7 @@ def _test(self, run_mlu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_gelu_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_gelu_op_mlu.py
index d5b0913b861124..7b6f2b2862f199 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_gelu_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_gelu_op_mlu.py
@@ -113,7 +113,7 @@ def _test(self, run_mlu=True):
             prediction = fluid.layers.fc(input=fc_1_gelu, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_leaky_relu_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_leaky_relu_op_mlu.py
index 05d78e7f31a1c3..0e4168dbe3e5ac 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_leaky_relu_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_leaky_relu_op_mlu.py
@@ -107,7 +107,7 @@ def _test(self, run_mlu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_relu6_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_relu6_op_mlu.py
index 4e8ebacf0eb692..e1841ce5d3ff34 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_relu6_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_relu6_op_mlu.py
@@ -126,7 +126,7 @@ def _test(self, run_mlu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_relu_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_relu_op_mlu.py
index db62e592ac2dd7..192a9e2adc6d17 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_relu_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_relu_op_mlu.py
@@ -127,7 +127,7 @@ def _test(self, run_mlu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py
index bf77d52532926c..f210ea0b633b21 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py
@@ -127,7 +127,7 @@ def _test(self, run_mlu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2)
 
             cost = fluid.layers.softmax_with_cross_entropy(prediction, label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_tanh_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_tanh_op_mlu.py
index c346dd0867a5bd..5eacb39a9286ca 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_tanh_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_tanh_op_mlu.py
@@ -108,7 +108,7 @@ def _test(self, run_mlu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py
index 49589d5d9dc80e..331de1e0c26e90 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py
@@ -264,7 +264,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             adam = fluid.optimizer.Adam(learning_rate=0.01)
             adam.minimize(loss)
 
@@ -349,7 +349,7 @@ def _test(
                 )
 
                 cost = fluid.layers.cross_entropy(input=prediction, label=label)
-                loss = fluid.layers.reduce_mean(cost)
+                loss = paddle.mean(cost)
                 beta1_init = 0.9
                 beta2_init = 0.999
                 epsilon_init = 1e-8
diff --git a/python/paddle/fluid/tests/unittests/npu/test_adamw_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_adamw_op_npu.py
index 0211eb196d58eb..b4976db23894a7 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_adamw_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_adamw_op_npu.py
@@ -215,7 +215,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             adam = paddle.optimizer.AdamW(learning_rate=0.01, weight_decay=0.02)
             adam.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py
index 1ab4edef710803..89c1e344724f66 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_cos_op_npu.py
@@ -105,7 +105,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py
index 42460f46a1ec77..1971da51d9cbe1 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_div_op_npu.py
@@ -139,7 +139,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_max_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_max_op_npu.py
index fe3d5847929441..57d2518225b425 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_max_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_max_op_npu.py
@@ -303,7 +303,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_min_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_min_op_npu.py
index 8cd51765bd8292..551269a9609291 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_min_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_min_op_npu.py
@@ -190,7 +190,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_pow_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_pow_op_npu.py
index b872c5bf83edf0..d19431da020953 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_pow_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_pow_op_npu.py
@@ -314,7 +314,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_elementwise_sub_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_elementwise_sub_op_npu.py
index 8542ed6bdc3969..01b3f5bdab7807 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_elementwise_sub_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_elementwise_sub_op_npu.py
@@ -195,7 +195,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_gather_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_gather_op_npu.py
index 1d27eadbc12f38..a6fa001076c2a5 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_gather_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_gather_op_npu.py
@@ -134,7 +134,7 @@ def _test(self, run_npu=True):
             a.stop_gradient = False
             b = paddle.gather(a, index)
 
-            loss = fluid.layers.reduce_mean(b)
+            loss = paddle.mean(b)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py
index 90e3f8dd2b2069..20af178483acce 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_gelu_op_npu.py
@@ -113,7 +113,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1_gelu, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_leaky_relu_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_leaky_relu_op_npu.py
index e91a65faeec163..550b02e85da8b3 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_leaky_relu_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_leaky_relu_op_npu.py
@@ -107,7 +107,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py
index 8745a66b45a4b4..bb60f9d4e32500 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_log_op_npu.py
@@ -105,7 +105,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_mul_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_mul_op_npu.py
index 8f8abea53964a9..3e4dc2de9708dc 100755
--- a/python/paddle/fluid/tests/unittests/npu/test_mul_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_mul_op_npu.py
@@ -248,7 +248,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
@@ -325,7 +325,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
@@ -405,7 +405,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
@@ -486,7 +486,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=result, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_pow_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_pow_op_npu.py
index 09f2d0fc055c13..73dfae2d13bf5c 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_pow_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_pow_op_npu.py
@@ -105,7 +105,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py
index 09ba63b726c05a..bb81f8039abffd 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py
@@ -113,7 +113,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=z_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py
index 3988b4a6a93958..ac83c1fac0b92c 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_relu6_op_npu.py
@@ -126,7 +126,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_relu_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_relu_op_npu.py
index a52e4d39cfd77b..b333a11dcd7cb1 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_relu_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_relu_op_npu.py
@@ -119,7 +119,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_rmsprop_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_rmsprop_op_npu.py
index 9274d8daa1d423..ed712cb3e73923 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_rmsprop_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_rmsprop_op_npu.py
@@ -53,7 +53,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             rmsprop = fluid.optimizer.RMSProp(learning_rate=0.01)
             rmsprop.minimize(loss)
 
@@ -116,7 +116,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             rmsprop = fluid.optimizer.RMSProp(learning_rate=0.01, centered=True)
             rmsprop.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_sgd_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_sgd_op_npu.py
index 9747953862508a..ba3f9abd081a96 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_sgd_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_sgd_op_npu.py
@@ -78,7 +78,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py
index f19e892f9a37e9..487ca61320e402 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py
@@ -125,7 +125,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2)
 
             cost = fluid.layers.softmax_with_cross_entropy(prediction, label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py
index d28f67a51e3e55..2674fe59721ad5 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_sqrt_op_npu.py
@@ -108,7 +108,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py
index f6dbefee32a6f6..8e9a69e4c147d7 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_square_op_npu.py
@@ -105,7 +105,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py
index 8cbb0d217eb370..a407336c0c18f1 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_tanh_op_npu.py
@@ -108,7 +108,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
             cost = fluid.layers.cross_entropy(input=prediction, label=label)
-            loss = fluid.layers.reduce_mean(cost)
+            loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py
index 824815d48aa2cc..9e0ed71d03598f 100644
--- a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py
+++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py
@@ -72,7 +72,7 @@ def forward(self, input, label):
             logits=projection, label=label, soft_label=False
         )
         loss = paddle.reshape(loss, shape=[-1, self.num_steps])
-        loss = fluid.layers.reduce_mean(loss, dim=[0])
+        loss = paddle.mean(loss, axis=[0])
         loss = paddle.sum(loss)
 
         return loss
diff --git a/python/paddle/fluid/tests/unittests/seresnext_net.py b/python/paddle/fluid/tests/unittests/seresnext_net.py
index 86cf960a282db1..7d96ea40ef4c04 100644
--- a/python/paddle/fluid/tests/unittests/seresnext_net.py
+++ b/python/paddle/fluid/tests/unittests/seresnext_net.py
@@ -51,7 +51,7 @@ def squeeze_excitation(input, num_channels, reduction_ratio):
     conv = input
     shape = conv.shape
     reshape = paddle.reshape(x=conv, shape=[-1, shape[1], shape[2] * shape[3]])
-    pool = fluid.layers.reduce_mean(input=reshape, dim=2)
+    pool = paddle.mean(x=reshape, axis=2)
 
     squeeze = fluid.layers.fc(
         input=pool, size=num_channels // reduction_ratio, act='relu'
@@ -162,7 +162,7 @@ def SE_ResNeXt50Small(use_feed):
 
     shape = conv.shape
     reshape = paddle.reshape(x=conv, shape=[-1, shape[1], shape[2] * shape[3]])
-    pool = fluid.layers.reduce_mean(input=reshape, dim=2)
+    pool = paddle.mean(x=reshape, axis=2)
     dropout = (
         pool
         if remove_dropout
diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py
index d84366efdcb698..715b5460ed2f14 100644
--- a/python/paddle/fluid/tests/unittests/test_adam_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adam_op.py
@@ -614,7 +614,7 @@ def test_adam_op(self):
             with fluid.unique_name.guard():
                 data = fluid.data(name="data", shape=shape)
                 conv = fluid.layers.conv2d(data, 8, 3)
-                loss = fluid.layers.reduce_mean(conv)
+                loss = paddle.mean(conv)
 
                 beta1 = fluid.layers.create_global_var(
                     shape=[1], value=0.85, dtype='float32', persistable=True
@@ -807,7 +807,7 @@ def _test(
                 )
 
                 cost = fluid.layers.cross_entropy(input=prediction, label=label)
-                loss = fluid.layers.reduce_mean(cost)
+                loss = paddle.mean(cost)
                 beta1_init = 0.9
                 beta2_init = 0.999
                 epsilon_init = 1e-8
@@ -965,7 +965,7 @@ def test_adam_exception(self):
         prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
 
         cost = fluid.layers.cross_entropy(input=prediction, label=label)
-        loss = fluid.layers.reduce_mean(cost)
+        loss = paddle.mean(cost)
         adam = fluid.optimizer.Adam(use_global_beta_pow=True)
         adam.minimize(loss)
         self.assertRaises(Exception, adam._get_global_accumulator, 'tmp')
diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py b/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py
index 50f412fca0e66b..f55cc3370473bc 100644
--- a/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py
+++ b/python/paddle/fluid/tests/unittests/test_dataloader_early_reset.py
@@ -12,11 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-
-import numpy as np
-
+import paddle
 import paddle.fluid as fluid
+import numpy as np
+import unittest
 
 
 def infinite_reader():
@@ -33,7 +32,7 @@ def setUp(self):
 
     def build_network(self):
         y = fluid.layers.fc(self.x, size=10)
-        loss = fluid.layers.reduce_mean(y)
+        loss = paddle.mean(y)
 
         optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
         optimizer.minimize(loss)
diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_keep_order.py b/python/paddle/fluid/tests/unittests/test_dataloader_keep_order.py
index 8263edd7469eb7..82aa47d8a6998f 100644
--- a/python/paddle/fluid/tests/unittests/test_dataloader_keep_order.py
+++ b/python/paddle/fluid/tests/unittests/test_dataloader_keep_order.py
@@ -17,6 +17,7 @@
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 
 
@@ -48,7 +49,7 @@ def build_network(self, places):
         )
 
         fc = fluid.layers.fc(input_data, size=10)
-        loss = fluid.layers.reduce_mean(fc)
+        loss = paddle.mean(fc)
 
         loader.set_batch_generator(
             create_reader(self.shape, self.batch_num),
diff --git a/python/paddle/fluid/tests/unittests/test_dataloader_unkeep_order.py b/python/paddle/fluid/tests/unittests/test_dataloader_unkeep_order.py
index c8cf808526b5d3..8373482772deeb 100644
--- a/python/paddle/fluid/tests/unittests/test_dataloader_unkeep_order.py
+++ b/python/paddle/fluid/tests/unittests/test_dataloader_unkeep_order.py
@@ -17,6 +17,7 @@
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid.reader import keep_data_loader_order
 
@@ -54,7 +55,7 @@ def build_network(self, places):
         )
 
         fc = fluid.layers.fc(input_data, size=10)
-        loss = fluid.layers.reduce_mean(fc)
+        loss = paddle.mean(fc)
 
         loader.set_batch_generator(
             create_reader(self.shape, self.batch_num),
diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py
index 75f076ae7ce7af..866722b7d00072 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py
@@ -55,7 +55,7 @@ def net(self, emb_array, fc_array):
                     ),
                 ),
             )
-            loss = fluid.layers.reduce_mean(fc1)
+            loss = paddle.mean(fc1)
         return loss
 
     def save_origin_model(self, emb_array, fc_array):
diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_sgd.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_sgd.py
index 0c2073e3b72b07..ee9b995031dbc0 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_sgd.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_tensor_load_sgd.py
@@ -52,7 +52,7 @@ def net(self):
                     )
                     fc1 = fluid.layers.fc(input=emb, size=128, act="relu")
                     fc2 = fluid.layers.fc(input=fc1, size=64, act="relu")
-                    loss = fluid.layers.reduce_mean(fc2)
+                    loss = paddle.mean(fc2)
             return scope, train_program, startup_program, loss
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
index 1b5af488460b05..00a47420210eb1 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
@@ -437,7 +437,7 @@ def net_conf(self):
             paddle.sum(true_xent, axis=1),
             paddle.sum(neg_xent, axis=1),
         )
-        avg_cost = fluid.layers.reduce_mean(cost)
+        avg_cost = paddle.mean(cost)
 
         sgd_optimizer = fluid.optimizer.SGD(
             learning_rate=fluid.layers.exponential_decay(
diff --git a/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py b/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
index bea1473ac78fe4..a92052c05065f3 100644
--- a/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
@@ -57,7 +57,7 @@ def build_and_run_program(place, batch_size, beam_size, stop_gradient=False):
         layers.assign(length_cond, cond)
 
     out = layers.tensor_array_to_tensor(scores, axis=0, use_stack=True)[0]
-    loss = layers.reduce_mean(out)
+    loss = paddle.mean(out)
     opt = fluid.optimizer.Adam(0.01)
     opt.minimize(loss)
     exe = fluid.Executor(place)
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
index 4e3e204c2d286d..ccdf56e64f4900 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
@@ -468,7 +468,7 @@ def encoder_static(
     )
 
     loss = paddle.reshape(loss, shape=[-1, num_steps])
-    loss = layers.reduce_mean(loss, dim=[0])
+    loss = paddle.mean(loss, axis=[0])
     loss = paddle.sum(loss)
 
     loss.persistable = True
diff --git a/python/paddle/fluid/tests/unittests/test_gradient_clip.py b/python/paddle/fluid/tests/unittests/test_gradient_clip.py
index 71952b73f5bdce..4aa064921fe5cc 100644
--- a/python/paddle/fluid/tests/unittests/test_gradient_clip.py
+++ b/python/paddle/fluid/tests/unittests/test_gradient_clip.py
@@ -412,7 +412,7 @@ def test_gradient_clip(self):
                 [16, 5], min=-10, max=10
             ).astype('float32')
             out = linear(fluid.dygraph.to_variable(inputs))
-            loss = fluid.layers.reduce_mean(out)
+            loss = paddle.mean(out)
             loss.backward()
             sgd_optimizer = fluid.optimizer.SGD(
                 learning_rate=0.0,
@@ -557,7 +557,7 @@ def test_gradient_clip(self):
                 ).astype('float32')
                 with paddle.amp.auto_cast(level='O2'):
                     out = model(fluid.dygraph.to_variable(inputs))
-                    loss = fluid.layers.reduce_mean(out)
+                    loss = paddle.mean(out)
                 scaled = scaler.scale(loss)
                 scaled.backward()
                 scaler.unscale_(sgd_optimizer)
@@ -605,7 +605,7 @@ def test_gradient_clip(self):
             ).astype('float32')
             linear = paddle.nn.Linear(5, 5)
             out = linear(fluid.dygraph.to_variable(inputs))
-            loss = fluid.layers.reduce_mean(out)
+            loss = paddle.mean(out)
             loss.backward()
             # before clip
             params_grads = []
diff --git a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
index 180e9abe1b2f9b..68814309791926 100644
--- a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
+++ b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
@@ -321,7 +321,7 @@ def hs_net_conf(self, is_sparse):
             path_code=path_code,
         )
 
-        avg_cost = fluid.layers.reduce_mean(cost)
+        avg_cost = paddle.mean(cost)
 
         return avg_cost, data_list
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
index 5fc83145d24f44..522fb24f8fb7a2 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
@@ -45,7 +45,7 @@ def forward(self, x, y):
         a = self.linear1(x)
         b = self.linear2(y)
         c = fluid.layers.mul(a, b)
-        d = fluid.layers.reduce_mean(c)
+        d = paddle.mean(c)
         return d
 
 
@@ -74,7 +74,7 @@ def forward(self, x, y):
         b = self.linear2(y)
         b.stop_gradient = True
         c = fluid.layers.mul(a, b)
-        d = fluid.layers.reduce_mean(c)
+        d = paddle.mean(c)
         return d
 
 
@@ -124,15 +124,15 @@ def __init__(self, input_size, vocab_size, size, dtype="float32"):
 
     def forward(self, x):
         # this method involves only the linear layers
-        loss = fluid.layers.reduce_mean(self.linear_0(x) + self.linear_1(x))
+        loss = paddle.mean(self.linear_0(x) + self.linear_1(x))
         return loss
 
     def linear0(self, x):
-        loss = fluid.layers.reduce_mean(self.linear_0(x))
+        loss = paddle.mean(self.linear_0(x))
         return loss
 
     def embed_linear0(self, x):
-        loss = fluid.layers.reduce_mean(self.linear_0(self.embed0(x)))
+        loss = paddle.mean(self.linear_0(self.embed0(x)))
         return loss
 
 
@@ -147,18 +147,18 @@ def __init__(self, input_size, vocab_size, size, dtype="float32"):
     def forward(self, indices):
         # mind the difference with MyLayer
         # In this example, the forward method involes all params
-        loss = fluid.layers.reduce_mean(
+        loss = paddle.mean(
             self.linear_0(self.embed0(indices))
             + self.linear_1(self.embed1(indices))
         )
         return loss
 
     def linear0(self, x):
-        loss = fluid.layers.reduce_mean(self.linear_0(x))
+        loss = paddle.mean(self.linear_0(x))
         return loss
 
     def embed_linear0(self, x):
-        loss = fluid.layers.reduce_mean(self.linear_0(self.embed0(x)))
+        loss = paddle.mean(self.linear_0(self.embed0(x)))
         return loss
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py b/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py
index 57335a88319e87..92957890e3dbda 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py
@@ -50,7 +50,7 @@ def paramter_list(self, use_fluid_api):
             self.assertEqual(len(model.params), num_stacked_param)
             res = model(x)
             self.assertListEqual(res.shape, [5, 2])
-            loss = fluid.layers.reduce_mean(res)
+            loss = paddle.mean(res)
             loss.backward()
 
             model.params[num_stacked_param - 1] = fluid.layers.create_parameter(
@@ -64,7 +64,7 @@ def paramter_list(self, use_fluid_api):
             self.assertEqual(len(model.params), num_stacked_param + 1)
             res = model(x)
             self.assertListEqual(res.shape, [5, 4])
-            loss = fluid.layers.reduce_mean(res)
+            loss = paddle.mean(res)
             loss.backward()
 
     def test_paramter_list(self):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py b/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py
index 1049c08c64d40a..57f624e800998f 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py
@@ -16,6 +16,7 @@
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid.framework import _test_eager_guard
 from paddle.nn import Linear
@@ -32,7 +33,7 @@ def func_sequential(self):
             model1[1] = Linear(1, 3)
             res1 = model1(data)
             self.assertListEqual(res1.shape, [5, 3])
-            loss1 = fluid.layers.reduce_mean(res1)
+            loss1 = paddle.mean(res1)
             loss1.backward()
 
             l1 = Linear(10, 1)
@@ -53,7 +54,7 @@ def func_sequential(self):
             res2 = model2(data)
             self.assertListEqual(res2.shape, [5, 4])
 
-            loss2 = fluid.layers.reduce_mean(res2)
+            loss2 = paddle.mean(res2)
             loss2.backward()
 
     def test_sequential(self):
@@ -71,7 +72,7 @@ def func_sequential_list_params(self):
             model1[1] = Linear(1, 3)
             res1 = model1(data)
             self.assertListEqual(res1.shape, [5, 3])
-            loss1 = fluid.layers.reduce_mean(res1)
+            loss1 = paddle.mean(res1)
             loss1.backward()
 
             l1 = Linear(10, 1)
@@ -92,7 +93,7 @@ def func_sequential_list_params(self):
             res2 = model2(data)
             self.assertListEqual(res2.shape, [5, 4])
 
-            loss2 = fluid.layers.reduce_mean(res2)
+            loss2 = paddle.mean(res2)
             loss2.backward()
 
     def test_sequential_list_params(self):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
index 7f7330eca39b59..39927e0a2da746 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
@@ -322,7 +322,7 @@ def func_example_no_grad_vars(self):
         z = y1 + y2
         w = z * z
 
-        w_mean = fluid.layers.reduce_mean(w)
+        w_mean = paddle.mean(w)
         del y1, z, w
 
         (dx_actual,) = self.grad(
@@ -440,7 +440,7 @@ def func_example_with_gradient_accumulation_and_create_graph(self):
         z = y + 1
         w = z * z
 
-        w_mean = fluid.layers.reduce_mean(w)
+        w_mean = paddle.mean(w)
         del y, z, w
 
         (dx_actual,) = self.grad([w_mean], [x], create_graph=True)
@@ -454,7 +454,7 @@ def func_example_with_gradient_accumulation_and_create_graph(self):
         ).astype('float32')
         np.testing.assert_allclose(dx_actual.numpy(), dx_expected, rtol=1e-05)
 
-        loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
+        loss = paddle.mean(dx_actual * dx_actual + x * x)
         loss.backward(retain_graph=True)
 
         x_grad_actual = x.gradient()
@@ -494,7 +494,7 @@ def func_example_with_gradient_accumulation_and_no_grad_vars(self):
         z = y1 + y2
         w = z * z
 
-        w_mean = fluid.layers.reduce_mean(w)
+        w_mean = paddle.mean(w)
         del y1, z, w
 
         (dx_actual,) = self.grad(
@@ -517,7 +517,7 @@ def func_example_with_gradient_accumulation_and_no_grad_vars(self):
         ).astype('float32')
         np.testing.assert_allclose(dx_actual.numpy(), dx_expected, rtol=1e-05)
 
-        loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
+        loss = paddle.mean(dx_actual * dx_actual + x * x)
         loss.backward()
 
         x_grad_actual = x.gradient()
@@ -544,7 +544,7 @@ def func_example_with_gradient_accumulation_and_not_create_graph(self):
         z = y + 1
         w = z * z
 
-        w_mean = fluid.layers.reduce_mean(w)
+        w_mean = paddle.mean(w)
         del y, z, w
 
         (dx_actual,) = self.grad([w_mean], [x], create_graph=False)
@@ -558,7 +558,7 @@ def func_example_with_gradient_accumulation_and_not_create_graph(self):
 
         np.testing.assert_allclose(dx_actual.numpy(), dx_expected, rtol=1e-05)
 
-        loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
+        loss = paddle.mean(dx_actual * dx_actual + x * x)
         loss.backward()
 
         x_grad_actual = x.gradient()
@@ -644,7 +644,7 @@ def raise_no_grad_op(self):
                 outputs=[y], inputs=[x], create_graph=True, retain_graph=True
             )[0]
 
-            loss = fluid.layers.reduce_mean(dx)
+            loss = paddle.mean(dx)
             loss.backward()
 
     def test_raise(self):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gan.py b/python/paddle/fluid/tests/unittests/test_imperative_gan.py
index 845f47434e59d7..6ee8ded8a6bbe9 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_gan.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_gan.py
@@ -79,7 +79,7 @@ def func_test_gan_float32(self):
             )
 
             d_real = discriminator(img)
-            d_loss_real = fluid.layers.reduce_mean(
+            d_loss_real = paddle.mean(
                 paddle.nn.functional.binary_cross_entropy_with_logits(
                     logit=d_real,
                     label=fluid.layers.fill_constant(
@@ -89,7 +89,7 @@ def func_test_gan_float32(self):
             )
 
             d_fake = discriminator(generator(noise))
-            d_loss_fake = fluid.layers.reduce_mean(
+            d_loss_fake = paddle.mean(
                 paddle.nn.functional.binary_cross_entropy_with_logits(
                     logit=d_fake,
                     label=fluid.layers.fill_constant(
@@ -112,7 +112,7 @@ def func_test_gan_float32(self):
             )
 
             d_fake = discriminator(generator(noise))
-            g_loss = fluid.layers.reduce_mean(
+            g_loss = paddle.mean(
                 paddle.nn.functional.binary_cross_entropy_with_logits(
                     logit=d_fake,
                     label=fluid.layers.fill_constant(
@@ -164,7 +164,7 @@ def func_test_gan_float32(self):
             )
 
             d_real = discriminator(to_variable(np.ones([2, 1], np.float32)))
-            d_loss_real = fluid.layers.reduce_mean(
+            d_loss_real = paddle.mean(
                 paddle.nn.functional.binary_cross_entropy_with_logits(
                     logit=d_real, label=to_variable(np.ones([2, 1], np.float32))
                 )
@@ -173,7 +173,7 @@ def func_test_gan_float32(self):
             d_fake = discriminator(
                 generator(to_variable(np.ones([2, 2], np.float32)))
             )
-            d_loss_fake = fluid.layers.reduce_mean(
+            d_loss_fake = paddle.mean(
                 paddle.nn.functional.binary_cross_entropy_with_logits(
                     logit=d_fake,
                     label=to_variable(np.zeros([2, 1], np.float32)),
@@ -189,7 +189,7 @@ def func_test_gan_float32(self):
             d_fake = discriminator(
                 generator(to_variable(np.ones([2, 2], np.float32)))
             )
-            g_loss = fluid.layers.reduce_mean(
+            g_loss = paddle.mean(
                 paddle.nn.functional.binary_cross_entropy_with_logits(
                     logit=d_fake, label=to_variable(np.ones([2, 1], np.float32))
                 )
@@ -219,7 +219,7 @@ def func_test_gan_float32(self):
             )
 
             d_real2 = discriminator2(to_variable(np.ones([2, 1], np.float32)))
-            d_loss_real2 = fluid.layers.reduce_mean(
+            d_loss_real2 = paddle.mean(
                 paddle.nn.functional.binary_cross_entropy_with_logits(
                     logit=d_real2,
                     label=to_variable(np.ones([2, 1], np.float32)),
@@ -229,7 +229,7 @@ def func_test_gan_float32(self):
             d_fake2 = discriminator2(
                 generator2(to_variable(np.ones([2, 2], np.float32)))
             )
-            d_loss_fake2 = fluid.layers.reduce_mean(
+            d_loss_fake2 = paddle.mean(
                 paddle.nn.functional.binary_cross_entropy_with_logits(
                     logit=d_fake2,
                     label=to_variable(np.zeros([2, 1], np.float32)),
@@ -245,7 +245,7 @@ def func_test_gan_float32(self):
             d_fake2 = discriminator2(
                 generator2(to_variable(np.ones([2, 2], np.float32)))
             )
-            g_loss2 = fluid.layers.reduce_mean(
+            g_loss2 = paddle.mean(
                 paddle.nn.functional.binary_cross_entropy_with_logits(
                     logit=d_fake2,
                     label=to_variable(np.ones([2, 1], np.float32)),
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
index b8efe8fbd1c9f2..ed5d93961d1ae7 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
@@ -73,7 +73,7 @@ def forward(self, input, label):
             logits=projection, label=label, soft_label=False
         )
         loss = paddle.reshape(loss, shape=[-1, self.num_steps])
-        loss = fluid.layers.reduce_mean(loss, dim=[0])
+        loss = paddle.mean(loss, axis=[0])
         loss = paddle.sum(loss)
 
         return loss
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
index 71eb99c229369c..e6958608628476 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
@@ -141,7 +141,7 @@ def _check_mlp(self, place=None):
 
                 img = paddle.reshape(img, shape=[batch_size, -1])
                 cost = mlp(img)
-                avg_loss = fluid.layers.reduce_mean(cost)
+                avg_loss = paddle.mean(cost)
                 dy_out = avg_loss.numpy()
 
                 if batch_id == 0:
@@ -180,7 +180,7 @@ def _check_mlp(self, place=None):
             label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             img = paddle.reshape(img, shape=[batch_size, 784])
             cost = mlp(img)
-            avg_loss = fluid.layers.reduce_mean(cost)
+            avg_loss = paddle.mean(cost)
             optimizer.minimize(avg_loss)
 
             # initialize params and fetch them
@@ -478,7 +478,7 @@ def func_test_constant_lr(self):
 
             b = linear(a)
 
-            loss = fluid.layers.reduce_mean(b)
+            loss = paddle.mean(b)
 
             adam = fluid.optimizer.Adam(
                 0.001, parameter_list=linear.parameters()
@@ -509,7 +509,7 @@ def func_test_lr_decay(self):
 
             b = linear(a)
 
-            loss = fluid.layers.reduce_mean(b)
+            loss = paddle.mean(b)
 
             bd = [2, 4, 6, 8]
             value = [0.2, 0.4, 0.6, 0.8, 1.0]
@@ -545,7 +545,7 @@ def func_test_lr_decay_natural_exp(self):
 
             b = linear(a)
 
-            loss = fluid.layers.reduce_mean(b)
+            loss = paddle.mean(b)
             base_lr = 1.0
 
             adam = fluid.optimizer.Adam(
@@ -584,7 +584,7 @@ def func_test_set_lr(self):
 
             b = linear(a)
 
-            loss = fluid.layers.reduce_mean(b)
+            loss = paddle.mean(b)
 
             adam = fluid.optimizer.Adam(0.1, parameter_list=linear.parameters())
 
@@ -965,7 +965,7 @@ def func_test_parameter_list(self):
 
             y = linear_1(in_data)
             y = linear_2(y)
-            loss = fluid.layers.reduce_mean(y)
+            loss = paddle.mean(y)
             loss.backward()
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
index 27c50d2e8af4ea..d712258edf0a16 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
@@ -139,7 +139,7 @@ def _check_mlp(self, place=None):
 
             img = paddle.reshape(img, shape=[batch_size, -1])
             cost = mlp(img)
-            avg_loss = fluid.layers.reduce_mean(cost)
+            avg_loss = paddle.mean(cost)
             dy_out = avg_loss.numpy()
 
             if batch_id == 0:
@@ -189,7 +189,7 @@ def _check_mlp(self, place=None):
             label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             img = paddle.reshape(img, shape=[batch_size, 784])
             cost = mlp(img)
-            avg_loss = fluid.layers.reduce_mean(cost)
+            avg_loss = paddle.mean(cost)
             optimizer.minimize(avg_loss)
 
             # initialize params and fetch them
@@ -616,7 +616,7 @@ def func_test_constant_lr(self):
 
             b = linear(a)
 
-            loss = fluid.layers.reduce_mean(b)
+            loss = paddle.mean(b)
 
             adam = paddle.optimizer.Adam(0.001, parameters=linear.parameters())
 
@@ -645,7 +645,7 @@ def func_test_lr_decay(self):
 
             b = linear(a)
 
-            loss = fluid.layers.reduce_mean(b)
+            loss = paddle.mean(b)
 
             bd = [2, 4, 6, 8]
             value = [0.2, 0.4, 0.6, 0.8, 1.0]
@@ -677,7 +677,7 @@ def func_test_lr_scheduler_natural_exp(self):
             a = fluid.dygraph.to_variable(a)
             b = linear(a)
 
-            loss = fluid.layers.reduce_mean(b)
+            loss = paddle.mean(b)
             base_lr = 1.0
 
             scheduler = paddle.optimizer.lr.NaturalExpDecay(1.0, gamma=0.5)
@@ -709,7 +709,7 @@ def func_test_set_lr(self):
 
             b = linear(a)
 
-            loss = fluid.layers.reduce_mean(b)
+            loss = paddle.mean(b)
 
             adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())
 
@@ -1085,7 +1085,7 @@ def func_test_parameter_list(self):
 
             y = linear_1(in_data)
             y = linear_2(y)
-            loss = fluid.layers.reduce_mean(y)
+            loss = paddle.mean(y)
             loss.backward()
             sgd.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py b/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
index 67f5a7d26b5085..714e27c6620878 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py
@@ -31,7 +31,7 @@ def func_partitial_backward(self):
 
             y = linear1(x[:, :2])
             z = linear2(x[:, 2:])
-            loss = fluid.layers.reduce_mean(y)
+            loss = paddle.mean(y)
             loss.backward()
 
             for param in linear1.parameters():
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
index 3765a6676d0bbe..f8f8620338ca32 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
@@ -232,7 +232,7 @@ def forward(self, input, label, init_hidden, init_cell):
             logits=projection, label=label, soft_label=False
         )
         loss = paddle.reshape(loss, shape=[-1, self.num_steps])
-        loss = fluid.layers.reduce_mean(loss, dim=[0])
+        loss = paddle.mean(loss, axis=[0])
         loss = paddle.sum(loss)
 
         return loss, last_hidden, last_cell
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
index 6f2645750f0d24..260c3e0b8eb414 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
@@ -228,7 +228,7 @@ def forward(self, input, label, init_hidden, init_cell):
             logits=projection, label=label, soft_label=False
         )
         loss = paddle.reshape(loss, shape=[-1, self.num_steps])
-        loss = fluid.layers.reduce_mean(loss, dim=[0])
+        loss = paddle.mean(loss, axis=[0])
         loss = paddle.sum(loss)
 
         return loss, last_hidden, last_cell
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py
index 65e389b3596101..ea6804e64e9eb0 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py
@@ -229,7 +229,7 @@ def forward(self, input, label, init_hidden, init_cell):
             logits=projection, label=label, soft_label=False
         )
         loss = paddle.reshape(loss, shape=[-1, self.num_steps])
-        loss = fluid.layers.reduce_mean(loss, dim=[0])
+        loss = paddle.mean(loss, axis=[0])
         loss = paddle.sum(loss)
 
         return loss, last_hidden, last_cell
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
index cc31e922b5efa2..e99d099317e81f 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
@@ -82,7 +82,7 @@ def forward(self, input, label):
             logits=projection, label=label, soft_label=False
         )
         loss = paddle.reshape(loss, shape=[-1, self.num_steps])
-        loss = fluid.layers.reduce_mean(loss, dim=[0])
+        loss = paddle.mean(loss, axis=[0])
         loss = paddle.sum(loss)
 
         return loss
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py
index 5e3ecf8b6cc3bf..1a1b22ee71c351 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py
@@ -445,9 +445,7 @@ def get_generator_loss(
 ):
     fake_img = generator(image_real, label_trg)
     rec_img = generator(fake_img, label_org)
-    g_loss_rec = fluid.layers.reduce_mean(
-        paddle.abs(paddle.subtract(image_real, rec_img))
-    )
+    g_loss_rec = paddle.mean(paddle.abs(paddle.subtract(image_real, rec_img)))
 
     pred_fake, cls_fake = discriminator(fake_img)
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py b/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py
index 341aa800c96262..31e7386fa5d7fc 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_trace_non_persistable_inputs.py
@@ -72,7 +72,7 @@ def test_main(self):
                 static_out = traced_layer([in_x])[0]
                 np.testing.assert_array_equal(dygraph_out_numpy, static_out)
 
-                loss = fluid.layers.reduce_mean(dygraph_out)
+                loss = paddle.mean(dygraph_out)
                 loss.backward()
 
                 optimizer.minimize(loss)
diff --git a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
index a31809c0974bb2..ff2dc85126b300 100644
--- a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
+++ b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
@@ -151,7 +151,7 @@ def test_LR_state_dict(self):
 
             for epoch in range(10):
                 out = linear(input)
-                loss = fluid.layers.reduce_mean(out)
+                loss = paddle.mean(out)
                 loss.backward()
                 adam1.minimize(loss)
                 adam2.minimize(loss)
diff --git a/python/paddle/fluid/tests/unittests/test_mean_op.py b/python/paddle/fluid/tests/unittests/test_mean_op.py
index 5999b3ee0362ab..83f07bf747c7ee 100644
--- a/python/paddle/fluid/tests/unittests/test_mean_op.py
+++ b/python/paddle/fluid/tests/unittests/test_mean_op.py
@@ -383,7 +383,7 @@ def test_case(x, axis=None, keepdim=False):
     def test_fluid_api(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
             x = fluid.data("x", shape=[10, 10], dtype="float32")
-            out = fluid.layers.reduce_mean(input=x, dim=1)
+            out = paddle.mean(x=x, axis=1)
             place = fluid.CPUPlace()
             exe = fluid.Executor(place)
             x_np = np.random.rand(10, 10).astype(np.float32)
@@ -393,7 +393,7 @@ def test_fluid_api(self):
         with fluid.dygraph.guard():
             x_np = np.random.rand(10, 10).astype(np.float32)
             x = fluid.dygraph.to_variable(x_np)
-            out = fluid.layers.reduce_mean(input=x, dim=1)
+            out = paddle.mean(x=x, axis=1)
         np.testing.assert_allclose(
             out.numpy(), np.mean(x_np, axis=1), rtol=1e-05
         )
diff --git a/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py b/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py
index 392559a1b58e2a..232c0f5c4925be 100644
--- a/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py
+++ b/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py
@@ -16,6 +16,7 @@
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 
 
@@ -29,7 +30,7 @@ def main_impl(self, place):
             name='image', shape=self.image_shape, dtype='float32'
         )
         relu_image = fluid.layers.relu(image)
-        loss = fluid.layers.reduce_mean(relu_image)
+        loss = paddle.mean(relu_image)
 
         build_strategy = fluid.BuildStrategy()
         build_strategy.enable_inplace = True
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py
index 8bcde4489e4254..83ce7e5d35519f 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py
@@ -16,6 +16,7 @@
 import time
 import unittest
 
+import paddle
 import numpy as np
 from test_multiprocess_dataloader_static import (
     BATCH_SIZE,
@@ -100,7 +101,7 @@ def run_main(self, num_workers, places, persistent_workers):
                 for image, label in dataloader():
                     out = fc_net(image)
                     loss = fluid.layers.cross_entropy(out, label)
-                    avg_loss = fluid.layers.reduce_mean(loss)
+                    avg_loss = paddle.mean(loss)
                     avg_loss.backward()
                     optimizer.minimize(avg_loss)
                     fc_net.clear_gradients()
@@ -170,7 +171,7 @@ def run_main(self, num_workers, places, persistent_workers):
                 for image, label in dataloader():
                     out = fc_net(image)
                     loss = fluid.layers.cross_entropy(out, label)
-                    avg_loss = fluid.layers.reduce_mean(loss)
+                    avg_loss = paddle.mean(loss)
                     avg_loss.backward()
                     optimizer.minimize(avg_loss)
                     fc_net.clear_gradients()
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py
index e739c0c2cb755d..c4b59ef96eea79 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py
@@ -100,7 +100,7 @@ def run_main(self, num_workers, places, persistent_workers):
                 for image, label in dataloader():
                     out = fc_net(image)
                     loss = fluid.layers.cross_entropy(out, label)
-                    avg_loss = fluid.layers.reduce_mean(loss)
+                    avg_loss = paddle.mean(loss)
                     avg_loss.backward()
                     optimizer.minimize(avg_loss)
                     fc_net.clear_gradients()
@@ -168,7 +168,7 @@ def run_main(self, num_workers, places, persistent_workers):
                 for image, label in dataloader():
                     out = fc_net(image)
                     loss = fluid.layers.cross_entropy(out, label)
-                    avg_loss = fluid.layers.reduce_mean(loss)
+                    avg_loss = paddle.mean(loss)
                     avg_loss.backward()
                     optimizer.minimize(avg_loss)
                     fc_net.clear_gradients()
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py
index 8808654e03ed5a..f9fcb6f77d8f3e 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py
@@ -18,6 +18,7 @@
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 from paddle.io import DataLoader, IterableDataset
 
@@ -78,7 +79,7 @@ def simple_fc_net_static():
                 param_attr=param_attr,
                 bias_attr=bias_attr,
             )
-            loss = fluid.layers.reduce_mean(
+            loss = paddle.mean(
                 fluid.layers.cross_entropy(input=predict_label, label=label)
             )
 
diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py
index 283c68c1a13b86..7321e4d137442a 100644
--- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py
+++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py
@@ -79,7 +79,7 @@ def simple_fc_net_static():
                 param_attr=param_attr,
                 bias_attr=bias_attr,
             )
-            loss = fluid.layers.reduce_mean(
+            loss = paddle.mean(
                 fluid.layers.cross_entropy(input=predict_label, label=label)
             )
 
diff --git a/python/paddle/fluid/tests/unittests/test_nn_grad.py b/python/paddle/fluid/tests/unittests/test_nn_grad.py
index 657d3f4dfb0829..a4030d8adbda8c 100644
--- a/python/paddle/fluid/tests/unittests/test_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_grad.py
@@ -75,7 +75,7 @@ def func(self, place):
 
         x = layers.data('x', shape, False, dtype)
         x.persistable = True
-        y = layers.reduce_mean(x, dim=0)
+        y = paddle.mean(x, axis=0)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
 
         gradient_checker.double_grad_check(
diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py
index b3d6c75d9a12de..50fe0ab67ef48d 100644
--- a/python/paddle/fluid/tests/unittests/test_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer.py
@@ -1169,7 +1169,7 @@ def mlp(input_x, input_y):
                 input=[drop_res], size=2, act='softmax'
             )
             cost = fluid.layers.cross_entropy(input=prediction, label=input_y)
-            sum_cost = fluid.layers.reduce_mean(cost)
+            sum_cost = paddle.mean(cost)
             return drop_res, prediction, sum_cost
 
         main_program = Program()
@@ -1226,7 +1226,7 @@ def mlp(input_x, input_y):
                 input=[drop_res], size=2, act='softmax'
             )
             cost = fluid.layers.cross_entropy(input=prediction, label=input_y)
-            sum_cost = fluid.layers.reduce_mean(cost)
+            sum_cost = paddle.mean(cost)
             return drop_res, prediction, sum_cost
 
         main_program = Program()
diff --git a/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py b/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py
index e992fe1f34ec54..1547bd673db5f5 100644
--- a/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py
@@ -239,7 +239,7 @@ def func_example_with_gradient_accumulation_and_create_graph(self):
         z = y + 1
         w = z * z
 
-        w_mean = fluid.layers.reduce_mean(w)
+        w_mean = paddle.mean(w)
         del y, z, w
 
         (dx_actual,) = self.grad([w_mean], [x], create_graph=True)
@@ -256,7 +256,7 @@ def func_example_with_gradient_accumulation_and_create_graph(self):
         if not _in_legacy_dygraph():
             pass
         else:
-            loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
+            loss = paddle.mean(dx_actual * dx_actual + x * x)
             loss.backward()
 
             x_grad_actual = x.gradient()
@@ -286,7 +286,7 @@ def func_example_with_gradient_accumulation_and_no_grad_vars(self):
         z = y1 + y2
         w = z * z
 
-        w_mean = fluid.layers.reduce_mean(w)
+        w_mean = paddle.mean(w)
         del y1, z, w
 
         (dx_actual,) = self.grad(
@@ -308,7 +308,7 @@ def func_example_with_gradient_accumulation_and_no_grad_vars(self):
         if not _in_legacy_dygraph():
             pass
         else:
-            loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
+            loss = paddle.mean(dx_actual * dx_actual + x * x)
             loss.backward()
 
             x_grad_actual = x.gradient()
@@ -337,7 +337,7 @@ def func_example_with_gradient_accumulation_and_not_create_graph(self):
         z = y + 1
         w = z * z
 
-        w_mean = fluid.layers.reduce_mean(w)
+        w_mean = paddle.mean(w)
         del y, z, w
 
         (dx_actual,) = self.grad([w_mean], [x], create_graph=False)
@@ -354,7 +354,7 @@ def func_example_with_gradient_accumulation_and_not_create_graph(self):
         if not _in_legacy_dygraph():
             pass
         else:
-            loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
+            loss = paddle.mean(dx_actual * dx_actual + x * x)
             loss.backward()
 
             x_grad_actual = x.gradient()
diff --git a/python/paddle/fluid/tests/unittests/test_paddle_save_load.py b/python/paddle/fluid/tests/unittests/test_paddle_save_load.py
index c78fa2ed847b41..193e1047642614 100644
--- a/python/paddle/fluid/tests/unittests/test_paddle_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_paddle_save_load.py
@@ -213,7 +213,7 @@ def test_replace_static_save_load(self):
             )
             z = paddle.static.nn.fc(x, 10)
             z = paddle.static.nn.fc(z, 10, bias_attr=False)
-            loss = fluid.layers.reduce_mean(z)
+            loss = paddle.mean(z)
             opt = Adam(learning_rate=1e-3)
             opt.minimize(loss)
             place = paddle.CPUPlace()
@@ -382,7 +382,7 @@ def test_single_pickle_var_static(self):
                 name="x", shape=[None, IMAGE_SIZE], dtype='float32'
             )
             z = paddle.static.nn.fc(x, 128)
-            loss = fluid.layers.reduce_mean(z)
+            loss = paddle.mean(z)
             place = (
                 fluid.CPUPlace()
                 if not paddle.fluid.core.is_compiled_with_cuda()
@@ -640,7 +640,7 @@ def test_save_load_complex_object_static_save(self):
             )
             z = paddle.static.nn.fc(x, 10, bias_attr=False)
             z = paddle.static.nn.fc(z, 128, bias_attr=False)
-            loss = fluid.layers.reduce_mean(z)
+            loss = paddle.mean(z)
             place = (
                 fluid.CPUPlace()
                 if not paddle.fluid.core.is_compiled_with_cuda()
@@ -915,7 +915,7 @@ def test_static_save_to_memory(self):
             )
             z = paddle.static.nn.fc(x, 10, bias_attr=False)
             z = paddle.static.nn.fc(z, 128, bias_attr=False)
-            loss = fluid.layers.reduce_mean(z)
+            loss = paddle.mean(z)
             place = (
                 fluid.CPUPlace()
                 if not paddle.fluid.core.is_compiled_with_cuda()
diff --git a/python/paddle/fluid/tests/unittests/test_paddle_save_load_binary.py b/python/paddle/fluid/tests/unittests/test_paddle_save_load_binary.py
index 478570100e03ef..4616d8b4b2a472 100644
--- a/python/paddle/fluid/tests/unittests/test_paddle_save_load_binary.py
+++ b/python/paddle/fluid/tests/unittests/test_paddle_save_load_binary.py
@@ -79,7 +79,7 @@ def test_replace_save_load_vars(self):
             )
             z = paddle.static.nn.fc(x, 10, bias_attr=False)
             z = paddle.static.nn.fc(z, 128, bias_attr=False)
-            loss = fluid.layers.reduce_mean(z)
+            loss = paddle.mean(z)
             place = (
                 fluid.CPUPlace()
                 if not paddle.fluid.core.is_compiled_with_cuda()
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py
index b18525d727bcfd..7d782fb25bc00e 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_isolated_var.py
@@ -31,7 +31,7 @@ def build_network(self, is_training):
         x = fluid.data(name='x', shape=[-1, 10], dtype='float32')
         y = fluid.data(name='y', shape=[-1, 10], dtype='float32')
         fc = fluid.layers.fc(x, size=30, bias_attr=False)
-        loss = fluid.layers.reduce_mean(fc)
+        loss = paddle.mean(fc)
         if is_training:
             adam = fluid.optimizer.Adam(learning_rate=1e-3)
             adam.minimize(loss)
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_inference_feed_partial_data.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_inference_feed_partial_data.py
index 2704352460d41e..7d3823a07ee2e1 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_inference_feed_partial_data.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_inference_feed_partial_data.py
@@ -16,6 +16,7 @@
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 
 
@@ -183,7 +184,7 @@ def run_network(self, iterable, use_cuda, drop_last):
             feed_list=[x], capacity=16, iterable=iterable, drop_last=drop_last
         )
         y = fluid.layers.fc(x, size=10)
-        loss = fluid.layers.reduce_mean(y)
+        loss = paddle.mean(y)
 
         exe = fluid.Executor(places[0])
         exe.run(fluid.default_startup_program())
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
index 73995d0ee00db7..0d3ccae5bfcb44 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
@@ -640,7 +640,7 @@ def def_seq2seq_model(
         target_length, maxlen=max_tar_seq_len, dtype="float32"
     )
     loss = loss * tar_mask
-    loss = layers.reduce_mean(loss, dim=[0])
+    loss = paddle.mean(loss, axis=[0])
     loss = paddle.sum(loss)
 
     # optimizer
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
index 077c8d5e68e75a..b7c98515fee85b 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
@@ -319,7 +319,7 @@ def learn(self, act_prob, action, reward, length=None):
         cost = (
             (paddle.sum(cost) / paddle.sum(length))
             if length is not None
-            else layers.reduce_mean(cost)
+            else paddle.mean(cost)
         )
         optimizer = fluid.optimizer.Adam(self.lr)
         optimizer.minimize(cost)
@@ -405,7 +405,7 @@ def learn(self, probs, label, weight=None, length=None):
         max_seq_len = layers.shape(probs)[1]
         mask = layers.sequence_mask(length, maxlen=max_seq_len, dtype="float32")
         loss = loss * mask
-        loss = layers.reduce_mean(loss, dim=[0])
+        loss = paddle.mean(loss, axis=[0])
         loss = paddle.sum(loss)
         optimizer = fluid.optimizer.Adam(self.lr)
         optimizer.minimize(loss)
diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py
index 8871966d350aa7..d5bb1583651b1a 100644
--- a/python/paddle/fluid/tests/unittests/test_static_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py
@@ -241,7 +241,7 @@ def forward(self, input, label, init_hidden, init_cell):
             logits=projection, label=label, soft_label=False
         )
         loss = paddle.reshape(loss, shape=[-1, self.num_steps])
-        loss = fluid.layers.reduce_mean(loss, dim=[0])
+        loss = paddle.mean(loss, axis=[0])
         loss = paddle.sum(loss)
 
         return loss, last_hidden, last_cell
diff --git a/python/paddle/fluid/tests/unittests/test_traced_layer_err_msg.py b/python/paddle/fluid/tests/unittests/test_traced_layer_err_msg.py
index 8beda249844425..68e5bce290c096 100644
--- a/python/paddle/fluid/tests/unittests/test_traced_layer_err_msg.py
+++ b/python/paddle/fluid/tests/unittests/test_traced_layer_err_msg.py
@@ -223,7 +223,7 @@ def _train_simple_net(self):
                     ).astype('float32')
                 )
                 dygraph_out = layer(in_x)
-                loss = fluid.layers.reduce_mean(dygraph_out)
+                loss = paddle.mean(dygraph_out)
                 loss.backward()
                 optimizer.minimize(loss)
         return layer
diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py
index cf9f9762aa6088..e88331676c525c 100644
--- a/python/paddle/nn/layer/loss.py
+++ b/python/paddle/nn/layer/loss.py
@@ -522,24 +522,16 @@ class MSELoss(Layer):
     r"""
     **Mean Square Error Loss**
     Computes the mean square error (squared L2 norm) of given input and label.
-
     If :attr:`reduction` is set to ``'none'``, loss is calculated as:
-
     .. math::
         Out = (input - label)^2
-
     If :attr:`reduction` is set to ``'mean'``, loss is calculated as:
-
     .. math::
         Out = \operatorname{mean}((input - label)^2)
-
     If :attr:`reduction` is set to ``'sum'``, loss is calculated as:
-
     .. math::
         Out = \operatorname{sum}((input - label)^2)
-
     where `input` and `label` are `float32` tensors of same shape.
-
     Parameters:
         reduction (string, optional): The reduction method for the output,
             could be 'none' | 'mean' | 'sum'.
@@ -547,17 +539,13 @@ class MSELoss(Layer):
             If :attr:`size_average` is ``'sum'``, the reduced sum loss is returned.
             If :attr:`reduction` is ``'none'``, the unreduced loss is returned.
             Default is ``'mean'``.
-
     Shape:
         input (Tensor): Input tensor, the data type is float32 or float64
         label (Tensor): Label tensor, the data type is float32 or float64
         output (Tensor): output tensor storing the MSE loss of input and label, the data type is same as input.
-
     Examples:
         .. code-block:: python
-
             import paddle
-
             mse_loss = paddle.nn.loss.MSELoss()
             input = paddle.to_tensor([1.5])
             label = paddle.to_tensor([1.7])
@@ -596,7 +584,7 @@ def forward(self, input, label):
             square_out = paddle.sum(square_out)
             return square_out
 
-        return getattr(fluid.layers, reduce_op)(square_out)
+        return paddle.mean(square_out)
 
 
 class L1Loss(Layer):

From 2bdad6cd2905d3948c478c80f398593ac612faf6 Mon Sep 17 00:00:00 2001
From: Zhang Jun <ewalker@live.cn>
Date: Thu, 1 Dec 2022 17:45:57 +0800
Subject: [PATCH 082/154]  [inference][trt] Fp16 support for Generic plugin
 (#48253)

* Support FP16 in generic TensorRT plugin.
* Support FP16 for Pad3D.
---
 .../ir_passes/tensorrt_subgraph_pass.cc       |  13 +-
 .../generic_and_custom_plugin_creater.cc      |   5 +-
 .../convert/multihead_matmul_roformer_op.cc   |   2 +-
 paddle/fluid/inference/tensorrt/helper.h      |  10 ++
 .../tensorrt/plugin/generic_plugin.cu         | 141 ++++++++++++------
 .../tensorrt/plugin/generic_plugin.h          |  23 ++-
 6 files changed, 127 insertions(+), 67 deletions(-)

diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
index 9c63c95fd91ba6..44249796ec4055 100644
--- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
@@ -30,9 +30,7 @@
 #include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h"
 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
 #include "paddle/fluid/inference/tensorrt/engine.h"
-#include "paddle/fluid/inference/tensorrt/helper.h"
 #include "paddle/fluid/inference/tensorrt/op_teller.h"
-#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
 #include "paddle/fluid/inference/utils/io_utils.h"
 #include "paddle/phi/common/backend.h"
 #include "paddle/phi/common/data_type.h"
@@ -42,15 +40,6 @@ namespace inference {
 namespace analysis {
 namespace {
 
-bool IsFloat(framework::proto::VarType::Type t) {
-  if (t == framework::proto::VarType::FP16 ||
-      t == framework::proto::VarType::FP32 ||
-      t == framework::proto::VarType::FP64 ||
-      t == framework::proto::VarType::BF16)
-    return true;
-  return false;
-}
-
 // if in mixed model precision, we should make all tensorrt_engine's output
 // floats dtype to float32 dtype.
 void OutputProcess(framework::ir::Graph *graph,
@@ -85,7 +74,7 @@ void OutputProcess(framework::ir::Graph *graph,
     for (auto *var_node : op_node->outputs) {
       if (!trt_outputs.count(var_node)) continue;
       if (!var_node->Var()->Persistable() &&
-          IsFloat(var_node->Var()->GetDataType()) &&
+          tensorrt::IsFloatVar(var_node->Var()->GetDataType()) &&
           var_node->Var()->GetDataType() != framework::proto::VarType::FP32) {
         for (auto *next_op : var_node->outputs) {
           // if next_op support mixed_precision, we need to add cast op.
diff --git a/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc b/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc
index b5d9a50f06d7df..76e6d3ffcbb9f0 100644
--- a/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc
+++ b/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc
@@ -182,6 +182,8 @@ class GenericPluginCreater : public OpConverter {
           phi::DefaultKernelSignatureMap::Instance().Get(op_desc.Type());
     }
 
+    bool with_fp16 = engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
+
     plugin::GenericPlugin::InputOutPutVarInfo in_out_info;
 
     for (auto &param_name : phi_kernel_signature.input_names) {
@@ -218,7 +220,8 @@ class GenericPluginCreater : public OpConverter {
         in_out_info.outputs_data_type.push_back(var->GetDataType());
       }
     }
-    plugin::GenericPlugin *plugin = new plugin::GenericPlugin(op, in_out_info);
+    plugin::GenericPlugin *plugin =
+        new plugin::GenericPlugin(op, in_out_info, with_fp16);
     layer = engine_->AddDynamicPlugin(inputs.data(), inputs.size(), plugin);
 
     RreplenishLayerAndOutput(layer, op_desc.Type(), output_names, test_mode);
diff --git a/paddle/fluid/inference/tensorrt/convert/multihead_matmul_roformer_op.cc b/paddle/fluid/inference/tensorrt/convert/multihead_matmul_roformer_op.cc
index 879c8fa9d6a5de..2f19fbdeeda3dc 100644
--- a/paddle/fluid/inference/tensorrt/convert/multihead_matmul_roformer_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/multihead_matmul_roformer_op.cc
@@ -60,7 +60,7 @@ class MultiheadMatMulRoformerOpConverter : public OpConverter {
         weight_data_tmp.data(), weight_data, weight_t->numel() * sizeof(float));
 
     // (hidden_in, 3, hidden_out)
-    auto weight_dims = weight_t->dims();
+    auto& weight_dims = weight_t->dims();
 
     int hidden_in = weight_dims[0];   // channels_in
     int three = weight_dims[1];       // channels_out
diff --git a/paddle/fluid/inference/tensorrt/helper.h b/paddle/fluid/inference/tensorrt/helper.h
index 984d06efa5ca2b..0b435c4c1214b2 100644
--- a/paddle/fluid/inference/tensorrt/helper.h
+++ b/paddle/fluid/inference/tensorrt/helper.h
@@ -22,6 +22,7 @@
 #include <utility>
 #include <vector>
 
+#include "paddle/fluid/framework/framework.pb.h"
 #include "paddle/fluid/platform/dynload/tensorrt.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/phi/common/data_type.h"
@@ -213,6 +214,15 @@ static inline nvinfer1::DataType PhiType2NvType(phi::DataType type) {
   }
   return nv_type;
 }
+
+static bool IsFloatVar(framework::proto::VarType::Type t) {
+  if (t == framework::proto::VarType::FP16 ||
+      t == framework::proto::VarType::FP32 ||
+      t == framework::proto::VarType::FP64 ||
+      t == framework::proto::VarType::BF16)
+    return true;
+  return false;
+}
 }  // namespace tensorrt
 }  // namespace inference
 }  // namespace paddle
diff --git a/paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu
index f335c63fa36614..86ecca92901773 100644
--- a/paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu
+++ b/paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu
@@ -30,8 +30,11 @@ namespace plugin {
 void BuildPhiKernelContextAttr(const framework::OpDesc& op_desc,
                                phi::KernelContext* kernel_context,
                                const phi::KernelSignature& signature,
-                               const phi::Kernel& phi_kernel) {
-  const phi::KernelArgsDef& args_def = phi_kernel.args_def();
+                               const phi::Kernel* phi_kernel) {
+  if (!phi_kernel->IsValid()) {
+    return;
+  }
+  const phi::KernelArgsDef& args_def = phi_kernel->args_def();
   const auto& attr_names = signature.attr_names;
   const auto& attr_defs = args_def.attribute_defs();
 
@@ -221,28 +224,34 @@ void BuildPhiKernelContextAttr(const framework::OpDesc& op_desc,
 
 GenericPlugin::GenericPlugin(
     const paddle::framework::proto::OpDesc& proto_op_desc,
-    const InputOutPutVarInfo& in_out_info) {
+    const InputOutPutVarInfo& in_out_info,
+    bool with_fp16) {
   proto_op_desc_ = proto_op_desc;
   op_desc_ = std::move(framework::OpDesc(proto_op_desc_, nullptr));
   proto_op_desc_.SerializeToString(&op_meta_data_);
   inputs_data_type_ = in_out_info.inputs_data_type;
   outputs_data_type_ = in_out_info.outputs_data_type;
+  with_fp16_ = with_fp16;
 }
 
 GenericPlugin::GenericPlugin(
     const paddle::framework::proto::OpDesc& proto_op_desc,
     const std::vector<int>& inputs_data_type,
-    const std::vector<int>& outputs_data_type) {
+    const std::vector<int>& outputs_data_type,
+    bool with_fp16) {
   proto_op_desc_ = proto_op_desc;
   op_desc_ = std::move(framework::OpDesc(proto_op_desc_, nullptr));
   proto_op_desc_.SerializeToString(&op_meta_data_);
   inputs_data_type_ = inputs_data_type;
   outputs_data_type_ = outputs_data_type;
+  with_fp16_ = with_fp16;
 }
 
 GenericPlugin::GenericPlugin(void const* serial_data, size_t serial_length) {
   DeserializeValue(&serial_data, &serial_length, &inputs_data_type_);
   DeserializeValue(&serial_data, &serial_length, &outputs_data_type_);
+  DeserializeValue(&serial_data, &serial_length, &with_fp16_);
+
   std::string op_meta_data((char*)(serial_data), serial_length);  // NOLINT
   op_meta_data_ = std::move(op_meta_data);
   proto_op_desc_.ParseFromString(op_meta_data_);
@@ -266,8 +275,8 @@ int GenericPlugin::getNbInputs() const TRT_NOEXCEPT {
 }
 
 nvinfer1::IPluginV2DynamicExt* GenericPlugin::clone() const TRT_NOEXCEPT {
-  nvinfer1::IPluginV2DynamicExt* plugin =
-      new GenericPlugin(proto_op_desc_, inputs_data_type_, outputs_data_type_);
+  nvinfer1::IPluginV2DynamicExt* plugin = new GenericPlugin(
+      proto_op_desc_, inputs_data_type_, outputs_data_type_, with_fp16_);
   plugin->initialize();
   return plugin;
 }
@@ -277,6 +286,8 @@ void GenericPlugin::serialize(void* buffer) const TRT_NOEXCEPT {
   SerializeValue(&buffer, inputs_data_type_);
   // outputs_data_type_
   SerializeValue(&buffer, outputs_data_type_);
+  // use fp16
+  SerializeValue(&buffer, with_fp16_);
   // serialize op_meta_data_
   std::memcpy(buffer, op_meta_data_.c_str(), op_meta_data_.size());
   reinterpret_cast<char*&>(buffer) += op_meta_data_.size();
@@ -310,6 +321,12 @@ bool GenericPlugin::supportsFormatCombination(
     if (pos == 3)
       return (in_out[pos].type == nvinfer1::DataType::kFLOAT) &&
              (in_out[pos].format == nvinfer1::TensorFormat::kLINEAR);
+  } else if (op_desc_.Type() == "pad3d") {
+    return (in_out[pos].type == nvinfer1::DataType::kFLOAT ||
+            (isFp16Supported() &&
+             in_out[pos].type == nvinfer1::DataType::kHALF)) &&
+           (in_out[pos].format == nvinfer1::TensorFormat::kLINEAR) &&
+           (in_out[0].type == in_out[pos].type);
   } else {
     return (in_out[pos].type == nvinfer1::DataType::kFLOAT) &&
            (in_out[pos].format == nvinfer1::TensorFormat::kLINEAR);
@@ -337,34 +354,43 @@ int GenericPlugin::initialize() TRT_NOEXCEPT {
         phi::DefaultKernelSignatureMap::Instance().Get(op_type);
   }
 
-  phi::KernelKey phi_kernel_key(
-      phi::Backend::GPU, phi::DataLayout::ANY, phi::DataType::FLOAT32);
-
   PADDLE_ENFORCE_EQ(
       phi::KernelFactory::Instance().HasCompatiblePhiKernel(op_type),
       true,
       platform::errors::Fatal("%s has no compatible phi kernel!",
                               op_type.c_str()));
 
-  const phi::Kernel& phi_kernel = phi::KernelFactory::Instance().SelectKernel(
-      phi_kernel_signature.name, phi_kernel_key);
-  phi_kernel_ = &phi_kernel;
-
-  PADDLE_ENFORCE_EQ(phi_kernel_->IsValid(),
-                    true,
-                    platform::errors::Fatal("%s phi kernel is invalid!.",
-                                            phi_kernel_signature.name));
-
   paddle::platform::DeviceContextPool& pool =
       paddle::platform::DeviceContextPool::Instance();
   platform::CUDAPlace place(platform::GetCurrentDeviceId());
   auto* dev_ctx = static_cast<phi::GPUContext*>(pool.Get(place));
 
-  if (!phi_kernel_context_) {
-    phi_kernel_context_ = new phi::KernelContext(dev_ctx);
-    BuildPhiKernelContextAttr(
-        op_desc_, phi_kernel_context_, phi_kernel_signature, phi_kernel);
+  std::vector<phi::DataType> precision_types{phi::DataType::FLOAT32,
+                                             phi::DataType::FLOAT16};
+  for (auto& precision_type : precision_types) {
+    phi::KernelKey phi_kernel_key(
+        phi::Backend::GPU, phi::DataLayout::ANY, precision_type);
+
+    auto nv_dtype = PhiType2NvType(precision_type);
+    phi_kernels_[nv_dtype].reset(
+        new phi::Kernel(phi::KernelFactory::Instance().SelectKernel(
+            phi_kernel_signature.name, phi_kernel_key)));
+
+    if (phi_kernel_contexts_.find(nv_dtype) == phi_kernel_contexts_.end() ||
+        !phi_kernel_contexts_[nv_dtype]) {
+      phi_kernel_contexts_[nv_dtype].reset(new phi::KernelContext(dev_ctx));
+      BuildPhiKernelContextAttr(op_desc_,
+                                phi_kernel_contexts_[nv_dtype].get(),
+                                phi_kernel_signature,
+                                phi_kernels_[nv_dtype].get());
+    }
   }
+  PADDLE_ENFORCE_EQ(phi_kernels_[nvinfer1::DataType::kFLOAT]->IsValid() ||
+                        phi_kernels_[nvinfer1::DataType::kHALF]->IsValid(),
+                    true,
+                    platform::errors::Fatal("%s phi kernel is invalid!.",
+                                            phi_kernel_signature.name));
+
   if (!dense_tensor_inputs_)
     dense_tensor_inputs_ = new std::vector<phi::DenseTensor>(getNbInputs());
   if (!dense_tensor_outputs_)
@@ -396,15 +422,14 @@ void GenericPlugin::configurePlugin(
     int nb_inputs,
     const nvinfer1::DynamicPluginTensorDesc* out,
     int nb_outputs) TRT_NOEXCEPT {
-  CHECK(phi_kernel_context_);
-  CHECK(phi_kernel_);
+  CHECK(phi_kernels_[nvinfer1::DataType::kFLOAT]->IsValid() ||
+        phi_kernels_[nvinfer1::DataType::kHALF]->IsValid());
   CHECK(nb_inputs == getNbInputs());
   CHECK(nb_outputs == getNbOutputs());
 }
 
 // Shutdown the layer. This is called when the engine is destroyed
 void GenericPlugin::terminate() TRT_NOEXCEPT {
-  delete phi_kernel_context_;
   delete dense_tensor_inputs_;
   delete dense_tensor_outputs_;
 }
@@ -418,27 +443,42 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
   platform::CUDAPlace place(platform::GetCurrentDeviceId());
 
   // [TODO]now generic plugin do not support FP16 and INT8 precision
-  auto protoType2PhiType = [](int proto_type) -> std::pair<phi::DataType, int> {
+  auto protoType2PhiType =
+      [&](int proto_type,
+          nvinfer1::DataType nv_dtype) -> std::pair<phi::DataType, int> {
     if (proto_type ==
-        static_cast<int>(framework::proto::VarType_Type::VarType_Type_FP32))
-      return {phi::DataType::FLOAT32, sizeof(float)};
-    else if (proto_type ==
-                 static_cast<int>(
-                     framework::proto::VarType_Type::VarType_Type_INT64) ||
-             proto_type ==
-                 static_cast<int>(
-                     framework::proto::VarType_Type::VarType_Type_INT32))
+        static_cast<int>(framework::proto::VarType_Type::VarType_Type_FP16)) {
+      return {phi::DataType::FLOAT16, sizeof(half)};
+    } else if (proto_type ==
+               static_cast<int>(
+                   framework::proto::VarType_Type::VarType_Type_FP32)) {
+      if (isFp16Supported() && nv_dtype == nvinfer1::DataType::kHALF) {
+        return {phi::DataType::FLOAT16, sizeof(half)};
+      } else {
+        return {phi::DataType::FLOAT32, sizeof(float)};
+      }
+    } else if (proto_type ==
+                   static_cast<int>(
+                       framework::proto::VarType_Type::VarType_Type_INT64) ||
+               proto_type ==
+                   static_cast<int>(
+                       framework::proto::VarType_Type::VarType_Type_INT32)) {
       return {phi::DataType::INT32, sizeof(int32_t)};
-    else if (proto_type ==
-             static_cast<int>(
-                 framework::proto::VarType_Type::VarType_Type_BOOL))
+    } else if (proto_type ==
+               static_cast<int>(
+                   framework::proto::VarType_Type::VarType_Type_BOOL)) {
       return {phi::DataType::BOOL, sizeof(bool)};
-    else
+    } else {
       CHECK(false) << "precision is not supported";
+    }
   };
 
   // input
-  phi_kernel_context_->ClearInputOutput();
+  auto data_type = input_desc[0].type;
+  CHECK((data_type == nvinfer1::DataType::kFLOAT) ||
+        (data_type == nvinfer1::DataType::kHALF));
+
+  phi_kernel_contexts_[data_type]->ClearInputOutput();
 
   for (int i = 0; i < getNbInputs(); i++) {
     auto const& input_dims = input_desc[i].dims;
@@ -450,7 +490,9 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
     int input_numel = 1;
     for (int k = 0; k < input_shape.size(); k++) input_numel *= input_shape[k];
 
-    auto data_type_and_size = protoType2PhiType(inputs_data_type_[i]);
+    auto data_type_and_size =
+        protoType2PhiType(inputs_data_type_[i], data_type);
+
     phi::DenseTensorMeta input_meta(data_type_and_size.first,
                                     phi::make_ddim(input_shape));
     std::shared_ptr<phi::Allocation> input_alloc(
@@ -459,9 +501,9 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
                             place));
     (*dense_tensor_inputs_)[i] =
         std::move(phi::DenseTensor(input_alloc, input_meta));
-    phi_kernel_context_->EmplaceBackInput(&((*dense_tensor_inputs_)[i]));
+    phi_kernel_contexts_[data_type]->EmplaceBackInput(
+        &((*dense_tensor_inputs_)[i]));
   }
-
   // output
   for (int i = 0; i < getNbOutputs(); i++) {
     auto const& output_dims = output_desc[i].dims;
@@ -474,23 +516,28 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
     for (int k = 0; k < output_shape.size(); k++)
       output_numel *= output_shape[k];
 
-    auto data_type_and_size = protoType2PhiType(inputs_data_type_[i]);
+    auto data_type_and_size =
+        protoType2PhiType(inputs_data_type_[i], data_type);
     phi::DenseTensorMeta output_meta(data_type_and_size.first,
                                      phi::make_ddim(output_shape));
     std::shared_ptr<phi::Allocation> output_alloc(
         new phi::Allocation(reinterpret_cast<void*>(outputs[i]),
                             output_numel * data_type_and_size.second,
                             place));
+
     phi::DenseTensor output_densetonsor(output_alloc, output_meta);
+
     (*dense_tensor_outputs_)[i] =
         std::move(phi::DenseTensor(output_alloc, output_meta));
-    phi_kernel_context_->EmplaceBackOutput(&((*dense_tensor_outputs_)[i]));
+
+    phi_kernel_contexts_[data_type]->EmplaceBackOutput(
+        &((*dense_tensor_outputs_)[i]));
   }
 
-  CHECK_EQ(phi_kernel_context_->InputsSize(), getNbInputs());
-  CHECK_EQ(phi_kernel_context_->OutputsSize(), getNbOutputs());
+  CHECK_EQ(phi_kernel_contexts_[data_type]->InputsSize(), getNbInputs());
+  CHECK_EQ(phi_kernel_contexts_[data_type]->OutputsSize(), getNbOutputs());
 
-  (*phi_kernel_)(phi_kernel_context_);
+  (*phi_kernels_[data_type])(phi_kernel_contexts_[data_type].get());
 
   return cudaGetLastError() != cudaSuccess;
 }
diff --git a/paddle/fluid/inference/tensorrt/plugin/generic_plugin.h b/paddle/fluid/inference/tensorrt/plugin/generic_plugin.h
index 5705078ffa4412..4b78f0d1ed137e 100644
--- a/paddle/fluid/inference/tensorrt/plugin/generic_plugin.h
+++ b/paddle/fluid/inference/tensorrt/plugin/generic_plugin.h
@@ -44,7 +44,7 @@ namespace plugin {
 void BuildPhiKernelContextAttr(const framework::OpDesc& op_desc,
                                phi::KernelContext* kernel_context,
                                const phi::KernelSignature& signature,
-                               const phi::Kernel& phi_kernel);
+                               const phi::Kernel* phi_kernel);
 
 class GenericPlugin : public DynamicPluginTensorRT {
  public:
@@ -57,11 +57,13 @@ class GenericPlugin : public DynamicPluginTensorRT {
   GenericPlugin() {}
 
   GenericPlugin(const paddle::framework::proto::OpDesc& proto_op_desc,
-                const InputOutPutVarInfo& in_out_info);
+                const InputOutPutVarInfo& in_out_info,
+                bool with_fp16_ = false);
 
   GenericPlugin(const paddle::framework::proto::OpDesc& proto_op_desc,
                 const std::vector<int>& inputs_data_type,
-                const std::vector<int>& outputs_data_type);
+                const std::vector<int>& outputs_data_type,
+                bool with_fp16_ = false);
 
   // It was used for tensorrt deserialization.
   // It should not be called by users.
@@ -86,7 +88,7 @@ class GenericPlugin : public DynamicPluginTensorRT {
 
   size_t getSerializationSize() const TRT_NOEXCEPT {
     return op_meta_data_.size() + SerializedSize(inputs_data_type_) +
-           SerializedSize(outputs_data_type_);
+           SerializedSize(outputs_data_type_) + SerializedSize(with_fp16_);
   }
 
   void serialize(void* buffer) const TRT_NOEXCEPT;
@@ -122,15 +124,24 @@ class GenericPlugin : public DynamicPluginTensorRT {
                                        const nvinfer1::DataType* input_types,
                                        int nb_inputs) const TRT_NOEXCEPT;
 
+  bool isFp16Supported() {
+    auto half_dtype = nvinfer1::DataType::kHALF;
+    return with_fp16_ &&
+           !(phi_kernels_.find(half_dtype) == phi_kernels_.end()) &&
+           phi_kernels_[half_dtype]->IsValid();
+  }
+
  private:
   std::string op_meta_data_;
   framework::proto::OpDesc proto_op_desc_;
   framework::OpDesc op_desc_;
 
  private:
-  const phi::Kernel* phi_kernel_{nullptr};
+  std::unordered_map<nvinfer1::DataType, std::unique_ptr<phi::Kernel>>
+      phi_kernels_;
+  std::unordered_map<nvinfer1::DataType, std::unique_ptr<phi::KernelContext>>
+      phi_kernel_contexts_;
 
-  phi::KernelContext* phi_kernel_context_{nullptr};
   std::vector<phi::DenseTensor>* dense_tensor_inputs_{nullptr};
   std::vector<phi::DenseTensor>* dense_tensor_outputs_{nullptr};
 

From d3f8ede01820e3e4b1a763df9d460cea0d56b142 Mon Sep 17 00:00:00 2001
From: zhoutianzi666 <39978853+zhoutianzi666@users.noreply.github.com>
Date: Thu, 1 Dec 2022 17:48:53 +0800
Subject: [PATCH 083/154] [Paddle Inference] remove conv_act_set from
 graph_pattern_detector.cc (#48569)

* remove conv_act_set from graph_pattern_detector.cc
---
 .../ir/conv_elementwise_add2_act_fuse_pass.cc       |  9 ++++++++-
 .../ir/conv_elementwise_add_act_fuse_pass.cc        |  9 ++++++++-
 paddle/fluid/framework/ir/graph_pattern_detector.cc | 13 ++++---------
 paddle/fluid/framework/ir/graph_pattern_detector.h  |  6 ++++--
 4 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
index 6d9611ebd13931..737fa23f73732d 100644
--- a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
@@ -131,8 +131,15 @@ void ConvElementwiseAdd2ActFusePass::ApplyImpl(ir::Graph* graph) const {
   auto* x = gpd.mutable_pattern()->NewNode("x")->AsInput()->assert_is_op_input(
       "conv2d", "Input");
 
+#if CUDNN_VERSION >= 8000
+  std::unordered_set<std::string> cudnn_act_set(
+      {"identity", "relu", "sigmoid", "tanh"});
+#else
+  std::unordered_set<std::string> cudnn_act_set({"identity", "relu"});
+#endif
+
   patterns::ConvElementwiseadd2Act pattern(gpd.mutable_pattern(), pattern_name);
-  pattern(x);
+  pattern(x, cudnn_act_set);
 
   auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                      Graph* g) {
diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc
index 47e2c5e380bcbf..1d309d133795c5 100644
--- a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc
@@ -130,8 +130,15 @@ void ConvElementwiseAddActFusePass::ApplyImpl(ir::Graph* graph) const {
                 ->assert_is_op_input("conv2d", "Input")
                 ->AsInput();
 
+#if CUDNN_VERSION >= 8000
+  std::unordered_set<std::string> cudnn_act_set(
+      {"identity", "relu", "sigmoid", "tanh"});
+#else
+  std::unordered_set<std::string> cudnn_act_set({"identity", "relu"});
+#endif
+
   patterns::ConvElementwiseaddAct pattern(gpd.mutable_pattern(), pattern_name);
-  pattern(x);
+  pattern(x, cudnn_act_set);
 
   auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                      Graph* g) {
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
index acbaef67a68fc4..dd5edaaa9c821f 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -2372,14 +2372,8 @@ PDNode *patterns::PriorBox::operator()() {
   return boxes_var;
 }
 
-#if CUDNN_VERSION >= 8000
-std::unordered_set<std::string> conv_act_set(
-    {"identity", "relu", "sigmoid", "tanh"});
-#else
-std::unordered_set<std::string> conv_act_set({"identity", "relu"});
-#endif
-
-PDNode *patterns::ConvElementwiseaddAct::operator()(PDNode *conv_in) {
+PDNode *patterns::ConvElementwiseaddAct::operator()(
+    PDNode *conv_in, const std::unordered_set<std::string> &conv_act_set) {
   conv_in->AsInput();
   auto conv_op = pattern->NewNode(conv_op_repr())->assert_is_op("conv2d");
   auto conv_out = pattern->NewNode(conv_out_repr())
@@ -2576,7 +2570,8 @@ PDNode *patterns::VitAttention::operator()(PDNode *in) {
   return reshape2_out;
 }
 
-PDNode *patterns::ConvElementwiseadd2Act::operator()(PDNode *conv_in) {
+PDNode *patterns::ConvElementwiseadd2Act::operator()(
+    PDNode *conv_in, const std::unordered_set<std::string> &conv_act_set) {
   auto conv_op = pattern->NewNode(conv_op_repr())->assert_is_op("conv2d");
   auto conv_filter = pattern->NewNode(conv_filter_repr())
                          ->assert_is_op_input("conv2d", "Filter")
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h
index da479c1bf7c9b3..f8f985fa5994ec 100755
--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -1476,7 +1476,8 @@ struct ConvElementwiseaddAct : public PatternBase {
   ConvElementwiseaddAct(PDPattern* pattern, const std::string& name_scope)
       : PatternBase(pattern, name_scope, "conv_elementwiseadd_act") {}
 
-  PDNode* operator()(PDNode* conv_in);
+  PDNode* operator()(PDNode* conv_in,
+                     const std::unordered_set<std::string>& conv_act_set);
 
   PATTERN_DECL_NODE(conv_op);
   PATTERN_DECL_NODE(conv_out);
@@ -1496,7 +1497,8 @@ struct ConvElementwiseadd2Act : public PatternBase {
       : PatternBase(
             pattern, name_scope, "conv_elementwiseadd2_elementwiseadd_act") {}
 
-  PDNode* operator()(PDNode* conv_in);
+  PDNode* operator()(PDNode* conv_in,
+                     const std::unordered_set<std::string>& conv_act_set);
 
   PATTERN_DECL_NODE(conv_op);
   PATTERN_DECL_NODE(conv_filter);

From 529e74e41a56dba4bf14885af9d317e53b75d30a Mon Sep 17 00:00:00 2001
From: Roc <30228238+sljlp@users.noreply.github.com>
Date: Thu, 1 Dec 2022 17:52:07 +0800
Subject: [PATCH 084/154] [Clean Fluid] replace accuracy and auc  and remove
 get_places, distributions(#48554)

* mv accuracy and auc

* rm distributions

* rm get_places

* replace metric
---
 .../distributed/fleet/metrics/metric.py       |   6 +-
 .../contrib/slim/tests/test_imperative_ptq.py |   4 +-
 .../contrib/slim/tests/test_imperative_qat.py |  10 +-
 .../slim/tests/test_imperative_qat_amp.py     |   8 +-
 .../slim/tests/test_imperative_qat_lsq.py     |   6 +-
 .../tests/test_image_classification_fp16.py   |   2 +-
 .../incubate/fleet/tests/fleet_deep_ctr.py    |   4 +-
 .../fluid/incubate/fleet/utils/fleet_util.py  |   6 +-
 python/paddle/fluid/layers/__init__.py        |   8 -
 python/paddle/fluid/layers/device.py          |  43 -
 python/paddle/fluid/layers/distributions.py   | 721 ----------------
 python/paddle/fluid/metrics.py                |   2 +-
 .../tests/book/notest_understand_sentiment.py |   4 +-
 .../tests/book/test_image_classification.py   |   2 +-
 .../fluid/tests/book/test_recognize_digits.py |   2 +-
 .../tests/unittests/check_nan_inf_base.py     |   2 +-
 .../fleet/dist_mnist_gradient_merge.py        |   2 +-
 .../collective/fleet/pipeline_mnist.py        |   2 +-
 .../fleet/pipeline_mnist_multi_device.py      |   2 +-
 .../fleet/pipeline_mnist_one_device.py        |   2 +-
 .../tests/unittests/dist_allreduce_op.py      |   2 +-
 .../paddle/fluid/tests/unittests/dist_ctr.py  |   4 +-
 .../fluid/tests/unittests/dist_fleet_ctr.py   |   4 +-
 .../dist_fleet_raw_program_optimizer.py       |   2 +-
 ...et_raw_program_optimizer_fuse_allreduce.py |   2 +-
 .../dist_fleet_sparse_embedding_ctr.py        |   4 +-
 .../fluid/tests/unittests/dist_mnist.py       |   2 +-
 .../tests/unittests/dist_mnist_batch_merge.py |   2 +-
 .../unittests/dist_mnist_fp16_allreduce.py    |   2 +-
 .../fluid/tests/unittests/dist_mnist_lars.py  |   2 +-
 .../fluid/tests/unittests/dist_se_resnext.py  |   4 +-
 .../unittests/dist_text_classification.py     |   2 +-
 .../distribution/test_distribution.py         | 162 ----
 .../dygraph_to_static/bert_dygraph_model.py   |   2 +-
 .../unittests/dygraph_to_static/test_mnist.py |   2 +-
 .../dygraph_to_static/test_mobile_net.py      |   4 +-
 .../dygraph_to_static/test_resnet.py          |   4 +-
 .../dygraph_to_static/test_resnet_amp.py      |   4 +-
 .../test_resnet_pure_fp16.py                  |   4 +-
 .../dygraph_to_static/test_se_resnet.py       |   4 +-
 .../dygraph_to_static/test_sentiment.py       |   8 +-
 .../unittests/dygraph_to_static/test_tsm.py   |   4 +-
 .../unittests/mlu/test_accuracy_op_mlu.py     |   6 +-
 .../fluid/tests/unittests/test_accuracy_op.py |   6 +-
 .../test_async_ssa_graph_executor_mnist.py    |   2 +-
 .../fluid/tests/unittests/test_auc_op.py      |   2 +-
 .../fluid/tests/unittests/test_desc_clone.py  |   2 +-
 .../tests/unittests/test_distributions.py     | 799 ------------------
 .../tests/unittests/test_get_places_op.py     |  48 --
 .../unittests/test_inference_model_io.py      |   4 +-
 .../fluid/tests/unittests/test_layers.py      |  14 +-
 .../fluid/tests/unittests/test_profiler.py    |   2 +-
 .../unittests/test_program_prune_backward.py  |   2 +-
 python/paddle/static/__init__.py              |   5 +-
 .../metric_op.py => static/nn/metric.py}      |  18 +-
 55 files changed, 93 insertions(+), 1885 deletions(-)
 delete mode 100644 python/paddle/fluid/layers/device.py
 delete mode 100644 python/paddle/fluid/layers/distributions.py
 delete mode 100644 python/paddle/fluid/tests/unittests/test_get_places_op.py
 rename python/paddle/{fluid/layers/metric_op.py => static/nn/metric.py} (97%)
 mode change 100755 => 100644

diff --git a/python/paddle/distributed/fleet/metrics/metric.py b/python/paddle/distributed/fleet/metrics/metric.py
index aaf1115af864d5..ba2acd5ee3cd9d 100644
--- a/python/paddle/distributed/fleet/metrics/metric.py
+++ b/python/paddle/distributed/fleet/metrics/metric.py
@@ -148,8 +148,8 @@ def auc(stat_pos, stat_neg, scope=None, util=None):
     distributed auc in fleet
 
     Args:
-        stat_pos(numpy.array|Variable|string): stat_pos in output of fluid.layers.auc
-        stat_neg(numpy.array|Variable|string): stat_neg in output of fluid.layers.auc
+        stat_pos(numpy.array|Variable|string): stat_pos in output of paddle.static.auc
+        stat_neg(numpy.array|Variable|string): stat_neg in output of paddle.static.auc
         scope(Scope): specific scope
 
     Returns:
@@ -163,7 +163,7 @@ def auc(stat_pos, stat_neg, scope=None, util=None):
           binary_predict = fluid.layers.concat(
               input=[fluid.layers.elementwise_sub(fluid.layers.ceil(similarity_norm), similarity_norm), similarity_norm], axis=1)
           self.auc, batch_auc, [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg] =
-              fluid.layers.auc(input=binary_predict, label=label, curve='ROC', num_thresholds=4096)
+              paddle.static.auc(input=binary_predict, label=label, curve='ROC', num_thresholds=4096)
 
           # in train.py, after train or infer
           pos = np.array(scope.find_var(stat_pos.name).get_tensor())
diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py
index 759e74907e1bca..df182f6c9c8c46 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_ptq.py
@@ -150,8 +150,8 @@ def model_test(self, model, batch_num=-1, batch_size=8):
             label = paddle.to_tensor(y_data)
 
             out = model(img)
-            acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
-            acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
+            acc_top1 = paddle.static.accuracy(input=out, label=label, k=1)
+            acc_top5 = paddle.static.accuracy(input=out, label=label, k=5)
             eval_acc_top1_list.append(float(acc_top1.numpy()))
 
             if batch_id % 50 == 0:
diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
index aff07fb397cb9a..0e0fbd752b2874 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
@@ -129,7 +129,7 @@ def func_qat(self):
                     img = fluid.dygraph.to_variable(x_data)
                     label = fluid.dygraph.to_variable(y_data)
                     out = lenet(img)
-                    acc = fluid.layers.accuracy(out, label)
+                    acc = paddle.static.accuracy(out, label)
                     loss = fluid.layers.cross_entropy(out, label)
                     avg_loss = paddle.mean(loss)
                     avg_loss.backward()
@@ -160,10 +160,10 @@ def func_qat(self):
                     label = fluid.dygraph.to_variable(y_data)
 
                     out = lenet(img)
-                    acc_top1 = fluid.layers.accuracy(
+                    acc_top1 = paddle.static.accuracy(
                         input=out, label=label, k=1
                     )
-                    acc_top5 = fluid.layers.accuracy(
+                    acc_top5 = paddle.static.accuracy(
                         input=out, label=label, k=5
                     )
 
@@ -200,7 +200,7 @@ def func_qat(self):
             label = fluid.dygraph.to_variable(y_data)
             lenet.eval()
             fp32_out = lenet(test_img)
-            fp32_acc = fluid.layers.accuracy(fp32_out, label).numpy()
+            fp32_acc = paddle.static.accuracy(fp32_out, label).numpy()
 
         with tempfile.TemporaryDirectory(prefix="qat_save_path_") as tmpdir:
             # save inference quantized model
@@ -237,7 +237,7 @@ def func_qat(self):
             )
             paddle.disable_static()
             quant_out = fluid.dygraph.to_variable(quant_out)
-            quant_acc = fluid.layers.accuracy(quant_out, label).numpy()
+            quant_acc = paddle.static.accuracy(quant_out, label).numpy()
             paddle.enable_static()
             delta_value = fp32_acc - quant_acc
             self.assertLessEqual(delta_value, self.diff_threshold)
diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_amp.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_amp.py
index ee0edb445e7088..d01fc2e63cfd10 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_amp.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_amp.py
@@ -118,7 +118,7 @@ def model_train(self, model, batch_num=-1, batch_size=32, use_amp=False):
             if use_amp:
                 with paddle.amp.auto_cast():
                     out = model(img)
-                    acc = fluid.layers.accuracy(out, label)
+                    acc = paddle.static.accuracy(out, label)
                     loss = fluid.layers.cross_entropy(out, label)
                     avg_loss = paddle.mean(loss)
                 scaled_loss = scaler.scale(avg_loss)
@@ -128,7 +128,7 @@ def model_train(self, model, batch_num=-1, batch_size=32, use_amp=False):
                 adam.clear_gradients()
             else:
                 out = model(img)
-                acc = fluid.layers.accuracy(out, label)
+                acc = paddle.static.accuracy(out, label)
                 loss = fluid.layers.cross_entropy(out, label)
                 avg_loss = paddle.mean(loss)
                 avg_loss.backward()
@@ -167,8 +167,8 @@ def model_test(self, model, batch_num=-1, batch_size=32, use_amp=False):
 
             with paddle.amp.auto_cast(use_amp):
                 out = model(img)
-                acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
-                acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
+                acc_top1 = paddle.static.accuracy(input=out, label=label, k=1)
+                acc_top5 = paddle.static.accuracy(input=out, label=label, k=5)
 
             acc_top1_list.append(float(acc_top1.numpy()))
             if batch_id % 100 == 0:
diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_lsq.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_lsq.py
index 1b54a5b55b1083..2b06ee5bf06d87 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_lsq.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_lsq.py
@@ -170,7 +170,7 @@ def func_qat(self):
                 img = fluid.dygraph.to_variable(x_data)
                 label = fluid.dygraph.to_variable(y_data)
                 out = lenet(img)
-                acc = fluid.layers.accuracy(out, label)
+                acc = paddle.static.accuracy(out, label)
                 loss = fluid.layers.cross_entropy(out, label)
                 avg_loss = paddle.mean(loss)
 
@@ -202,10 +202,10 @@ def func_qat(self):
                     label = fluid.dygraph.to_variable(y_data)
 
                     out = lenet(img)
-                    acc_top1 = fluid.layers.accuracy(
+                    acc_top1 = paddle.static.accuracy(
                         input=out, label=label, k=1
                     )
-                    acc_top5 = fluid.layers.accuracy(
+                    acc_top5 = paddle.static.accuracy(
                         input=out, label=label, k=5
                     )
 
diff --git a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
index 7edaeb2760bed4..ab9ebfa71929b7 100644
--- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
+++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
@@ -131,7 +131,7 @@ def train(net_type, use_cuda, save_dirname, is_local):
             logits, label, return_softmax=True
         )
         avg_cost = paddle.mean(cost)
-        acc = fluid.layers.accuracy(input=predict, label=label)
+        acc = paddle.static.accuracy(input=predict, label=label)
 
         # Test program
         test_program = train_program.clone(for_test=True)
diff --git a/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py b/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py
index d2c843ca4d0b59..3c034d60f37537 100644
--- a/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py
+++ b/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py
@@ -146,8 +146,8 @@ def model():
     merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1)
 
     predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax')
-    acc = fluid.layers.accuracy(input=predict, label=label)
-    auc_var, batch_auc_var, auc_states = fluid.layers.auc(
+    acc = paddle.static.accuracy(input=predict, label=label)
+    auc_var, batch_auc_var, auc_states = paddle.static.auc(
         input=predict, label=label
     )
     cost = fluid.layers.cross_entropy(input=predict, label=label)
diff --git a/python/paddle/fluid/incubate/fleet/utils/fleet_util.py b/python/paddle/fluid/incubate/fleet/utils/fleet_util.py
index 8d5203f201d084..9678914b50bed3 100644
--- a/python/paddle/fluid/incubate/fleet/utils/fleet_util.py
+++ b/python/paddle/fluid/incubate/fleet/utils/fleet_util.py
@@ -192,7 +192,7 @@ def print_global_auc(
                       fluid.layers.ceil(similarity_norm), similarity_norm),\
                   similarity_norm], axis=1)
               auc, batch_auc, [batch_stat_pos, batch_stat_neg, stat_pos, \
-                  stat_neg] = fluid.layers.auc(input=binary_predict,\
+                  stat_neg] = paddle.static.auc(input=binary_predict,\
                                                label=label, curve='ROC',\
                                                num_thresholds=4096)
 
@@ -1381,7 +1381,7 @@ def get_global_metrics(
                       fluid.layers.ceil(similarity_norm), similarity_norm),\
                   similarity_norm], axis=1)
               auc, batch_auc, [batch_stat_pos, batch_stat_neg, stat_pos, \
-                  stat_neg] = fluid.layers.auc(input=binary_predict,\
+                  stat_neg] = paddle.static.auc(input=binary_predict,\
                                                label=label, curve='ROC',\
                                                num_thresholds=4096)
               local_sqrerr, local_abserr, local_prob, local_q, local_pos_ins,\
@@ -1581,7 +1581,7 @@ def print_global_metrics(
                       fluid.layers.ceil(similarity_norm), similarity_norm),\
                   similarity_norm], axis=1)
               auc, batch_auc, [batch_stat_pos, batch_stat_neg, stat_pos, \
-                  stat_neg] = fluid.layers.auc(input=binary_predict,\
+                  stat_neg] = paddle.static.auc(input=binary_predict,\
                                                label=label, curve='ROC',\
                                                num_thresholds=4096)
               local_sqrerr, local_abserr, local_prob, local_q, local_pos_ins, \
diff --git a/python/paddle/fluid/layers/__init__.py b/python/paddle/fluid/layers/__init__.py
index 9ce0c0afeb95a7..6a49ad01177e33 100644
--- a/python/paddle/fluid/layers/__init__.py
+++ b/python/paddle/fluid/layers/__init__.py
@@ -20,19 +20,14 @@
 from .tensor import *
 from . import control_flow
 from .control_flow import *
-from . import device
-from .device import *
 from . import math_op_patch
 from .math_op_patch import *
 from . import loss
 from .loss import *
 from . import detection
 from .detection import *
-from . import metric_op
-from .metric_op import *
 from .learning_rate_scheduler import *
 from .collective import *
-from .distributions import *
 from .sequence_lod import *
 from . import rnn
 
@@ -41,11 +36,8 @@
 __all__ += io.__all__
 __all__ += tensor.__all__
 __all__ += control_flow.__all__
-__all__ += device.__all__
 __all__ += detection.__all__
-__all__ += metric_op.__all__
 __all__ += learning_rate_scheduler.__all__
-__all__ += distributions.__all__
 __all__ += sequence_lod.__all__
 __all__ += loss.__all__
 __all__ += rnn.__all__
diff --git a/python/paddle/fluid/layers/device.py b/python/paddle/fluid/layers/device.py
deleted file mode 100644
index ac352ef52b18bc..00000000000000
--- a/python/paddle/fluid/layers/device.py
+++ /dev/null
@@ -1,43 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-All util layers.
-"""
-
-from .layer_function_generator import autodoc
-from ..framework import unique_name
-from ..layer_helper import LayerHelper
-from paddle.utils import deprecated
-
-__all__ = []
-
-
-@deprecated(since='0.15.0', update_to="paddle.fluid.ParallelExecutor")
-@autodoc()
-def get_places(device_count=None, device_type=None):
-    helper = LayerHelper('get_places', **locals())
-    out_places = helper.create_variable(
-        name=unique_name.generate_with_ignorable_key(helper.name + ".out")
-    )
-    attrs = dict()
-    if device_count is not None:
-        attrs['device_count'] = int(device_count)
-    if device_type is not None:
-        attrs['device_type'] = str(device_type)
-
-    helper.append_op(
-        type='get_places', outputs={"Out": [out_places]}, attrs=attrs
-    )
-
-    return out_places
diff --git a/python/paddle/fluid/layers/distributions.py b/python/paddle/fluid/layers/distributions.py
deleted file mode 100644
index a54403013c69cd..00000000000000
--- a/python/paddle/fluid/layers/distributions.py
+++ /dev/null
@@ -1,721 +0,0 @@
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from . import control_flow
-from . import tensor
-from . import nn
-import math
-import numpy as np
-import warnings
-import paddle
-
-from ..data_feeder import (
-    convert_dtype,
-    check_variable_and_dtype,
-    check_type,
-    check_dtype,
-)
-
-__all__ = ['Uniform', 'Normal', 'Categorical', 'MultivariateNormalDiag']
-
-
-class Distribution:
-    """
-    Distribution is the abstract base class for probability distributions.
-    """
-
-    def sample(self):
-        """Sampling from the distribution."""
-        raise NotImplementedError
-
-    def entropy(self):
-        """The entropy of the distribution."""
-        raise NotImplementedError
-
-    def kl_divergence(self, other):
-        """The KL-divergence between self distributions and other."""
-        raise NotImplementedError
-
-    def log_prob(self, value):
-        """Log probability density/mass function."""
-        raise NotImplementedError
-
-    def _validate_args(self, *args):
-        """
-        Argument validation for distribution args
-        Args:
-            value (float, list, numpy.ndarray, Variable)
-        Raises
-            ValueError: if one argument is Variable, all arguments should be Variable
-        """
-        is_variable = False
-        is_number = False
-        for arg in args:
-            if isinstance(arg, tensor.Variable):
-                is_variable = True
-            else:
-                is_number = True
-
-        if is_variable and is_number:
-            raise ValueError(
-                'if one argument is Variable, all arguments should be Variable'
-            )
-
-        return is_variable
-
-    def _to_variable(self, *args):
-        """
-        Argument convert args to Variable
-
-        Args:
-            value (float, list, numpy.ndarray, Variable)
-        Returns:
-            Variable of args.
-        """
-        numpy_args = []
-        variable_args = []
-        tmp = 0.0
-
-        for arg in args:
-            valid_arg = False
-            for cls in [float, list, np.ndarray, tensor.Variable]:
-                if isinstance(arg, cls):
-                    valid_arg = True
-                    break
-            assert (
-                valid_arg
-            ), "type of input args must be float, list, numpy.ndarray or Variable."
-            if isinstance(arg, float):
-                arg = np.zeros(1) + arg
-            arg_np = np.array(arg)
-            arg_dtype = arg_np.dtype
-            if str(arg_dtype) not in ['float32']:
-                warnings.warn(
-                    "data type of argument only support float32, your argument will be convert to float32."
-                )
-                arg_np = arg_np.astype('float32')
-            tmp = tmp + arg_np
-            numpy_args.append(arg_np)
-
-        dtype = tmp.dtype
-        for arg in numpy_args:
-            arg_broadcasted, _ = np.broadcast_arrays(arg, tmp)
-            arg_variable = tensor.create_tensor(dtype=dtype)
-            tensor.assign(arg_broadcasted, arg_variable)
-            variable_args.append(arg_variable)
-
-        return tuple(variable_args)
-
-
-class Uniform(Distribution):
-    r"""Uniform distribution with `low` and `high` parameters.
-
-    Mathematical Details
-
-    The probability density function (pdf) is,
-
-    .. math::
-
-        pdf(x; a, b) = \\frac{1}{Z}, \ a <=x <b
-
-    .. math::
-
-        Z = b - a
-
-    In the above equation:
-
-    * :math:`low = a`,
-    * :math:`high = b`,
-    * :math:`Z`: is the normalizing constant.
-
-    The parameters `low` and `high` must be shaped in a way that supports
-    broadcasting (e.g., `high - low` is a valid operation).
-
-    Args:
-        low(float|list|numpy.ndarray|Variable): The lower boundary of uniform distribution.The data type is float32
-        high(float|list|numpy.ndarray|Variable): The higher boundary of uniform distribution.The data type is float32
-
-    Examples:
-        .. code-block:: python
-
-          import numpy as np
-          from paddle.fluid import layers
-          from paddle.fluid.layers import Uniform
-
-          # Without broadcasting, a single uniform distribution [3, 4]:
-          u1 = Uniform(low=3.0, high=4.0)
-          # 2 distributions [1, 3], [2, 4]
-          u2 = Uniform(low=[1.0, 2.0],
-                        high=[3.0, 4.0])
-          # 4 distributions
-          u3 = Uniform(low=[[1.0, 2.0],
-                    [3.0, 4.0]],
-               high=[[1.5, 2.5],
-                     [3.5, 4.5]])
-
-          # With broadcasting:
-          u4 = Uniform(low=3.0, high=[5.0, 6.0, 7.0])
-
-          # Complete example
-          value_npdata = np.array([0.8], dtype="float32")
-          value_tensor = layers.create_tensor(dtype="float32")
-          layers.assign(value_npdata, value_tensor)
-
-          uniform = Uniform([0.], [2.])
-
-          sample = uniform.sample([2])
-          # a random tensor created by uniform distribution with shape: [2, 1]
-          entropy = uniform.entropy()
-          # [0.6931472] with shape: [1]
-          lp = uniform.log_prob(value_tensor)
-          # [-0.6931472] with shape: [1]
-    """
-
-    def __init__(self, low, high):
-        check_type(
-            low, 'low', (float, np.ndarray, tensor.Variable, list), 'Uniform'
-        )
-        check_type(
-            high, 'high', (float, np.ndarray, tensor.Variable, list), 'Uniform'
-        )
-
-        self.all_arg_is_float = False
-        self.batch_size_unknown = False
-        if self._validate_args(low, high):
-            self.batch_size_unknown = True
-            self.low = low
-            self.high = high
-        else:
-            if isinstance(low, float) and isinstance(high, float):
-                self.all_arg_is_float = True
-            self.low, self.high = self._to_variable(low, high)
-
-    def sample(self, shape, seed=0):
-        """Generate samples of the specified shape.
-
-        Args:
-          shape (list): 1D `int32`. Shape of the generated samples.
-          seed (int): Python integer number.
-
-        Returns:
-          Variable: A tensor with prepended dimensions shape.The data type is float32.
-
-        """
-        check_type(shape, 'shape', (list), 'sample')
-        check_type(seed, 'seed', (int), 'sample')
-
-        batch_shape = list((self.low + self.high).shape)
-        if self.batch_size_unknown:
-            output_shape = shape + batch_shape
-            zero_tmp = tensor.fill_constant_batch_size_like(
-                self.low + self.high, batch_shape + shape, self.low.dtype, 0.0
-            )
-            uniform_random_tmp = (
-                paddle.tensor.random.uniform_random_batch_size_like(
-                    zero_tmp, zero_tmp.shape, min=0.0, max=1.0, seed=seed
-                )
-            )
-            output = (
-                uniform_random_tmp * (zero_tmp + self.high - self.low)
-                + self.low
-            )
-            return paddle.reshape(output, output_shape)
-        else:
-            output_shape = shape + batch_shape
-            output = (
-                nn.uniform_random(output_shape, seed=seed)
-                * (
-                    tensor.zeros(output_shape, dtype=self.low.dtype)
-                    + (self.high - self.low)
-                )
-                + self.low
-            )
-            if self.all_arg_is_float:
-                return paddle.reshape(output, shape)
-            else:
-                return output
-
-    def log_prob(self, value):
-        """Log probability density/mass function.
-
-        Args:
-          value (Variable): The input tensor.
-
-        Returns:
-          Variable: log probability.The data type is same with value.
-
-        """
-        check_variable_and_dtype(
-            value, 'value', ['float32', 'float64'], 'log_prob'
-        )
-
-        lb_bool = control_flow.less_than(self.low, value)
-        ub_bool = control_flow.less_than(value, self.high)
-        lb = tensor.cast(lb_bool, dtype=value.dtype)
-        ub = tensor.cast(ub_bool, dtype=value.dtype)
-        return paddle.log(lb * ub) - paddle.log(self.high - self.low)
-
-    def entropy(self):
-        """Shannon entropy in nats.
-
-        Returns:
-          Variable: Shannon entropy of uniform distribution.The data type is float32.
-
-        """
-        return paddle.log(self.high - self.low)
-
-
-class Normal(Distribution):
-    r"""The Normal distribution with location `loc` and `scale` parameters.
-
-    Mathematical details
-
-    The probability density function (pdf) is,
-
-    .. math::
-
-        pdf(x; \mu, \sigma) = \\frac{1}{Z}e^{\\frac {-0.5 (x - \mu)^2}  {\sigma^2} }
-
-    .. math::
-
-        Z = (2 \pi \sigma^2)^{0.5}
-
-    In the above equation:
-
-    * :math:`loc = \mu`: is the mean.
-    * :math:`scale = \sigma`: is the std.
-    * :math:`Z`: is the normalization constant.
-
-    Args:
-        loc(float|list|numpy.ndarray|Variable): The mean of normal distribution.The data type is float32.
-        scale(float|list|numpy.ndarray|Variable): The std of normal distribution.The data type is float32.
-
-    Examples:
-        .. code-block:: python
-
-          import numpy as np
-          from paddle.fluid import layers
-          from paddle.fluid.layers import Normal
-
-          # Define a single scalar Normal distribution.
-          dist = Normal(loc=0., scale=3.)
-          # Define a batch of two scalar valued Normals.
-          # The first has mean 1 and standard deviation 11, the second 2 and 22.
-          dist = Normal(loc=[1., 2.], scale=[11., 22.])
-          # Get 3 samples, returning a 3 x 2 tensor.
-          dist.sample([3])
-
-          # Define a batch of two scalar valued Normals.
-          # Both have mean 1, but different standard deviations.
-          dist = Normal(loc=1., scale=[11., 22.])
-
-          # Complete example
-          value_npdata = np.array([0.8], dtype="float32")
-          value_tensor = layers.create_tensor(dtype="float32")
-          layers.assign(value_npdata, value_tensor)
-
-          normal_a = Normal([0.], [1.])
-          normal_b = Normal([0.5], [2.])
-
-          sample = normal_a.sample([2])
-          # a random tensor created by normal distribution with shape: [2, 1]
-          entropy = normal_a.entropy()
-          # [1.4189385] with shape: [1]
-          lp = normal_a.log_prob(value_tensor)
-          # [-1.2389386] with shape: [1]
-          kl = normal_a.kl_divergence(normal_b)
-          # [0.34939718] with shape: [1]
-    """
-
-    def __init__(self, loc, scale):
-        check_type(
-            loc, 'loc', (float, np.ndarray, tensor.Variable, list), 'Normal'
-        )
-        check_type(
-            scale, 'scale', (float, np.ndarray, tensor.Variable, list), 'Normal'
-        )
-
-        self.batch_size_unknown = False
-        self.all_arg_is_float = False
-        if self._validate_args(loc, scale):
-            self.batch_size_unknown = True
-            self.loc = loc
-            self.scale = scale
-        else:
-            if isinstance(loc, float) and isinstance(scale, float):
-                self.all_arg_is_float = True
-            self.loc, self.scale = self._to_variable(loc, scale)
-
-    def sample(self, shape, seed=0):
-        """Generate samples of the specified shape.
-
-        Args:
-          shape (list): 1D `int32`. Shape of the generated samples.
-          seed (int): Python integer number.
-
-        Returns:
-          Variable: A tensor with prepended dimensions shape.The data type is float32.
-
-        """
-
-        check_type(shape, 'shape', (list), 'sample')
-        check_type(seed, 'seed', (int), 'sample')
-
-        batch_shape = list((self.loc + self.scale).shape)
-
-        if self.batch_size_unknown:
-            output_shape = shape + batch_shape
-            zero_tmp = tensor.fill_constant_batch_size_like(
-                self.loc + self.scale, batch_shape + shape, self.loc.dtype, 0.0
-            )
-            zero_tmp_shape = nn.shape(zero_tmp)
-            normal_random_tmp = nn.gaussian_random(
-                zero_tmp_shape, mean=0.0, std=1.0, seed=seed
-            )
-            output = normal_random_tmp * (zero_tmp + self.scale) + self.loc
-            return paddle.reshape(output, output_shape)
-        else:
-            output_shape = shape + batch_shape
-            output = (
-                nn.gaussian_random(output_shape, mean=0.0, std=1.0, seed=seed)
-                * (
-                    tensor.zeros(output_shape, dtype=self.loc.dtype)
-                    + self.scale
-                )
-                + self.loc
-            )
-            if self.all_arg_is_float:
-                return paddle.reshape(output, shape)
-            else:
-                return output
-
-    def entropy(self):
-        """Shannon entropy in nats.
-
-        Returns:
-          Variable: Shannon entropy of normal distribution.The data type is float32.
-
-        """
-        batch_shape = list((self.loc + self.scale).shape)
-        zero_tmp = tensor.fill_constant_batch_size_like(
-            self.loc + self.scale, batch_shape, self.loc.dtype, 0.0
-        )
-        return (
-            0.5
-            + 0.5 * math.log(2 * math.pi)
-            + paddle.log((self.scale + zero_tmp))
-        )
-
-    def log_prob(self, value):
-        """Log probability density/mass function.
-
-        Args:
-          value (Variable): The input tensor.
-
-        Returns:
-          Variable: log probability.The data type is same with value.
-
-        """
-        check_variable_and_dtype(
-            value, 'value', ['float32', 'float64'], 'log_prob'
-        )
-
-        var = self.scale * self.scale
-        log_scale = paddle.log(self.scale)
-        return (
-            -1.0 * ((value - self.loc) * (value - self.loc)) / (2.0 * var)
-            - log_scale
-            - math.log(math.sqrt(2.0 * math.pi))
-        )
-
-    def kl_divergence(self, other):
-        """The KL-divergence between two normal distributions.
-
-        Args:
-            other (Normal): instance of Normal.
-
-        Returns:
-            Variable: kl-divergence between two normal distributions.The data type is float32.
-
-        """
-
-        check_type(other, 'other', Normal, 'kl_divergence')
-
-        var_ratio = self.scale / other.scale
-        var_ratio = var_ratio * var_ratio
-        t1 = (self.loc - other.loc) / other.scale
-        t1 = t1 * t1
-        return 0.5 * (var_ratio + t1 - 1.0 - paddle.log(var_ratio))
-
-
-class Categorical(Distribution):
-    r"""
-    Categorical distribution is a discrete probability distribution that
-    describes the possible results of a random variable that can take on
-    one of K possible categories, with the probability of each category
-    separately specified.
-
-    The probability mass function (pmf) is:
-
-    .. math::
-
-        pmf(k; p_i) = \prod_{i=1}^{k} p_i^{[x=i]}
-
-    In the above equation:
-
-    * :math:`[x=i]` : it evaluates to 1 if :math:`x==i` , 0 otherwise.
-
-    Args:
-        logits(list|numpy.ndarray|Variable): The logits input of categorical distribution. The data type is float32.
-
-    Examples:
-        .. code-block:: python
-
-          import numpy as np
-          from paddle.fluid import layers
-          from paddle.fluid.layers import Categorical
-
-          a_logits_npdata = np.array([-0.602,-0.602], dtype="float32")
-          a_logits_tensor = layers.create_tensor(dtype="float32")
-          layers.assign(a_logits_npdata, a_logits_tensor)
-
-          b_logits_npdata = np.array([-0.102,-0.112], dtype="float32")
-          b_logits_tensor = layers.create_tensor(dtype="float32")
-          layers.assign(b_logits_npdata, b_logits_tensor)
-
-          a = Categorical(a_logits_tensor)
-          b = Categorical(b_logits_tensor)
-
-          a.entropy()
-          # [0.6931472] with shape: [1]
-
-          b.entropy()
-          # [0.6931347] with shape: [1]
-
-          a.kl_divergence(b)
-          # [1.2516975e-05] with shape: [1]
-
-    """
-
-    def __init__(self, logits):
-        """
-        Args:
-            logits(list|numpy.ndarray|Variable): The logits input of categorical distribution. The data type is float32.
-        """
-        check_type(
-            logits, 'logits', (np.ndarray, tensor.Variable, list), 'Categorical'
-        )
-
-        if self._validate_args(logits):
-            self.logits = logits
-        else:
-            self.logits = self._to_variable(logits)[0]
-
-    def kl_divergence(self, other):
-        """The KL-divergence between two Categorical distributions.
-
-        Args:
-            other (Categorical): instance of Categorical. The data type is float32.
-
-        Returns:
-            Variable: kl-divergence between two Categorical distributions.
-
-        """
-        check_type(other, 'other', Categorical, 'kl_divergence')
-
-        logits = self.logits - paddle.max(self.logits, axis=-1, keepdim=True)
-        other_logits = other.logits - paddle.max(
-            other.logits, axis=-1, keepdim=True
-        )
-        e_logits = paddle.exp(logits)
-        other_e_logits = paddle.exp(other_logits)
-        z = paddle.sum(e_logits, axis=-1, keepdim=True)
-        other_z = paddle.sum(other_e_logits, axis=-1, keepdim=True)
-        prob = e_logits / z
-        kl = paddle.sum(
-            prob
-            * (logits - paddle.log(z) - other_logits + paddle.log(other_z)),
-            axis=-1,
-            keepdim=True,
-        )
-
-        return kl
-
-    def entropy(self):
-        """Shannon entropy in nats.
-
-        Returns:
-          Variable: Shannon entropy of Categorical distribution. The data type is float32.
-
-        """
-        logits = self.logits - paddle.max(self.logits, axis=-1, keepdim=True)
-        e_logits = paddle.exp(logits)
-        z = paddle.sum(e_logits, axis=-1, keepdim=True)
-
-        prob = e_logits / z
-        entropy = -1.0 * paddle.sum(
-            prob * (logits - paddle.log(z)), axis=-1, keepdim=True
-        )
-
-        return entropy
-
-
-class MultivariateNormalDiag(Distribution):
-    r"""
-    A multivariate normal (also called Gaussian) distribution parameterized by a mean vector
-    and a covariance matrix.
-
-    The probability density function (pdf) is:
-
-    .. math::
-
-        pdf(x; loc, scale) = \\frac{e^{-\\frac{||y||^2}{2}}}{Z}
-
-    where:
-    .. math::
-
-        y = inv(scale) @ (x - loc)
-        Z = (2\\pi)^{0.5k} |det(scale)|
-
-
-    In the above equation:
-
-    * :math:`inv` : denotes to take the inverse of the matrix.
-    * :math:`@` : denotes matrix multiplication.
-    * :math:`det` : denotes to evaluate the determinant.
-
-    Args:
-        loc(list|numpy.ndarray|Variable): The mean of multivariateNormal distribution with shape :math:`[k]` .
-            The data type is float32.
-        scale(list|numpy.ndarray|Variable): The positive definite diagonal covariance matrix of multivariateNormal
-            distribution  with shape :math:`[k, k]` . All elements are 0 except diagonal elements. The data type is
-            float32.
-
-    Examples:
-        .. code-block:: python
-
-            import numpy as np
-            from paddle.fluid import layers
-            from paddle.fluid.layers import MultivariateNormalDiag
-
-            a_loc_npdata = np.array([0.3,0.5],dtype="float32")
-            a_loc_tensor = layers.create_tensor(dtype="float32")
-            layers.assign(a_loc_npdata, a_loc_tensor)
-
-
-            a_scale_npdata = np.array([[0.4,0],[0,0.5]],dtype="float32")
-            a_scale_tensor = layers.create_tensor(dtype="float32")
-            layers.assign(a_scale_npdata, a_scale_tensor)
-
-            b_loc_npdata = np.array([0.2,0.4],dtype="float32")
-            b_loc_tensor = layers.create_tensor(dtype="float32")
-            layers.assign(b_loc_npdata, b_loc_tensor)
-
-            b_scale_npdata = np.array([[0.3,0],[0,0.4]],dtype="float32")
-            b_scale_tensor = layers.create_tensor(dtype="float32")
-            layers.assign(b_scale_npdata, b_scale_tensor)
-
-            a = MultivariateNormalDiag(a_loc_tensor, a_scale_tensor)
-            b = MultivariateNormalDiag(b_loc_tensor, b_scale_tensor)
-
-            a.entropy()
-            # [2.033158] with shape: [1]
-            b.entropy()
-            # [1.7777451] with shape: [1]
-
-            a.kl_divergence(b)
-            # [0.06542051] with shape: [1]
-
-    """
-
-    def __init__(self, loc, scale):
-        check_type(
-            loc,
-            'loc',
-            (np.ndarray, tensor.Variable, list),
-            'MultivariateNormalDiag',
-        )
-        check_type(
-            scale,
-            'scale',
-            (np.ndarray, tensor.Variable, list),
-            'MultivariateNormalDiag',
-        )
-
-        if self._validate_args(loc, scale):
-            self.loc = loc
-            self.scale = scale
-        else:
-            self.loc, self.scale = self._to_variable(loc, scale)
-
-    def _det(self, value):
-
-        batch_shape = list(value.shape)
-        one_all = paddle.ones(shape=batch_shape, dtype=self.loc.dtype)
-        one_diag = tensor.diag(
-            paddle.ones(shape=[batch_shape[0]], dtype=self.loc.dtype)
-        )
-        det_diag = paddle.prod(value + one_all - one_diag)
-
-        return det_diag
-
-    def _inv(self, value):
-
-        batch_shape = list(value.shape)
-        one_all = paddle.ones(shape=batch_shape, dtype=self.loc.dtype)
-        one_diag = tensor.diag(
-            paddle.ones(shape=[batch_shape[0]], dtype=self.loc.dtype)
-        )
-        inv_diag = paddle.pow(value, (one_all - 2 * one_diag))
-
-        return inv_diag
-
-    def entropy(self):
-        """Shannon entropy in nats.
-
-        Returns:
-          Variable: Shannon entropy of Multivariate Normal distribution. The data type is float32.
-
-        """
-        entropy = 0.5 * (
-            self.scale.shape[0] * (1.0 + math.log(2 * math.pi))
-            + paddle.log(self._det(self.scale))
-        )
-
-        return entropy
-
-    def kl_divergence(self, other):
-        """The KL-divergence between two Multivariate Normal distributions.
-
-        Args:
-            other (MultivariateNormalDiag): instance of Multivariate Normal.
-
-        Returns:
-            Variable: kl-divergence between two Multivariate Normal distributions. The data type is float32.
-
-        """
-        check_type(other, 'other', MultivariateNormalDiag, 'kl_divergence')
-
-        tr_cov_matmul = paddle.sum(self._inv(other.scale) * self.scale)
-        loc_matmul_cov = nn.matmul(
-            (other.loc - self.loc), self._inv(other.scale)
-        )
-        tri_matmul = nn.matmul(loc_matmul_cov, (other.loc - self.loc))
-        k = list(self.scale.shape)[0]
-        ln_cov = paddle.log(self._det(other.scale)) - paddle.log(
-            self._det(self.scale)
-        )
-        kl = 0.5 * (tr_cov_matmul + tri_matmul - k + ln_cov)
-
-        return kl
diff --git a/python/paddle/fluid/metrics.py b/python/paddle/fluid/metrics.py
index c775bff5f3f7f2..8e79a2429d3ac4 100644
--- a/python/paddle/fluid/metrics.py
+++ b/python/paddle/fluid/metrics.py
@@ -714,7 +714,7 @@ class Auc(MetricBase):
     The auc metric is for binary classification.
     Refer to https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve.
     Please notice that the auc metric is implemented with python, which may be a little bit slow.
-    If you concern the speed, please use the fluid.layers.auc instead.
+    If you concern the speed, please use the paddle.static.auc instead.
 
     The `auc` function creates four local variables, `true_positives`,
     `true_negatives`, `false_positives` and `false_negatives` that are used to
diff --git a/python/paddle/fluid/tests/book/notest_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py
index ab220911b630fe..3809e797f3c7ca 100644
--- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py
+++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py
@@ -49,7 +49,7 @@ def convolution_net(
     )
     cost = fluid.layers.cross_entropy(input=prediction, label=label)
     avg_cost = paddle.mean(cost)
-    accuracy = fluid.layers.accuracy(input=prediction, label=label)
+    accuracy = paddle.static.accuracy(input=prediction, label=label)
     return avg_cost, accuracy, prediction
 
 
@@ -84,7 +84,7 @@ def stacked_lstm_net(
     )
     cost = fluid.layers.cross_entropy(input=prediction, label=label)
     avg_cost = paddle.mean(cost)
-    accuracy = fluid.layers.accuracy(input=prediction, label=label)
+    accuracy = paddle.static.accuracy(input=prediction, label=label)
     return avg_cost, accuracy, prediction
 
 
diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py
index 0ac7adb994bf0a..beb562bee57a26 100644
--- a/python/paddle/fluid/tests/book/test_image_classification.py
+++ b/python/paddle/fluid/tests/book/test_image_classification.py
@@ -121,7 +121,7 @@ def train(net_type, use_cuda, save_dirname, is_local):
     predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
     cost = fluid.layers.cross_entropy(input=predict, label=label)
     avg_cost = paddle.mean(cost)
-    acc = fluid.layers.accuracy(input=predict, label=label)
+    acc = paddle.static.accuracy(input=predict, label=label)
 
     # Test program
     test_program = fluid.default_main_program().clone(for_test=True)
diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py
index 892782820bb711..a89cb1617a12a2 100644
--- a/python/paddle/fluid/tests/book/test_recognize_digits.py
+++ b/python/paddle/fluid/tests/book/test_recognize_digits.py
@@ -32,7 +32,7 @@ def loss_net(hidden, label):
     prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
     loss = fluid.layers.cross_entropy(input=prediction, label=label)
     avg_loss = paddle.mean(loss)
-    acc = fluid.layers.accuracy(input=prediction, label=label)
+    acc = paddle.static.accuracy(input=prediction, label=label)
     return prediction, avg_loss, acc
 
 
diff --git a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
index d96ebe3e4376e1..c6e9a36ddfe2cc 100644
--- a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
+++ b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
@@ -66,7 +66,7 @@ def net():
     cost, y_predict = fluid.layers.softmax_with_cross_entropy(
         hidden, y, return_softmax=True
     )
-    acc_top1 = fluid.layers.accuracy(input=y_predict, label=y, k=1)
+    acc_top1 = paddle.static.accuracy(input=y_predict, label=y, k=1)
     avg_cost = paddle.mean(cost)
 
     sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.05)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dist_mnist_gradient_merge.py b/python/paddle/fluid/tests/unittests/collective/fleet/dist_mnist_gradient_merge.py
index 35c6763b47bfaf..1e67d722040c40 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dist_mnist_gradient_merge.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dist_mnist_gradient_merge.py
@@ -39,7 +39,7 @@ def get_model(self, batch_size=2):
 
         # Evaluator
         batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
-        batch_acc = fluid.layers.accuracy(
+        batch_acc = paddle.static.accuracy(
             input=predict, label=label, total=batch_size_tensor
         )
 
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
index caffe9c4e02841..59572a5e7deacb 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
@@ -106,7 +106,7 @@ def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None):
         # Evaluator
         with fluid.device_guard("gpu:1"):
             batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
-            batch_acc = fluid.layers.accuracy(
+            batch_acc = paddle.static.accuracy(
                 input=predict, label=label, total=batch_size_tensor
             )
 
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
index faf74d9bf41a40..e60b6bbbcd4280 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
@@ -106,7 +106,7 @@ def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None):
         # Evaluator
         with fluid.device_guard("gpu:1"):
             batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
-            batch_acc = fluid.layers.accuracy(
+            batch_acc = paddle.static.accuracy(
                 input=predict, label=label, total=batch_size_tensor
             )
 
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py
index 8ff918d0c7588d..8ecea66aaa6bcb 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py
@@ -98,7 +98,7 @@ def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None):
         # Evaluator
         with fluid.device_guard("gpu:0"):
             batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
-            batch_acc = fluid.layers.accuracy(
+            batch_acc = paddle.static.accuracy(
                 input=predict, label=label, total=batch_size_tensor
             )
 
diff --git a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py
index d5ec419d7987fb..bd3a6d659a39eb 100644
--- a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py
+++ b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py
@@ -82,7 +82,7 @@ def get_model(self, batch_size=2, single_device=False):
 
         # Evaluator
         batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
-        batch_acc = fluid.layers.accuracy(
+        batch_acc = paddle.static.accuracy(
             input=predict, label=label, total=batch_size_tensor
         )
 
diff --git a/python/paddle/fluid/tests/unittests/dist_ctr.py b/python/paddle/fluid/tests/unittests/dist_ctr.py
index f0d25089f35cd3..73e4f7fffc871f 100644
--- a/python/paddle/fluid/tests/unittests/dist_ctr.py
+++ b/python/paddle/fluid/tests/unittests/dist_ctr.py
@@ -99,8 +99,8 @@ def get_model(self, batch_size=2):
         merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1)
 
         predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax')
-        acc = fluid.layers.accuracy(input=predict, label=label)
-        auc_var, batch_auc_var, auc_states = fluid.layers.auc(
+        acc = paddle.static.accuracy(input=predict, label=label)
+        auc_var, batch_auc_var, auc_states = paddle.static.auc(
             input=predict, label=label
         )
         cost = fluid.layers.cross_entropy(input=predict, label=label)
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
index 988e77fc60c5fe..3826bbae5caed5 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
@@ -148,9 +148,9 @@ def net(self, args, is_train=True, batch_size=4, lr=0.01):
         merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1)
 
         predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax')
-        acc = fluid.layers.accuracy(input=predict, label=label)
+        acc = paddle.static.accuracy(input=predict, label=label)
 
-        auc_var, batch_auc_var, auc_states = fluid.layers.auc(
+        auc_var, batch_auc_var, auc_states = paddle.static.auc(
             input=predict, label=label
         )
 
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py
index d7f5a23370ec23..90c1ea16a82c53 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py
@@ -84,7 +84,7 @@ def get_model(self, batch_size=2, single_device=False):
 
         # Evaluator
         batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
-        batch_acc = fluid.layers.accuracy(
+        batch_acc = paddle.static.accuracy(
             input=predict, label=label, total=batch_size_tensor
         )
 
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py
index ae352e686e0907..98d7ef1d1569d4 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py
@@ -84,7 +84,7 @@ def get_model(self, batch_size=2, single_device=False):
 
         # Evaluator
         batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
-        batch_acc = fluid.layers.accuracy(
+        batch_acc = paddle.static.accuracy(
             input=predict, label=label, total=batch_size_tensor
         )
 
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py
index fd12a0ecfa24e1..1868f81dd12986 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py
@@ -138,8 +138,8 @@ def net(self, args, batch_size=4, lr=0.01):
         merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1)
         predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax')
 
-        acc = fluid.layers.accuracy(input=predict, label=label)
-        auc_var, _, _ = fluid.layers.auc(input=predict, label=label)
+        acc = paddle.static.accuracy(input=predict, label=label)
+        auc_var, _, _ = paddle.static.auc(input=predict, label=label)
         cost = fluid.layers.cross_entropy(input=predict, label=label)
         avg_cost = paddle.mean(x=cost)
 
diff --git a/python/paddle/fluid/tests/unittests/dist_mnist.py b/python/paddle/fluid/tests/unittests/dist_mnist.py
index 4af3aba8b88451..3cecc8b32c0b02 100644
--- a/python/paddle/fluid/tests/unittests/dist_mnist.py
+++ b/python/paddle/fluid/tests/unittests/dist_mnist.py
@@ -83,7 +83,7 @@ def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None):
 
         # Evaluator
         batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
-        batch_acc = fluid.layers.accuracy(
+        batch_acc = paddle.static.accuracy(
             input=predict, label=label, total=batch_size_tensor
         )
 
diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py b/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py
index 993ec3ec6d135e..4cda9dd53a7233 100644
--- a/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py
+++ b/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py
@@ -48,7 +48,7 @@ def get_model(self, batch_size=2):
 
         # Evaluator
         batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
-        batch_acc = fluid.layers.accuracy(
+        batch_acc = paddle.static.accuracy(
             input=predict, label=label, total=batch_size_tensor
         )
 
diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py b/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py
index 626233b1f0de30..53819ca5491d4c 100644
--- a/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py
+++ b/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py
@@ -42,7 +42,7 @@ def get_model(self, batch_size=2):
 
         # Evaluator
         batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
-        batch_acc = fluid.layers.accuracy(
+        batch_acc = paddle.static.accuracy(
             input=predict, label=label, total=batch_size_tensor
         )
 
diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_lars.py b/python/paddle/fluid/tests/unittests/dist_mnist_lars.py
index 4466b82cb48013..347692afdd0b12 100644
--- a/python/paddle/fluid/tests/unittests/dist_mnist_lars.py
+++ b/python/paddle/fluid/tests/unittests/dist_mnist_lars.py
@@ -39,7 +39,7 @@ def get_model(self, batch_size=2):
 
         # Evaluator
         batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
-        batch_acc = fluid.layers.accuracy(
+        batch_acc = paddle.static.accuracy(
             input=predict, label=label, total=batch_size_tensor
         )
 
diff --git a/python/paddle/fluid/tests/unittests/dist_se_resnext.py b/python/paddle/fluid/tests/unittests/dist_se_resnext.py
index 3461be8228fc7c..05b3f3b093a654 100644
--- a/python/paddle/fluid/tests/unittests/dist_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/dist_se_resnext.py
@@ -224,8 +224,8 @@ def get_model(self, batch_size=2, use_dgc=False):
         cost = fluid.layers.cross_entropy(input=out, label=label)
 
         avg_cost = paddle.mean(x=cost)
-        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
-        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
+        acc_top1 = paddle.static.accuracy(input=out, label=label, k=1)
+        acc_top5 = paddle.static.accuracy(input=out, label=label, k=5)
 
         # Evaluator
         test_program = fluid.default_main_program().clone(for_test=True)
diff --git a/python/paddle/fluid/tests/unittests/dist_text_classification.py b/python/paddle/fluid/tests/unittests/dist_text_classification.py
index 3312974657926b..73e153164b3e8f 100644
--- a/python/paddle/fluid/tests/unittests/dist_text_classification.py
+++ b/python/paddle/fluid/tests/unittests/dist_text_classification.py
@@ -134,7 +134,7 @@ def get_model(self, batch_size=2):
         predict = conv_net(data, dict_dim)
         cost = fluid.layers.cross_entropy(input=predict, label=label)
         avg_cost = paddle.mean(x=cost)
-        acc = fluid.layers.accuracy(input=predict, label=label)
+        acc = paddle.static.accuracy(input=predict, label=label)
         inference_program = fluid.default_main_program().clone()
 
         # Optimization
diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution.py
index e3b6276529dab6..bbe564eef391cd 100644
--- a/python/paddle/fluid/tests/unittests/distribution/test_distribution.py
+++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution.py
@@ -12,18 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-
-import config
-import numpy as np
-import parameterize
-
-import paddle
-from paddle.distribution import Categorical, Normal, Uniform
-from paddle.fluid import layers
-
-paddle.enable_static()
-
 
 class DistributionNumpy:
     def sample(self):
@@ -40,153 +28,3 @@ def log_prob(self, value):
 
     def probs(self, value):
         raise NotImplementedError
-
-
-class DistributionTestName(unittest.TestCase):
-    def get_prefix(self, string):
-        return string.split('.')[0]
-
-    def test_normal_name(self):
-        name = 'test_normal'
-        normal1 = Normal(0.0, 1.0, name=name)
-        self.assertEqual(normal1.name, name)
-
-        normal2 = Normal(0.0, 1.0)
-        self.assertEqual(normal2.name, 'Normal')
-
-        paddle.enable_static()
-
-        sample = normal1.sample([2])
-        self.assertEqual(self.get_prefix(sample.name), name + '_sample')
-
-        entropy = normal1.entropy()
-        self.assertEqual(self.get_prefix(entropy.name), name + '_entropy')
-
-        value_npdata = np.array([0.8], dtype="float32")
-        value_tensor = layers.create_tensor(dtype="float32")
-        layers.assign(value_npdata, value_tensor)
-
-        lp = normal1.log_prob(value_tensor)
-        self.assertEqual(self.get_prefix(lp.name), name + '_log_prob')
-
-        p = normal1.probs(value_tensor)
-        self.assertEqual(self.get_prefix(p.name), name + '_probs')
-
-        kl = normal1.kl_divergence(normal2)
-        self.assertEqual(self.get_prefix(kl.name), name + '_kl_divergence')
-
-    def test_uniform_name(self):
-        name = 'test_uniform'
-        uniform1 = Uniform(0.0, 1.0, name=name)
-        self.assertEqual(uniform1.name, name)
-
-        uniform2 = Uniform(0.0, 1.0)
-        self.assertEqual(uniform2.name, 'Uniform')
-
-        paddle.enable_static()
-
-        sample = uniform1.sample([2])
-        self.assertEqual(self.get_prefix(sample.name), name + '_sample')
-
-        entropy = uniform1.entropy()
-        self.assertEqual(self.get_prefix(entropy.name), name + '_entropy')
-
-        value_npdata = np.array([0.8], dtype="float32")
-        value_tensor = layers.create_tensor(dtype="float32")
-        layers.assign(value_npdata, value_tensor)
-
-        lp = uniform1.log_prob(value_tensor)
-        self.assertEqual(self.get_prefix(lp.name), name + '_log_prob')
-
-        p = uniform1.probs(value_tensor)
-        self.assertEqual(self.get_prefix(p.name), name + '_probs')
-
-    def test_categorical_name(self):
-        name = 'test_categorical'
-        categorical1 = Categorical([0.4, 0.6], name=name)
-        self.assertEqual(categorical1.name, name)
-
-        categorical2 = Categorical([0.5, 0.5])
-        self.assertEqual(categorical2.name, 'Categorical')
-
-        paddle.enable_static()
-
-        sample = categorical1.sample([2])
-        self.assertEqual(self.get_prefix(sample.name), name + '_sample')
-
-        entropy = categorical1.entropy()
-        self.assertEqual(self.get_prefix(entropy.name), name + '_entropy')
-
-        kl = categorical1.kl_divergence(categorical2)
-        self.assertEqual(self.get_prefix(kl.name), name + '_kl_divergence')
-
-        value_npdata = np.array([0], dtype="int64")
-        value_tensor = layers.create_tensor(dtype="int64")
-        layers.assign(value_npdata, value_tensor)
-
-        p = categorical1.probs(value_tensor)
-        self.assertEqual(self.get_prefix(p.name), name + '_probs')
-
-        lp = categorical1.log_prob(value_tensor)
-        self.assertEqual(self.get_prefix(lp.name), name + '_log_prob')
-
-
-@parameterize.place(config.DEVICES)
-@parameterize.parameterize_cls(
-    (parameterize.TEST_CASE_NAME, 'batch_shape', 'event_shape'),
-    [
-        ('test-tuple', (10, 20), (10, 20)),
-        ('test-list', [100, 100], [100, 200, 300]),
-        ('test-null-eventshape', (100, 100), ()),
-    ],
-)
-class TestDistributionShape(unittest.TestCase):
-    def setUp(self):
-        paddle.disable_static()
-        self.dist = paddle.distribution.Distribution(
-            batch_shape=self.batch_shape, event_shape=self.event_shape
-        )
-
-    def tearDown(self):
-        paddle.enable_static()
-
-    def test_batch_shape(self):
-        self.assertTrue(isinstance(self.dist.batch_shape, tuple))
-        self.assertTrue(self.dist.batch_shape == tuple(self.batch_shape))
-
-    def test_event_shape(self):
-        self.assertTrue(isinstance(self.dist.event_shape, tuple))
-        self.assertTrue(self.dist.event_shape == tuple(self.event_shape))
-
-    def test_prob(self):
-        with self.assertRaises(NotImplementedError):
-            self.dist.prob(paddle.to_tensor(parameterize.xrand()))
-
-    def test_extend_shape(self):
-        shapes = [(34, 20), (56,), ()]
-        for shape in shapes:
-            self.assertTrue(
-                self.dist._extend_shape(shape),
-                shape + self.dist.batch_shape + self.dist.event_shape,
-            )
-
-
-class TestDistributionException(unittest.TestCase):
-    def setUp(self):
-        self._d = paddle.distribution.Distribution()
-
-    def test_mean(self):
-        with self.assertRaises(NotImplementedError):
-            self._d.mean
-
-    def test_variance(self):
-        with self.assertRaises(NotImplementedError):
-            self._d.variance
-
-    def test_rsample(self):
-        with self.assertRaises(NotImplementedError):
-            self._d.rsample(())
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
index 3e372a6d9408f6..64d0a8dc73aa7c 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
@@ -424,7 +424,7 @@ def forward(
             logits=next_sent_fc_out, label=labels, return_softmax=True
         )
 
-        next_sent_acc = fluid.layers.accuracy(
+        next_sent_acc = paddle.static.accuracy(
             input=next_sent_softmax, label=labels
         )
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
index 43947a505e6b75..93f01d165f3c46 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
@@ -110,7 +110,7 @@ def __init__(self):
     def forward(self, inputs, label=None):
         x = self.inference(inputs)
         if label is not None:
-            acc = fluid.layers.accuracy(input=x, label=label)
+            acc = paddle.static.accuracy(input=x, label=label)
             loss = fluid.layers.cross_entropy(x, label)
             avg_loss = paddle.mean(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
index 2c458b006daaf6..4c5e306718d223 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
@@ -540,8 +540,8 @@ def train_mobilenet(args, to_static):
                     input=softmax_out, label=label
                 )
                 avg_loss = paddle.mean(x=loss)
-                acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
-                acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
+                acc_top1 = paddle.static.accuracy(input=out, label=label, k=1)
+                acc_top5 = paddle.static.accuracy(input=out, label=label, k=5)
                 t_start_back = time.time()
 
                 loss_data.append(avg_loss.numpy())
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
index 00d6f24da345db..6323ba5ee7ed0f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
@@ -276,10 +276,10 @@ def train(self, to_static, build_strategy=None):
                     pred = resnet(img)
                     loss = fluid.layers.cross_entropy(input=pred, label=label)
                     avg_loss = paddle.mean(x=loss)
-                    acc_top1 = fluid.layers.accuracy(
+                    acc_top1 = paddle.static.accuracy(
                         input=pred, label=label, k=1
                     )
-                    acc_top5 = fluid.layers.accuracy(
+                    acc_top5 = paddle.static.accuracy(
                         input=pred, label=label, k=5
                     )
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py
index ae7368ac9857b2..4ce379a7043c2a 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py
@@ -76,8 +76,8 @@ def train(to_static, build_strategy=None):
                     # If we remove it, the loss between dygraph and dy2stat is exactly same.
                     loss = fluid.layers.cross_entropy(input=pred, label=label)
                 avg_loss = paddle.mean(x=pred)
-                acc_top1 = fluid.layers.accuracy(input=pred, label=label, k=1)
-                acc_top5 = fluid.layers.accuracy(input=pred, label=label, k=5)
+                acc_top1 = paddle.static.accuracy(input=pred, label=label, k=1)
+                acc_top5 = paddle.static.accuracy(input=pred, label=label, k=5)
 
                 scaled = scaler.scale(avg_loss)
                 scaled.backward()
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py
index 058be1b07afb52..21d081b00978e4 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py
@@ -77,8 +77,8 @@ def train(to_static, build_strategy=None):
                 pred = resnet(img)
                 loss = fluid.layers.cross_entropy(input=pred, label=label)
             avg_loss = paddle.mean(x=pred)
-            acc_top1 = fluid.layers.accuracy(input=pred, label=label, k=1)
-            acc_top5 = fluid.layers.accuracy(input=pred, label=label, k=5)
+            acc_top1 = paddle.static.accuracy(input=pred, label=label, k=1)
+            acc_top5 = paddle.static.accuracy(input=pred, label=label, k=5)
 
             scaled = scaler.scale(avg_loss)
             scaled.backward()
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
index 269f50a8dd9c5f..109fc9975488d9 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
@@ -347,8 +347,8 @@ def forward(self, inputs, label):
         loss = fluid.layers.cross_entropy(input=softmax_out, label=label)
         avg_loss = paddle.mean(x=loss)
 
-        acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1)
-        acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5)
+        acc_top1 = paddle.static.accuracy(input=softmax_out, label=label, k=1)
+        acc_top5 = paddle.static.accuracy(input=softmax_out, label=label, k=5)
         return out, avg_loss, acc_top1, acc_top5
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py
index ef9436864a83f4..c644cdfe0c446c 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py
@@ -109,7 +109,7 @@ def forward(self, inputs, label=None):
 
         cost = fluid.layers.cross_entropy(input=prediction, label=label)
         avg_cost = paddle.mean(x=cost)
-        acc = fluid.layers.accuracy(input=prediction, label=label)
+        acc = paddle.static.accuracy(input=prediction, label=label)
         return avg_cost, prediction, acc
 
 
@@ -152,7 +152,7 @@ def forward(self, inputs, label=None):
 
         cost = fluid.layers.cross_entropy(input=prediction, label=label)
         avg_cost = paddle.mean(x=cost)
-        acc = fluid.layers.accuracy(input=prediction, label=label)
+        acc = paddle.static.accuracy(input=prediction, label=label)
         return avg_cost, prediction, acc
 
 
@@ -198,7 +198,7 @@ def forward(self, inputs, label=None):
         prediction = paddle.nn.functional.softmax(prediction)
         cost = fluid.layers.cross_entropy(input=prediction, label=label)
         avg_cost = paddle.mean(x=cost)
-        acc = fluid.layers.accuracy(input=prediction, label=label)
+        acc = paddle.static.accuracy(input=prediction, label=label)
         return avg_cost, prediction, acc
 
 
@@ -257,7 +257,7 @@ def forward(self, inputs, label=None):
         # if label is not None:
         cost = fluid.layers.cross_entropy(input=prediction, label=label)
         avg_cost = paddle.mean(x=cost)
-        acc = fluid.layers.accuracy(input=prediction, label=label)
+        acc = paddle.static.accuracy(input=prediction, label=label)
         return avg_cost, prediction, acc
         # else:
         #     return prediction
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
index 9c52786842dfd5..7cd74f30256f41 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
@@ -334,10 +334,10 @@ def train(args, fake_data_reader, to_static):
                     input=outputs, label=labels, ignore_index=-1
                 )
                 avg_loss = paddle.mean(loss)
-                acc_top1 = fluid.layers.accuracy(
+                acc_top1 = paddle.static.accuracy(
                     input=outputs, label=labels, k=1
                 )
-                acc_top5 = fluid.layers.accuracy(
+                acc_top5 = paddle.static.accuracy(
                     input=outputs, label=labels, k=5
                 )
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_accuracy_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_accuracy_op_mlu.py
index e37be2f9f3d11a..a32765b8d30689 100755
--- a/python/paddle/fluid/tests/unittests/mlu/test_accuracy_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_accuracy_op_mlu.py
@@ -74,14 +74,14 @@ def test_errors(self):
             label = fluid.layers.data(
                 name='label', shape=[-1, 1], dtype="int32"
             )
-            self.assertRaises(TypeError, fluid.layers.accuracy, x1, label)
+            self.assertRaises(TypeError, paddle.static.accuracy, x1, label)
             self.assertRaises(TypeError, paddle.metric.accuracy, x1, label)
             # The input dtype of accuracy_op must be float32 or float64.
             x2 = fluid.layers.data(name='x2', shape=[4], dtype="int32")
-            self.assertRaises(TypeError, fluid.layers.accuracy, x2, label)
+            self.assertRaises(TypeError, paddle.static.accuracy, x2, label)
             self.assertRaises(TypeError, paddle.metric.accuracy, x2, label)
             x3 = fluid.layers.data(name='input', shape=[-1, 2], dtype="float16")
-            fluid.layers.accuracy(input=x3, label=label)
+            paddle.static.accuracy(input=x3, label=label)
             paddle.metric.accuracy(input=x3, label=label)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_accuracy_op.py b/python/paddle/fluid/tests/unittests/test_accuracy_op.py
index fc7666bcba7428..446240e9fb71da 100755
--- a/python/paddle/fluid/tests/unittests/test_accuracy_op.py
+++ b/python/paddle/fluid/tests/unittests/test_accuracy_op.py
@@ -69,14 +69,14 @@ def test_errors(self):
             label = fluid.layers.data(
                 name='label', shape=[-1, 1], dtype="int32"
             )
-            self.assertRaises(TypeError, fluid.layers.accuracy, x1, label)
+            self.assertRaises(TypeError, paddle.static.accuracy, x1, label)
             self.assertRaises(TypeError, paddle.metric.accuracy, x1, label)
             # The input dtype of accuracy_op must be float32 or float64.
             x2 = fluid.layers.data(name='x2', shape=[4], dtype="int32")
-            self.assertRaises(TypeError, fluid.layers.accuracy, x2, label)
+            self.assertRaises(TypeError, paddle.static.accuracy, x2, label)
             self.assertRaises(TypeError, paddle.metric.accuracy, x2, label)
             x3 = fluid.layers.data(name='input', shape=[-1, 2], dtype="float16")
-            fluid.layers.accuracy(input=x3, label=label)
+            paddle.static.accuracy(input=x3, label=label)
             paddle.metric.accuracy(input=x3, label=label)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py
index 9caae0bea67e77..26eb0a628ab9a0 100644
--- a/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py
@@ -59,7 +59,7 @@ def convolutional_neural_network(use_py_reader):
         prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
         loss = fluid.layers.cross_entropy(input=prediction, label=label)
         avg_loss = paddle.mean(loss)
-        acc = fluid.layers.accuracy(input=prediction, label=label)
+        acc = paddle.static.accuracy(input=prediction, label=label)
         i = fluid.layers.zeros(shape=[1], dtype='int64')
         array = fluid.layers.array_write(x=prediction, i=i)
         fluid.layers.increment(i)
diff --git a/python/paddle/fluid/tests/unittests/test_auc_op.py b/python/paddle/fluid/tests/unittests/test_auc_op.py
index 4a693500bcf28b..48fe682b1400c6 100644
--- a/python/paddle/fluid/tests/unittests/test_auc_op.py
+++ b/python/paddle/fluid/tests/unittests/test_auc_op.py
@@ -158,7 +158,7 @@ def test_type2():
                     name="input2", shape=[-1, 2], dtype="float32"
                 )
                 label2 = fluid.data(name="label2", shape=[-1], dtype="float32")
-                result2 = fluid.layers.auc(input=data2, label=label2)
+                result2 = paddle.static.auc(input=data2, label=label2)
 
             self.assertRaises(TypeError, test_type2)
 
diff --git a/python/paddle/fluid/tests/unittests/test_desc_clone.py b/python/paddle/fluid/tests/unittests/test_desc_clone.py
index e2a90cb544b646..7d4f1f0975fc46 100644
--- a/python/paddle/fluid/tests/unittests/test_desc_clone.py
+++ b/python/paddle/fluid/tests/unittests/test_desc_clone.py
@@ -78,7 +78,7 @@ def get_model(batch_size):
 
     # Evaluator
     batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
-    batch_acc = fluid.layers.accuracy(
+    batch_acc = paddle.static.accuracy(
         input=predict, label=label, total=batch_size_tensor
     )
 
diff --git a/python/paddle/fluid/tests/unittests/test_distributions.py b/python/paddle/fluid/tests/unittests/test_distributions.py
index df8fe980e88433..4656c2081166c7 100644
--- a/python/paddle/fluid/tests/unittests/test_distributions.py
+++ b/python/paddle/fluid/tests/unittests/test_distributions.py
@@ -12,20 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import math
-import unittest
-
-import numpy as np
-
-from paddle import fluid
-from paddle.fluid import layers
-from paddle.fluid.layers.distributions import (
-    Categorical,
-    MultivariateNormalDiag,
-    Normal,
-    Uniform,
-)
-
 
 class DistributionNumpy:
     """
@@ -47,788 +33,3 @@ def kl_divergence(self, other):
     def log_prob(self, value):
         """Log probability density/mass function."""
         raise NotImplementedError
-
-
-class UniformNumpy(DistributionNumpy):
-    def __init__(self, low, high):
-        self.low = np.array(low).astype('float32')
-        self.high = np.array(high).astype('float32')
-
-    def sample(self, shape):
-        shape = tuple(shape) + (self.low + self.high).shape
-        return self.low + (
-            np.random.uniform(size=shape) * (self.high - self.low)
-        )
-
-    def log_prob(self, value):
-        lb = np.less(self.low, value).astype('float32')
-        ub = np.less(value, self.high).astype('float32')
-        return np.log(lb * ub) - np.log(self.high - self.low)
-
-    def entropy(self):
-        return np.log(self.high - self.low)
-
-
-class NormalNumpy(DistributionNumpy):
-    def __init__(self, loc, scale):
-        self.loc = np.array(loc).astype('float32')
-        self.scale = np.array(scale).astype('float32')
-
-    def sample(self, shape):
-        shape = tuple(shape) + (self.loc + self.scale).shape
-        return self.loc + (np.random.randn(*shape) * self.scale)
-
-    def log_prob(self, value):
-        var = self.scale * self.scale
-        log_scale = np.log(self.scale)
-        return (
-            -((value - self.loc) * (value - self.loc)) / (2.0 * var)
-            - log_scale
-            - math.log(math.sqrt(2.0 * math.pi))
-        )
-
-    def entropy(self):
-        return (
-            0.5
-            + 0.5 * np.log(np.array(2.0 * math.pi).astype('float32'))
-            + np.log(self.scale)
-        )
-
-    def kl_divergence(self, other):
-        var_ratio = self.scale / other.scale
-        var_ratio = var_ratio * var_ratio
-        t1 = (self.loc - other.loc) / other.scale
-        t1 = t1 * t1
-        return 0.5 * (var_ratio + t1 - 1 - np.log(var_ratio))
-
-
-class CategoricalNumpy(DistributionNumpy):
-    def __init__(self, logits):
-        self.logits = np.array(logits).astype('float32')
-
-    def entropy(self):
-        logits = self.logits - np.max(self.logits, axis=-1, keepdims=True)
-        e_logits = np.exp(logits)
-        z = np.sum(e_logits, axis=-1, keepdims=True)
-        prob = e_logits / z
-        return -1.0 * np.sum(
-            prob * (logits - np.log(z)), axis=-1, keepdims=True
-        )
-
-    def kl_divergence(self, other):
-        logits = self.logits - np.max(self.logits, axis=-1, keepdims=True)
-        other_logits = other.logits - np.max(
-            other.logits, axis=-1, keepdims=True
-        )
-        e_logits = np.exp(logits)
-        other_e_logits = np.exp(other_logits)
-        z = np.sum(e_logits, axis=-1, keepdims=True)
-        other_z = np.sum(other_e_logits, axis=-1, keepdims=True)
-        prob = e_logits / z
-        return np.sum(
-            prob * (logits - np.log(z) - other_logits + np.log(other_z)),
-            axis=-1,
-            keepdims=True,
-        )
-
-
-class MultivariateNormalDiagNumpy(DistributionNumpy):
-    def __init__(self, loc, scale):
-        self.loc = np.array(loc).astype('float32')
-        self.scale = np.array(scale).astype('float32')
-
-    def _det(self, value):
-        batch_shape = list(value.shape)
-        one_all = np.ones(shape=batch_shape, dtype='float32')
-        one_diag = np.eye(batch_shape[0], dtype='float32')
-        det_diag = np.prod(value + one_all - one_diag)
-
-        return det_diag
-
-    def _inv(self, value):
-        batch_shape = list(value.shape)
-        one_all = np.ones(shape=batch_shape, dtype='float32')
-        one_diag = np.eye(batch_shape[0], dtype='float32')
-        inv_diag = np.power(value, (one_all - 2 * one_diag))
-
-        return inv_diag
-
-    def entropy(self):
-        return 0.5 * (
-            self.scale.shape[0]
-            * (1.0 + np.log(np.array(2 * math.pi).astype('float32')))
-            + np.log(self._det(self.scale))
-        )
-
-    def kl_divergence(self, other):
-        tr_cov_matmul = np.sum(self._inv(other.scale) * self.scale)
-        loc_matmul_cov = np.matmul(
-            (other.loc - self.loc), self._inv(other.scale)
-        )
-        tri_matmul = np.matmul(loc_matmul_cov, (other.loc - self.loc))
-        k = list(self.scale.shape)[0]
-        ln_cov = np.log(self._det(other.scale)) - np.log(self._det(self.scale))
-        kl = 0.5 * (tr_cov_matmul + tri_matmul - k + ln_cov)
-
-        return kl
-
-
-class DistributionTest(unittest.TestCase):
-    def setUp(self, use_gpu=False):
-        self.use_gpu = use_gpu
-        if not use_gpu:
-            place = fluid.CPUPlace()
-            self.gpu_id = -1
-        else:
-            place = fluid.CUDAPlace(0)
-            self.gpu_id = 0
-        self.executor = fluid.Executor(place)
-
-    def build_normal_program(
-        self,
-        test_program,
-        batch_size,
-        dims,
-        loc_float,
-        scale_float,
-        other_loc_float,
-        other_scale_float,
-        scale_np,
-        other_scale_np,
-        loc_np,
-        other_loc_np,
-        values_np,
-    ):
-        with fluid.program_guard(test_program):
-            loc = layers.data(name='loc', shape=[dims], dtype='float32')
-            scale = layers.data(name='scale', shape=[dims], dtype='float32')
-
-            other_loc = layers.data(
-                name='other_loc', shape=[dims], dtype='float32'
-            )
-            other_scale = layers.data(
-                name='other_scale', shape=[dims], dtype='float32'
-            )
-
-            values = layers.data(name='values', shape=[dims], dtype='float32')
-
-            normal_float = Normal(loc_float, scale_float)
-            other_normal_float = Normal(other_loc_float, other_scale_float)
-
-            normal_float_np_broadcast = Normal(loc_float, scale_np)
-            other_normal_float_np_broadcast = Normal(
-                other_loc_float, other_scale_np
-            )
-
-            normal_np = Normal(loc_np, scale_np)
-            other_normal_np = Normal(other_loc_np, other_scale_np)
-
-            normal_variable = Normal(loc, scale)
-            other_normal_variable = Normal(other_loc, other_scale)
-
-            sample_float = normal_float.sample([batch_size, dims])
-            sample_float_np_broadcast = normal_float_np_broadcast.sample(
-                [batch_size, dims]
-            )
-            sample_np = normal_np.sample([batch_size, dims])
-            sample_variable = normal_variable.sample([batch_size, dims])
-
-            entropy_float = normal_float.entropy()
-            entropy_float_np_broadcast = normal_float_np_broadcast.entropy()
-            entropy_np = normal_np.entropy()
-            entropy_variable = normal_variable.entropy()
-
-            lp_float_np_broadcast = normal_float_np_broadcast.log_prob(values)
-            lp_np = normal_np.log_prob(values)
-            lp_variable = normal_variable.log_prob(values)
-
-            kl_float = normal_float.kl_divergence(other_normal_float)
-            kl_float_np_broadcast = normal_float_np_broadcast.kl_divergence(
-                other_normal_float_np_broadcast
-            )
-            kl_np = normal_np.kl_divergence(other_normal_np)
-            kl_variable = normal_variable.kl_divergence(other_normal_variable)
-
-        fetch_list = [
-            sample_float,
-            sample_float_np_broadcast,
-            sample_np,
-            sample_variable,
-            entropy_float,
-            entropy_float_np_broadcast,
-            entropy_np,
-            entropy_variable,
-            lp_float_np_broadcast,
-            lp_np,
-            lp_variable,
-            kl_float,
-            kl_float_np_broadcast,
-            kl_np,
-            kl_variable,
-        ]
-        feed_vars = {
-            'loc': loc_np,
-            'scale': scale_np,
-            'other_loc': other_loc_np,
-            'other_scale': other_scale_np,
-            'values': values_np,
-        }
-        return feed_vars, fetch_list
-
-    def get_normal_random_input(self, batch_size, dims):
-        loc_np = np.random.randn(batch_size, dims).astype('float32')
-        other_loc_np = np.random.randn(batch_size, dims).astype('float32')
-
-        loc_float = (np.random.ranf() - 0.5) * 4
-        scale_float = (np.random.ranf() - 0.5) * 4
-        while scale_float < 0:
-            scale_float = (np.random.ranf() - 0.5) * 4
-
-        other_loc_float = (np.random.ranf() - 0.5) * 4
-        other_scale_float = (np.random.ranf() - 0.5) * 4
-        while other_scale_float < 0:
-            other_scale_float = (np.random.ranf() - 0.5) * 4
-
-        scale_np = np.random.randn(batch_size, dims).astype('float32')
-        other_scale_np = np.random.randn(batch_size, dims).astype('float32')
-        values_np = np.random.randn(batch_size, dims).astype('float32')
-
-        while not np.all(scale_np > 0):
-            scale_np = np.random.randn(batch_size, dims).astype('float32')
-        while not np.all(other_scale_np > 0):
-            other_scale_np = np.random.randn(batch_size, dims).astype('float32')
-        return (
-            loc_np,
-            other_loc_np,
-            loc_float,
-            scale_float,
-            other_loc_float,
-            other_scale_float,
-            scale_np,
-            other_scale_np,
-            values_np,
-        )
-
-    def test_normal_distribution(self, batch_size=2, dims=3, tolerance=1e-6):
-        test_program = fluid.Program()
-        (
-            loc_np,
-            other_loc_np,
-            loc_float,
-            scale_float,
-            other_loc_float,
-            other_scale_float,
-            scale_np,
-            other_scale_np,
-            values_np,
-        ) = self.get_normal_random_input(batch_size, dims)
-
-        feed_vars, fetch_list = self.build_normal_program(
-            test_program,
-            batch_size,
-            dims,
-            loc_float,
-            scale_float,
-            other_loc_float,
-            other_scale_float,
-            scale_np,
-            other_scale_np,
-            loc_np,
-            other_loc_np,
-            values_np,
-        )
-        self.executor.run(fluid.default_startup_program())
-
-        np_normal_float = NormalNumpy(loc_float, scale_float)
-        np_other_normal_float = NormalNumpy(other_loc_float, other_scale_float)
-        np_normal_float_np_broadcast = NormalNumpy(loc_float, scale_np)
-        np_other_normal_float_np_broadcast = NormalNumpy(
-            other_loc_float, other_scale_np
-        )
-        np_normal = NormalNumpy(loc_np, scale_np)
-        np_other_normal = NormalNumpy(other_loc_np, other_scale_np)
-
-        gt_sample_float = np_normal_float.sample([batch_size, dims])
-        gt_sample_float_np_broadcast = np_normal_float_np_broadcast.sample(
-            [batch_size, dims]
-        )
-        gt_sample_np = np_normal.sample([batch_size, dims])
-        gt_entropy_float = np_normal_float.entropy()
-        gt_entropy_float_np_broadcast = np_normal_float_np_broadcast.entropy()
-        gt_entropy = np_normal.entropy()
-        gt_lp_float_np_broadcast = np_normal_float_np_broadcast.log_prob(
-            values_np
-        )
-        gt_lp = np_normal.log_prob(values_np)
-        gt_kl_float = np_normal_float.kl_divergence(np_other_normal_float)
-        gt_kl_float_np_broadcast = np_normal_float_np_broadcast.kl_divergence(
-            np_other_normal_float_np_broadcast
-        )
-        gt_kl = np_normal.kl_divergence(np_other_normal)
-
-        [
-            output_sample_float,
-            output_sample_float_np_broadcast,
-            output_sample_np,
-            output_sample_variable,
-            output_entropy_float,
-            output_entropy_float_np_broadcast,
-            output_entropy_np,
-            output_entropy_variable,
-            output_lp_float_np_broadcast,
-            output_lp_np,
-            output_lp_variable,
-            output_kl_float,
-            output_kl_float_np_broadcast,
-            output_kl_np,
-            output_kl_variable,
-        ] = self.executor.run(
-            program=test_program, feed=feed_vars, fetch_list=fetch_list
-        )
-
-        np.testing.assert_allclose(
-            output_sample_float.shape,
-            gt_sample_float.shape,
-            rtol=tolerance,
-            atol=tolerance,
-        )
-        np.testing.assert_allclose(
-            output_sample_float_np_broadcast.shape,
-            gt_sample_float_np_broadcast.shape,
-            rtol=tolerance,
-            atol=tolerance,
-        )
-        np.testing.assert_allclose(
-            output_sample_np.shape,
-            gt_sample_np.shape,
-            rtol=tolerance,
-            atol=tolerance,
-        )
-        np.testing.assert_allclose(
-            output_sample_variable.shape,
-            gt_sample_np.shape,
-            rtol=tolerance,
-            atol=tolerance,
-        )
-        np.testing.assert_allclose(
-            output_entropy_float,
-            gt_entropy_float,
-            rtol=tolerance,
-            atol=tolerance,
-        )
-        np.testing.assert_allclose(
-            output_entropy_float_np_broadcast,
-            gt_entropy_float_np_broadcast,
-            rtol=tolerance,
-            atol=tolerance,
-        )
-        np.testing.assert_allclose(
-            output_entropy_np, gt_entropy, rtol=tolerance, atol=tolerance
-        )
-        np.testing.assert_allclose(
-            output_entropy_variable, gt_entropy, rtol=tolerance, atol=tolerance
-        )
-        np.testing.assert_allclose(
-            output_lp_float_np_broadcast,
-            gt_lp_float_np_broadcast,
-            rtol=tolerance,
-            atol=tolerance,
-        )
-        np.testing.assert_allclose(
-            output_lp_np, gt_lp, rtol=tolerance, atol=tolerance
-        )
-        np.testing.assert_allclose(
-            output_lp_variable, gt_lp, rtol=tolerance, atol=tolerance
-        )
-        np.testing.assert_allclose(
-            output_kl_float, gt_kl_float, rtol=tolerance, atol=tolerance
-        )
-        np.testing.assert_allclose(
-            output_kl_float_np_broadcast,
-            gt_kl_float_np_broadcast,
-            rtol=tolerance,
-            atol=tolerance,
-        )
-        np.testing.assert_allclose(
-            output_kl_np, gt_kl, rtol=tolerance, atol=tolerance
-        )
-        np.testing.assert_allclose(
-            output_kl_variable, gt_kl, rtol=tolerance, atol=tolerance
-        )
-
-    def build_uniform_program(
-        self,
-        test_program,
-        batch_size,
-        dims,
-        low_float,
-        high_float,
-        high_np,
-        low_np,
-        values_np,
-    ):
-        with fluid.program_guard(test_program):
-            low = layers.data(name='low', shape=[dims], dtype='float32')
-            high = layers.data(name='high', shape=[dims], dtype='float32')
-
-            values = layers.data(name='values', shape=[dims], dtype='float32')
-
-            uniform_float = Uniform(low_float, high_float)
-            uniform_float_np_broadcast = Uniform(low_float, high_np)
-            uniform_np = Uniform(low_np, high_np)
-            uniform_variable = Uniform(low, high)
-
-            sample_float = uniform_float.sample([batch_size, dims])
-            sample_float_np_broadcast = uniform_float_np_broadcast.sample(
-                [batch_size, dims]
-            )
-            sample_np = uniform_np.sample([batch_size, dims])
-            sample_variable = uniform_variable.sample([batch_size, dims])
-
-            entropy_float = uniform_float.entropy()
-            entropy_float_np_broadcast = uniform_float_np_broadcast.entropy()
-            entropy_np = uniform_np.entropy()
-            entropy_variable = uniform_variable.entropy()
-
-            lp_float_np_broadcast = uniform_float_np_broadcast.log_prob(values)
-            lp_np = uniform_np.log_prob(values)
-            lp_variable = uniform_variable.log_prob(values)
-
-        fetch_list = [
-            sample_float,
-            sample_float_np_broadcast,
-            sample_np,
-            sample_variable,
-            entropy_float,
-            entropy_float_np_broadcast,
-            entropy_np,
-            entropy_variable,
-            lp_float_np_broadcast,
-            lp_np,
-            lp_variable,
-        ]
-        feed_vars = {'low': low_np, 'high': high_np, 'values': values_np}
-        return feed_vars, fetch_list
-
-    def test_uniform_distribution(self, batch_size=2, dims=3, tolerance=1e-6):
-        test_program = fluid.Program()
-
-        low_np = np.random.randn(batch_size, dims).astype('float32')
-        low_float = np.random.uniform(-2, 1)
-        high_float = np.random.uniform(1, 3)
-        high_np = np.random.uniform(-5.0, 5.0, (batch_size, dims)).astype(
-            'float32'
-        )
-        values_np = np.random.randn(batch_size, dims).astype('float32')
-
-        feed_vars, fetch_list = self.build_uniform_program(
-            test_program,
-            batch_size,
-            dims,
-            low_float,
-            high_float,
-            high_np,
-            low_np,
-            values_np,
-        )
-
-        self.executor.run(fluid.default_startup_program())
-
-        np_uniform_float = UniformNumpy(low_float, high_float)
-        np_uniform_float_np_broadcast = UniformNumpy(low_float, high_np)
-        np_uniform = UniformNumpy(low_np, high_np)
-
-        gt_sample_float = np_uniform_float.sample([batch_size, dims])
-        gt_sample_float_np_broadcast = np_uniform_float_np_broadcast.sample(
-            [batch_size, dims]
-        )
-        gt_sample_np = np_uniform.sample([batch_size, dims])
-        gt_entropy_float = np_uniform_float.entropy()
-        gt_entropy_float_np_broadcast = np_uniform_float_np_broadcast.entropy()
-        gt_entropy = np_uniform.entropy()
-        gt_lp_float_np_broadcast = np_uniform_float_np_broadcast.log_prob(
-            values_np
-        )
-        gt_lp = np_uniform.log_prob(values_np)
-
-        # result calculated by paddle
-        [
-            output_sample_float,
-            output_sample_float_np_broadcast,
-            output_sample_np,
-            output_sample_variable,
-            output_entropy_float,
-            output_entropy_float_np_broadcast,
-            output_entropy_np,
-            output_entropy_variable,
-            output_lp_float_np_broadcast,
-            output_lp_np,
-            output_lp_variable,
-        ] = self.executor.run(
-            program=test_program, feed=feed_vars, fetch_list=fetch_list
-        )
-
-        np.testing.assert_allclose(
-            output_sample_float.shape,
-            gt_sample_float.shape,
-            rtol=tolerance,
-            atol=tolerance,
-        )
-        np.testing.assert_allclose(
-            output_sample_float_np_broadcast.shape,
-            gt_sample_float_np_broadcast.shape,
-            rtol=tolerance,
-            atol=tolerance,
-        )
-        np.testing.assert_allclose(
-            output_sample_np.shape,
-            gt_sample_np.shape,
-            rtol=tolerance,
-            atol=tolerance,
-        )
-        np.testing.assert_allclose(
-            output_sample_variable.shape,
-            gt_sample_np.shape,
-            rtol=tolerance,
-            atol=tolerance,
-        )
-        np.testing.assert_allclose(
-            output_entropy_float,
-            gt_entropy_float,
-            rtol=tolerance,
-            atol=tolerance,
-        )
-        np.testing.assert_allclose(
-            output_entropy_float_np_broadcast,
-            gt_entropy_float_np_broadcast,
-            rtol=tolerance,
-            atol=tolerance,
-        )
-        np.testing.assert_allclose(
-            output_entropy_np, gt_entropy, rtol=tolerance, atol=tolerance
-        )
-        np.testing.assert_allclose(
-            output_entropy_variable, gt_entropy, rtol=tolerance, atol=tolerance
-        )
-        np.testing.assert_allclose(
-            output_lp_float_np_broadcast,
-            gt_lp_float_np_broadcast,
-            rtol=tolerance,
-            atol=tolerance,
-        )
-        np.testing.assert_allclose(
-            output_lp_np, gt_lp, rtol=tolerance, atol=tolerance
-        )
-        np.testing.assert_allclose(
-            output_lp_variable, gt_lp, rtol=tolerance, atol=tolerance
-        )
-
-    def test_categorical_distribution(
-        self, batch_size=2, dims=3, tolerance=1e-6
-    ):
-        test_program = fluid.Program()
-
-        logits_np = np.random.randn(batch_size, dims).astype('float32')
-        other_logits_np = np.random.randn(batch_size, dims).astype('float32')
-
-        with fluid.program_guard(test_program):
-            logits = layers.data(name='logits', shape=[dims], dtype='float32')
-            other_logits = layers.data(
-                name='other_logits', shape=[dims], dtype='float32'
-            )
-
-            categorical_np = Categorical(logits_np)
-            other_categorical_np = Categorical(other_logits_np)
-
-            entropy_np = categorical_np.entropy()
-            kl_np = categorical_np.kl_divergence(other_categorical_np)
-
-        self.executor.run(fluid.default_main_program())
-
-        np_categorical = CategoricalNumpy(logits_np)
-        np_other_categorical = CategoricalNumpy(other_logits_np)
-        gt_entropy_np = np_categorical.entropy()
-        gt_kl_np = np_categorical.kl_divergence(np_other_categorical)
-
-        # result calculated by paddle
-        [output_entropy_np, output_kl_np] = self.executor.run(
-            program=test_program,
-            feed={'logits': logits_np},
-            fetch_list=[entropy_np, kl_np],
-        )
-        np.testing.assert_allclose(
-            output_entropy_np, gt_entropy_np, rtol=tolerance, atol=tolerance
-        )
-        np.testing.assert_allclose(
-            output_kl_np, gt_kl_np, rtol=tolerance, atol=tolerance
-        )
-
-    def test_multivariateNormalDiag_distribution(
-        self, batch_size=2, tolerance=1e-6
-    ):
-        test_program = fluid.Program()
-
-        loc_np = np.random.random(
-            batch_size,
-        ).astype('float32')
-        scale_np = np.diag(
-            np.random.random(
-                batch_size,
-            )
-        ).astype('float32')
-        other_loc_np = np.random.random(
-            batch_size,
-        ).astype('float32')
-        other_scale_np = np.diag(
-            np.random.random(
-                batch_size,
-            )
-        ).astype('float32')
-
-        with fluid.program_guard(test_program):
-            loc = layers.data(
-                name='loc',
-                shape=[
-                    batch_size,
-                ],
-                dtype='float32',
-                append_batch_size=False,
-            )
-            scale = layers.data(
-                name='scale',
-                shape=[batch_size, batch_size],
-                dtype='float32',
-                append_batch_size=False,
-            )
-            other_loc = layers.data(
-                name='other_loc',
-                shape=[
-                    batch_size,
-                ],
-                dtype='float32',
-                append_batch_size=False,
-            )
-            other_scale = layers.data(
-                name='other_scale',
-                shape=[batch_size, batch_size],
-                dtype='float32',
-                append_batch_size=False,
-            )
-
-            multivariate_np = MultivariateNormalDiag(loc, scale)
-            other_multivariate_np = MultivariateNormalDiag(
-                other_loc, other_scale
-            )
-
-            entropy_np = multivariate_np.entropy()
-            other_entropy_np = other_multivariate_np.entropy()
-            kl_np = multivariate_np.kl_divergence(other_multivariate_np)
-
-        self.executor.run(fluid.default_main_program())
-
-        np_multivariate = MultivariateNormalDiagNumpy(loc_np, scale_np)
-        np_other_multivariate = MultivariateNormalDiagNumpy(
-            other_loc_np, other_scale_np
-        )
-        gt_entropy_np = np_multivariate.entropy()
-        gt_kl_np = np_multivariate.kl_divergence(np_other_multivariate)
-
-        # result calculated by paddle
-        [output_entropy_np, output_kl_np] = self.executor.run(
-            program=test_program,
-            feed={
-                'loc': loc_np,
-                'scale': scale_np,
-                'other_loc': other_loc_np,
-                'other_scale': other_scale_np,
-            },
-            fetch_list=[entropy_np, kl_np],
-        )
-        np.testing.assert_allclose(
-            output_entropy_np, gt_entropy_np, rtol=tolerance, atol=tolerance
-        )
-        np.testing.assert_allclose(
-            output_kl_np, gt_kl_np, rtol=tolerance, atol=tolerance
-        )
-
-
-class DistributionTestError(unittest.TestCase):
-    def test_normal_error(self):
-        loc = int(1)
-        scale = int(1)
-
-        # type of loc and scale must be float, list, numpy.ndarray, Variable
-        self.assertRaises(TypeError, Normal, loc, 1.0)
-        self.assertRaises(TypeError, Normal, 1.0, scale)
-
-        normal = Normal(0.0, 1.0)
-
-        value = [1.0, 2.0]
-        # type of value must be variable
-        self.assertRaises(TypeError, normal.log_prob, value)
-
-        shape = 1.0
-        # type of shape must be list
-        self.assertRaises(TypeError, normal.sample, shape)
-
-        seed = 1.0
-        # type of seed must be int
-        self.assertRaises(TypeError, normal.sample, [2, 3], seed)
-
-        normal_other = Uniform(1.0, 2.0)
-        # type of other must be an instance of Normal
-        self.assertRaises(TypeError, normal.kl_divergence, normal_other)
-
-    def test_uniform_error(self):
-        low = int(1)
-        high = int(1)
-
-        # type of loc and scale must be float, list, numpy.ndarray, Variable
-        self.assertRaises(TypeError, Uniform, low, 1.0)
-        self.assertRaises(TypeError, Uniform, 1.0, high)
-
-        uniform = Uniform(0.0, 1.0)
-
-        value = [1.0, 2.0]
-        # type of value must be variable
-        self.assertRaises(TypeError, uniform.log_prob, value)
-
-        shape = 1.0
-        # type of shape must be list
-        self.assertRaises(TypeError, uniform.sample, shape)
-
-        seed = 1.0
-        # type of seed must be int
-        self.assertRaises(TypeError, uniform.sample, [2, 3], seed)
-
-    def test_categorical_error(self):
-        logit = 1.0
-
-        # type of loc and scale must be list, numpy.ndarray, Variable
-        self.assertRaises(TypeError, Categorical, logit)
-
-        categorical = Categorical([-0.602, -0.602])
-
-        categorical_other = Normal(1.0, 2.0)
-        # type of other must be an instance of Normal
-        self.assertRaises(
-            TypeError, categorical.kl_divergence, categorical_other
-        )
-
-    def test_multivariate_normal_diag_error(self):
-        loc = 1.0
-        scale = 1.0
-
-        # type of loc and scale must be list, numpy.ndarray, Variable
-        self.assertRaises(TypeError, MultivariateNormalDiag, loc, [1.0])
-        self.assertRaises(TypeError, MultivariateNormalDiag, [1.0], scale)
-
-        mnd = MultivariateNormalDiag([0.3, 0.5], [[0.4, 0], [0, 0.5]])
-
-        categorical_other = Normal(1.0, 2.0)
-        # type of other must be an instance of Normal
-        self.assertRaises(TypeError, mnd.kl_divergence, categorical_other)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_get_places_op.py b/python/paddle/fluid/tests/unittests/test_get_places_op.py
deleted file mode 100644
index 03f32c78b59b50..00000000000000
--- a/python/paddle/fluid/tests/unittests/test_get_places_op.py
+++ /dev/null
@@ -1,48 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-from decorator_helper import prog_scope
-
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-from paddle.fluid.layers.device import get_places
-
-
-class TestGetPlaces(unittest.TestCase):
-    @prog_scope()
-    def check_get_cpu_places(self):
-        places = get_places()
-        cpu = fluid.CPUPlace()
-        exe = fluid.Executor(cpu)
-        exe.run(fluid.default_main_program())
-        self.assertEqual(places.type, fluid.core.VarDesc.VarType.PLACE_LIST)
-
-    @prog_scope()
-    def check_get_gpu_places(self):
-        places = get_places(device_type='CUDA')
-        gpu = fluid.CUDAPlace(0)
-        exe = fluid.Executor(gpu)
-        exe.run(fluid.default_main_program())
-        self.assertEqual(places.type, fluid.core.VarDesc.VarType.PLACE_LIST)
-
-    def test_main(self):
-        if core.is_compiled_with_cuda():
-            self.check_get_gpu_places()
-        self.check_get_cpu_places()
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_inference_model_io.py b/python/paddle/fluid/tests/unittests/test_inference_model_io.py
index b7aa6e7ba0ca58..9cf82e16f742f0 100644
--- a/python/paddle/fluid/tests/unittests/test_inference_model_io.py
+++ b/python/paddle/fluid/tests/unittests/test_inference_model_io.py
@@ -174,8 +174,8 @@ def test_save_inference_model_with_auc(self):
             x = layers.data(name='x', shape=[2], dtype='float32')
             y = layers.data(name='y', shape=[1], dtype='int32')
             predict = fluid.layers.fc(input=x, size=2, act='softmax')
-            acc = fluid.layers.accuracy(input=predict, label=y)
-            auc_var, batch_auc_var, auc_states = fluid.layers.auc(
+            acc = paddle.static.accuracy(input=predict, label=y)
+            auc_var, batch_auc_var, auc_states = paddle.static.auc(
                 input=predict, label=y
             )
             cost = fluid.layers.cross_entropy(input=predict, label=y)
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 912382f49ac447..8807b77664b759 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -34,7 +34,6 @@
     program_guard,
 )
 from paddle.fluid.initializer import Constant
-from paddle.fluid.layers.device import get_places
 from paddle.fluid.param_attr import ParamAttr
 from paddle.tensor import random
 
@@ -2895,7 +2894,7 @@ def test_accuracy(self):
             label = fluid.data(name="label", shape=[-1, 1], dtype="int")
             fc_out = fluid.layers.fc(input=data, size=10)
             predict = fluid.layers.softmax(input=fc_out)
-            result = fluid.layers.accuracy(input=predict, label=label, k=5)
+            result = paddle.static.accuracy(input=predict, label=label, k=5)
             place = fluid.CPUPlace()
             exe = fluid.Executor(place)
 
@@ -2911,7 +2910,9 @@ def test_accuracy(self):
             label = base.to_variable(y)
             fc_out = fluid.layers.fc(data, size=10)
             predict = fluid.layers.softmax(fc_out)
-            dynamic_out = fluid.layers.accuracy(input=predict, label=label, k=5)
+            dynamic_out = paddle.static.accuracy(
+                input=predict, label=label, k=5
+            )
 
         np.testing.assert_array_equal(static_out[0], dynamic_out.numpy())
 
@@ -2954,7 +2955,6 @@ def func_all_layers(self):
                     )
 
                 else:
-                    assert method.__name__ in ('make_get_places')
                     continue
             if method.__name__ in self.only_static_set:
                 continue
@@ -3201,12 +3201,6 @@ def make_softmax(self):
             hid = layers.fc(input=data, size=20)
             return layers.softmax(hid, axis=1)
 
-    def make_get_places(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            get_places(device_count=1)
-
     @prog_scope()
     def make_nce(self):
         window_size = 5
diff --git a/python/paddle/fluid/tests/unittests/test_profiler.py b/python/paddle/fluid/tests/unittests/test_profiler.py
index 5fbedfaaa7ff0c..33034811650653 100644
--- a/python/paddle/fluid/tests/unittests/test_profiler.py
+++ b/python/paddle/fluid/tests/unittests/test_profiler.py
@@ -60,7 +60,7 @@ def build_program(self, compile_program=True):
             cost = fluid.layers.cross_entropy(input=predict, label=label)
             avg_cost = paddle.mean(cost)
             batch_size = fluid.layers.create_tensor(dtype='int64')
-            batch_acc = fluid.layers.accuracy(
+            batch_acc = paddle.static.accuracy(
                 input=predict, label=label, total=batch_size
             )
 
diff --git a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
index 04c707b320f9aa..d751fd4b90d862 100755
--- a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
@@ -76,7 +76,7 @@ def simple_fc_net_with_accuracy(use_feed):
     prediction = fluid.layers.fc(hidden, size=10, act='softmax')
     loss = fluid.layers.cross_entropy(input=prediction, label=label)
     loss = paddle.mean(loss)
-    accuracy_out = fluid.layers.accuracy(input=prediction, label=label, k=5)
+    accuracy_out = paddle.static.accuracy(input=prediction, label=label, k=5)
     return loss
 
 
diff --git a/python/paddle/static/__init__.py b/python/paddle/static/__init__.py
index 78ad1cfabc3110..f527b5a1c358db 100644
--- a/python/paddle/static/__init__.py
+++ b/python/paddle/static/__init__.py
@@ -68,11 +68,12 @@
 
 from ..fluid.layers import create_parameter  # noqa: F401
 from ..fluid.layers import create_global_var  # noqa: F401
-from ..fluid.layers.metric_op import auc  # noqa: F401
-from ..fluid.layers.metric_op import accuracy  # noqa: F401
 from ..fluid.contrib.layers import ctr_metric_bundle  # noqa: F401
 from ..fluid.layers import exponential_decay  # noqa: F401
 
+from paddle.static.nn.metric import auc  # noqa: F401
+from paddle.static.nn.metric import accuracy  # noqa: F401
+
 __all__ = [  # noqa
     'append_backward',
     'gradients',
diff --git a/python/paddle/fluid/layers/metric_op.py b/python/paddle/static/nn/metric.py
old mode 100755
new mode 100644
similarity index 97%
rename from python/paddle/fluid/layers/metric_op.py
rename to python/paddle/static/nn/metric.py
index 3179f5d568c4f8..948b100bce7136
--- a/python/paddle/fluid/layers/metric_op.py
+++ b/python/paddle/static/nn/metric.py
@@ -15,22 +15,16 @@
 All layers just related to metric.
 """
 
-import warnings
-from ..layer_helper import LayerHelper
-from ..initializer import Normal, Constant
-from ..framework import (
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.initializer import Constant
+from paddle.fluid.framework import (
     Variable,
     _non_static_mode,
     _varbase_creator,
-    _in_legacy_dygraph,
-    in_dygraph_mode,
 )
-from .. import core
-from ..param_attr import ParamAttr
-from . import nn
-from . import tensor
-from ..data_feeder import check_variable_and_dtype
-from paddle import _C_ops, _legacy_C_ops
+from paddle.fluid.layers import tensor
+from paddle.fluid.data_feeder import check_variable_and_dtype
+from paddle import _legacy_C_ops
 
 __all__ = ['accuracy', 'auc']
 

From 1b1d6d3f2e61584ce54cb809202f1c9cfdaedadd Mon Sep 17 00:00:00 2001
From: xiaoxiaohehe001 <49090790+xiaoxiaohehe001@users.noreply.github.com>
Date: Thu, 1 Dec 2022 18:53:23 +0800
Subject: [PATCH 085/154] [Paddle Inference] Add sign and not trt converter
 (#48557)

---
 .../inference/tensorrt/convert/unary_op.cc    |  22 +++
 paddle/fluid/inference/tensorrt/op_teller.cc  |  50 ++++--
 .../ir/inference/test_trt_convert_unary.py    | 149 +++++++++++++++++-
 3 files changed, 209 insertions(+), 12 deletions(-)
 mode change 100755 => 100644 paddle/fluid/inference/tensorrt/op_teller.cc

diff --git a/paddle/fluid/inference/tensorrt/convert/unary_op.cc b/paddle/fluid/inference/tensorrt/convert/unary_op.cc
index cbf2139a3c4836..342b966bdcee49 100644
--- a/paddle/fluid/inference/tensorrt/convert/unary_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/unary_op.cc
@@ -90,7 +90,11 @@ const std::unordered_map<std::string, std::vector<nvinfer1::UnaryOperation>>
         {"floor", {nvinfer1::UnaryOperation::kFLOOR}},
         {"rsqrt",
          {nvinfer1::UnaryOperation::kSQRT, nvinfer1::UnaryOperation::kRECIP}},
+        {"logical_not", {nvinfer1::UnaryOperation::kNOT}},
         {"reciprocal", {nvinfer1::UnaryOperation::kRECIP}},
+#if IS_TRT_VERSION_GE(8200)
+        {"sign", {nvinfer1::UnaryOperation::kSIGN}},
+#endif
 #if IS_TRT_VERSION_GE(7000)
         {"erf", {nvinfer1::UnaryOperation::kERF}},
 #endif
@@ -167,10 +171,24 @@ class RsqrtOpConverter : public UnaryOpConverter {
  public:
   RsqrtOpConverter() { op_type_ = "rsqrt"; }
 };
+
+class LogicalNotOpConverter : public UnaryOpConverter {
+ public:
+  LogicalNotOpConverter() { op_type_ = "logical_not"; }
+};
+
 class ReciprocalOpConverter : public UnaryOpConverter {
  public:
   ReciprocalOpConverter() { op_type_ = "reciprocal"; }
 };
+
+#if IS_TRT_VERSION_GE(8200)
+class SignOpConverter : public UnaryOpConverter {
+ public:
+  SignOpConverter() { op_type_ = "sign"; }
+};
+#endif
+
 #if IS_TRT_VERSION_GE(7000)
 class ErfOpConverter : public UnaryOpConverter {
  public:
@@ -199,7 +217,11 @@ REGISTER_TRT_OP_CONVERTER(atanh, AtanhOpConverter);
 REGISTER_TRT_OP_CONVERTER(ceil, CeilOpConverter);
 REGISTER_TRT_OP_CONVERTER(floor, FloorOpConverter);
 REGISTER_TRT_OP_CONVERTER(rsqrt, RsqrtOpConverter);
+REGISTER_TRT_OP_CONVERTER(logical_not, LogicalNotOpConverter);
 REGISTER_TRT_OP_CONVERTER(reciprocal, ReciprocalOpConverter);
+#if IS_TRT_VERSION_GE(8200)
+REGISTER_TRT_OP_CONVERTER(sign, SignOpConverter);
+#endif
 #if IS_TRT_VERSION_GE(7000)
 REGISTER_TRT_OP_CONVERTER(erf, ErfOpConverter);
 #endif
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
old mode 100755
new mode 100644
index 58f99ff3d2ff7b..ce14e506dd6635
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -79,17 +79,18 @@ struct SimpleOpTypeSetTeller : public Teller {
         desc.HasAttr("skip_quant"))
       return false;
     std::unordered_set<std::string> act_op_list = {
-        "relu",        "relu6",     "sigmoid",
-        "elu",         "selu",      "softsign",
-        "softplus",    "stanh",     "thresholded_relu",
-        "exp",         "log",       "sqrt",
-        "abs",         "sin",       "cos",
-        "tan",         "tanh",      "sinh",
-        "cosh",        "asin",      "acos",
-        "atan",        "asinh",     "atanh",
-        "ceil",        "floor",     "erf",
-        "reciprocal",  "silu",      "celu",
-        "tanh_shrink", "logsigmoid"};
+        "relu",        "relu6",      "sigmoid",
+        "elu",         "selu",       "softsign",
+        "softplus",    "stanh",      "thresholded_relu",
+        "exp",         "log",        "sqrt",
+        "abs",         "sin",        "cos",
+        "tan",         "tanh",       "sinh",
+        "cosh",        "asin",       "acos",
+        "atan",        "asinh",      "atanh",
+        "ceil",        "floor",      "erf",
+        "reciprocal",  "silu",       "celu",
+        "tanh_shrink", "logsigmoid", "sign",
+        "logical_not"};
     if (act_op_list.find(op_type) != act_op_list.end()) {
       auto* block = desc.Block();
       if (block == nullptr) {
@@ -336,6 +337,29 @@ struct SimpleOpTypeSetTeller : public Teller {
       }
     }
 
+    if (op_type == "sign") {
+#if IS_TRT_VERSION_GE(8200)
+      if (!with_dynamic_shape) {
+        return false;
+      }
+#else
+      VLOG(3) << "sign op is only supported by trt8.2 above ";
+      return false;
+#endif
+    }
+
+    if (op_type == "logical_not") {
+#if IS_TRT_VERSION_GE(8400)
+      if (!with_dynamic_shape) {
+        return false;
+      }
+#else
+      VLOG(3) << "logical_not op is only supported by trt8.4 above because of "
+                 "cast op";
+      return false;
+#endif
+    }
+
     if (op_type == "matmul_v2") {
       if (!with_dynamic_shape) {
         return false;
@@ -2341,7 +2365,9 @@ struct SimpleOpTypeSetTeller : public Teller {
       "ceil",
       "floor",
       "rsqrt",
+      "sign",
       "reciprocal",
+      "logical_not",
       "erf",
       "softmax",
       "sigmoid",
@@ -2471,7 +2497,9 @@ struct SimpleOpTypeSetTeller : public Teller {
       "ceil",
       "floor",
       "rsqrt",
+      "sign",
       "reciprocal",
+      "logical_not",
       "erf",
       "softmax",
       "sigmoid",
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py
index e5c763b822b556..ba364220e3985d 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py
@@ -59,8 +59,10 @@ def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
                     "floor",
                     "rsqrt",
                     "reciprocal",
+                    "sign",
                 ]:
                     self.dims = dims
+                    self.op_type = op_type
                     dics = [{}]
 
                     ops_config = [
@@ -121,7 +123,14 @@ def clear_dynamic_shape():
             self.dynamic_shape.opt_input_shape = {}
 
         def generate_trt_nodes_num(attrs, dynamic_shape):
-            if self.dims == 1:
+            ver = paddle_infer.get_trt_compile_version()
+            if self.dims == 1 or (
+                self.op_type == "sign"
+                and (
+                    not dynamic_shape
+                    or ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8200
+                )
+            ):
                 return 0, 3
             return 1, 2
 
@@ -155,5 +164,143 @@ def test(self):
         self.run_test()
 
 
+class TrtConvertLogicalNotTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        def generate_input(shape):
+            return np.random.random(shape).astype(np.float32)
+
+        for shape in [[2, 16], [2, 16, 32], [1, 32, 16, 32]]:
+            for op_type in ["logical_not"]:
+                for axis in [-1]:
+                    self.dims = len(shape)
+                    dics = [
+                        {"axis": axis},
+                        {"in_dtype": 5, "out_dtype": 0},
+                        {"in_dtype": 0, "out_dtype": 5},
+                    ]
+                    ops_config = [
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["input_data"]},
+                            "op_outputs": {"Out": ["cast_output_data1"]},
+                            "op_attrs": dics[1],
+                            "outputs_dtype": {"cast_output_data1": np.bool},
+                        },
+                        {
+                            "op_type": op_type,
+                            "op_inputs": {
+                                "X": ["cast_output_data1"],
+                            },
+                            "op_outputs": {"Out": ["cast_output_data0"]},
+                            "op_attrs": dics[0],
+                            "outputs_dtype": {"cast_output_data0": np.bool},
+                        },
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["cast_output_data0"]},
+                            "op_outputs": {"Out": ["output_data"]},
+                            "op_attrs": dics[2],
+                        },
+                    ]
+                    ops = self.generate_op_config(ops_config)
+
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={},
+                        inputs={
+                            "input_data": TensorConfig(
+                                data_gen=partial(generate_input, shape)
+                            ),
+                        },
+                        outputs=["output_data"],
+                    )
+
+                    yield program_config
+
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if self.dims == 2:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [2, 16],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [2, 16],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [2, 16],
+                }
+            if self.dims == 3:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [2, 16, 32],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [2, 16, 32],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [2, 16, 32],
+                }
+            if self.dims == 4:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 32, 16, 32],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [1, 32, 16, 32],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [1, 32, 16, 32],
+                }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            if dynamic_shape:
+                ver = paddle_infer.get_trt_compile_version()
+                if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400:
+                    return 0, 5
+                return 1, 2
+            return 0, 5
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), (1e-3, 1e-3)
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), (1e-3, 1e-3)
+
+    def add_skip_trt_case(self):
+        pass
+
+    def test(self):
+        self.add_skip_trt_case()
+        self.run_test()
+
+
 if __name__ == "__main__":
     unittest.main()

From 758fccfef134b5b1c20955ee75d002191723de46 Mon Sep 17 00:00:00 2001
From: Zhang Jun <ewalker@live.cn>
Date: Thu, 1 Dec 2022 18:57:12 +0800
Subject: [PATCH 086/154] [inference][trt] dynamic shape support for Instance
 norm (#47998)

* instance norm support dynamic shape
* update unittest
---
 .../tensorrt/convert/instance_norm_op.cc      |  14 +-
 paddle/fluid/inference/tensorrt/op_teller.cc  |   4 -
 .../plugin/instance_norm_op_plugin.cu         | 109 ++++++++++++++
 .../tensorrt/plugin/instance_norm_op_plugin.h | 134 +++++++++++++++++-
 .../test_trt_convert_instance_norm.py         |   8 +-
 5 files changed, 258 insertions(+), 11 deletions(-)

diff --git a/paddle/fluid/inference/tensorrt/convert/instance_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/instance_norm_op.cc
index aef91a9a69ef0e..6a6e67328bb254 100644
--- a/paddle/fluid/inference/tensorrt/convert/instance_norm_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/instance_norm_op.cc
@@ -74,10 +74,16 @@ class InstanceNormOpConverter : public OpConverter {
       bias_v.push_back(bias_d[i]);
     }
 
-    plugin::InstanceNormPlugin* plugin =
-        new plugin::InstanceNormPlugin(eps, scale_v, bias_v);
-    plugin->getPluginType();
-    auto* layer = engine_->AddPlugin(&input, 1, plugin);
+    nvinfer1::IPluginV2* plugin = nullptr;
+    if (engine_->with_dynamic_shape()) {
+      plugin = new plugin::InstanceNormPluginDynamic(eps, scale_v, bias_v);
+    } else {
+      plugin = new plugin::InstanceNormPlugin(eps, scale_v, bias_v);
+    }
+
+    std::vector<nvinfer1::ITensor*> instance_norm_inputs{input};
+    auto* layer = engine_->network()->addPluginV2(
+        instance_norm_inputs.data(), instance_norm_inputs.size(), *plugin);
 
     auto output_name = op_desc.Output("Y")[0];
     RreplenishLayerAndOutput(layer, "instance_norm", {output_name}, test_mode);
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index ce14e506dd6635..17fb2f0aa6d095 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -1501,10 +1501,6 @@ struct SimpleOpTypeSetTeller : public Teller {
     }
 
     if (op_type == "instance_norm") {
-      if (with_dynamic_shape) {
-        VLOG(3) << "trt instance_norm op does not support dynamic shape ";
-        return false;
-      }
       if (desc.Input("X").size() != 1) {
         VLOG(3) << "input of instance_norm op converter should be 1, got "
                 << desc.Input("X").size();
diff --git a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu
index 6dd31dff0167b3..82e24bea09aaca 100644
--- a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu
+++ b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu
@@ -131,6 +131,115 @@ int InstanceNormPlugin::enqueue(int batch_size,
   return cudaGetLastError() != cudaSuccess;
 }
 
+int InstanceNormPluginDynamic::initialize() TRT_NOEXCEPT { return 0; }
+
+nvinfer1::DimsExprs InstanceNormPluginDynamic::getOutputDimensions(
+    int index,
+    const nvinfer1::DimsExprs *inputs,
+    int nbInputs,
+    nvinfer1::IExprBuilder &expr_builder) TRT_NOEXCEPT {
+  assert(nbInputs == 1);
+  assert(index < this->getNbOutputs());
+  nvinfer1::DimsExprs output(inputs[0]);
+  return output;
+}
+
+bool InstanceNormPluginDynamic::supportsFormatCombination(
+    int pos,
+    const nvinfer1::PluginTensorDesc *inOut,
+    int nbInputs,
+    int nbOutputs) TRT_NOEXCEPT {
+  assert(inOut && pos < (nbInputs + nbOutputs));
+  assert(pos == 0 || pos == 1);
+  return ((inOut[pos].type == nvinfer1::DataType::kFLOAT ||
+           inOut[pos].type == nvinfer1::DataType::kHALF) &&
+          (inOut[pos].format == nvinfer1::PluginFormat::kLINEAR) &&
+          inOut[pos].type == inOut[0].type);
+}
+
+int InstanceNormPluginDynamic::enqueue(
+    const nvinfer1::PluginTensorDesc *inputDesc,
+    const nvinfer1::PluginTensorDesc *outputDesc,
+    const void *const *inputs,
+    void *const *outputs,
+    void *workspace,
+    cudaStream_t stream) TRT_NOEXCEPT {
+  nvinfer1::Dims input_dims = inputDesc[0].dims;
+  int n = input_dims.d[0];
+  int c = input_dims.d[1];
+  int h = input_dims.d[2];
+  int w = input_dims.d[3];
+
+  scale_t.Resize(phi::make_ddim({n, c}));
+  bias_t.Resize(phi::make_ddim({n, c}));
+  int device_id;
+  cudaGetDevice(&device_id);
+  float *scale_d = scale_t.mutable_data<float>(platform::CUDAPlace(device_id));
+  float *bias_d = bias_t.mutable_data<float>(platform::CUDAPlace(device_id));
+
+  for (int i = 0; i < n; i++) {
+    cudaMemcpyAsync(scale_d + i * c,
+                    scale_.data(),
+                    sizeof(float) * c,
+                    cudaMemcpyHostToDevice,
+                    stream);
+    cudaMemcpyAsync(bias_d + i * c,
+                    bias_.data(),
+                    sizeof(float) * c,
+                    cudaMemcpyHostToDevice,
+                    stream);
+  }
+  platform::dynload::cudnnSetTensor4dDescriptor(
+      b_desc_, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, n * c, 1, 1);
+
+  cudnnDataType_t cudnn_dtype;
+  auto data_type = inputDesc[0].type;
+  convert_trt2cudnn_dtype(data_type, &cudnn_dtype);
+  platform::dynload::cudnnSetTensor4dDescriptor(
+      x_desc_, CUDNN_TENSOR_NCHW, cudnn_dtype, 1, n * c, h, w);
+  platform::dynload::cudnnSetTensor4dDescriptor(
+      y_desc_, CUDNN_TENSOR_NCHW, cudnn_dtype, 1, n * c, h, w);
+  float alpha = 1;
+  float beta = 0;
+  platform::dynload::cudnnSetStream(handle_, stream);
+
+  void const *x_ptr = inputs[0];
+  void *y_ptr = outputs[0];
+  platform::dynload::cudnnBatchNormalizationForwardTraining(
+      handle_,
+      CUDNN_BATCHNORM_SPATIAL_PERSISTENT,
+      &alpha,
+      &beta,
+      x_desc_,
+      x_ptr,
+      y_desc_,
+      y_ptr,
+      b_desc_,
+      scale_d,
+      bias_d,
+      1.,
+      nullptr,
+      nullptr,
+      eps_,
+      nullptr,
+      nullptr);
+  return cudaGetLastError() != cudaSuccess;
+}
+
+nvinfer1::DataType InstanceNormPluginDynamic::getOutputDataType(
+    int index,
+    const nvinfer1::DataType *inputTypes,
+    int nbInputs) const TRT_NOEXCEPT {
+  assert(inputTypes && nbInputs > 0 && index == 0);
+  return inputTypes[0];
+}
+
+void InstanceNormPluginDynamic::configurePlugin(
+    const nvinfer1::DynamicPluginTensorDesc *in,
+    int nbInputs,
+    const nvinfer1::DynamicPluginTensorDesc *out,
+    int nbOutputs) TRT_NOEXCEPT {}
+
 }  // namespace plugin
 }  // namespace tensorrt
 }  // namespace inference
diff --git a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h
index 90a01d076f3678..6a89139396c51b 100644
--- a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h
+++ b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h
@@ -99,7 +99,7 @@ class InstanceNormPlugin : public PluginTensorRT {
   }
 
   const char *getPluginType() const TRT_NOEXCEPT override {
-    return "instance_norm_plugin";
+    return "instance_norm";
   }
   int getNbOutputs() const TRT_NOEXCEPT override { return 1; }
   nvinfer1::Dims getOutputDimensions(int index,
@@ -125,7 +125,7 @@ class InstanceNormPlugin : public PluginTensorRT {
 class InstanceNormPluginCreator : public TensorRTPluginCreator {
  public:
   const char *getPluginName() const TRT_NOEXCEPT override {
-    return "instance_norm_plugin";
+    return "instance_norm";
   }
 
   const char *getPluginVersion() const TRT_NOEXCEPT override { return "1"; }
@@ -137,7 +137,137 @@ class InstanceNormPluginCreator : public TensorRTPluginCreator {
     return new InstanceNormPlugin(serial_data, serial_length);
   }
 };
+
+class InstanceNormPluginDynamic : public DynamicPluginTensorRT {
+ private:
+  float eps_;
+  std::vector<float> scale_;
+  std::vector<float> bias_;
+
+  phi::DenseTensor scale_t;
+  phi::DenseTensor bias_t;
+  cudnnHandle_t handle_;
+  cudnnTensorDescriptor_t x_desc_, y_desc_, b_desc_;
+
+ public:
+  size_t getSerializationSize() const TRT_NOEXCEPT override {
+    return SerializedSize(eps_) + SerializedSize(scale_) +
+           SerializedSize(bias_);
+  }
+
+  // TRT will call this func when we need to serialize the configuration of
+  // tensorrt.
+  // It should not be called by users.
+  void serialize(void *buffer) const TRT_NOEXCEPT override {
+    SerializeValue(&buffer, eps_);
+    SerializeValue(&buffer, scale_);
+    SerializeValue(&buffer, bias_);
+  }
+
+  explicit InstanceNormPluginDynamic(const float eps,
+                                     const std::vector<float> scale,
+                                     const std::vector<float> bias)
+      : eps_(eps), scale_(scale), bias_(bias) {
+    PADDLE_ENFORCE_EQ(scale.size(),
+                      bias.size(),
+                      platform::errors::InvalidArgument(
+                          "The instanceNorm's scale and bias should be the "
+                          "same size. Got scale size = %d, but bias size = %d",
+                          scale.size(),
+                          bias.size()));
+    platform::dynload::cudnnCreate(&handle_);
+    platform::dynload::cudnnCreateTensorDescriptor(&x_desc_);
+    platform::dynload::cudnnCreateTensorDescriptor(&y_desc_);
+    platform::dynload::cudnnCreateTensorDescriptor(&b_desc_);
+  }
+
+  // It was used for tensorrt deserialization.
+  // It should not be called by users.
+  InstanceNormPluginDynamic(void const *serialData, size_t serialLength) {
+    DeserializeValue(&serialData, &serialLength, &eps_);
+    DeserializeValue(&serialData, &serialLength, &scale_);
+    DeserializeValue(&serialData, &serialLength, &bias_);
+
+    platform::dynload::cudnnCreate(&handle_);
+    platform::dynload::cudnnCreateTensorDescriptor(&x_desc_);
+    platform::dynload::cudnnCreateTensorDescriptor(&y_desc_);
+    platform::dynload::cudnnCreateTensorDescriptor(&b_desc_);
+  }
+
+  ~InstanceNormPluginDynamic() {
+    platform::dynload::cudnnDestroy(handle_);
+    platform::dynload::cudnnDestroyTensorDescriptor(x_desc_);
+    platform::dynload::cudnnDestroyTensorDescriptor(y_desc_);
+    platform::dynload::cudnnDestroyTensorDescriptor(b_desc_);
+  }
+
+  int initialize() TRT_NOEXCEPT override;
+
+  nvinfer1::IPluginV2DynamicExt *clone() const TRT_NOEXCEPT override {
+    return new InstanceNormPluginDynamic(eps_, scale_, bias_);
+  }
+
+  const char *getPluginType() const TRT_NOEXCEPT override {
+    return "instance_norm_dynamic";
+  }
+  int getNbOutputs() const TRT_NOEXCEPT override { return 1; }
+  nvinfer1::DimsExprs getOutputDimensions(
+      int output_index,
+      const nvinfer1::DimsExprs *inputs,
+      int nb_inputs,
+      nvinfer1::IExprBuilder &expr_builder)  // NOLINT
+      TRT_NOEXCEPT override;
+
+  bool supportsFormatCombination(int pos,
+                                 const nvinfer1::PluginTensorDesc *inOut,
+                                 int nbInputs,
+                                 int nbOutputs) TRT_NOEXCEPT override;
+
+  void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in,
+                       int nbInputs,
+                       const nvinfer1::DynamicPluginTensorDesc *out,
+                       int nbOutputs) TRT_NOEXCEPT override;
+
+  size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs,
+                          int nbInputs,
+                          const nvinfer1::PluginTensorDesc *outputs,
+                          int nbOutputs) const TRT_NOEXCEPT override {
+    return 0;
+  }
+
+  int enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
+              const nvinfer1::PluginTensorDesc *outputDesc,
+              const void *const *inputs,
+              void *const *outputs,
+              void *workspace,
+              cudaStream_t stream) TRT_NOEXCEPT override;
+
+  nvinfer1::DataType getOutputDataType(int index,
+                                       const nvinfer1::DataType *inputTypes,
+                                       int nbInputs) const
+      TRT_NOEXCEPT override;
+
+  void destroy() TRT_NOEXCEPT override { delete this; }
+};
+
+class InstanceNormPluginDynamicCreator : public TensorRTPluginCreator {
+ public:
+  const char *getPluginName() const TRT_NOEXCEPT override {
+    return "instance_norm_dynamic";
+  }
+
+  const char *getPluginVersion() const TRT_NOEXCEPT override { return "1"; }
+
+  nvinfer1::IPluginV2 *deserializePlugin(const char *name,
+                                         const void *serial_data,
+                                         size_t serial_length)
+      TRT_NOEXCEPT override {
+    return new InstanceNormPluginDynamic(serial_data, serial_length);
+  }
+};
+
 REGISTER_TRT_PLUGIN_V2(InstanceNormPluginCreator);
+REGISTER_TRT_PLUGIN_V2(InstanceNormPluginDynamicCreator);
 
 }  // namespace plugin
 }  // namespace tensorrt
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py
index a65588b8c5e88d..72b728d5cc34bc 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py
@@ -50,7 +50,13 @@ def generate_input2(attrs: List[Dict[str, Any]], shape_input):
                 [batch, 16, 32, 64],
             ]:
                 self.in_dim = len(shape_input)
-                for epsilon in [0.0005, -1, 1]:
+                for epsilon in [
+                    0.0005,
+                    -1,
+                    1,
+                    0.000009999999747378752,
+                    0.00001,
+                ]:
                     dics = [{"epsilon": epsilon}]
                     ops_config = [
                         {

From 771811dc348a08ba68d217217b7655177c5915d9 Mon Sep 17 00:00:00 2001
From: Chitsing KUI <kuizhiqing@msn.com>
Date: Thu, 1 Dec 2022 18:58:27 +0800
Subject: [PATCH 087/154] [FLOPS] add flops for layer (#48447)

* add flops for layer

* hide api

* add unitest
---
 .../fluid/tests/unittests/test_newprofiler.py |   1 +
 python/paddle/profiler/profiler.py            |  20 ++
 python/paddle/profiler/profiler_statistic.py  | 228 ++++++++++++------
 python/paddle/utils/flops.py                  |   5 +-
 4 files changed, 175 insertions(+), 79 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_newprofiler.py b/python/paddle/fluid/tests/unittests/test_newprofiler.py
index 6dfe6250f91b67..c8ab1974beabe5 100755
--- a/python/paddle/fluid/tests/unittests/test_newprofiler.py
+++ b/python/paddle/fluid/tests/unittests/test_newprofiler.py
@@ -87,6 +87,7 @@ def my_trace_back(prof):
             targets=[profiler.ProfilerTarget.CPU],
             scheduler=lambda x: profiler.ProfilerState.RECORD_AND_RETURN,
             on_trace_ready=my_trace_back,
+            with_flops=True,
         ) as prof:
             for i in range(2):
                 y = x / 2.0
diff --git a/python/paddle/profiler/profiler.py b/python/paddle/profiler/profiler.py
index caa0d754cb6498..eef6714f2e20c9 100644
--- a/python/paddle/profiler/profiler.py
+++ b/python/paddle/profiler/profiler.py
@@ -31,6 +31,14 @@
     enable_memory_recorder,
     enable_op_info_recorder,
 )
+
+from .utils import RecordEvent, wrap_optimizers
+from .profiler_statistic import (
+    StatisticData,
+    _build_table,
+    SortedKeys,
+    gen_layer_flops,
+)
 from paddle.profiler import utils
 
 from .profiler_statistic import SortedKeys, StatisticData, _build_table
@@ -883,6 +891,18 @@ def summary(
                 )
             )
 
+        if self.with_flops:
+            self._print_flops()
+
+    def _print_flops(self, repeat=1):
+        if not self.with_flops:
+            print('ERROR: with_flops disabled.')
+            return
+
+        print(" Flops Profiler Begin ".center(100, "-"))
+        print(gen_layer_flops(self.profiler_result.get_data(), repeat))
+        print("- Flops Profiler End -".center(100, "-"))
+
 
 def get_profiler(config_path):
     try:
diff --git a/python/paddle/profiler/profiler_statistic.py b/python/paddle/profiler/profiler_statistic.py
index cedfac8b614172..c928309b66eac8 100755
--- a/python/paddle/profiler/profiler_statistic.py
+++ b/python/paddle/profiler/profiler_statistic.py
@@ -78,6 +78,16 @@ class SortedKeys(Enum):
     GPUMin = 7
 
 
+def _nodename2opname(name):
+    r'''
+    convert static host node name to operator name
+    '''
+    op_name = name.replace(' compute', '')
+    op_name = op_name.replace(' dygraph', '')
+    op_name = op_name.replace(' pybind_imperative_func', '')
+    return op_name
+
+
 class HostStatisticNode:
     r'''
     Wrap original node for calculating statistic metrics.
@@ -98,10 +108,7 @@ def __init__(self, hostnode):
     def cal_flops(self):
         if self.hostnode.type == TracerEventType.Operator:
             if hasattr(self.hostnode, 'input_shapes'):
-                op_name = self.hostnode.name
-                op_name = op_name.replace(' compute', '')
-                op_name = op_name.replace(' dygraph', '')
-                op_name = op_name.replace(' pybind_imperative_func', '')
+                op_name = _nodename2opname(self.hostnode.name)
                 self.flops = flops(
                     op_name,
                     self.hostnode.input_shapes,
@@ -111,6 +118,7 @@ def cal_flops(self):
     def cal_statistic(self):
         self.cpu_time = self.hostnode.end_ns - self.hostnode.start_ns
         self.self_cpu_time = self.cpu_time
+        self.cal_flops()
         for child in self.children_node:
             child.cal_flops()
             child.cal_statistic()
@@ -177,6 +185,117 @@ def get_device_nodes(hostnode):
     return device_nodes
 
 
+def _build_layer_from_tree(nodetrees):
+    def build_layer(node, depth=0):
+
+        if "GradNode" in node.name:
+            return [], 0
+
+        if node.type in [
+            TracerEventType.Backward,
+            TracerEventType.Optimization,
+        ]:
+            return [], 0
+
+        if node.type == TracerEventType.Operator:
+            stat_node = HostStatisticNode(node)
+            stat_node.cal_statistic()
+            return stat_node, stat_node.flops
+
+        layer = []
+        nflops = 0
+        for c in node.children_node:
+            l, f = build_layer(c, depth + 1)
+            if l:
+                nflops += f
+                layer.append(l)
+
+        if node.type == TracerEventType.Forward:
+            stat_node = HostStatisticNode(node)
+            stat_node.cal_statistic()
+            stat_node.flops = nflops
+            return [stat_node, layer], nflops
+
+        return layer, nflops
+
+    ret = []
+    for _, rootnode in nodetrees.items():
+        layer, _ = build_layer(rootnode)
+        ret.append(layer)
+
+    return ret
+
+
+def _format_large_number(n, precision=2):
+    if n // 1e12 > 0:
+        return "{} T".format(round(n / 1e12, precision))
+    if n // 1e9 > 0:
+        return "{} G".format(round(n / 1e9, precision))
+    if n // 1e6 > 0:
+        return "{} M".format(round(n / 1e6, precision))
+    if n // 1e3 > 0:
+        return "{} K".format(round(n / 1e3, precision))
+    return "{}".format(round(n, precision))
+
+
+def _format_time(n, precision=2):
+    if n // 1e9 > 0:
+        return "{} s".format(round(n / 1e9, precision))
+    if n // 1e6 > 0:
+        return "{} ms".format(round(n / 1e6, precision))
+    if n // 1e3 > 0:
+        return "{} us".format(round(n / 1e3, precision))
+    return "{} ns".format(round(n, precision))
+
+
+def _gen_layer_flops(node, repeat=1):
+    ret = []
+    offset = []
+    loop = []
+
+    def print_layer_tree(node, depth=0):
+        if isinstance(node, list):
+            for n in node:
+                print_layer_tree(n, depth + 1)
+
+        elif node.type in [TracerEventType.Forward, TracerEventType.Operator]:
+            if len(offset) == 0:
+                offset.append(depth)
+
+            name = _nodename2opname(node.name)
+
+            if (
+                depth == offset[-1] and len(ret) > 0 and ret[0].startswith(name)
+            ):  # repeat begin
+                loop.append(1)
+
+            if len(loop) >= repeat:
+                return "".join(ret)
+
+            align = " " * (depth - offset[-1])
+            tm = _format_time(node.cpu_time)
+            flops_n = _format_large_number(node.flops)
+            flops_s = _format_large_number(node.flops * 1e9 / node.cpu_time)
+            ret.append(
+                "{}{} latency: {}, FLOPs: {}, FLOPS: {}\n".format(
+                    align, name, tm, flops_n, flops_s
+                )
+            )
+
+    for n in node[1:]:
+        print_layer_tree(n)
+
+    return "".join(ret)
+
+
+def gen_layer_flops(nodetrees, repeat=1):
+    r'''
+    gen_layer_flops generate flops/runtime information depend on layer/operator.
+    '''
+    layer_tree = _build_layer_from_tree(nodetrees)
+    return _gen_layer_flops(layer_tree, repeat)
+
+
 def wrap_tree(nodetrees):
     '''
     Using HostStatisticNode to wrap original profiler result tree, and calculate node statistic metrics.
@@ -387,30 +506,7 @@ class EventSummary:
     Analyse operator event in profiling data, correlate with its device event.
     """
 
-    class DeviceItem:
-        def __init__(self, name):
-            self.name = name
-            self.call = 0
-            self.gpu_time = 0
-            self.max_gpu_time = 0
-            self.min_gpu_time = float('inf')
-
-        @property
-        def avg_gpu_time(self):
-            return self.gpu_time / self.call
-
-        def add_gpu_time(self, time):
-            if time > self.max_gpu_time:
-                self.max_gpu_time = time
-            if time < self.min_gpu_time:
-                self.min_gpu_time = time
-            self.gpu_time += time
-
-        def add_item(self, node):
-            self.call += 1
-            self.add_gpu_time(node.end_ns - node.start_ns)
-
-    class OperatorItem:
+    class ItemBase:
         def __init__(self, name):
             self.name = name
             self.call = 0
@@ -470,6 +566,15 @@ def add_call(self):
         def add_flops(self, flops):
             self._flops += flops
 
+        def add_item(self, node):
+            raise NotImplementedError
+
+    class DeviceItem(ItemBase):
+        def add_item(self, node):
+            self.call += 1
+            self.add_gpu_time(node.end_ns - node.start_ns)
+
+    class OperatorItem(ItemBase):
         def add_item(self, node):
             self.add_call()
             self.add_cpu_time(node.cpu_time)
@@ -491,56 +596,22 @@ def add_item(self, node):
                         self.devices[name] = EventSummary.DeviceItem(name)
                     self.devices[name].add_item(devicenode)
 
-    class GeneralItem:
-        def __init__(self, name):
-            self.name = name
-            self.call = 0
-            self.cpu_time = 0
-            self.max_cpu_time = 0
-            self.min_cpu_time = float('inf')
-            self.gpu_time = 0
-            self.max_gpu_time = 0
-            self.min_gpu_time = float('inf')
-            self.general_gpu_time = 0
-            self.min_general_gpu_time = float('inf')
-            self.max_general_gpu_time = 0
-
-        @property
-        def avg_cpu_time(self):
-            return self.cpu_time / self.call
-
-        @property
-        def avg_gpu_time(self):
-            return self.gpu_time / self.call
-
-        @property
-        def avg_general_gpu_time(self):
-            return self.general_gpu_time / self.call
-
-        def add_cpu_time(self, time):
-            if time > self.max_cpu_time:
-                self.max_cpu_time = time
-            if time < self.min_cpu_time:
-                self.min_cpu_time = time
-            self.cpu_time += time
-
-        def add_gpu_time(self, time):
-            if time > self.max_gpu_time:
-                self.max_gpu_time = time
-            if time < self.min_gpu_time:
-                self.min_gpu_time = time
-            self.gpu_time += time
-
-        def add_general_gpu_time(self, time):
-            if time > self.max_general_gpu_time:
-                self.max_general_gpu_time = time
-            if time < self.min_general_gpu_time:
-                self.min_general_gpu_time = time
-            self.general_gpu_time += time
-
-        def add_call(self):
-            self.call += 1
+    class ForwardItem(ItemBase):
+        def add_item(self, node):
+            self.add_call()
+            self.add_cpu_time(node.cpu_time)
+            self.add_gpu_time(node.gpu_time)
+            self.add_general_gpu_time(node.general_gpu_time)
+            self.add_flops(node.flops)
+            for child in node.children_node:
+                if child.type != TracerEventType.Operator:
+                    if child.name not in self.operator_inners:
+                        self.operator_inners[
+                            child.name
+                        ] = EventSummary.OperatorItem(child.name)
+                    self.operator_inners[child.name].add_item(child)
 
+    class GeneralItem(ItemBase):
         def add_item(self, node):
             self.add_call()
             self.add_cpu_time(node.cpu_time)
@@ -613,6 +684,9 @@ def parse(self, nodetrees):
                             self.add_model_perspective_item(child)
                         deque.append(child)
 
+    def add_forward_item(self, operator_node):
+        pass
+
     def add_operator_item(self, operator_node):
         if operator_node.name not in self.items:
             self.items[operator_node.name] = EventSummary.OperatorItem(
diff --git a/python/paddle/utils/flops.py b/python/paddle/utils/flops.py
index 114ca6d9ab6c77..9c131767693411 100644
--- a/python/paddle/utils/flops.py
+++ b/python/paddle/utils/flops.py
@@ -164,8 +164,9 @@ def _matmul_flops(input_shapes, attrs):
         shape_of_output = [dim1, dim2 ... max(dim(n-m), odim(n-m)), max(dim(n-m+1), odim(n-m+1)) ... dim_n_1, dim_m]
         equation: flops = 2 * numel(output) * dim_n
     """
-    x_shape = input_shapes.get("X")[0]
-    y_shape = input_shapes.get("Y")[0]
+
+    x_shape = input_shapes.get("X", input_shapes.get("x", [[0]]))[0]
+    y_shape = input_shapes.get("Y", input_shapes.get("y", [[0]]))[0]
     if attrs.get('transpose_X') or attrs.get('transpose_x'):
         x_shape[-1], x_shape[-2] = x_shape[-2], x_shape[-1]
 

From 93099bb8c3e112d1d102c3a4e16a97f24fc778d9 Mon Sep 17 00:00:00 2001
From: wanghuancoder <wanghuan29@baidu.com>
Date: Thu, 1 Dec 2022 19:12:33 +0800
Subject: [PATCH 088/154] do not link python lib in tensor wrapper (#48523)

* do not link python lib in tensor wrapper
---
 paddle/CMakeLists.txt                         |   3 +
 paddle/fluid/eager/CMakeLists.txt             |  16 +--
 .../eager_generated/backwards/CMakeLists.txt  |   1 -
 .../eager_generated/forwards/CMakeLists.txt   |   1 -
 .../generate_file_structures.py               |   2 -
 paddle/fluid/eager/hooks.h                    |  56 +++++++-
 paddle/fluid/eager/saved_tensors_hooks.cc     | 119 -----------------
 paddle/fluid/eager/saved_tensors_hooks.h      |  97 --------------
 paddle/fluid/eager/tensor_wrapper.h           |  31 +++--
 paddle/fluid/pybind/CMakeLists.txt            |   9 --
 paddle/fluid/pybind/eager_functions.cc        |   5 +-
 paddle/fluid/pybind/eager_py_layer.cc         |   1 -
 paddle/fluid/pybind/eager_utils.cc            | 121 ++++++++++++++++++
 paddle/fluid/pybind/eager_utils.h             |  43 +++++++
 14 files changed, 243 insertions(+), 262 deletions(-)
 delete mode 100644 paddle/fluid/eager/saved_tensors_hooks.cc
 delete mode 100644 paddle/fluid/eager/saved_tensors_hooks.h

diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt
index b39f720f410c36..e7f788631b7344 100644
--- a/paddle/CMakeLists.txt
+++ b/paddle/CMakeLists.txt
@@ -72,6 +72,9 @@ if(${len} GREATER_EQUAL 1)
         target_link_libraries(${test_name}
                               "-Wl,-rpath,$<TARGET_FILE_DIR:${paddle_lib}>")
       endif()
+      if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
+        target_link_libraries(${test_name} ${PYTHON_LIBRARIES})
+      endif()
       if(WITH_XPU)
         target_link_libraries(${test_name} xpulib)
       endif()
diff --git a/paddle/fluid/eager/CMakeLists.txt b/paddle/fluid/eager/CMakeLists.txt
index d5d5f1daa1e5f7..fd02d247788e90 100644
--- a/paddle/fluid/eager/CMakeLists.txt
+++ b/paddle/fluid/eager/CMakeLists.txt
@@ -14,8 +14,7 @@ set(eager_deps
     grad_node_info
     grad_tensor_holder
     accumulation_node
-    custom_operator_node
-    python)
+    custom_operator_node)
 
 set(fluid_deps
     tracer
@@ -37,6 +36,7 @@ add_subdirectory(api)
 add_subdirectory(accumulation)
 add_subdirectory(custom_operator)
 if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
+  add_subdirectory(tests)
   add_subdirectory(pylayer)
   cc_library(
     grad_tensor_holder
@@ -77,15 +77,3 @@ cc_library(
        scale_op
        autograd_meta
        hook_utils)
-
-if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
-  target_link_libraries(utils ${PYTHON_LIBRARIES})
-endif()
-
-if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
-  cc_library(
-    saved_tensors_hooks
-    SRCS saved_tensors_hooks.cc
-    DEPS hook_utils)
-  add_subdirectory(tests)
-endif()
diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt b/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt
index fc52d6f82a174c..69bfe4d9415722 100644
--- a/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt
+++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt
@@ -9,5 +9,4 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER))
     SRCS nodes.cc ${eager_manual_nodes}
     DEPS ${eager_deps})
   add_dependencies(final_dygraph_node eager_codegen)
-  target_link_libraries(final_dygraph_node ${PYTHON_LIBRARIES})
 endif()
diff --git a/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt b/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt
index 97d5aed2947609..c32dd2f122a6e2 100644
--- a/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt
+++ b/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt
@@ -9,5 +9,4 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER))
     SRCS dygraph_functions.cc ${eager_manual_functions}
     DEPS ${eager_deps})
   add_dependencies(final_dygraph_function eager_codegen)
-  target_link_libraries(final_dygraph_function ${PYTHON_LIBRARIES})
 endif()
diff --git a/paddle/fluid/eager/auto_code_generator/generate_file_structures.py b/paddle/fluid/eager/auto_code_generator/generate_file_structures.py
index d2beeff5ac22c4..3de8e587eea297 100644
--- a/paddle/fluid/eager/auto_code_generator/generate_file_structures.py
+++ b/paddle/fluid/eager/auto_code_generator/generate_file_structures.py
@@ -144,7 +144,6 @@ def GenerateFileStructureForIntermediateDygraph(eager_dir, split_count):
             f.write("nodes" + str(i + 1) + ".cc ")
         f.write("${fluid_manual_nodes} DEPS ${eager_deps} ${fluid_deps})\n")
         f.write("add_dependencies(dygraph_node copy_dygraph_node)\n")
-        f.write("target_link_libraries(dygraph_node ${PYTHON_LIBRARIES})\n")
 
     with open(forwards_level_cmakelist_path, "w") as f:
         f.write("add_custom_target(\n")
@@ -184,7 +183,6 @@ def GenerateFileStructureForIntermediateDygraph(eager_dir, split_count):
         f.write(
             "add_dependencies(dygraph_function copy_dygraph_forward_functions)\n"
         )
-        f.write("target_link_libraries(dygraph_function ${PYTHON_LIBRARIES})\n")
 
     with open(generated_level_cmakelist_path, "w") as f:
         f.write("add_subdirectory(forwards)\nadd_subdirectory(nodes)")
diff --git a/paddle/fluid/eager/hooks.h b/paddle/fluid/eager/hooks.h
index f501c4acc62103..ff2ca4aef130d9 100644
--- a/paddle/fluid/eager/hooks.h
+++ b/paddle/fluid/eager/hooks.h
@@ -19,6 +19,7 @@
 #include <utility>
 #include <vector>
 
+#include "paddle/fluid/platform/enforce.h"
 #include "paddle/phi/api/include/tensor.h"
 namespace egr {
 
@@ -62,18 +63,69 @@ class CppVoidHook : public VoidHook {
   std::function<void()> fn_;
 };
 
+class PyObjectHolderBase {
+ public:
+  virtual ~PyObjectHolderBase() = default;
+  virtual void* get() = 0;
+  virtual void reset(void* ptr) = 0;
+  virtual void inc_ref() = 0;
+  virtual void dec_ref() = 0;
+};
+
 class PackHookBase {
  public:
   virtual ~PackHookBase() = default;
-  virtual void* operator()(const paddle::experimental::Tensor& tensor) = 0;
+  virtual std::shared_ptr<PyObjectHolderBase> operator()(
+      const paddle::experimental::Tensor& tensor) = 0;
   virtual void* operator()(void* py_tensor) = 0;
 };
 
 class UnPackHookBase {
  public:
   virtual ~UnPackHookBase() = default;
-  virtual paddle::experimental::Tensor operator()(void* packed_value) = 0;
+  virtual paddle::experimental::Tensor operator()(
+      std::shared_ptr<PyObjectHolderBase> packed_value) = 0;
   virtual void* operator()(void* packed_value, void* other) = 0;
 };
 
+class SavedTensorsHooks {
+ public:
+  SavedTensorsHooks() = default;
+
+  ~SavedTensorsHooks() {}
+
+  void SetHooks(std::shared_ptr<PackHookBase> pack_hook,
+                std::shared_ptr<UnPackHookBase> unpack_hook) {
+    PADDLE_ENFORCE_EQ(pack_hook_ == nullptr && unpack_hook_ == nullptr,
+                      true,
+                      paddle::platform::errors::InvalidArgument(
+                          "paddle.autograd.saved_tensors_hooks only one pair "
+                          "of hooks is allowed at a time."));
+    pack_hook_ = pack_hook;
+    unpack_hook_ = unpack_hook;
+    is_enable_ = true;
+  }
+
+  void ResetHooks() {
+    pack_hook_ = nullptr;
+    unpack_hook_ = nullptr;
+    is_enable_ = false;
+  }
+
+  bool IsEnable() { return is_enable_; }
+
+  std::shared_ptr<PackHookBase> GetPackHook() { return pack_hook_; }
+  std::shared_ptr<UnPackHookBase> GetUnPackHook() { return unpack_hook_; }
+
+  static SavedTensorsHooks& GetInstance() {
+    static SavedTensorsHooks instance;
+    return instance;
+  }
+
+ private:
+  std::shared_ptr<PackHookBase> pack_hook_;
+  std::shared_ptr<UnPackHookBase> unpack_hook_;
+  bool is_enable_{false};
+};
+
 }  // namespace egr
diff --git a/paddle/fluid/eager/saved_tensors_hooks.cc b/paddle/fluid/eager/saved_tensors_hooks.cc
deleted file mode 100644
index 1060e5d463dd75..00000000000000
--- a/paddle/fluid/eager/saved_tensors_hooks.cc
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/eager/saved_tensors_hooks.h"
-#include "paddle/fluid/eager/api/utils/global_utils.h"
-
-#if !(defined(PADDLE_NO_PYTHON) && defined(PADDLE_ON_INFERENCE))
-#include "paddle/fluid/pybind/eager.h"
-#include "paddle/fluid/pybind/eager_utils.h"
-#endif
-
-namespace egr {
-#if !(defined(PADDLE_NO_PYTHON) && defined(PADDLE_ON_INFERENCE))
-PackHook::PackHook(PyObject* hook) : hook_(hook) { Py_INCREF(hook_); }
-
-PackHook::~PackHook() {
-  ::pybind11::gil_scoped_acquire gil;
-  Py_DECREF(hook_);
-}
-
-void* PackHook::operator()(const paddle::experimental::Tensor& tensor) {
-  bool grad_tmp = egr::Controller::Instance().HasGrad();
-  egr::Controller::Instance().SetHasGrad(false);
-  ::pybind11::gil_scoped_acquire gil;
-  auto args = PyTuple_New(1);
-  PyTuple_SET_ITEM(args, 0, paddle::pybind::ToPyObject(tensor));
-  PyObject* ret = PyObject_Call(hook_, args, nullptr);
-  PADDLE_ENFORCE_NOT_NULL(ret,
-                          paddle::platform::errors::External(
-                              pybind11::detail::error_string().c_str()));
-  Py_XDECREF(args);
-  egr::Controller::Instance().SetHasGrad(grad_tmp);
-  return reinterpret_cast<void*>(ret);
-}
-
-void* PackHook::operator()(void* py_tensor) {
-  bool grad_tmp = egr::Controller::Instance().HasGrad();
-  egr::Controller::Instance().SetHasGrad(false);
-  ::pybind11::gil_scoped_acquire gil;
-  auto args = PyTuple_New(1);
-  Py_INCREF(reinterpret_cast<PyObject*>(py_tensor));
-  PyTuple_SET_ITEM(args, 0, reinterpret_cast<PyObject*>(py_tensor));
-  PyObject* ret = PyObject_Call(hook_, args, nullptr);
-  PADDLE_ENFORCE_NOT_NULL(ret,
-                          paddle::platform::errors::External(
-                              pybind11::detail::error_string().c_str()));
-  Py_XDECREF(args);
-  egr::Controller::Instance().SetHasGrad(grad_tmp);
-  return reinterpret_cast<void*>(ret);
-}
-
-UnPackHook::UnPackHook(PyObject* hook) : hook_(hook) { Py_INCREF(hook_); }
-
-UnPackHook::~UnPackHook() {
-  ::pybind11::gil_scoped_acquire gil;
-  Py_DECREF(hook_);
-}
-
-paddle::experimental::Tensor UnPackHook::operator()(void* packed_value) {
-  bool grad_tmp = egr::Controller::Instance().HasGrad();
-  egr::Controller::Instance().SetHasGrad(false);
-  ::pybind11::gil_scoped_acquire gil;
-  auto args = PyTuple_New(1);
-  Py_INCREF(reinterpret_cast<PyObject*>(packed_value));
-  PyTuple_SET_ITEM(args, 0, reinterpret_cast<PyObject*>(packed_value));
-  PyObject* ret = PyObject_Call(hook_, args, nullptr);
-  PADDLE_ENFORCE_NOT_NULL(ret,
-                          paddle::platform::errors::External(
-                              pybind11::detail::error_string().c_str()));
-  Py_XDECREF(args);
-  egr::Controller::Instance().SetHasGrad(grad_tmp);
-
-  PADDLE_ENFORCE_EQ(paddle::pybind::IsEagerTensor(ret),
-                    true,
-                    paddle::platform::errors::InvalidArgument(
-                        "paddle.autograd.saved_tensors_hooks only one pair "
-                        "of hooks is allowed at a time."));
-
-  auto tensor = reinterpret_cast<paddle::pybind::TensorObject*>(ret)->tensor;
-  Py_XDECREF(ret);
-  return tensor;
-}
-
-void* UnPackHook::operator()(void* packed_value, void* other) {
-  bool grad_tmp = egr::Controller::Instance().HasGrad();
-  egr::Controller::Instance().SetHasGrad(false);
-  ::pybind11::gil_scoped_acquire gil;
-  auto args = PyTuple_New(1);
-  Py_INCREF(reinterpret_cast<PyObject*>(packed_value));
-  PyTuple_SET_ITEM(args, 0, reinterpret_cast<PyObject*>(packed_value));
-  PyObject* ret = PyObject_Call(hook_, args, nullptr);
-  PADDLE_ENFORCE_NOT_NULL(ret,
-                          paddle::platform::errors::External(
-                              pybind11::detail::error_string().c_str()));
-  Py_XDECREF(args);
-  egr::Controller::Instance().SetHasGrad(grad_tmp);
-
-  PADDLE_ENFORCE_EQ(paddle::pybind::IsEagerTensor(ret),
-                    true,
-                    paddle::platform::errors::InvalidArgument(
-                        "paddle.autograd.saved_tensors_hooks only one pair "
-                        "of hooks is allowed at a time."));
-
-  return reinterpret_cast<void*>(ret);
-}
-#endif
-
-}  // namespace egr
diff --git a/paddle/fluid/eager/saved_tensors_hooks.h b/paddle/fluid/eager/saved_tensors_hooks.h
deleted file mode 100644
index 1deb30daaa8e1f..00000000000000
--- a/paddle/fluid/eager/saved_tensors_hooks.h
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <Python.h>
-#include "paddle/fluid/eager/api/utils/global_utils.h"
-#include "paddle/fluid/eager/hooks.h"
-#include "paddle/phi/core/enforce.h"
-#include "paddle/phi/core/errors.h"
-
-namespace egr {
-#if !(defined(PADDLE_NO_PYTHON) && defined(PADDLE_ON_INFERENCE))
-class PackHook : public PackHookBase {
- public:
-  explicit PackHook(PyObject* hook);
-
-  ~PackHook();
-
-  void* operator()(const paddle::experimental::Tensor& tensor) override;
-
-  void* operator()(void* py_tensor) override;
-
- private:
-  PyObject* hook_;
-};
-
-class UnPackHook : public UnPackHookBase {
- public:
-  explicit UnPackHook(PyObject* hook);
-
-  ~UnPackHook();
-
-  paddle::experimental::Tensor operator()(void* packed_value) override;
-
-  void* operator()(void* packed_value, void* other) override;
-
- private:
-  PyObject* hook_;
-};
-#endif
-
-class SavedTensorsHooks {
- public:
-  SavedTensorsHooks() = default;
-
-  ~SavedTensorsHooks() {}
-
-  void SetHooks(PyObject* pack_hook, PyObject* unpack_hook) {
-#if !(defined(PADDLE_NO_PYTHON) && defined(PADDLE_ON_INFERENCE))
-    PADDLE_ENFORCE_EQ(pack_hook_ == nullptr && unpack_hook_ == nullptr,
-                      true,
-                      paddle::platform::errors::InvalidArgument(
-                          "paddle.autograd.saved_tensors_hooks only one pair "
-                          "of hooks is allowed at a time."));
-    pack_hook_ = std::make_shared<PackHook>(pack_hook);
-    unpack_hook_ = std::make_shared<UnPackHook>(unpack_hook);
-    is_enable_ = true;
-#endif
-  }
-
-  void ResetHooks() {
-#if !(defined(PADDLE_NO_PYTHON) && defined(PADDLE_ON_INFERENCE))
-    pack_hook_ = nullptr;
-    unpack_hook_ = nullptr;
-    is_enable_ = false;
-#endif
-  }
-
-  bool IsEnable() { return is_enable_; }
-
-  std::shared_ptr<PackHookBase> GetPackHook() { return pack_hook_; }
-  std::shared_ptr<UnPackHookBase> GetUnPackHook() { return unpack_hook_; }
-
-  static SavedTensorsHooks& GetInstance() {
-    static SavedTensorsHooks instance;
-    return instance;
-  }
-
- private:
-  std::shared_ptr<PackHookBase> pack_hook_;
-  std::shared_ptr<UnPackHookBase> unpack_hook_;
-  bool is_enable_{false};
-};
-
-}  // namespace egr
diff --git a/paddle/fluid/eager/tensor_wrapper.h b/paddle/fluid/eager/tensor_wrapper.h
index 67cd943f33174a..cb797c18b194ca 100644
--- a/paddle/fluid/eager/tensor_wrapper.h
+++ b/paddle/fluid/eager/tensor_wrapper.h
@@ -30,7 +30,7 @@
 #include "paddle/fluid/eager/utils.h"
 #include "paddle/phi/api/lib/utils/allocator.h"
 #ifndef PADDLE_NO_PYTHON
-#include "paddle/fluid/eager/saved_tensors_hooks.h"
+#include "paddle/fluid/eager/hooks.h"
 #endif
 
 namespace egr {
@@ -73,7 +73,7 @@ class TensorWrapper {
       }
     } else {
 #ifndef PADDLE_NO_PYTHON
-      if (SavedTensorsHooks::GetInstance().IsEnable() &&
+      if (egr::SavedTensorsHooks::GetInstance().IsEnable() &&
           tensor.is_dense_tensor() && tensor.initialized()) {
         phi::DenseTensor* dense_tensor =
             static_cast<phi::DenseTensor*>(tensor.impl().get());
@@ -81,9 +81,9 @@ class TensorWrapper {
             std::move(std::make_shared<phi::DenseTensor>(
                 std::make_shared<phi::Allocation>(nullptr, 0, tensor.place()),
                 dense_tensor->meta())));
-        auto pack_hook = SavedTensorsHooks::GetInstance().GetPackHook();
-        unpack_hook_ = SavedTensorsHooks::GetInstance().GetUnPackHook();
-        packed_value_ = reinterpret_cast<PyObject*>((*pack_hook)(tensor));
+        auto pack_hook = egr::SavedTensorsHooks::GetInstance().GetPackHook();
+        unpack_hook_ = egr::SavedTensorsHooks::GetInstance().GetUnPackHook();
+        packed_value_ = (*pack_hook)(tensor);
       } else {
 #endif
         intermidiate_tensor_.set_impl(tensor.impl());
@@ -105,6 +105,7 @@ class TensorWrapper {
       weak_grad_node_ = tensor_autograd_meta->GetMutableGradNode();
     }
   }
+
 #ifndef PADDLE_NO_PYTHON
   TensorWrapper(const TensorWrapper& other) {
     no_need_buffer_ = other.no_need_buffer_;
@@ -113,7 +114,9 @@ class TensorWrapper {
     inplace_version_snapshot_ = other.inplace_version_snapshot_;
     packed_value_ = other.packed_value_;
     unpack_hook_ = other.unpack_hook_;
-    Py_XINCREF(packed_value_);
+    if (packed_value_) {
+      packed_value_->inc_ref();
+    }
   }
 
   TensorWrapper& operator=(const TensorWrapper& other) {
@@ -123,12 +126,13 @@ class TensorWrapper {
     inplace_version_snapshot_ = other.inplace_version_snapshot_;
     packed_value_ = other.packed_value_;
     unpack_hook_ = other.unpack_hook_;
-    Py_XINCREF(packed_value_);
+    if (packed_value_) {
+      packed_value_->inc_ref();
+    }
     return *this;
   }
-
-  ~TensorWrapper() { Py_XDECREF(packed_value_); }
 #endif
+
   paddle::experimental::Tensor recover() {
     VLOG(6) << "Recover tensor: " << intermidiate_tensor_.name()
             << " for wrapper";
@@ -138,8 +142,7 @@ class TensorWrapper {
     }
 #ifndef PADDLE_NO_PYTHON
     if (packed_value_ && unpack_hook_) {
-      auto tensor_unpacked =
-          (*unpack_hook_)(reinterpret_cast<void*>(packed_value_));
+      auto tensor_unpacked = (*unpack_hook_)(packed_value_);
       auto src_dense_tensor =
           static_cast<phi::DenseTensor*>(tensor_unpacked.impl().get());
       static_cast<phi::DenseTensor*>(intermidiate_tensor_.impl().get())
@@ -224,10 +227,10 @@ class TensorWrapper {
   std::weak_ptr<egr::GradNodeBase> weak_grad_node_;
   uint32_t inplace_version_snapshot_ = 0;
 #ifndef PADDLE_NO_PYTHON
-  PyObject* packed_value_{nullptr};
-  std::shared_ptr<UnPackHookBase> unpack_hook_;
+  std::shared_ptr<egr::PyObjectHolderBase> packed_value_;
+  std::shared_ptr<egr::UnPackHookBase> unpack_hook_;
 #else
-  void* packed_value_{nullptr};
+  std::shared_ptr<void> packed_value_;
   std::shared_ptr<void> unpack_hook_;
 #endif
 };
diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index dab32b5a67552b..973ef8a4a79992 100755
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -605,7 +605,6 @@ if(WITH_PYTHON)
     list(APPEND PYBIND_DEPS python)
     list(APPEND PYBIND_DEPS custom_operator)
     list(APPEND PYBIND_DEPS custom_operator_node)
-    list(APPEND PYBIND_DEPS saved_tensors_hooks)
   endif()
 
   # On Linux, cc_library(paddle SHARED ..) will generate the libpaddle.so,
@@ -651,12 +650,4 @@ if(WITH_PYTHON)
   target_link_libraries(${SHARD_LIB_NAME} ${os_dependency_modules})
   add_dependencies(${SHARD_LIB_NAME} op_function_generator_cmd)
 
-  if(APPLE)
-    string(REGEX REPLACE ".+/(.+)" "\\1" PYTHON_LIBRARY_NAME
-                         ${PYTHON_LIBRARIES})
-    # target_link_libraries(${SHARD_LIB_NAME} "-Wl,-rpath,${PYTHON_LIBRARY_NAME}")
-  else()
-    target_link_libraries(${SHARD_LIB_NAME} ${PYTHON_LIBRARIES})
-  endif()
-
 endif()
diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
index 9c0f189e63050b..2874c7b90f4372 100644
--- a/paddle/fluid/pybind/eager_functions.cc
+++ b/paddle/fluid/pybind/eager_functions.cc
@@ -25,7 +25,6 @@ typedef SSIZE_T ssize_t;
 #include "paddle/fluid/eager/autograd_meta.h"
 #include "paddle/fluid/eager/backward.h"
 #include "paddle/fluid/eager/custom_operator/custom_operator_node.h"
-#include "paddle/fluid/eager/saved_tensors_hooks.h"
 #include "paddle/fluid/eager/utils.h"
 #include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/custom_operator.h"
@@ -715,7 +714,9 @@ static PyObject* eager_api_register_saved_tensors_hooks(PyObject* self,
   if (egr::Controller::Instance().HasGrad()) {
     auto pack_hook = PyTuple_GET_ITEM(args, 0);
     auto unpack_hook = PyTuple_GET_ITEM(args, 1);
-    egr::SavedTensorsHooks::GetInstance().SetHooks(pack_hook, unpack_hook);
+    egr::SavedTensorsHooks::GetInstance().SetHooks(
+        std::make_shared<PackHook>(pack_hook),
+        std::make_shared<UnPackHook>(unpack_hook));
   }
   RETURN_PY_NONE
   EAGER_CATCH_AND_THROW_RETURN_NULL
diff --git a/paddle/fluid/pybind/eager_py_layer.cc b/paddle/fluid/pybind/eager_py_layer.cc
index f80a39f9f0a9ba..8befe6318bce0e 100644
--- a/paddle/fluid/pybind/eager_py_layer.cc
+++ b/paddle/fluid/pybind/eager_py_layer.cc
@@ -20,7 +20,6 @@ limitations under the License. */
 #include "paddle/fluid/eager/api/all.h"
 #include "paddle/fluid/eager/autograd_meta.h"
 #include "paddle/fluid/eager/pylayer/py_layer_node.h"
-#include "paddle/fluid/eager/saved_tensors_hooks.h"
 #include "paddle/fluid/eager/utils.h"
 #include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/memory/allocation/allocator.h"
diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc
index f5f409673a1068..7c9faf2fd593ec 100644
--- a/paddle/fluid/pybind/eager_utils.cc
+++ b/paddle/fluid/pybind/eager_utils.cc
@@ -1627,5 +1627,126 @@ void PyVoidHook::operator()() {
   }
 }
 
+PyObjectHolder::PyObjectHolder(PyObject* ptr) { ptr_ = ptr; }
+
+PyObjectHolder::~PyObjectHolder() {
+  ::pybind11::gil_scoped_acquire gil;
+  Py_XDECREF(ptr_);
+}
+
+void* PyObjectHolder::get() { return reinterpret_cast<void*>(ptr_); }
+
+void PyObjectHolder::reset(void* ptr) {
+  if (ptr_) {
+    ::pybind11::gil_scoped_acquire gil;
+    Py_XDECREF(ptr_);
+  }
+  ptr_ = reinterpret_cast<PyObject*>(ptr);
+}
+
+void PyObjectHolder::inc_ref() {
+  ::pybind11::gil_scoped_acquire gil;
+  Py_XINCREF(ptr_);
+}
+void PyObjectHolder::dec_ref() {
+  ::pybind11::gil_scoped_acquire gil;
+  Py_XDECREF(ptr_);
+}
+
+PackHook::PackHook(PyObject* hook) : hook_(hook) { Py_INCREF(hook_); }
+
+PackHook::~PackHook() {
+  ::pybind11::gil_scoped_acquire gil;
+  Py_DECREF(hook_);
+}
+
+std::shared_ptr<egr::PyObjectHolderBase> PackHook::operator()(
+    const paddle::experimental::Tensor& tensor) {
+  bool grad_tmp = egr::Controller::Instance().HasGrad();
+  egr::Controller::Instance().SetHasGrad(false);
+  ::pybind11::gil_scoped_acquire gil;
+  auto args = PyTuple_New(1);
+  PyTuple_SET_ITEM(args, 0, paddle::pybind::ToPyObject(tensor));
+  PyObject* ret = PyObject_Call(hook_, args, nullptr);
+  PADDLE_ENFORCE_NOT_NULL(ret,
+                          paddle::platform::errors::External(
+                              pybind11::detail::error_string().c_str()));
+  Py_XDECREF(args);
+  egr::Controller::Instance().SetHasGrad(grad_tmp);
+  return std::make_shared<PyObjectHolder>(ret);
+}
+
+void* PackHook::operator()(void* py_tensor) {
+  bool grad_tmp = egr::Controller::Instance().HasGrad();
+  egr::Controller::Instance().SetHasGrad(false);
+  ::pybind11::gil_scoped_acquire gil;
+  auto args = PyTuple_New(1);
+  Py_INCREF(reinterpret_cast<PyObject*>(py_tensor));
+  PyTuple_SET_ITEM(args, 0, reinterpret_cast<PyObject*>(py_tensor));
+  PyObject* ret = PyObject_Call(hook_, args, nullptr);
+  PADDLE_ENFORCE_NOT_NULL(ret,
+                          paddle::platform::errors::External(
+                              pybind11::detail::error_string().c_str()));
+  Py_XDECREF(args);
+  egr::Controller::Instance().SetHasGrad(grad_tmp);
+  return reinterpret_cast<void*>(ret);
+}
+
+UnPackHook::UnPackHook(PyObject* hook) : hook_(hook) { Py_INCREF(hook_); }
+
+UnPackHook::~UnPackHook() {
+  ::pybind11::gil_scoped_acquire gil;
+  Py_DECREF(hook_);
+}
+
+paddle::experimental::Tensor UnPackHook::operator()(
+    std::shared_ptr<egr::PyObjectHolderBase> packed_value) {
+  bool grad_tmp = egr::Controller::Instance().HasGrad();
+  egr::Controller::Instance().SetHasGrad(false);
+  ::pybind11::gil_scoped_acquire gil;
+  auto args = PyTuple_New(1);
+  Py_INCREF(reinterpret_cast<PyObject*>(packed_value->get()));
+  PyTuple_SET_ITEM(args, 0, reinterpret_cast<PyObject*>(packed_value->get()));
+  PyObject* ret = PyObject_Call(hook_, args, nullptr);
+  PADDLE_ENFORCE_NOT_NULL(ret,
+                          paddle::platform::errors::External(
+                              pybind11::detail::error_string().c_str()));
+  Py_XDECREF(args);
+  egr::Controller::Instance().SetHasGrad(grad_tmp);
+
+  PADDLE_ENFORCE_EQ(paddle::pybind::IsEagerTensor(ret),
+                    true,
+                    paddle::platform::errors::InvalidArgument(
+                        "paddle.autograd.saved_tensors_hooks only one pair "
+                        "of hooks is allowed at a time."));
+
+  auto tensor = reinterpret_cast<paddle::pybind::TensorObject*>(ret)->tensor;
+  Py_XDECREF(ret);
+  return tensor;
+}
+
+void* UnPackHook::operator()(void* packed_value, void* other) {
+  bool grad_tmp = egr::Controller::Instance().HasGrad();
+  egr::Controller::Instance().SetHasGrad(false);
+  ::pybind11::gil_scoped_acquire gil;
+  auto args = PyTuple_New(1);
+  Py_INCREF(reinterpret_cast<PyObject*>(packed_value));
+  PyTuple_SET_ITEM(args, 0, reinterpret_cast<PyObject*>(packed_value));
+  PyObject* ret = PyObject_Call(hook_, args, nullptr);
+  PADDLE_ENFORCE_NOT_NULL(ret,
+                          paddle::platform::errors::External(
+                              pybind11::detail::error_string().c_str()));
+  Py_XDECREF(args);
+  egr::Controller::Instance().SetHasGrad(grad_tmp);
+
+  PADDLE_ENFORCE_EQ(paddle::pybind::IsEagerTensor(ret),
+                    true,
+                    paddle::platform::errors::InvalidArgument(
+                        "paddle.autograd.saved_tensors_hooks only one pair "
+                        "of hooks is allowed at a time."));
+
+  return reinterpret_cast<void*>(ret);
+}
+
 }  // namespace pybind
 }  // namespace paddle
diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h
index 654a03ae8808c3..900b2538ead50b 100644
--- a/paddle/fluid/pybind/eager_utils.h
+++ b/paddle/fluid/pybind/eager_utils.h
@@ -151,6 +151,49 @@ class PyVoidHook : public egr::VoidHook {
   PyObject* py_func_;
 };
 
+class PyObjectHolder : public egr::PyObjectHolderBase {
+ public:
+  PyObjectHolder() { ptr_ = nullptr; }
+  explicit PyObjectHolder(PyObject* ptr);
+  ~PyObjectHolder() override;
+  void* get() override;
+  void reset(void* ptr) override;
+  void inc_ref() override;
+  void dec_ref() override;
+
+ private:
+  PyObject* ptr_{nullptr};
+};
+
+class PackHook : public egr::PackHookBase {
+ public:
+  explicit PackHook(PyObject* hook);
+
+  ~PackHook();
+
+  std::shared_ptr<egr::PyObjectHolderBase> operator()(
+      const paddle::experimental::Tensor& tensor) override;
+
+  void* operator()(void* py_tensor) override;
+
+ private:
+  PyObject* hook_;
+};
+
+class UnPackHook : public egr::UnPackHookBase {
+ public:
+  explicit UnPackHook(PyObject* hook);
+
+  ~UnPackHook();
+
+  paddle::experimental::Tensor operator()(
+      std::shared_ptr<egr::PyObjectHolderBase> packed_value) override;
+
+  void* operator()(void* packed_value, void* other) override;
+
+ private:
+  PyObject* hook_;
+};
 template <typename Tuple, size_t N>
 struct TupleTensorResult {
   static void Run(const Tuple& out, PyObject* result) {

From aa892113395a73f6789508b9ebb2e91213a5450a Mon Sep 17 00:00:00 2001
From: Wilber <jiweibo@baidu.com>
Date: Thu, 1 Dec 2022 19:52:53 +0800
Subject: [PATCH 089/154] [Inference] Optimize memory_optimize pass. (#48476)

* update memory_optimize pass
---
 paddle/fluid/framework/naive_executor.cc      | 67 ++++++++++++++++++-
 paddle/fluid/framework/naive_executor.h       | 11 ++-
 .../fluid/inference/analysis/CMakeLists.txt   |  2 +-
 paddle/fluid/inference/analysis/argument.h    |  1 +
 .../inference/analysis/ir_pass_manager.cc     |  2 +
 .../inference/analysis/pass_result_info.cc    | 15 +++++
 .../inference/analysis/pass_result_info.h     | 66 ++++++++++++++++++
 .../analysis/passes/memory_optimize_pass.cc   |  9 ++-
 .../fluid/inference/api/analysis_predictor.cc | 22 ++++--
 .../fluid/inference/api/analysis_predictor.h  |  2 +-
 10 files changed, 183 insertions(+), 14 deletions(-)
 create mode 100644 paddle/fluid/inference/analysis/pass_result_info.cc
 create mode 100644 paddle/fluid/inference/analysis/pass_result_info.h

diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc
index 57e9a175b16f24..6c0daef26ffc8c 100644
--- a/paddle/fluid/framework/naive_executor.cc
+++ b/paddle/fluid/framework/naive_executor.cc
@@ -15,8 +15,11 @@
 #include "paddle/fluid/framework/naive_executor.h"
 
 #include <string>
+#include <unordered_map>
+#include <unordered_set>
 
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/framework/variable_helper.h"
 #include "paddle/fluid/platform/denormal.h"
 #ifdef PADDLE_WITH_MKLDNN
@@ -61,12 +64,31 @@ void NaiveExecutor::Run() {
 #ifdef PADDLE_WITH_INFERENCE_NVTX
     platform::CudaNvtxRangePush(op->Type(), platform::NvtxRangeColor::Green);
 #endif
+
+    // According to reuse table, we share the out tensor's holder.
+    if (reuse_cache_.count(op.get())) {
+      for (auto &it : reuse_cache_[op.get()]) {
+        it.first->ShareBufferWith(*cluster_buffer_[it.second]);
+      }
+    }
+
     op->Run(*scope_, place_);
+
+    // Update the shared_holder so that only records the max one.
+    if (reuse_cache_.count(op.get())) {
+      for (auto &it : reuse_cache_[op.get()]) {
+        if (it.first->memory_size() >
+            cluster_buffer_[it.second]->memory_size()) {
+          cluster_buffer_[it.second] = it.first;
+        }
+      }
+    }
+
 #ifdef PADDLE_WITH_INFERENCE_NVTX
     platform::CudaNvtxRangePop();
 #endif
-    if (hookfunc_) {
-      hookfunc_(op.get());
+    for (auto &func : hookfunc_) {
+      func(op.get());
     }
   }
 #ifdef PADDLE_WITH_INFERENCE_NVTX
@@ -146,7 +168,46 @@ phi::DenseTensor *NaiveExecutor::FindTensor(const std::string &name) {
 }
 
 void NaiveExecutor::RegisterOutputHook(const HookFunc &hookfunc) {
-  hookfunc_ = hookfunc;
+  hookfunc_.push_back(hookfunc);
+}
+
+void NaiveExecutor::MakeReusePlan(
+    const std::unordered_map<std::string, std::string> &reuse_table) {
+  std::unordered_map<std::string, std::unordered_set<std::string>> clusters;
+  for (auto &it : reuse_table) {
+    clusters[it.second].insert(it.first);
+  }
+
+  std::vector<std::string> cluster_names;
+  for (auto &it : clusters) {
+    cluster_names.push_back(it.first);
+  }
+  cluster_buffer_.resize(cluster_names.size());
+
+  for (auto &op : ops_) {
+    for (auto &name : op->OutputVars(true)) {
+      if (reuse_table.count(name)) {
+        const auto &reuse_name = reuse_table.at(name);
+        auto it =
+            std::find(cluster_names.begin(), cluster_names.end(), reuse_name);
+        int idx = it - cluster_names.begin();
+        auto *var = scope_->FindVar(name);
+        auto *reuse_var = scope_->FindVar(reuse_name);
+        if (var && reuse_var && var->IsType<phi::DenseTensor>() &&
+            reuse_var->IsType<phi::DenseTensor>()) {
+          auto *tensor = var->GetMutable<phi::DenseTensor>();
+          auto *reuse_tensor = reuse_var->GetMutable<phi::DenseTensor>();
+          cluster_buffer_[idx] = reuse_tensor;
+          if (reuse_cache_.count(op.get())) {
+            reuse_cache_[op.get()].emplace(tensor, idx);
+          } else {
+            reuse_cache_[op.get()] =
+                std::unordered_map<phi::DenseTensor *, int>{{tensor, idx}};
+          }
+        }
+      }
+    }
+  }
 }
 
 NaiveExecutor::~NaiveExecutor() {
diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h
index 882f50b451a295..f1a4a036cde36b 100644
--- a/paddle/fluid/framework/naive_executor.h
+++ b/paddle/fluid/framework/naive_executor.h
@@ -17,6 +17,7 @@
 #include <functional>
 #include <memory>
 #include <string>
+#include <unordered_map>
 #include <vector>
 
 #include "paddle/fluid/framework/operator.h"
@@ -67,6 +68,9 @@ class NaiveExecutor {
 
   Scope* GetScope() { return scope_; }
 
+  void MakeReusePlan(
+      const std::unordered_map<std::string, std::string>& reuse_table);
+
   void ResetTrtOps(int num);
 
   void RegisterOutputHook(const HookFunc& hookfunc);
@@ -82,7 +86,12 @@ class NaiveExecutor {
   std::vector<std::unique_ptr<OperatorBase>> ops_;
   Scope* scope_{nullptr};
 
-  HookFunc hookfunc_{nullptr};
+  std::vector<HookFunc> hookfunc_;
+
+  // Record information that tensor_a should ShareBufferWith tensor_b.
+  std::unordered_map<OperatorBase*, std::unordered_map<phi::DenseTensor*, int>>
+      reuse_cache_;
+  std::vector<phi::DenseTensor*> cluster_buffer_;
 };
 
 }  // namespace framework
diff --git a/paddle/fluid/inference/analysis/CMakeLists.txt b/paddle/fluid/inference/analysis/CMakeLists.txt
index 10d67c69f13448..06c4a55c5c9b69 100644
--- a/paddle/fluid/inference/analysis/CMakeLists.txt
+++ b/paddle/fluid/inference/analysis/CMakeLists.txt
@@ -20,7 +20,7 @@ cc_library(
 
 cc_library(
   ir_pass_manager
-  SRCS ir_pass_manager.cc
+  SRCS ir_pass_manager.cc pass_result_info.cc
   DEPS graph pass ${INFER_IR_PASSES} analysis_helper)
 
 cc_library(
diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h
index fd5ba90eefb3fb..a8d1067c554715 100644
--- a/paddle/fluid/inference/analysis/argument.h
+++ b/paddle/fluid/inference/analysis/argument.h
@@ -139,6 +139,7 @@ struct Argument {
   unique_ptr_t field__##_;
 
   DECL_ARGUMENT_FIELD(predictor_id, PredictorID, int);
+  DECL_ARGUMENT_FIELD(root_predictor_id, RootPredictorID, int);
   // Model path
   DECL_ARGUMENT_FIELD(model_dir, ModelDir, std::string);
   // Model specified with program and parameters files.
diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc
index 862a019da6d57c..b31f28a6a602f9 100644
--- a/paddle/fluid/inference/analysis/ir_pass_manager.cc
+++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc
@@ -229,6 +229,8 @@ void IRPassManager::CreatePasses(Argument *argument,
                     argument->dlnne_input_shape_dict()));
       pass->Set("program",
                 new framework::ProgramDesc *(&argument->main_program()));
+    } else if (pass_name == "memory_optimize_pass") {
+      pass->Set("root_predictor_id", new int(argument->root_predictor_id()));
     }
     if (pass_name == "lite_subgraph_pass") {
       bool lite_enable_int8 =
diff --git a/paddle/fluid/inference/analysis/pass_result_info.cc b/paddle/fluid/inference/analysis/pass_result_info.cc
new file mode 100644
index 00000000000000..d22d208588f334
--- /dev/null
+++ b/paddle/fluid/inference/analysis/pass_result_info.cc
@@ -0,0 +1,15 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/analysis/pass_result_info.h"
diff --git a/paddle/fluid/inference/analysis/pass_result_info.h b/paddle/fluid/inference/analysis/pass_result_info.h
new file mode 100644
index 00000000000000..7e42573e959119
--- /dev/null
+++ b/paddle/fluid/inference/analysis/pass_result_info.h
@@ -0,0 +1,66 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include "paddle/phi/core/enforce.h"
+#include "paddle/utils/variant.h"
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+class PassResultInfoForRuntime {
+ public:
+  using PassInfo =
+      paddle::variant<std::string,
+                      std::vector<std::string>,
+                      std::unordered_map<std::string, std::string>>;
+
+  static PassResultInfoForRuntime* Instance() {
+    static PassResultInfoForRuntime info;
+    return &info;
+  }
+
+  template <typename T>
+  void Set(int predictor_id, const std::string& pass_name, T infos) {
+    map[predictor_id].emplace(pass_name, infos);
+  }
+
+  template <typename T>
+  T Get(int predictor_id, const std::string& pass_name) {
+    PADDLE_ENFORCE_EQ(
+        map.count(predictor_id) && map[predictor_id].count(pass_name),
+        true,
+        phi::errors::InvalidArgument(
+            "Not find predictor_id %d and pass_name %s",
+            predictor_id,
+            pass_name));
+    return PADDLE_GET_CONST(T, map[predictor_id][pass_name]);
+  }
+
+ private:
+  using PassResultInfoMap =
+      std::unordered_map<int, std::unordered_map<std::string, PassInfo>>;
+  PassResultInfoMap map;
+};
+
+}  // namespace analysis
+}  // namespace inference
+}  // namespace paddle
diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
index 63aaa7d97967a4..2ff82986e945ca 100644
--- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
+++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
@@ -19,6 +19,7 @@
 
 #include "glog/logging.h"
 #include "paddle/fluid/framework/ir/graph_helper.h"
+#include "paddle/fluid/inference/analysis/pass_result_info.h"
 #include "paddle/fluid/platform/enforce.h"
 
 namespace paddle {
@@ -310,7 +311,7 @@ void MemoryOptimizePass::RunImpl(Argument* argument) {
   // mapping table.
   if (!argument->enable_memory_optim()) return;
   // Because of pass is a singleton, graph can not be member
-  // variables，otherwise，errors will be caused under multithreading
+  // variables，otherwise, errors will be caused under multithreading
   // conditions.
   auto graph = argument->main_graph_ptr();
 
@@ -323,7 +324,11 @@ void MemoryOptimizePass::RunImpl(Argument* argument) {
   CollectLifeCycle(graph, &lifecycles, sort_kind);
   CollectVarMemorySize(graph, &space_table);
   MakeSimpleReusePlan(lifecycles, space_table, &node2cluster, &cluster_size);
-  UpdateOpDescsByReuse(graph, node2cluster, sort_kind);
+
+  auto* pass_res_info = PassResultInfoForRuntime::Instance();
+  pass_res_info->Set(
+      argument->root_predictor_id(), "memory_optimize_pass", node2cluster);
+
   return;
 }
 
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 6a23f11e4522a7..1c27c008d8ca7b 100755
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -38,6 +38,7 @@
 #include "paddle/fluid/framework/var_type_traits.h"
 #include "paddle/fluid/framework/version.h"
 #include "paddle/fluid/inference/analysis/helper.h"
+#include "paddle/fluid/inference/analysis/pass_result_info.h"
 #include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h"
 #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
 #include "paddle/fluid/inference/api/helper.h"
@@ -262,6 +263,10 @@ bool AnalysisPredictor::Init(
                "generated.";
   }
 
+  if (!status_is_cloned_) {
+    root_predictor_id_ = predictor_id_;
+  }
+
   // no matter with or without MKLDNN
   paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
 
@@ -615,6 +620,15 @@ bool AnalysisPredictor::PrepareExecutor() {
   executor_->Prepare(
       sub_scope_, *inference_program_, 0, config_.use_feed_fetch_ops_);
 
+  if (config_.enable_memory_optim_) {
+    auto *pass_res_info =
+        inference::analysis::PassResultInfoForRuntime::Instance();
+    auto reuse_table =
+        pass_res_info->Get<std::unordered_map<std::string, std::string>>(
+            root_predictor_id_, "memory_optimize_pass");
+    executor_->MakeReusePlan(reuse_table);
+  }
+
   PADDLE_ENFORCE_NOT_NULL(sub_scope_,
                           platform::errors::PreconditionNotMet(
                               "The sub_scope should not be nullptr."));
@@ -1079,6 +1093,7 @@ void AnalysisPredictor::PrepareArgument() {
   argument_.SetModelFromMemory(config_.model_from_memory_);
   // Analyze inference_program
   argument_.SetPredictorID(predictor_id_);
+  argument_.SetRootPredictorID(root_predictor_id_);
   argument_.SetOptimCacheDir(config_.opt_cache_dir_);
   if (!config_.model_dir().empty()) {
     argument_.SetModelDir(config_.model_dir());
@@ -2114,6 +2129,7 @@ std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) {
   std::lock_guard<std::mutex> lk(clone_mutex_);
   auto *x = new AnalysisPredictor(config_);
   x->status_is_cloned_ = true;
+  x->root_predictor_id_ = this->root_predictor_id_;
   if (config_.use_external_stream_ && stream == nullptr) {
     PADDLE_THROW(platform::errors::InvalidArgument(
         "config has been configured to use external stream, but the Clone "
@@ -2175,12 +2191,6 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) {
 }
 
 void AnalysisPredictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) {
-  if (config_.enable_memory_optim()) {
-    LOG(WARNING) << "If you want to run output hook function, you should "
-                    "use config.EnableMemoryOptim(false) to turn off memory "
-                    "reuse!";
-    return;
-  }
   static std::once_flag register_hook_flag;
   std::call_once(register_hook_flag, [this] {
     executor_->RegisterOutputHook([this](framework::OperatorBase *op) {
diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h
index 37d1511fa272d6..25595d12cb44a5 100644
--- a/paddle/fluid/inference/api/analysis_predictor.h
+++ b/paddle/fluid/inference/api/analysis_predictor.h
@@ -102,7 +102,6 @@ class AnalysisPredictor : public PaddlePredictor {
   explicit AnalysisPredictor(const AnalysisConfig &config) : config_(config) {
     if (config_.shape_range_info_collected()) {
       config_.SwitchIrOptim(false);
-      config_.EnableMemoryOptim(false);
     }
     predictor_id_ = inference::GetUniqueId();
   }
@@ -518,6 +517,7 @@ class AnalysisPredictor : public PaddlePredictor {
   int need_collect_var_shapes_{-1};  // -1 for default, 0 for false, 1 for true.
   std::vector<std::map<std::string, std::vector<int>>> batch_var_shapes_;
   int predictor_id_;
+  int root_predictor_id_{-1};
 
  private:
   std::vector<Exp_OutputHookFunc> hookfuncs_;

From e5cf75d848f6b1d38cd6a7353c3f8598c6d3ef88 Mon Sep 17 00:00:00 2001
From: Wangzheee <634486483@qq.com>
Date: Thu, 1 Dec 2022 19:57:37 +0800
Subject: [PATCH 090/154] [Paddle Inference] General optimization for no_varlen
 multihead (#48469)

* general optimization for no_varlen multihead
---
 .../ir/remove_padding_recover_padding_pass.cc |   3 -
 ...t_embedding_eltwise_layernorm_fuse_pass.cc |   4 +-
 .../ir/trt_multihead_matmul_fuse_pass.cc      |   4 +-
 .../ir/trt_skip_layernorm_fuse_pass.cc        |   4 +-
 .../fluid/inference/tensorrt/convert/fc_op.cc |   2 +-
 .../tensorrt/convert/multihead_matmul_op.cc   | 123 +++++-
 .../convert/transformer_input_convert_op.cc   |   2 +-
 .../inference/tensorrt/plugin/CMakeLists.txt  |   2 +-
 .../tensorrt/plugin/remove_padding_plugin.cu  |   1 -
 .../transformer_input_convert_plugin.cu       | 122 ------
 ...transformer_input_output_convert_plugin.cu | 356 ++++++++++++++++++
 ...transformer_input_output_convert_plugin.h} | 117 +++++-
 12 files changed, 595 insertions(+), 145 deletions(-)
 delete mode 100644 paddle/fluid/inference/tensorrt/plugin/transformer_input_convert_plugin.cu
 create mode 100644 paddle/fluid/inference/tensorrt/plugin/transformer_input_output_convert_plugin.cu
 rename paddle/fluid/inference/tensorrt/plugin/{transformer_input_convert_plugin.h => transformer_input_output_convert_plugin.h} (53%)

diff --git a/paddle/fluid/framework/ir/remove_padding_recover_padding_pass.cc b/paddle/fluid/framework/ir/remove_padding_recover_padding_pass.cc
index 5127c5934cb48d..19c2e0541b1bce 100644
--- a/paddle/fluid/framework/ir/remove_padding_recover_padding_pass.cc
+++ b/paddle/fluid/framework/ir/remove_padding_recover_padding_pass.cc
@@ -439,9 +439,6 @@ void RemovePaddingRecoverPaddingPass::ApplyImpl(ir::Graph* graph) const {
                  "remove_padding pass.";
       return;
     }
-    fc_op->Op()->RemoveAttr("in_num_col_dims");
-    fc_op->Op()->SetAttr("in_num_col_dims", 1);
-
     insert_remove_padding_op(fc_input, fc_op);
     insert_recover_padding_op(fc_op, fc_op->outputs[0]);
     found_subgraph_count++;
diff --git a/paddle/fluid/framework/ir/trt_embedding_eltwise_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/trt_embedding_eltwise_layernorm_fuse_pass.cc
index f870796a4c164b..23ebbddf5796f7 100644
--- a/paddle/fluid/framework/ir/trt_embedding_eltwise_layernorm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/trt_embedding_eltwise_layernorm_fuse_pass.cc
@@ -441,14 +441,14 @@ void TrtEmbeddingEltwiseLayerNormFusePass::ApplyImpl(Graph* graph) const {
     std::string mask_id = Get<std::string>("tensorrt_transformer_maskid");
 
     if ((use_varseqlen && pos_id != "" && mask_id != "") ||
-        (!use_varseqlen && pos_id == "" && mask_id == "")) {
+        (!use_varseqlen && pos_id == "")) {
       VLOG(3) << "start trt_embedding_eltwise_layernorm_fuse_pass";
     } else {
       PADDLE_THROW(
           platform::errors::Fatal("Use transformer'varseqlen need config: "
                                   "use_varseqlen, set pos_id, set "
                                   "mask_id. Or not use varseqlen, do not set "
-                                  "pos_id, set mask_id. Please "
+                                  "pos_id. Please "
                                   "reconfig"));
     }
     graph->Set(kEmbEltwiseLayernormPass, new bool(true));
diff --git a/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc
index 4ecc9919f5485e..1d17cba4459059 100644
--- a/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc
@@ -1637,14 +1637,14 @@ void TrtMultiHeadMatmulV3FusePass::ApplyImpl(Graph* graph) const {
                                     "preln_embedding_eltwise_layernorm_fuse_"
                                     "pass. please use no_varseqlen"));
       }
-    } else if (!use_varseqlen && pos_id == "" && mask_id == "") {
+    } else if (!use_varseqlen && pos_id == "") {
       VLOG(3) << "start no_varseqlen_trt_multihead_matmul_fuse_pass";
     } else {
       PADDLE_THROW(
           platform::errors::Fatal("Use transformer'varseqlen need config: "
                                   "use_varseqlen, set pos_id, set "
                                   "mask_id. Or not use varseqlen, do not set "
-                                  "pos_id, set mask_id. Please "
+                                  "pos_id. Please "
                                   "reconfig"));
     }
     graph->Set(kMultiheadMatmulPass, new bool(true));
diff --git a/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc
index d33adab8b3ea78..2e578a06e38e15 100644
--- a/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc
@@ -207,14 +207,14 @@ void TrtSkipLayerNormFusePass::ApplyImpl(ir::Graph *graph) const {
             "trt_embedding_eltwise_layernorm_fuse_pass, "
             "trt_multihead_matmul_fuse_pass. please use no_varseqlen"));
       }
-    } else if (!use_varseqlen && pos_id == "" && mask_id == "") {
+    } else if (!use_varseqlen && pos_id == "") {
       VLOG(3) << "start no_varseqlen trt_skip_layernorm_fuse_pass";
     } else {
       PADDLE_THROW(
           platform::errors::Fatal("Use transformer'varseqlen need config: "
                                   "use_varseqlen, set pos_id, set "
                                   "mask_id. Or not use varseqlen, do not set "
-                                  "pos_id, set mask_id. Please "
+                                  "pos_id. Please "
                                   "reconfig"));
     }
   }
diff --git a/paddle/fluid/inference/tensorrt/convert/fc_op.cc b/paddle/fluid/inference/tensorrt/convert/fc_op.cc
index 63637c25be4fbb..38ed95ce33a86c 100644
--- a/paddle/fluid/inference/tensorrt/convert/fc_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/fc_op.cc
@@ -332,7 +332,7 @@ class FcOpConverter : public OpConverter {
     }
     // If use tensorrt'oss, the x_dim and x_num_col_dims need change, and can
     // not add Shuffle layer in ernie's multihead.
-    if (x_dim.nbDims == 4 && x_num_col_dims == 1) {
+    if (x_dim.nbDims == 4 && x_dim.d[2] == 1 && x_dim.d[3] == 1) {
       if (enable_int8 || support_int8) {
         // add conv1x1 layer
         nvinfer1::DimsHW nv_ksize(1, 1);
diff --git a/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc b/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
index 0515cb513d007d..0a238eadd95c9b 100644
--- a/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
 #include "paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.h"
+#include "paddle/fluid/inference/tensorrt/plugin/transformer_input_output_convert_plugin.h"
 
 namespace paddle {
 namespace inference {
@@ -87,7 +88,7 @@ class MultiheadMatMulOpConverter : public OpConverter {
                           engine_->tensorrt_transformer_posid() != "" &&
                           engine_->tensorrt_transformer_maskid() != "";
     if (engine_->with_dynamic_shape()) {
-      if (flag_varseqlen) {
+      if (engine_->tensorrt_transformer_maskid() != "") {
         if (engine_->precision() == AnalysisConfig::Precision::kFloat32) {
           PADDLE_THROW(platform::errors::Fatal(
               "use use_varseqlen must be int8 or half, not float32."));
@@ -98,8 +99,100 @@ class MultiheadMatMulOpConverter : public OpConverter {
         nvinfer1::Weights bias{nvinfer1::DataType::kFLOAT,
                                static_cast<void*>(bias_data),
                                static_cast<int32_t>(bias_t->numel())};
-        auto max_seqlen_tensor = engine_->GetITensor("max_seqlen_tensor");
-        auto pos_id_tensor = engine_->GetITensor("pos_id");
+
+        nvinfer1::ITensor* mask_tensor;
+        nvinfer1::ITensor* pos_id_tensor;
+        nvinfer1::ITensor* max_seqlen_tensor;
+        auto* new_input = input;
+        if (flag_varseqlen) {
+          mask_tensor = engine_->GetITensor("qkv_plugin_mask");
+          pos_id_tensor = engine_->GetITensor("pos_id");
+          max_seqlen_tensor = engine_->GetITensor("max_seqlen_tensor");
+        } else {
+          auto* bias_qk_tensor =
+              engine_->GetITensor(op_desc.Input("BiasQK").front());
+          auto bias_qk_dims = bias_qk_tensor->getDimensions();
+          PADDLE_ENFORCE_EQ(bias_qk_dims.nbDims,
+                            4,
+                            platform::errors::InvalidArgument(
+                                "The rank of Multihead Matmul'BiasQK must be "
+                                "4, but got rank is %d.",
+                                bias_qk_dims.nbDims));
+
+          nvinfer1::Dims start_dims = bias_qk_dims;
+          start_dims.d[0] = 0;
+          start_dims.d[1] = 0;
+          start_dims.d[2] = 0;
+          start_dims.d[3] = 0;
+          nvinfer1::Dims size_dims = bias_qk_dims;
+          nvinfer1::Dims step_dims = bias_qk_dims;
+          step_dims.d[0] = 1;
+          step_dims.d[1] = 1;
+          step_dims.d[2] = 1;
+          step_dims.d[3] = 1;
+          auto* shape_tensor = Shape(bias_qk_tensor);
+
+          // (b,n,m,m) -> (b,1,m,1)
+          std::vector<nvinfer1::ITensor*> size_vec_tensor;
+          size_vec_tensor.push_back(GetEleTensorOfShape(shape_tensor, 0));
+          size_vec_tensor.push_back(Add1DConstantLayer(1));
+          size_vec_tensor.push_back(GetEleTensorOfShape(shape_tensor, 2));
+          size_vec_tensor.push_back(Add1DConstantLayer(1));
+          auto* size_tensor = Concat(size_vec_tensor);
+          auto* slice_layer = TRT_ENGINE_ADD_LAYER(engine_,
+                                                   Slice,
+                                                   *bias_qk_tensor,
+                                                   start_dims,
+                                                   size_dims,
+                                                   step_dims);
+          slice_layer->setInput(2, *size_tensor);
+
+          // half -> bool
+          auto* cast_layer_0 = TRT_ENGINE_ADD_LAYER(
+              engine_, Identity, *slice_layer->getOutput(0));
+          cast_layer_0->setOutputType(0, nvinfer1::DataType::kBOOL);
+
+          // bool kNOT
+          auto* not_layer =
+              TRT_ENGINE_ADD_LAYER(engine_,
+                                   Unary,
+                                   *cast_layer_0->getOutput(0),
+                                   nvinfer1::UnaryOperation::kNOT);
+
+          // bool -> int32
+          auto* cast_layer_1 =
+              TRT_ENGINE_ADD_LAYER(engine_, Identity, *not_layer->getOutput(0));
+          cast_layer_1->setOutputType(0, nvinfer1::DataType::kINT32);
+
+          // Calculate the number of 1 : (b,1,m,1) -> (b)
+          uint32_t reduce_dim_0 = 0;
+          reduce_dim_0 |= 1 << 1;  // 00000000000000000000000000000010
+          reduce_dim_0 |= 1 << 2;  // 00000000000000000000000000000110
+          reduce_dim_0 |= 1 << 3;  // 00000000000000000000000000001110
+          bool keep_dim = false;
+          nvinfer1::ReduceOperation reduce_type =
+              nvinfer1::ReduceOperation::kSUM;
+          auto* reduce_sum_layer =
+              TRT_ENGINE_ADD_LAYER(engine_,
+                                   Reduce,
+                                   *cast_layer_1->getOutput(0),
+                                   reduce_type,
+                                   reduce_dim_0,
+                                   keep_dim);
+          std::vector<nvinfer1::ITensor*> inputs_transformer;
+          inputs_transformer.emplace_back(input);
+          inputs_transformer.emplace_back(
+              reduce_sum_layer->getOutput(0));  // (b,m)
+          plugin::TransformerInputConvertPlugin* plugin =
+              new plugin::TransformerInputConvertPlugin();
+          nvinfer1::ILayer* transformer_input_layer = engine_->AddDynamicPlugin(
+              inputs_transformer.data(), inputs_transformer.size(), plugin);
+
+          new_input = transformer_input_layer->getOutput(0);
+          mask_tensor = transformer_input_layer->getOutput(1);
+          pos_id_tensor = transformer_input_layer->getOutput(2);
+          max_seqlen_tensor = transformer_input_layer->getOutput(3);
+        }
         if (engine_->with_interleaved()) {
           VLOG(4) << "fused multihead_matmul op: use_varseqlen and "
                      "with_interleaved";
@@ -111,7 +204,7 @@ class MultiheadMatMulOpConverter : public OpConverter {
           float dp_probs = 1.0 / 127.0;
           nvinfer1::DimsHW nv_ksize(1, 1);
           fc_layer = TRT_ENGINE_ADD_LAYER(
-              engine_, Convolution, *input, n, nv_ksize, weight, bias);
+              engine_, Convolution, *new_input, n, nv_ksize, weight, bias);
           fc_layer->setName(
               ("Multihead: Convolution/FullyConnected: (Output: " +
                output_name + ")")
@@ -220,10 +313,10 @@ class MultiheadMatMulOpConverter : public OpConverter {
           if (op_desc.HasAttr("Input_scale")) {
             nvinfer1::DimsHW nv_ksize(1, 1);
             fc_layer = TRT_ENGINE_ADD_LAYER(
-                engine_, Convolution, *input, n, nv_ksize, weight, bias);
+                engine_, Convolution, *new_input, n, nv_ksize, weight, bias);
           } else {
             fc_layer = TRT_ENGINE_ADD_LAYER(
-                engine_, FullyConnected, *input, n, weight, bias);
+                engine_, FullyConnected, *new_input, n, weight, bias);
           }
 
           if (op_desc.HasAttr("fc_out_threshold")) {
@@ -282,14 +375,28 @@ class MultiheadMatMulOpConverter : public OpConverter {
 
           std::vector<nvinfer1::ITensor*> plugin_inputs;
           plugin_inputs.emplace_back(fc_layer->getOutput(0));
-          plugin_inputs.emplace_back(engine_->GetITensor("qkv_plugin_mask"));
+          plugin_inputs.emplace_back(mask_tensor);
           plugin_inputs.emplace_back(pos_id_tensor);
           plugin_inputs.emplace_back(
               max_seqlen_tensor);  // max_seqlen, eval_placeholder_3
 
           auto plugin_layer = engine_->network()->addPluginV2(
               plugin_inputs.data(), plugin_inputs.size(), *plugin);
-          layer = plugin_layer;
+
+          // recover no_varlen output
+          if (!flag_varseqlen) {
+            std::vector<nvinfer1::ITensor*> output_transformer;
+            output_transformer.emplace_back(plugin_layer->getOutput(0));
+            output_transformer.emplace_back(input);
+            output_transformer.emplace_back(pos_id_tensor);
+            plugin::TransformerOutputConvertPlugin* plugin =
+                new plugin::TransformerOutputConvertPlugin();
+            nvinfer1::ILayer* transformer_output_layer =
+                engine_->AddDynamicPlugin(output_transformer.data(),
+                                          output_transformer.size(),
+                                          plugin);
+            layer = transformer_output_layer;
+          }
         }
       } else {
         if (input_dims.d[1] <= 384 && !bias_qk_attr &&
diff --git a/paddle/fluid/inference/tensorrt/convert/transformer_input_convert_op.cc b/paddle/fluid/inference/tensorrt/convert/transformer_input_convert_op.cc
index a9b80f076aab87..37257b95647300 100644
--- a/paddle/fluid/inference/tensorrt/convert/transformer_input_convert_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/transformer_input_convert_op.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
-#include "paddle/fluid/inference/tensorrt/plugin/transformer_input_convert_plugin.h"
+#include "paddle/fluid/inference/tensorrt/plugin/transformer_input_output_convert_plugin.h"
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
index a72880780d81e7..2ecb8c8c71bcf9 100644
--- a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
@@ -26,7 +26,7 @@ list(
   deformable_conv_op_plugin.cu
   matmul_op_int8_plugin.cu
   multihead_matmul_roformer_plugin.cu
-  transformer_input_convert_plugin.cu
+  transformer_input_output_convert_plugin.cu
   remove_padding_plugin.cu
   recover_padding_plugin.cu
   c_allreduce_op_plugin.cu
diff --git a/paddle/fluid/inference/tensorrt/plugin/remove_padding_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/remove_padding_plugin.cu
index a18c0d0c7294b3..ec874f71ad7688 100644
--- a/paddle/fluid/inference/tensorrt/plugin/remove_padding_plugin.cu
+++ b/paddle/fluid/inference/tensorrt/plugin/remove_padding_plugin.cu
@@ -105,7 +105,6 @@ int RemovePaddingPlugin::enqueue(const nvinfer1::PluginTensorDesc* inputDesc,
                                  void* const* outputs,
                                  void* workspace,
                                  cudaStream_t stream) TRT_NOEXCEPT {
-  const auto input_desc = inputDesc[0];
   const half* input0 = static_cast<const half*>(inputs[0]);
   const int32_t* input1 =
       static_cast<const int32_t*>(inputs[1]);  // pos_id_tensor
diff --git a/paddle/fluid/inference/tensorrt/plugin/transformer_input_convert_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/transformer_input_convert_plugin.cu
deleted file mode 100644
index c9e48525843447..00000000000000
--- a/paddle/fluid/inference/tensorrt/plugin/transformer_input_convert_plugin.cu
+++ /dev/null
@@ -1,122 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/inference/tensorrt/plugin/transformer_input_convert_plugin.h"
-
-namespace paddle {
-namespace inference {
-namespace tensorrt {
-namespace plugin {
-
-__global__ void TransformerInputConvertKernel(const int64_t* input,
-                                              int32_t* output0) {
-  int tid = blockIdx.x * blockDim.x + threadIdx.x;
-  __shared__ int32_t shared_data;
-  if (threadIdx.x == static_cast<int>(input[tid])) {
-    atomicAdd(&shared_data, 1);
-  }
-  output0[0] = 0;
-  output0[blockIdx.x + 1] = shared_data;
-  __syncthreads();
-  for (int i = 0; i < blockDim.x; ++i) {
-    output0[i + 1] += output0[i];
-  }
-}
-
-nvinfer1::DataType TransformerInputConvertPlugin::getOutputDataType(
-    int index,
-    const nvinfer1::DataType* input_types,
-    int nb_inputs) const TRT_NOEXCEPT {
-  return nvinfer1::DataType::kINT32;
-}
-
-nvinfer1::DimsExprs TransformerInputConvertPlugin::getOutputDimensions(
-    int outputIndex,
-    const nvinfer1::DimsExprs* inputs,
-    int nbInputs,
-    nvinfer1::IExprBuilder& exprBuilder) TRT_NOEXCEPT {
-  nvinfer1::DimsExprs output_dims{};
-  output_dims.nbDims = 1;
-  if (outputIndex == 0) {  // PosId
-    const auto* one = exprBuilder.constant(1);
-    output_dims.d[0] = exprBuilder.operation(
-        nvinfer1::DimensionOperation::kSUM, *inputs[0].d[0], *one);
-  } else {  // MaxSeqlen
-    output_dims.d[0] = inputs[0].d[1];
-  }
-  return output_dims;
-}
-
-bool TransformerInputConvertPlugin::supportsFormatCombination(
-    int pos,
-    const nvinfer1::PluginTensorDesc* inOut,
-    int nbInputs,
-    int nbOutputs) TRT_NOEXCEPT {
-  PADDLE_ENFORCE_EQ(nbInputs,
-                    1,
-                    platform::errors::InvalidArgument("Must have 1 inputs, "
-                                                      "but got %d input(s). ",
-                                                      nbInputs));
-  PADDLE_ENFORCE_EQ(nbOutputs,
-                    getNbOutputs(),
-                    platform::errors::InvalidArgument("Must have 2 output, "
-                                                      "but got %d output(s). ",
-                                                      nbOutputs));
-  if (pos == 0) {  // input
-    return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR;
-  } else {  // output0, output1
-    return inOut[pos].type == nvinfer1::DataType::kINT32 &&
-           inOut[pos].format == nvinfer1::TensorFormat::kLINEAR;
-  }
-}
-
-void TransformerInputConvertPlugin::configurePlugin(
-    const nvinfer1::DynamicPluginTensorDesc* inputs,
-    int nbInputs,
-    const nvinfer1::DynamicPluginTensorDesc* outputs,
-    int nbOutputs) TRT_NOEXCEPT {}
-
-void TransformerInputConvertPlugin::attachToContext(
-    cudnnContext* cudnnContext,
-    cublasContext* cublasContext,
-    nvinfer1::IGpuAllocator* gpuAllocator) TRT_NOEXCEPT {}
-
-void TransformerInputConvertPlugin::detachFromContext() TRT_NOEXCEPT {}
-
-void TransformerInputConvertPlugin::terminate() TRT_NOEXCEPT {}
-
-int TransformerInputConvertPlugin::enqueue(
-    const nvinfer1::PluginTensorDesc* inputDesc,
-    const nvinfer1::PluginTensorDesc* outputDesc,
-    const void* const* inputs,
-    void* const* outputs,
-    void* workspace,
-    cudaStream_t stream) TRT_NOEXCEPT {
-  const auto input_desc = inputDesc[0];
-  const int64_t* input = static_cast<const int64_t*>(inputs[0]);
-  int32_t* output0 = static_cast<int32_t*>(outputs[0]);  // PosId
-  // int32_t* output1 = static_cast<int32_t*>(outputs[1]);    // MaxSeqlen
-
-  const int32_t num_blocks = input_desc.dims.d[0];   // batchs
-  const int32_t num_threads = input_desc.dims.d[1];  // max sequnce length
-
-  TransformerInputConvertKernel<<<num_blocks, num_threads, 0, stream>>>(
-      input, output0);
-  return cudaGetLastError() != cudaSuccess;
-}
-
-}  // namespace plugin
-}  // namespace tensorrt
-}  // namespace inference
-}  // namespace paddle
diff --git a/paddle/fluid/inference/tensorrt/plugin/transformer_input_output_convert_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/transformer_input_output_convert_plugin.cu
new file mode 100644
index 00000000000000..39e2a0b422463c
--- /dev/null
+++ b/paddle/fluid/inference/tensorrt/plugin/transformer_input_output_convert_plugin.cu
@@ -0,0 +1,356 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/plugin/transformer_input_output_convert_plugin.h"
+#include "cub/cub.cuh"
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+namespace plugin {
+
+__global__ void remove_padding_kernel(const half* input0,
+                                      const int32_t* input1,
+                                      half* output) {
+  int word_id = blockIdx.x * gridDim.y + blockIdx.y;
+  int32_t seqence_length = input1[blockIdx.x + 1] - input1[blockIdx.x];
+  if (blockIdx.y < seqence_length) {
+    output[(input1[blockIdx.x] + blockIdx.y) * gridDim.z * blockDim.x +
+           blockIdx.z * blockDim.x + threadIdx.x] =
+        input0[word_id * gridDim.z * blockDim.x + blockIdx.z * blockDim.x +
+               threadIdx.x];
+  }
+}
+
+__global__ void recover_padding_kernel(const half* input0,
+                                       const int32_t* input1,
+                                       half* output) {
+  int word_id = blockIdx.x * gridDim.y + blockIdx.y;
+  int32_t seqence_length = input1[blockIdx.x + 1] - input1[blockIdx.x];
+  if (blockIdx.y < seqence_length) {
+    output[word_id * gridDim.z * blockDim.x + blockIdx.z * blockDim.x +
+           threadIdx.x] =
+        input0[(input1[blockIdx.x] + blockIdx.y) * gridDim.z * blockDim.x +
+               blockIdx.z * blockDim.x + threadIdx.x];
+  } else {
+    output[word_id * gridDim.z * blockDim.x + blockIdx.z * blockDim.x +
+           threadIdx.x] = 0;
+  }
+}
+
+nvinfer1::DataType TransformerInputConvertPlugin::getOutputDataType(
+    int index,
+    const nvinfer1::DataType* input_types,
+    int nb_inputs) const TRT_NOEXCEPT {
+  if (index == 0) {  // new input
+    return nvinfer1::DataType::kHALF;
+  } else if (index == 1) {  // mask
+    return nvinfer1::DataType::kHALF;
+  } else if (index == 2) {  // pos id
+    return nvinfer1::DataType::kINT32;
+  } else if (index == 3) {  // max_seqlen_tensor
+    return nvinfer1::DataType::kHALF;
+  }
+}
+
+nvinfer1::DimsExprs TransformerInputConvertPlugin::getOutputDimensions(
+    int outputIndex,
+    const nvinfer1::DimsExprs* inputs,
+    int nbInputs,
+    nvinfer1::IExprBuilder& exprBuilder) TRT_NOEXCEPT {
+  constexpr size_t threadsPerCta384 = 1 * 8 * 32;
+  constexpr size_t xmmasM384 = 24;
+  constexpr size_t packedMaskSize384 = xmmasM384 * threadsPerCta384;
+  int32_t maskSize_ = packedMaskSize384;
+  auto maskSize = exprBuilder.constant(maskSize_);
+  auto fp16maskSize = exprBuilder.operation(
+      nvinfer1::DimensionOperation::kPROD, *maskSize, *exprBuilder.constant(2));
+
+  auto one = exprBuilder.constant(1);
+  auto B = inputs[0].d[0];
+  auto MaxLength = inputs[0].d[1];
+  auto Hidden = inputs[0].d[2];
+
+  nvinfer1::DimsExprs output_dims;
+  if (outputIndex == 0) {  // new input
+    output_dims.nbDims = 4;
+    output_dims.d[0] = exprBuilder.operation(
+        nvinfer1::DimensionOperation::kPROD, *B, *MaxLength);
+    output_dims.d[1] = Hidden;
+    output_dims.d[2] = exprBuilder.constant(1);
+    output_dims.d[3] = exprBuilder.constant(1);
+  } else if (outputIndex == 1) {  // mask
+    output_dims.nbDims = 2;
+    output_dims.d[0] = B;
+    output_dims.d[1] = fp16maskSize;
+  } else if (outputIndex == 2) {  // pos id
+    output_dims.nbDims = 1;
+    output_dims.d[0] =
+        exprBuilder.operation(nvinfer1::DimensionOperation::kSUM, *B, *one);
+  } else if (outputIndex == 3) {  // max_seqlen_tensor
+    output_dims.nbDims = 1;
+    output_dims.d[0] = MaxLength;
+  }
+  return output_dims;
+}
+
+bool TransformerInputConvertPlugin::supportsFormatCombination(
+    int pos,
+    const nvinfer1::PluginTensorDesc* inOut,
+    int nbInputs,
+    int nbOutputs) TRT_NOEXCEPT {
+  PADDLE_ENFORCE_EQ(nbInputs,
+                    2,
+                    platform::errors::InvalidArgument(
+                        "TransformerInputConvertPlugin must have 2 inputs, "
+                        "but got %d input(s). ",
+                        nbInputs));
+  PADDLE_ENFORCE_EQ(nbOutputs,
+                    4,
+                    platform::errors::InvalidArgument(
+                        "TransformerInputConvertPlugin must have 4 outputs, "
+                        "but got %d output(s). ",
+                        nbOutputs));
+  if (pos == 0) {  //  input
+    return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR &&
+           inOut[pos].type == nvinfer1::DataType::kHALF;
+  } else if (pos == 1) {  //  reducesum_qk_bias
+    return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR &&
+           inOut[pos].type == nvinfer1::DataType::kINT32;
+  } else if (pos == 2) {  // new input
+    return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR &&
+           inOut[pos].type == nvinfer1::DataType::kHALF;
+  } else if (pos == 3) {  // mask
+    return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR &&
+           inOut[pos].type == nvinfer1::DataType::kHALF;
+  } else if (pos == 4) {  // pos id
+    return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR &&
+           inOut[pos].type == nvinfer1::DataType::kINT32;
+  } else if (pos == 5) {  // max_seqlen_tensor
+    return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR &&
+           inOut[pos].type == nvinfer1::DataType::kHALF;
+  }
+}
+
+void TransformerInputConvertPlugin::configurePlugin(
+    const nvinfer1::DynamicPluginTensorDesc* inputs,
+    int nbInputs,
+    const nvinfer1::DynamicPluginTensorDesc* outputs,
+    int nbOutputs) TRT_NOEXCEPT {}
+
+void TransformerInputConvertPlugin::attachToContext(
+    cudnnContext* cudnnContext,
+    cublasContext* cublasContext,
+    nvinfer1::IGpuAllocator* gpuAllocator) TRT_NOEXCEPT {}
+
+void TransformerInputConvertPlugin::detachFromContext() TRT_NOEXCEPT {}
+
+void TransformerInputConvertPlugin::terminate() TRT_NOEXCEPT {}
+
+int TransformerInputConvertPlugin::enqueue(
+    const nvinfer1::PluginTensorDesc* inputDesc,
+    const nvinfer1::PluginTensorDesc* outputDesc,
+    const void* const* inputs,
+    void* const* outputs,
+    void* workspace,
+    cudaStream_t stream) TRT_NOEXCEPT {
+  // input(no_varlen), reducesum_qk_bias, input(varlen), mask, pos_id,
+  // max_seqlen_tensor
+  const half* input0 = static_cast<const half*>(inputs[0]);  // input(no_varlen)
+  const int32_t* input1 =
+      static_cast<const int32_t*>(inputs[1]);            // reducesum_qk_bias
+  half* output0 = static_cast<half*>(outputs[0]);        // input(varlen)
+  int32_t* output2 = static_cast<int32_t*>(outputs[2]);  // pos_id
+  const auto input0_desc = inputDesc[0];
+  const int32_t B = input0_desc.dims.d[0];           // batchs
+  const int32_t MaxLength = input0_desc.dims.d[1];   // max token length
+  const int32_t HiddenSize = input0_desc.dims.d[2];  // hidden size
+
+  // Determine temporary device storage requirements
+  void* d_temp_storage = NULL;
+  size_t temp_storage_bytes = 0;
+  cub::DeviceScan::ExclusiveSum(
+      d_temp_storage, temp_storage_bytes, input1, output2, B + 1);
+  // Allocate temporary storage
+  cudaMalloc(&d_temp_storage, temp_storage_bytes);
+
+  // Run exclusive prefix sum
+  cub::DeviceScan::ExclusiveSum(
+      d_temp_storage, temp_storage_bytes, input1, output2, B + 1);
+  const int32_t vector_length = HiddenSize;
+  int32_t num_threads;
+  if (vector_length < 1024) {
+    num_threads = vector_length;
+  } else {
+    if (vector_length % 512 == 0) {
+      num_threads = 512;
+    } else if (vector_length % 256 == 0) {
+      num_threads = 256;
+    } else if (vector_length % 128 == 0) {
+      num_threads = 128;
+    } else if (vector_length % 64 == 0) {
+      num_threads = 64;
+    } else if (vector_length % 32 == 0) {
+      num_threads = 32;
+    } else if (vector_length % 16 == 0) {
+      num_threads = 16;
+    } else if (vector_length % 8 == 0) {
+      num_threads = 8;
+    } else if (vector_length % 4 == 0) {
+      num_threads = 4;
+    } else if (vector_length % 2 == 0) {
+      num_threads = 2;
+    } else {
+      num_threads = 1;
+    }
+  }
+  const dim3 num_blocks(
+      B,
+      MaxLength,
+      vector_length /
+          num_threads);  //  batchs, max sequnce length, input0.dims.d[2]/*
+  remove_padding_kernel<<<num_blocks, num_threads, 0, stream>>>(
+      input0, output2, output0);  // input(no_varlen), pos_id, input(varlen)
+  return cudaGetLastError() != cudaSuccess;
+}
+
+nvinfer1::DataType TransformerOutputConvertPlugin::getOutputDataType(
+    int index,
+    const nvinfer1::DataType* input_types,
+    int nb_inputs) const TRT_NOEXCEPT {
+  if (index == 0) {
+    return nvinfer1::DataType::kHALF;
+  }
+}
+
+nvinfer1::DimsExprs TransformerOutputConvertPlugin::getOutputDimensions(
+    int outputIndex,
+    const nvinfer1::DimsExprs* inputs,
+    int nbInputs,
+    nvinfer1::IExprBuilder& exprBuilder) TRT_NOEXCEPT {
+  nvinfer1::DimsExprs output_dims;
+  if (outputIndex == 0) {
+    output_dims = inputs[1];
+  }
+  return output_dims;
+}
+
+bool TransformerOutputConvertPlugin::supportsFormatCombination(
+    int pos,
+    const nvinfer1::PluginTensorDesc* inOut,
+    int nbInputs,
+    int nbOutputs) TRT_NOEXCEPT {
+  PADDLE_ENFORCE_EQ(nbInputs,
+                    3,
+                    platform::errors::InvalidArgument(
+                        "TransformerOutputConvertPlugin must have 3 inputs, "
+                        "but got %d input(s). ",
+                        nbInputs));
+  PADDLE_ENFORCE_EQ(nbOutputs,
+                    1,
+                    platform::errors::InvalidArgument(
+                        "TransformerOutputConvertPlugin must have 1 output, "
+                        "but got %d output(s). ",
+                        nbOutputs));
+  if (pos == 0) {  // qkv plugin output(varlen)
+    return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR &&
+           inOut[pos].type == nvinfer1::DataType::kHALF;
+  } else if (pos == 1) {  // qkv plugin input(no_varlen)
+    return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR &&
+           inOut[pos].type == nvinfer1::DataType::kHALF;
+  } else if (pos == 2) {  // pos id
+    return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR &&
+           inOut[pos].type == nvinfer1::DataType::kINT32;
+  } else if (pos == 3) {  // qkv plugin output(no_varlen)
+    return inOut[pos].format == nvinfer1::TensorFormat::kLINEAR &&
+           inOut[pos].type == nvinfer1::DataType::kHALF;
+  }
+}
+
+void TransformerOutputConvertPlugin::configurePlugin(
+    const nvinfer1::DynamicPluginTensorDesc* inputs,
+    int nbInputs,
+    const nvinfer1::DynamicPluginTensorDesc* outputs,
+    int nbOutputs) TRT_NOEXCEPT {}
+
+void TransformerOutputConvertPlugin::attachToContext(
+    cudnnContext* cudnnContext,
+    cublasContext* cublasContext,
+    nvinfer1::IGpuAllocator* gpuAllocator) TRT_NOEXCEPT {}
+
+void TransformerOutputConvertPlugin::detachFromContext() TRT_NOEXCEPT {}
+
+void TransformerOutputConvertPlugin::terminate() TRT_NOEXCEPT {}
+
+int TransformerOutputConvertPlugin::enqueue(
+    const nvinfer1::PluginTensorDesc* inputDesc,
+    const nvinfer1::PluginTensorDesc* outputDesc,
+    const void* const* inputs,
+    void* const* outputs,
+    void* workspace,
+    cudaStream_t stream) TRT_NOEXCEPT {
+  const half* input0 =
+      static_cast<const half*>(inputs[0]);  // qkv plugin output(varlen)
+  const half* input1 =
+      static_cast<const half*>(inputs[1]);  // qkv plugin input(no_varlen)
+  const int32_t* input2 = static_cast<const int32_t*>(inputs[2]);  // pos id
+  half* output =
+      static_cast<half*>(outputs[0]);  // qkv plugin output(no_varlen)
+  const auto input1_desc = inputDesc[1];
+  const int32_t B = input1_desc.dims.d[0];           // batchs
+  const int32_t MaxLength = input1_desc.dims.d[1];   // max token length
+  const int32_t HiddenSize = input1_desc.dims.d[2];  // hidden size
+
+  const int32_t vector_length = HiddenSize;
+  int32_t num_threads;
+  if (vector_length < 1024) {
+    num_threads = vector_length;
+  } else {
+    if (vector_length % 512 == 0) {
+      num_threads = 512;
+    } else if (vector_length % 256 == 0) {
+      num_threads = 256;
+    } else if (vector_length % 128 == 0) {
+      num_threads = 128;
+    } else if (vector_length % 64 == 0) {
+      num_threads = 64;
+    } else if (vector_length % 32 == 0) {
+      num_threads = 32;
+    } else if (vector_length % 16 == 0) {
+      num_threads = 16;
+    } else if (vector_length % 8 == 0) {
+      num_threads = 8;
+    } else if (vector_length % 4 == 0) {
+      num_threads = 4;
+    } else if (vector_length % 2 == 0) {
+      num_threads = 2;
+    } else {
+      num_threads = 1;
+    }
+  }
+  const dim3 num_blocks(
+      B,
+      MaxLength,
+      vector_length / num_threads);  //  batchs, max sequnce length
+                                     //  (mask_id.dims.d[1]),
+                                     //  input.dims.d[1]/*
+  recover_padding_kernel<<<num_blocks, num_threads, 0, stream>>>(
+      input0, input2, output);
+  return cudaGetLastError() != cudaSuccess;
+}
+
+}  // namespace plugin
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
diff --git a/paddle/fluid/inference/tensorrt/plugin/transformer_input_convert_plugin.h b/paddle/fluid/inference/tensorrt/plugin/transformer_input_output_convert_plugin.h
similarity index 53%
rename from paddle/fluid/inference/tensorrt/plugin/transformer_input_convert_plugin.h
rename to paddle/fluid/inference/tensorrt/plugin/transformer_input_output_convert_plugin.h
index 43ca34c4276004..80a5bfa5b1dfc5 100644
--- a/paddle/fluid/inference/tensorrt/plugin/transformer_input_convert_plugin.h
+++ b/paddle/fluid/inference/tensorrt/plugin/transformer_input_output_convert_plugin.h
@@ -40,14 +40,14 @@ class TransformerInputConvertPlugin : public DynamicPluginTensorRT {
     return "transformer_input_convert_plugin";
   }
 
-  int getNbOutputs() const TRT_NOEXCEPT override { return 2; }
+  int getNbOutputs() const TRT_NOEXCEPT override { return 4; }
 
   int initialize() TRT_NOEXCEPT { return 0; }
   void terminate() TRT_NOEXCEPT;
   nvinfer1::DimsExprs getOutputDimensions(int outputIndex,
                                           const nvinfer1::DimsExprs* inputs,
                                           int nbInputs,
-                                          nvinfer1::IExprBuilder& exprBuilder)
+                                          nvinfer1::IExprBuilder& exprBuilder) // NOLINT
       TRT_NOEXCEPT override;
 
   bool supportsFormatCombination(int pos,
@@ -134,7 +134,120 @@ class TransformerInputConvertPluginCreator : public nvinfer1::IPluginCreator {
   std::string plugin_name_;
   nvinfer1::PluginFieldCollection field_collection_{0, nullptr};
 };
+
+class TransformerOutputConvertPlugin : public DynamicPluginTensorRT {
+ public:
+  TransformerOutputConvertPlugin() {}
+
+  TransformerOutputConvertPlugin(void const* serial_data,
+                                 size_t serial_length) {}
+
+  nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override {
+    TransformerOutputConvertPlugin* ptr = new TransformerOutputConvertPlugin();
+    return ptr;
+  }
+
+  const char* getPluginType() const TRT_NOEXCEPT override {
+    return "transformer_output_convert_plugin";
+  }
+
+  int getNbOutputs() const TRT_NOEXCEPT override { return 1; }
+
+  int initialize() TRT_NOEXCEPT { return 0; }
+  void terminate() TRT_NOEXCEPT;
+  nvinfer1::DimsExprs getOutputDimensions(int outputIndex,
+                                          const nvinfer1::DimsExprs* inputs,
+                                          int nbInputs,
+                                          nvinfer1::IExprBuilder& exprBuilder) // NOLINT
+      TRT_NOEXCEPT override;
+
+  bool supportsFormatCombination(int pos,
+                                 const nvinfer1::PluginTensorDesc* inOut,
+                                 int nbInputs,
+                                 int nbOutputs) TRT_NOEXCEPT override;
+
+  void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* inputs,
+                       int nbInputs,
+                       const nvinfer1::DynamicPluginTensorDesc* outputs,
+                       int nbOutputs) TRT_NOEXCEPT override;
+
+  size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs,
+                          int nbInputs,
+                          const nvinfer1::PluginTensorDesc* outputs,
+                          int nbOutputs) const TRT_NOEXCEPT override {
+    return 0;
+  }
+
+  void attachToContext(cudnnContext* cudnnContext,
+                       cublasContext* cublasContext,
+                       nvinfer1::IGpuAllocator* gpuAllocator)
+      TRT_NOEXCEPT override;
+
+  void detachFromContext() TRT_NOEXCEPT override;
+
+  int enqueue(const nvinfer1::PluginTensorDesc* inputDesc,
+              const nvinfer1::PluginTensorDesc* outputDesc,
+              const void* const* inputs,
+              void* const* outputs,
+              void* workspace,
+              cudaStream_t stream) TRT_NOEXCEPT override;
+  nvinfer1::DataType getOutputDataType(int index,
+                                       const nvinfer1::DataType* inputTypes,
+                                       int nbInputs) const
+      TRT_NOEXCEPT override;
+
+  void destroy() TRT_NOEXCEPT override { delete this; }
+
+ protected:
+  size_t getSerializationSize() const TRT_NOEXCEPT override { return 0; }
+
+  void serialize(void* buffer) const TRT_NOEXCEPT override {}
+};
+
+class TransformerOutputConvertPluginCreator : public nvinfer1::IPluginCreator {
+ public:
+  TransformerOutputConvertPluginCreator() {}
+  const char* getPluginName() const TRT_NOEXCEPT override {
+    return "transformer_output_convert_plugin";
+  }
+  const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; }
+
+  const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override {
+    return &field_collection_;
+  }
+
+  nvinfer1::IPluginV2* createPlugin(
+      const char* name, const nvinfer1::PluginFieldCollection* plugin_field)
+      TRT_NOEXCEPT override {
+    return nullptr;
+  }
+
+  nvinfer1::IPluginV2* deserializePlugin(const char* name,
+                                         void const* serial_data,
+                                         size_t serial_length)
+      TRT_NOEXCEPT override {
+    TransformerOutputConvertPlugin* obj =
+        new TransformerOutputConvertPlugin(serial_data, serial_length);
+    obj->setPluginNamespace(name);
+    return obj;
+  }
+
+  void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override {
+    plugin_namespace_ = lib_namespace;
+  }
+
+  const char* getPluginNamespace() const TRT_NOEXCEPT override {
+    return plugin_namespace_.c_str();
+  }
+
+ private:
+  std::string plugin_namespace_;
+  std::string plugin_name_;
+  nvinfer1::PluginFieldCollection field_collection_{0, nullptr};
+};
+
 REGISTER_TRT_PLUGIN_V2(TransformerInputConvertPluginCreator);
+REGISTER_TRT_PLUGIN_V2(TransformerOutputConvertPluginCreator);
 }  // namespace plugin
 }  // namespace tensorrt
 }  // namespace inference

From 3cbca60f6b635d24b9cb90c1406f3f72e742b458 Mon Sep 17 00:00:00 2001
From: wangguanzhong <jerrywgz@126.com>
Date: Thu, 1 Dec 2022 19:58:51 +0800
Subject: [PATCH 091/154] fix trt roi_align test (#48570)

---
 .../ir/inference/test_trt_convert_roi_align.py  | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roi_align.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roi_align.py
index eafd8debc27e79..1e1a83a40e48ac 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roi_align.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roi_align.py
@@ -35,7 +35,20 @@ def generate_input2(attrs: List[Dict[str, Any]], batch):
             return np.random.random([3, 4]).astype(np.float32)
 
         def generate_input3(attrs: List[Dict[str, Any]], batch):
-            return np.random.random([batch]).astype(np.int32)
+            if batch == 1:
+                return np.array([3]).astype(np.int32)
+            if batch == 2:
+                return np.array([1, 2]).astype(np.int32)
+            if batch == 4:
+                return np.array([1, 1, 0, 1]).astype(np.int32)
+
+        def generate_lod(batch):
+            if batch == 1:
+                return [[0, 3]]
+            if batch == 2:
+                return [[0, 1, 3]]
+            if batch == 4:
+                return [[0, 1, 2, 2, 3]]
 
         for num_input in [0, 1]:
             for batch in [1, 2, 4]:
@@ -96,7 +109,7 @@ def generate_input3(attrs: List[Dict[str, Any]], batch):
                                                 data_gen=partial(
                                                     generate_input2, dics, batch
                                                 ),
-                                                lod=[[32, 3]],
+                                                lod=generate_lod(batch),
                                             ),
                                         },
                                     ]

From 3d35aa802fea2d91d17160584e767d635026f42e Mon Sep 17 00:00:00 2001
From: zyfncg <zhangyunfei07@baidu.com>
Date: Thu, 1 Dec 2022 21:12:29 +0800
Subject: [PATCH 092/154] Rename kernel for top_k, slogdeterminant,
 generate_proposals_v2 (#48594)

* rename kernel for top_k, slogdeterminant, generate_proposals_v2

* fix bug
---
 paddle/phi/api/yaml/legacy_backward.yaml      |  4 +-
 paddle/phi/api/yaml/legacy_ops.yaml           |  6 +--
 paddle/phi/core/compat/op_utils.h             |  3 +-
 ...kernel.cc => generate_proposals_kernel.cc} | 36 ++++++++---------
 .../cpu/slogdeterminant_grad_kernel.cc        |  2 +-
 .../phi/kernels/cpu/slogdeterminant_kernel.cc |  8 +---
 paddle/phi/kernels/cpu/top_k_grad_kernel.cc   |  2 +-
 paddle/phi/kernels/cpu/top_k_kernel.cc        |  2 +-
 .../phi/kernels/generate_proposals_kernel.h   | 38 ++++++++++++++++++
 .../kernels/generate_proposals_v2_kernel.h    | 38 ------------------
 ...kernel.cu => generate_proposals_kernel.cu} | 39 +++++++++----------
 .../gpu/slogdeterminant_grad_kernel.cu        |  2 +-
 .../phi/kernels/gpu/slogdeterminant_kernel.cu |  8 +---
 paddle/phi/kernels/gpu/top_k_grad_kernel.cu   |  2 +-
 paddle/phi/kernels/gpu/top_k_kernel.cu        |  2 +-
 ...kernel.cc => generate_proposals_kernel.cc} | 39 +++++++++----------
 paddle/phi/kernels/xpu/top_k_kernel.cc        |  2 +-
 .../phi/ops/compat/generate_proposals_sig.cc  | 19 +++++++++
 paddle/phi/ops/compat/slogdeterminant_sig.cc  |  5 ++-
 paddle/phi/ops/compat/top_k_sig.cc            | 10 ++---
 20 files changed, 138 insertions(+), 129 deletions(-)
 rename paddle/phi/kernels/cpu/{generate_proposals_v2_kernel.cc => generate_proposals_kernel.cc} (93%)
 create mode 100644 paddle/phi/kernels/generate_proposals_kernel.h
 delete mode 100644 paddle/phi/kernels/generate_proposals_v2_kernel.h
 rename paddle/phi/kernels/gpu/{generate_proposals_v2_kernel.cu => generate_proposals_kernel.cu} (95%)
 rename paddle/phi/kernels/xpu/{generate_proposals_v2_kernel.cc => generate_proposals_kernel.cc} (93%)
 create mode 100644 paddle/phi/ops/compat/generate_proposals_sig.cc

diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
index 47ba24b091d7a7..064c6b00a88494 100755
--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -1504,7 +1504,7 @@
     func : UnchangedInferMeta
     param : [x]
   kernel :
-    func : slogdeterminant_grad
+    func : slogdet_grad
 
 - backward_op : softmax_grad
   forward : softmax (Tensor x, int axis) -> Tensor(out)
@@ -1713,7 +1713,7 @@
     func : UnchangedInferMeta
     param : [x]
   kernel :
-    func : top_k_grad
+    func : topk_grad
 
 - backward_op : transpose_double_grad
   forward : transpose_grad (Tensor grad_out, int[] perm) -> Tensor(grad_x)
diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml
index 1bc0fc7f0aa43b..d32a853b8c0940 100755
--- a/paddle/phi/api/yaml/legacy_ops.yaml
+++ b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -878,7 +878,7 @@
   infer_meta :
     func : GenerateProposalsV2InferMeta
   kernel :
-    func : generate_proposals_v2
+    func : generate_proposals
 
 - op : greater_equal
   args : (Tensor x, Tensor y)
@@ -1935,7 +1935,7 @@
   infer_meta :
     func : UnchangedInferMeta
   kernel :
-    func : slogdeterminant
+    func : slogdet
   backward : slogdet_grad
 
 - op : softmax
@@ -2100,7 +2100,7 @@
   infer_meta :
     func : TopKInferMeta
   kernel :
-    func : top_k
+    func : topk
   backward : topk_grad
 
 - op : transpose
diff --git a/paddle/phi/core/compat/op_utils.h b/paddle/phi/core/compat/op_utils.h
index b836359ae817b3..2145d73cd9f374 100644
--- a/paddle/phi/core/compat/op_utils.h
+++ b/paddle/phi/core/compat/op_utils.h
@@ -83,7 +83,8 @@ static const std::unordered_set<std::string> deprecated_op_names(
      "bicubic_interp",
      "bicubic_interp_grad",
      "crop",
-     "crop_grad"});
+     "crop_grad",
+     "generate_proposals"});
 
 class DefaultKernelSignatureMap {
  public:
diff --git a/paddle/phi/kernels/cpu/generate_proposals_v2_kernel.cc b/paddle/phi/kernels/cpu/generate_proposals_kernel.cc
similarity index 93%
rename from paddle/phi/kernels/cpu/generate_proposals_v2_kernel.cc
rename to paddle/phi/kernels/cpu/generate_proposals_kernel.cc
index 22f39555449a1a..4a9569c045c0ba 100644
--- a/paddle/phi/kernels/cpu/generate_proposals_v2_kernel.cc
+++ b/paddle/phi/kernels/cpu/generate_proposals_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/generate_proposals_v2_kernel.h"
+#include "paddle/phi/kernels/generate_proposals_kernel.h"
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/detection/nms_util.h"
@@ -284,21 +284,21 @@ std::pair<DenseTensor, DenseTensor> ProposalForOneImage(
 }
 
 template <typename T, typename Context>
-void GenerateProposalsV2Kernel(const Context& ctx,
-                               const DenseTensor& scores,
-                               const DenseTensor& bbox_deltas,
-                               const DenseTensor& im_shape,
-                               const DenseTensor& anchors,
-                               const DenseTensor& variances,
-                               int pre_nms_top_n,
-                               int post_nms_top_n,
-                               float nms_thresh,
-                               float min_size,
-                               float eta,
-                               bool pixel_offset,
-                               DenseTensor* rpn_rois,
-                               DenseTensor* rpn_roi_probs,
-                               DenseTensor* rpn_rois_num) {
+void GenerateProposalsKernel(const Context& ctx,
+                             const DenseTensor& scores,
+                             const DenseTensor& bbox_deltas,
+                             const DenseTensor& im_shape,
+                             const DenseTensor& anchors,
+                             const DenseTensor& variances,
+                             int pre_nms_top_n,
+                             int post_nms_top_n,
+                             float nms_thresh,
+                             float min_size,
+                             float eta,
+                             bool pixel_offset,
+                             DenseTensor* rpn_rois,
+                             DenseTensor* rpn_roi_probs,
+                             DenseTensor* rpn_rois_num) {
   auto& scores_dim = scores.dims();
   int64_t num = scores_dim[0];
   int64_t c_score = scores_dim[1];
@@ -384,9 +384,9 @@ void GenerateProposalsV2Kernel(const Context& ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(generate_proposals_v2,
+PD_REGISTER_KERNEL(generate_proposals,
                    CPU,
                    ALL_LAYOUT,
-                   phi::GenerateProposalsV2Kernel,
+                   phi::GenerateProposalsKernel,
                    float,
                    double) {}
diff --git a/paddle/phi/kernels/cpu/slogdeterminant_grad_kernel.cc b/paddle/phi/kernels/cpu/slogdeterminant_grad_kernel.cc
index 0854895f0c1c66..5f265ab9bc87ef 100644
--- a/paddle/phi/kernels/cpu/slogdeterminant_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/slogdeterminant_grad_kernel.cc
@@ -17,7 +17,7 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/slogdeterminant_grad_kernel_impl.h"
 
-PD_REGISTER_KERNEL(slogdeterminant_grad,
+PD_REGISTER_KERNEL(slogdet_grad,
                    CPU,
                    ALL_LAYOUT,
                    phi::SlogDeterminantGradKernel,
diff --git a/paddle/phi/kernels/cpu/slogdeterminant_kernel.cc b/paddle/phi/kernels/cpu/slogdeterminant_kernel.cc
index 6bd9f0296c62cb..8e96c163164c43 100644
--- a/paddle/phi/kernels/cpu/slogdeterminant_kernel.cc
+++ b/paddle/phi/kernels/cpu/slogdeterminant_kernel.cc
@@ -17,9 +17,5 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/slogdeterminant_kernel_impl.h"
 
-PD_REGISTER_KERNEL(slogdeterminant,
-                   CPU,
-                   ALL_LAYOUT,
-                   phi::SlogDeterminantKernel,
-                   float,
-                   double) {}
+PD_REGISTER_KERNEL(
+    slogdet, CPU, ALL_LAYOUT, phi::SlogDeterminantKernel, float, double) {}
diff --git a/paddle/phi/kernels/cpu/top_k_grad_kernel.cc b/paddle/phi/kernels/cpu/top_k_grad_kernel.cc
index e44f85fb6c0fb0..2d02b0ab523b23 100644
--- a/paddle/phi/kernels/cpu/top_k_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/top_k_grad_kernel.cc
@@ -141,7 +141,7 @@ void TopkGradKernel(const Context& dev_ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(top_k_grad,
+PD_REGISTER_KERNEL(topk_grad,
                    CPU,
                    ALL_LAYOUT,
                    phi::TopkGradKernel,
diff --git a/paddle/phi/kernels/cpu/top_k_kernel.cc b/paddle/phi/kernels/cpu/top_k_kernel.cc
index 4ac16667ce2741..3e946803660768 100644
--- a/paddle/phi/kernels/cpu/top_k_kernel.cc
+++ b/paddle/phi/kernels/cpu/top_k_kernel.cc
@@ -227,4 +227,4 @@ void TopkKernel(const Context& dev_ctx,
 }  // namespace phi
 
 PD_REGISTER_KERNEL(
-    top_k, CPU, ALL_LAYOUT, phi::TopkKernel, float, double, int32_t, int64_t) {}
+    topk, CPU, ALL_LAYOUT, phi::TopkKernel, float, double, int32_t, int64_t) {}
diff --git a/paddle/phi/kernels/generate_proposals_kernel.h b/paddle/phi/kernels/generate_proposals_kernel.h
new file mode 100644
index 00000000000000..e14b250a7d0634
--- /dev/null
+++ b/paddle/phi/kernels/generate_proposals_kernel.h
@@ -0,0 +1,38 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void GenerateProposalsKernel(const Context& ctx,
+                             const DenseTensor& scores,
+                             const DenseTensor& bbox_deltas,
+                             const DenseTensor& im_shape,
+                             const DenseTensor& anchors,
+                             const DenseTensor& variances,
+                             int pre_nms_top_n,
+                             int post_nms_top_n,
+                             float nms_thresh,
+                             float min_size,
+                             float eta,
+                             bool pixel_offset,
+                             DenseTensor* rpn_rois,
+                             DenseTensor* rpn_roi_probs,
+                             DenseTensor* rpn_rois_num);
+
+}  // namespace phi
diff --git a/paddle/phi/kernels/generate_proposals_v2_kernel.h b/paddle/phi/kernels/generate_proposals_v2_kernel.h
deleted file mode 100644
index c2fc2677039f9a..00000000000000
--- a/paddle/phi/kernels/generate_proposals_v2_kernel.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "paddle/phi/core/dense_tensor.h"
-
-namespace phi {
-
-template <typename T, typename Context>
-void GenerateProposalsV2Kernel(const Context& ctx,
-                               const DenseTensor& scores,
-                               const DenseTensor& bbox_deltas,
-                               const DenseTensor& im_shape,
-                               const DenseTensor& anchors,
-                               const DenseTensor& variances,
-                               int pre_nms_top_n,
-                               int post_nms_top_n,
-                               float nms_thresh,
-                               float min_size,
-                               float eta,
-                               bool pixel_offset,
-                               DenseTensor* rpn_rois,
-                               DenseTensor* rpn_roi_probs,
-                               DenseTensor* rpn_rois_num);
-
-}  // namespace phi
diff --git a/paddle/phi/kernels/gpu/generate_proposals_v2_kernel.cu b/paddle/phi/kernels/gpu/generate_proposals_kernel.cu
similarity index 95%
rename from paddle/phi/kernels/gpu/generate_proposals_v2_kernel.cu
rename to paddle/phi/kernels/gpu/generate_proposals_kernel.cu
index 91abb290dd86b1..f750bd5fe7eb92 100644
--- a/paddle/phi/kernels/gpu/generate_proposals_v2_kernel.cu
+++ b/paddle/phi/kernels/gpu/generate_proposals_kernel.cu
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/generate_proposals_v2_kernel.h"
+#include "paddle/phi/kernels/generate_proposals_kernel.h"
 
 #include <algorithm>
 #include <vector>
@@ -458,21 +458,21 @@ static std::pair<DenseTensor, DenseTensor> ProposalForOneImage(
 }
 
 template <typename T, typename Context>
-void GenerateProposalsV2Kernel(const Context &ctx,
-                               const DenseTensor &scores,
-                               const DenseTensor &bbox_deltas,
-                               const DenseTensor &im_shape,
-                               const DenseTensor &anchors,
-                               const DenseTensor &variances,
-                               int pre_nms_top_n,
-                               int post_nms_top_n,
-                               float nms_thresh,
-                               float min_size,
-                               float eta,
-                               bool pixel_offset,
-                               DenseTensor *rpn_rois,
-                               DenseTensor *rpn_roi_probs,
-                               DenseTensor *rpn_rois_num) {
+void GenerateProposalsKernel(const Context &ctx,
+                             const DenseTensor &scores,
+                             const DenseTensor &bbox_deltas,
+                             const DenseTensor &im_shape,
+                             const DenseTensor &anchors,
+                             const DenseTensor &variances,
+                             int pre_nms_top_n,
+                             int post_nms_top_n,
+                             float nms_thresh,
+                             float min_size,
+                             float eta,
+                             bool pixel_offset,
+                             DenseTensor *rpn_rois,
+                             DenseTensor *rpn_roi_probs,
+                             DenseTensor *rpn_rois_num) {
   PADDLE_ENFORCE_GE(
       eta,
       1.,
@@ -584,8 +584,5 @@ void GenerateProposalsV2Kernel(const Context &ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(generate_proposals_v2,
-                   GPU,
-                   ALL_LAYOUT,
-                   phi::GenerateProposalsV2Kernel,
-                   float) {}
+PD_REGISTER_KERNEL(
+    generate_proposals, GPU, ALL_LAYOUT, phi::GenerateProposalsKernel, float) {}
diff --git a/paddle/phi/kernels/gpu/slogdeterminant_grad_kernel.cu b/paddle/phi/kernels/gpu/slogdeterminant_grad_kernel.cu
index 153a97fa7a50ec..f9f9055f57add2 100644
--- a/paddle/phi/kernels/gpu/slogdeterminant_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/slogdeterminant_grad_kernel.cu
@@ -17,7 +17,7 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/slogdeterminant_grad_kernel_impl.h"
 
-PD_REGISTER_KERNEL(slogdeterminant_grad,
+PD_REGISTER_KERNEL(slogdet_grad,
                    GPU,
                    ALL_LAYOUT,
                    phi::SlogDeterminantGradKernel,
diff --git a/paddle/phi/kernels/gpu/slogdeterminant_kernel.cu b/paddle/phi/kernels/gpu/slogdeterminant_kernel.cu
index e94dc117fb96e2..14a9b7e387bcbb 100644
--- a/paddle/phi/kernels/gpu/slogdeterminant_kernel.cu
+++ b/paddle/phi/kernels/gpu/slogdeterminant_kernel.cu
@@ -17,9 +17,5 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/slogdeterminant_kernel_impl.h"
 
-PD_REGISTER_KERNEL(slogdeterminant,
-                   GPU,
-                   ALL_LAYOUT,
-                   phi::SlogDeterminantKernel,
-                   float,
-                   double) {}
+PD_REGISTER_KERNEL(
+    slogdet, GPU, ALL_LAYOUT, phi::SlogDeterminantKernel, float, double) {}
diff --git a/paddle/phi/kernels/gpu/top_k_grad_kernel.cu b/paddle/phi/kernels/gpu/top_k_grad_kernel.cu
index ae95923e7f6ff5..e20fec806873f1 100644
--- a/paddle/phi/kernels/gpu/top_k_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/top_k_grad_kernel.cu
@@ -76,7 +76,7 @@ void TopkGradKernel(const Context& dev_ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(top_k_grad,
+PD_REGISTER_KERNEL(topk_grad,
                    GPU,
                    ALL_LAYOUT,
                    phi::TopkGradKernel,
diff --git a/paddle/phi/kernels/gpu/top_k_kernel.cu b/paddle/phi/kernels/gpu/top_k_kernel.cu
index c9ea86472f3f8e..a455d9305d955f 100644
--- a/paddle/phi/kernels/gpu/top_k_kernel.cu
+++ b/paddle/phi/kernels/gpu/top_k_kernel.cu
@@ -332,7 +332,7 @@ void TopkKernel(const Context& dev_ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(top_k,
+PD_REGISTER_KERNEL(topk,
                    GPU,
                    ALL_LAYOUT,
                    phi::TopkKernel,
diff --git a/paddle/phi/kernels/xpu/generate_proposals_v2_kernel.cc b/paddle/phi/kernels/xpu/generate_proposals_kernel.cc
similarity index 93%
rename from paddle/phi/kernels/xpu/generate_proposals_v2_kernel.cc
rename to paddle/phi/kernels/xpu/generate_proposals_kernel.cc
index 5a91f5ad9d52f9..bf7f3e90bfd51a 100644
--- a/paddle/phi/kernels/xpu/generate_proposals_v2_kernel.cc
+++ b/paddle/phi/kernels/xpu/generate_proposals_kernel.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/generate_proposals_v2_kernel.h"
+#include "paddle/phi/kernels/generate_proposals_kernel.h"
 
 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/backends/xpu/xpu_context.h"
@@ -272,21 +272,21 @@ std::pair<DenseTensor, DenseTensor> ProposalForOneImage(
 }
 
 template <typename T, typename Context>
-void GenerateProposalsV2Kernel(const Context& dev_ctx,
-                               const DenseTensor& scores,
-                               const DenseTensor& bbox_deltas,
-                               const DenseTensor& im_shape,
-                               const DenseTensor& anchors,
-                               const DenseTensor& variances,
-                               int pre_nms_top_n,
-                               int post_nms_top_n,
-                               float nms_thresh,
-                               float min_size,
-                               float eta,
-                               bool pixel_offset,
-                               DenseTensor* rpn_rois,
-                               DenseTensor* rpn_roi_probs,
-                               DenseTensor* rpn_rois_num) {
+void GenerateProposalsKernel(const Context& dev_ctx,
+                             const DenseTensor& scores,
+                             const DenseTensor& bbox_deltas,
+                             const DenseTensor& im_shape,
+                             const DenseTensor& anchors,
+                             const DenseTensor& variances,
+                             int pre_nms_top_n,
+                             int post_nms_top_n,
+                             float nms_thresh,
+                             float min_size,
+                             float eta,
+                             bool pixel_offset,
+                             DenseTensor* rpn_rois,
+                             DenseTensor* rpn_roi_probs,
+                             DenseTensor* rpn_rois_num) {
   PADDLE_ENFORCE_GE(eta,
                     1.,
                     phi::errors::InvalidArgument(
@@ -408,8 +408,5 @@ void GenerateProposalsV2Kernel(const Context& dev_ctx,
 }
 }  // namespace phi
 
-PD_REGISTER_KERNEL(generate_proposals_v2,
-                   XPU,
-                   ALL_LAYOUT,
-                   phi::GenerateProposalsV2Kernel,
-                   float) {}
+PD_REGISTER_KERNEL(
+    generate_proposals, XPU, ALL_LAYOUT, phi::GenerateProposalsKernel, float) {}
diff --git a/paddle/phi/kernels/xpu/top_k_kernel.cc b/paddle/phi/kernels/xpu/top_k_kernel.cc
index f2592f9501ee15..0fdb66c41294de 100644
--- a/paddle/phi/kernels/xpu/top_k_kernel.cc
+++ b/paddle/phi/kernels/xpu/top_k_kernel.cc
@@ -173,4 +173,4 @@ void TopkKernel(const Context& dev_ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(top_k, XPU, ALL_LAYOUT, phi::TopkKernel, float) {}
+PD_REGISTER_KERNEL(topk, XPU, ALL_LAYOUT, phi::TopkKernel, float) {}
diff --git a/paddle/phi/ops/compat/generate_proposals_sig.cc b/paddle/phi/ops/compat/generate_proposals_sig.cc
new file mode 100644
index 00000000000000..fc6696d4a27057
--- /dev/null
+++ b/paddle/phi/ops/compat/generate_proposals_sig.cc
@@ -0,0 +1,19 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/core/compat/op_utils.h"
+
+PD_REGISTER_BASE_KERNEL_NAME(generate_proposals_v2, generate_proposals);
+PD_REGISTER_BASE_KERNEL_NAME(generate_proposals_v2_grad,
+                             generate_proposals_grad);
diff --git a/paddle/phi/ops/compat/slogdeterminant_sig.cc b/paddle/phi/ops/compat/slogdeterminant_sig.cc
index e4eeca0515230a..2e63a90d929085 100644
--- a/paddle/phi/ops/compat/slogdeterminant_sig.cc
+++ b/paddle/phi/ops/compat/slogdeterminant_sig.cc
@@ -19,10 +19,13 @@ namespace phi {
 KernelSignature SlogDeterminantGradOpArgumentMapping(
     const ArgumentMappingContext& ctx) {
   return KernelSignature(
-      "slogdeterminant_grad", {"Input", "Out", "Out@GRAD"}, {}, {"Input@GRAD"});
+      "slogdet_grad", {"Input", "Out", "Out@GRAD"}, {}, {"Input@GRAD"});
 }
 
 }  // namespace phi
 
+PD_REGISTER_BASE_KERNEL_NAME(slogdeterminant, slogdet);
+PD_REGISTER_BASE_KERNEL_NAME(slogdeterminant_grad, slogdet_grad);
+
 PD_REGISTER_ARG_MAPPING_FN(slogdeterminant_grad,
                            phi::SlogDeterminantGradOpArgumentMapping);
diff --git a/paddle/phi/ops/compat/top_k_sig.cc b/paddle/phi/ops/compat/top_k_sig.cc
index c1073f9efdc6b5..0f3a5c1c0b5f92 100644
--- a/paddle/phi/ops/compat/top_k_sig.cc
+++ b/paddle/phi/ops/compat/top_k_sig.cc
@@ -19,16 +19,16 @@ namespace phi {
 KernelSignature TopkOpArgumentMapping(const ArgumentMappingContext& ctx) {
   if (ctx.HasInput("K")) {
     return KernelSignature(
-        "top_k", {"X"}, {"K", "axis", "largest", "sorted"}, {"Out", "Indices"});
+        "topk", {"X"}, {"K", "axis", "largest", "sorted"}, {"Out", "Indices"});
 
   } else {
     return KernelSignature(
-        "top_k", {"X"}, {"k", "axis", "largest", "sorted"}, {"Out", "Indices"});
+        "topk", {"X"}, {"k", "axis", "largest", "sorted"}, {"Out", "Indices"});
   }
 }
 
 KernelSignature TopkGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("top_k_grad",
+  return KernelSignature("topk_grad",
                          {"X", "Indices", "Out@GRAD"},
                          {"k", "axis", "largest", "sorted"},
                          {"X@GRAD"});
@@ -36,7 +36,7 @@ KernelSignature TopkGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
 
 }  // namespace phi
 
-PD_REGISTER_BASE_KERNEL_NAME(top_k_v2, top_k);
-PD_REGISTER_BASE_KERNEL_NAME(top_k_v2_grad, top_k_grad);
+PD_REGISTER_BASE_KERNEL_NAME(top_k_v2, topk);
+PD_REGISTER_BASE_KERNEL_NAME(top_k_v2_grad, topk_grad);
 PD_REGISTER_ARG_MAPPING_FN(top_k_v2, phi::TopkOpArgumentMapping);
 PD_REGISTER_ARG_MAPPING_FN(top_k_v2_grad, phi::TopkGradOpArgumentMapping);

From ab85f87aa9328b6ba6629ef9a983510b27cc2dfd Mon Sep 17 00:00:00 2001
From: Aurelius84 <zhangliujie@baidu.com>
Date: Thu, 1 Dec 2022 21:40:42 +0800
Subject: [PATCH 093/154] [Fluid Clean]Migrate utils files and delete
 dygraph_to_static dir (#48566)

* [Fluid Clean]Migrate utils files and delete dygraph_to_static dir

* fix setup.py.in

* fix import

* fix unittest

* fix code style

* fix unittest
---
 python/paddle/fluid/compiler.py               |  2 +-
 python/paddle/fluid/dygraph/__init__.py       |  2 -
 .../dygraph/dygraph_to_static/__init__.py     | 28 ------
 .../dygraph_to_static/variable_trans_func.py  | 95 -------------------
 python/paddle/fluid/layers/control_flow.py    | 10 +-
 .../dygraph_to_static/test_ast_util.py        |  2 +-
 .../test_basic_api_transformation.py          |  2 +-
 .../dygraph_to_static/test_break_continue.py  |  2 +-
 .../test_closure_analysis.py                  |  4 +-
 .../unittests/dygraph_to_static/test_error.py |  4 +-
 .../dygraph_to_static/test_function_spec.py   |  2 +-
 .../dygraph_to_static/test_ifelse.py          |  2 +-
 .../dygraph_to_static/test_logging_utils.py   |  2 +-
 .../dygraph_to_static/test_origin_info.py     |  8 +-
 .../test_program_translator.py                |  2 +-
 .../dygraph_to_static/test_return.py          |  2 +-
 .../dygraph_to_static/test_rollback.py        |  2 +-
 .../dygraph_to_static/test_setter_helper.py   |  2 +-
 .../dygraph_to_static/test_static_analysis.py |  5 +-
 .../unittests/dygraph_to_static/test_utils.py |  5 +-
 .../test_variable_trans_func.py               |  6 +-
 .../paddle/fluid/tests/unittests/op_test.py   |  2 +-
 .../fluid/tests/unittests/test_input_spec.py  |  4 +-
 python/paddle/fluid/variable_index.py         |  2 +-
 python/paddle/jit/api.py                      |  8 +-
 python/paddle/jit/dy2static/__init__.py       |  7 +-
 .../jit/dy2static/assert_transformer.py       |  4 +-
 .../paddle/jit/dy2static/ast_transformer.py   |  6 +-
 python/paddle/jit/dy2static/base.py           | 18 ----
 .../paddle/jit/dy2static/base_transformer.py  |  2 +-
 .../jit/dy2static/basic_api_transformer.py    |  7 +-
 .../dy2static/break_continue_transformer.py   |  6 +-
 .../paddle/jit/dy2static/call_transformer.py  |  6 +-
 .../paddle/jit/dy2static/cast_transformer.py  |  4 +-
 .../paddle/jit/dy2static/convert_call_func.py |  4 +-
 .../paddle/jit/dy2static/convert_operators.py |  6 +-
 .../dy2static/create_variable_transformer.py  |  6 +-
 .../jit/dy2static/decorator_transformer.py    |  4 +-
 .../jit/dy2static/early_return_transformer.py |  2 +-
 .../dy2static}/error.py                       | 13 +--
 .../dy2static}/function_spec.py               | 13 ++-
 .../jit/dy2static/ifelse_transformer.py       | 14 +--
 .../dy2static}/logging_utils.py               |  2 +-
 .../jit/dy2static/logical_transformer.py      |  2 +-
 .../paddle/jit/dy2static/loop_transformer.py  | 16 ++--
 .../dy2static}/origin_info.py                 |  7 +-
 .../paddle/jit/dy2static/partial_program.py   |  2 +-
 .../paddle/jit/dy2static/print_transformer.py |  2 +-
 .../jit/dy2static/program_translator.py       | 30 +++---
 .../jit/dy2static/return_transformer.py       |  8 +-
 .../dy2static}/static_analysis.py             |  0
 .../jit/dy2static/tensor_shape_transformer.py |  4 +-
 .../jit/dy2static/typehint_transformer.py     |  2 +-
 .../dy2static}/utils.py                       | 36 +++----
 .../jit/dy2static/variable_trans_func.py      | 81 +++++++++++++++-
 python/setup.py.in                            |  1 -
 56 files changed, 217 insertions(+), 303 deletions(-)
 delete mode 100644 python/paddle/fluid/dygraph/dygraph_to_static/__init__.py
 delete mode 100644 python/paddle/fluid/dygraph/dygraph_to_static/variable_trans_func.py
 delete mode 100644 python/paddle/jit/dy2static/base.py
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/error.py (98%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/function_spec.py (97%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/logging_utils.py (99%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/origin_info.py (98%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/static_analysis.py (100%)
 rename python/paddle/{fluid/dygraph/dygraph_to_static => jit/dy2static}/utils.py (98%)

diff --git a/python/paddle/fluid/compiler.py b/python/paddle/fluid/compiler.py
index 81af46c468adbd..f763e0f1d8838c 100644
--- a/python/paddle/fluid/compiler.py
+++ b/python/paddle/fluid/compiler.py
@@ -696,7 +696,7 @@ def patch_program_cache(ipu_strategy):
             ProgramCache,
             MAX_TRACED_PROGRAM_COUNT,
         )
-        from ..fluid.dygraph.dygraph_to_static import logging_utils
+        from paddle.jit.dy2static import logging_utils
         from paddle.jit.dy2static.partial_program import (
             partial_program_from,
         )
diff --git a/python/paddle/fluid/dygraph/__init__.py b/python/paddle/fluid/dygraph/__init__.py
index 76c75b974e3a42..551561428da72f 100644
--- a/python/paddle/fluid/dygraph/__init__.py
+++ b/python/paddle/fluid/dygraph/__init__.py
@@ -43,8 +43,6 @@
 from . import static_runner
 from .static_runner import StaticModelRunner
 
-from . import dygraph_to_static
-
 from . import rnn
 from .rnn import *
 
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/__init__.py b/python/paddle/fluid/dygraph/dygraph_to_static/__init__.py
deleted file mode 100644
index 15013fb36d806f..00000000000000
--- a/python/paddle/fluid/dygraph/dygraph_to_static/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from . import static_analysis
-from .static_analysis import *
-
-from . import variable_trans_func
-from .variable_trans_func import *
-
-
-from . import logging_utils
-from .logging_utils import *
-
-__all__ = []
-__all__ += static_analysis.__all__
-__all__ += variable_trans_func.__all__
-__all__ += logging_utils.__all__
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/variable_trans_func.py b/python/paddle/fluid/dygraph/dygraph_to_static/variable_trans_func.py
deleted file mode 100644
index f28f1993621cc4..00000000000000
--- a/python/paddle/fluid/dygraph/dygraph_to_static/variable_trans_func.py
+++ /dev/null
@@ -1,95 +0,0 @@
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle
-import textwrap
-from paddle.utils import gast
-from paddle.fluid import unique_name
-from paddle.fluid.framework import Variable
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
-    UndefinedVar,
-    create_undefined_variable,
-)
-from paddle.fluid.layers.utils import map_structure, is_sequence
-
-__all__ = [
-    'create_bool_as_type',
-    'create_fill_constant_node',
-    'to_static_variable',
-    'create_undefined_var',
-]
-
-
-def create_undefined_var(name):
-    func_code = "{} = _jst.UndefinedVar('{}')".format(name, name)
-    return gast.parse(func_code).body[0]
-
-
-def create_fill_constant_node(name, value=0):
-    func_code = "{} = paddle.full(shape=[1], ".format(name)
-    if isinstance(value, bool):
-        func_code += "dtype='bool', fill_value={}, name='{}')".format(
-            value, name
-        )
-        return gast.parse(func_code).body[0]
-    if isinstance(value, float):
-        func_code += "dtype='float64', fill_value={}, name='{}')".format(
-            value, name
-        )
-        return gast.parse(func_code).body[0]
-
-    if isinstance(value, int):
-        func_code += "dtype='int64', fill_value={}, name='{}')".format(
-            value, name
-        )
-        return gast.parse(func_code).body[0]
-
-
-def to_static_variable(x):
-    '''
-    Translate a Python Tensor to PaddlePaddle static graph Tensor
-    '''
-    if isinstance(x, bool):
-        return paddle.full(shape=[1], dtype='bool', fill_value=x)
-    if isinstance(x, float):
-        return paddle.full(shape=[1], dtype='float64', fill_value=x)
-    if isinstance(x, int):
-        return paddle.full(shape=[1], dtype='int64', fill_value=x)
-    if isinstance(x, UndefinedVar) or x is None:
-        """
-        for early return case, we need a variable to represent None, current we use data_layer_not_check.
-        """
-        return create_undefined_variable()
-    if is_sequence(x):
-        return map_structure(to_static_variable, x)
-    return x
-
-
-def create_bool_as_type(x, value=True):
-    '''
-    Create a bool variable, which type is the same as x.
-    '''
-    if isinstance(x, Variable):
-        return paddle.full(shape=[1], fill_value=value, dtype="bool")
-    else:
-        return value
-
-
-def create_bool_node(name, value):
-    '''
-    Create a assign stmt for name = value .
-    '''
-    assert isinstance(value, bool)
-    node = "{} = {}".format(name, value)
-    return gast.parse(node).body[0]
diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py
index c47e9babea7fdb..7a6079ad62c18e 100755
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -159,10 +159,10 @@ def select_input(inputs, mask):
 
 
 def select_input_with_buildin_type(inputs, mask, name):
-    from paddle.fluid.dygraph.dygraph_to_static.variable_trans_func import (
+    from paddle.jit.dy2static.variable_trans_func import (
         to_static_variable,
     )
-    from paddle.fluid.dygraph.dygraph_to_static.utils import UndefinedVar
+    from paddle.jit.dy2static.utils import UndefinedVar
 
     false_var, true_var = inputs
 
@@ -1484,7 +1484,7 @@ def _deal_with_undefined_var(output_vars, loop_vars):
     3. UndefinedVar = List(int)     # create a list of variable
     4. UndefinedVar = value         # create a variable
     """
-    from paddle.fluid.dygraph.dygraph_to_static.utils import (
+    from paddle.jit.dy2static.utils import (
         UndefinedVar,
         create_undefined_variable,
     )
@@ -2552,7 +2552,7 @@ def merge_every_var_list(false_vars, true_vars, name):
 
 
 def change_none_to_undefinedvar(nest1, nest2):
-    from paddle.fluid.dygraph.dygraph_to_static.utils import UndefinedVar
+    from paddle.jit.dy2static.utils import UndefinedVar
 
     def map_fn(x):
         if x is None:
@@ -2588,7 +2588,7 @@ def expand_undefined_var(nest1, nest2, names):
     nest2: Var2, ([1,2,3,4], UndefinedVar)
     In this case, we should not expand recursively.
     """
-    from paddle.fluid.dygraph.dygraph_to_static.utils import UndefinedVar
+    from paddle.jit.dy2static.utils import UndefinedVar
     from paddle.jit.dy2static.return_transformer import (
         RETURN_VALUE_PREFIX,
     )
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ast_util.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ast_util.py
index b81ed5f0b4360a..b417cd5fd95af2 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ast_util.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ast_util.py
@@ -25,7 +25,7 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_func
+from paddle.jit.dy2static.utils import ast_to_func
 from paddle.utils import gast
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py
index 34a65913c5ae56..3733977c5dbcfa 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py
@@ -22,8 +22,8 @@
 import paddle.fluid.dygraph as dygraph
 from paddle import to_tensor
 from paddle.fluid.dygraph import to_variable
-from paddle.fluid.dygraph.dygraph_to_static.utils import is_dygraph_api
 from paddle.jit.api import dygraph_to_static_func
+from paddle.jit.dy2static.utils import is_dygraph_api
 from paddle.utils import gast
 
 SEED = 2020
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_break_continue.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_break_continue.py
index b3d1b5b2cb4f63..f81c2d53640f55 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_break_continue.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_break_continue.py
@@ -18,9 +18,9 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.dygraph_to_static.utils import Dygraph2StaticException
 from paddle.jit.api import declarative
 from paddle.jit.dy2static.program_translator import ProgramTranslator
+from paddle.jit.dy2static.utils import Dygraph2StaticException
 
 SEED = 2020
 np.random.seed(SEED)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_closure_analysis.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_closure_analysis.py
index b0156699161e7f..bed90ccbe47fe0 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_closure_analysis.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_closure_analysis.py
@@ -18,9 +18,7 @@
 from numpy import append
 
 import paddle
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
-    FunctionNameLivenessAnalysis,
-)
+from paddle.jit.dy2static.utils import FunctionNameLivenessAnalysis
 from paddle.utils import gast
 
 global_a = []
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_error.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_error.py
index 6faed1a61e809d..805970814425da 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_error.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_error.py
@@ -18,8 +18,8 @@
 import numpy as np
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.dygraph_to_static import error
-from paddle.fluid.dygraph.dygraph_to_static.origin_info import unwrap
+from paddle.jit.dy2static import error
+from paddle.jit.dy2static.origin_info import unwrap
 
 
 def inner_func():
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_function_spec.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_function_spec.py
index b7a26169f3a2d8..ac9f6362cb42c0 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_function_spec.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_function_spec.py
@@ -17,7 +17,7 @@
 from test_declarative import foo_func
 
 import paddle
-from paddle.fluid.dygraph.dygraph_to_static.function_spec import FunctionSpec
+from paddle.jit.dy2static.function_spec import FunctionSpec
 from paddle.static import InputSpec
 
 paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py
index 89b9e871564f9f..c17bfd2508b3de 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py
@@ -43,9 +43,9 @@
 
 import paddle
 import paddle.fluid.core as core
-from paddle.fluid.dygraph.dygraph_to_static.utils import Dygraph2StaticException
 from paddle.jit.api import declarative
 from paddle.jit.dy2static.program_translator import ProgramTranslator
+from paddle.jit.dy2static.utils import Dygraph2StaticException
 
 np.random.seed(1)
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logging_utils.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logging_utils.py
index 93e5fae00d410f..fa34869e922055 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logging_utils.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logging_utils.py
@@ -20,7 +20,7 @@
 from unittest import mock
 
 import paddle
-from paddle.fluid.dygraph.dygraph_to_static import logging_utils
+from paddle.jit.dy2static import logging_utils
 from paddle.utils import gast
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_origin_info.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_origin_info.py
index 65c8fb4a1fd704..540ad3c13bb4b8 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_origin_info.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_origin_info.py
@@ -15,7 +15,9 @@
 import sys
 import unittest
 
-from paddle.fluid.dygraph.dygraph_to_static.origin_info import (
+from paddle.jit.api import declarative
+from paddle.jit.dy2static import DygraphToStaticAst
+from paddle.jit.dy2static.origin_info import (
     ORIGI_INFO,
     Location,
     OriginInfo,
@@ -25,9 +27,7 @@
     inspect,
     unwrap,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_func
-from paddle.jit.api import declarative
-from paddle.jit.dy2static import DygraphToStaticAst
+from paddle.jit.dy2static.utils import ast_to_func
 
 
 def simple_func(x):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
index c61fdcccf015ac..aee91f6de1729f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
@@ -27,9 +27,9 @@
 import paddle
 import paddle.fluid as fluid
 import paddle.jit.dy2static as _jst
-from paddle.fluid.dygraph.dygraph_to_static.utils import func_to_source_code
 from paddle.jit import ProgramTranslator
 from paddle.jit.api import declarative
+from paddle.jit.dy2static.utils import func_to_source_code
 from paddle.utils import gast
 
 np.random.seed(0)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_return.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_return.py
index 76e3de2a5ec870..b4ec4fb8fd5afe 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_return.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_return.py
@@ -20,8 +20,8 @@
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.dygraph.dygraph_to_static.utils import Dygraph2StaticException
 from paddle.jit import ProgramTranslator, to_static
+from paddle.jit.dy2static.utils import Dygraph2StaticException
 
 SEED = 2020
 np.random.seed(SEED)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_rollback.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_rollback.py
index e7dabd9af31ea4..c418a850d5aafb 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_rollback.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_rollback.py
@@ -17,8 +17,8 @@
 import numpy as np
 
 import paddle
-from paddle.fluid.dygraph.dygraph_to_static.utils import func_to_source_code
 from paddle.jit.dy2static.program_translator import StaticFunction
+from paddle.jit.dy2static.utils import func_to_source_code
 
 
 class Net(paddle.nn.Layer):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_setter_helper.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_setter_helper.py
index 725dc032d5e201..eab182b6f419df 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_setter_helper.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_setter_helper.py
@@ -14,7 +14,7 @@
 
 import unittest
 
-from paddle.fluid.dygraph.dygraph_to_static.utils import GetterSetterHelper
+from paddle.jit.dy2static.utils import GetterSetterHelper
 
 vars = [1, 2, 3, 4, 5]
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_static_analysis.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_static_analysis.py
index 22598ede71b74f..23eb6964c3e3d7 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_static_analysis.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_static_analysis.py
@@ -19,10 +19,7 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.dygraph_to_static import (
-    NodeVarType,
-    StaticAnalysisVisitor,
-)
+from paddle.jit.dy2static import NodeVarType, StaticAnalysisVisitor
 from paddle.utils import gast
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_utils.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_utils.py
index a310ab65307c58..3361a866feb540 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_utils.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_utils.py
@@ -15,10 +15,7 @@
 import types
 import unittest
 
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
-    index_in_list,
-    is_paddle_func,
-)
+from paddle.jit.dy2static.utils import index_in_list, is_paddle_func
 
 
 class TestIndexInList(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_variable_trans_func.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_variable_trans_func.py
index 353a5e8b79738a..f2395fa517793d 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_variable_trans_func.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_variable_trans_func.py
@@ -14,10 +14,8 @@
 
 import unittest
 
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
-from paddle.fluid.dygraph.dygraph_to_static.variable_trans_func import (
-    create_fill_constant_node,
-)
+from paddle.jit.dy2static.utils import ast_to_source_code
+from paddle.jit.dy2static.variable_trans_func import create_fill_constant_node
 
 
 class TestVariableTransFunc(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py
index fd43a80c176185..9728edf5d1c04f 100644
--- a/python/paddle/fluid/tests/unittests/op_test.py
+++ b/python/paddle/fluid/tests/unittests/op_test.py
@@ -29,7 +29,6 @@
 import paddle.fluid.core as core
 from paddle.fluid import unique_name
 from paddle.fluid.backward import append_backward
-from paddle.fluid.dygraph.dygraph_to_static.utils import parse_arg_and_kwargs
 from paddle.fluid.executor import Executor
 from paddle.fluid.framework import (
     OpProtoHolder,
@@ -43,6 +42,7 @@
     _test_eager_guard,
 )
 from paddle.fluid.op import Operator
+from paddle.jit.dy2static.utils import parse_arg_and_kwargs
 
 sys.path.append(os.path.abspath(os.path.dirname(__file__)))
 from testsuite import append_input_output, append_loss_ops, create_op, set_input
diff --git a/python/paddle/fluid/tests/unittests/test_input_spec.py b/python/paddle/fluid/tests/unittests/test_input_spec.py
index 3a623f463c1813..07a2e4da449ba2 100644
--- a/python/paddle/fluid/tests/unittests/test_input_spec.py
+++ b/python/paddle/fluid/tests/unittests/test_input_spec.py
@@ -20,10 +20,8 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
-    _compatible_non_tensor_spec,
-)
 from paddle.fluid.framework import convert_np_dtype_to_dtype_
+from paddle.jit.dy2static.utils import _compatible_non_tensor_spec
 from paddle.static import InputSpec
 
 
diff --git a/python/paddle/fluid/variable_index.py b/python/paddle/fluid/variable_index.py
index cf298501a29f8b..31d587269db71c 100644
--- a/python/paddle/fluid/variable_index.py
+++ b/python/paddle/fluid/variable_index.py
@@ -622,7 +622,7 @@ def _setitem_for_tensor_array(var, item, value):
         not _non_static_mode()
     ), "setitem for tensor_array must be called in static graph mode."
     if isinstance(item, (Variable, int)):
-        from paddle.fluid.dygraph.dygraph_to_static.variable_trans_func import (
+        from paddle.jit.dy2static.variable_trans_func import (
             to_static_variable,
         )
         from paddle import cast
diff --git a/python/paddle/jit/api.py b/python/paddle/jit/api.py
index 0387460c26a5d7..95b07a989a7a73 100644
--- a/python/paddle/jit/api.py
+++ b/python/paddle/jit/api.py
@@ -34,16 +34,16 @@
     program_desc_tracing_guard,
     switch_to_static_graph,
 )
-from paddle.fluid.dygraph.dygraph_to_static import logging_utils
-from paddle.jit.dy2static.convert_call_func import (
+from .dy2static import logging_utils
+from .dy2static.convert_call_func import (
     ConversionOptions,
     CONVERSION_OPTIONS,
 )
-from paddle.fluid.dygraph.dygraph_to_static.logging_utils import (
+from .dy2static.logging_utils import (
     set_code_level,
     set_verbosity,
 )
-from paddle.jit.dy2static.program_translator import (
+from .dy2static.program_translator import (
     ProgramTranslator,
     StaticFunction,
     unwrap_decorators,
diff --git a/python/paddle/jit/dy2static/__init__.py b/python/paddle/jit/dy2static/__init__.py
index 605c3ea93bf488..c42116c21065de 100644
--- a/python/paddle/jit/dy2static/__init__.py
+++ b/python/paddle/jit/dy2static/__init__.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .base import saw
-from .base import UndefinedVar
+from .utils import (
+    saw,
+    UndefinedVar,
+)
 from .convert_operators import convert_logical_and as And  # noqa: F401
 from .convert_operators import convert_var_dtype as AsDtype  # noqa: F401
 from .convert_operators import convert_assert as Assert  # noqa: F401
@@ -35,5 +37,6 @@
 from .assert_transformer import AssertTransformer
 from .ast_transformer import DygraphToStaticAst
 from .program_translator import convert_to_static
+from .static_analysis import *  # noqa: F403
 
 __all__ = []
diff --git a/python/paddle/jit/dy2static/assert_transformer.py b/python/paddle/jit/dy2static/assert_transformer.py
index 3a8a8b01aecc8b..81bad1111b1920 100644
--- a/python/paddle/jit/dy2static/assert_transformer.py
+++ b/python/paddle/jit/dy2static/assert_transformer.py
@@ -14,10 +14,10 @@
 
 from paddle.utils import gast
 
-from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
+from paddle.jit.dy2static.static_analysis import (
     AstNodeWrapper,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
+from paddle.jit.dy2static.utils import ast_to_source_code
 from .base_transformer import (
     BaseTransformer,
 )
diff --git a/python/paddle/jit/dy2static/ast_transformer.py b/python/paddle/jit/dy2static/ast_transformer.py
index b23a6dc368ad37..2e244d6f341833 100644
--- a/python/paddle/jit/dy2static/ast_transformer.py
+++ b/python/paddle/jit/dy2static/ast_transformer.py
@@ -61,7 +61,7 @@
 from .create_variable_transformer import (
     CreateVariableTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
+from .static_analysis import (
     StaticAnalysisVisitor,
 )
 from .tensor_shape_transformer import (
@@ -71,8 +71,8 @@
     DecoratorTransformer,
 )
 
-from paddle.fluid.dygraph.dygraph_to_static import logging_utils
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
+from . import logging_utils
+from .utils import ast_to_source_code
 
 __all__ = ['DygraphToStaticAst']
 
diff --git a/python/paddle/jit/dy2static/base.py b/python/paddle/jit/dy2static/base.py
deleted file mode 100644
index 5d1bb01cf5c0be..00000000000000
--- a/python/paddle/jit/dy2static/base.py
+++ /dev/null
@@ -1,18 +0,0 @@
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ...fluid.dygraph.dygraph_to_static.utils import saw  # noqa: F401
-from ...fluid.dygraph.dygraph_to_static.utils import UndefinedVar  # noqa: F401
-
-__all__ = []
diff --git a/python/paddle/jit/dy2static/base_transformer.py b/python/paddle/jit/dy2static/base_transformer.py
index 518805250de93a..166753d05f57a4 100644
--- a/python/paddle/jit/dy2static/base_transformer.py
+++ b/python/paddle/jit/dy2static/base_transformer.py
@@ -14,7 +14,7 @@
 
 from paddle.utils import gast
 from paddle.fluid import unique_name
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
+from paddle.jit.dy2static.utils import (
     ORIGI_INFO,
     FOR_ITER_INDEX_PREFIX,
     FOR_ITER_VAR_LEN_PREFIX,
diff --git a/python/paddle/jit/dy2static/basic_api_transformer.py b/python/paddle/jit/dy2static/basic_api_transformer.py
index 8a80fc4e6ed641..89fa0738b9a190 100644
--- a/python/paddle/jit/dy2static/basic_api_transformer.py
+++ b/python/paddle/jit/dy2static/basic_api_transformer.py
@@ -15,14 +15,13 @@
 import astor
 from paddle.utils import gast
 
-from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
+from .static_analysis import (
     AstNodeWrapper,
 )
-from paddle.fluid.dygraph.dygraph_to_static import utils
+from . import utils
 from .base_transformer import (
     BaseTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
 
 
 class BasicApiTransformer(BaseTransformer):
@@ -166,7 +165,7 @@ def visit_Attribute(self, node):
             node = (
                 gast.parse(
                     "_jst.Attr({}, \"{}\")".format(
-                        ast_to_source_code(value).strip(), attr
+                        utils.ast_to_source_code(value).strip(), attr
                     )
                 )
                 .body[0]
diff --git a/python/paddle/jit/dy2static/break_continue_transformer.py b/python/paddle/jit/dy2static/break_continue_transformer.py
index 23576fdf9849ec..670cc842f349b5 100644
--- a/python/paddle/jit/dy2static/break_continue_transformer.py
+++ b/python/paddle/jit/dy2static/break_continue_transformer.py
@@ -15,9 +15,9 @@
 from paddle.utils import gast
 
 from paddle.fluid import unique_name
-from paddle.fluid.dygraph.dygraph_to_static.utils import index_in_list
-from paddle.fluid.dygraph.dygraph_to_static.utils import BaseNodeVisitor
-from paddle.fluid.dygraph.dygraph_to_static.variable_trans_func import (
+from paddle.jit.dy2static.utils import index_in_list
+from paddle.jit.dy2static.utils import BaseNodeVisitor
+from paddle.jit.dy2static.variable_trans_func import (
     create_bool_node,
 )
 from .base_transformer import (
diff --git a/python/paddle/jit/dy2static/call_transformer.py b/python/paddle/jit/dy2static/call_transformer.py
index 7380934d47e16a..11f0f6624e8fed 100644
--- a/python/paddle/jit/dy2static/call_transformer.py
+++ b/python/paddle/jit/dy2static/call_transformer.py
@@ -14,11 +14,11 @@
 
 from paddle.utils import gast
 
-from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
+from paddle.jit.dy2static.static_analysis import (
     AstNodeWrapper,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
-from paddle.fluid.dygraph.dygraph_to_static.utils import is_paddle_api
+from paddle.jit.dy2static.utils import ast_to_source_code
+from paddle.jit.dy2static.utils import is_paddle_api
 from .base_transformer import (
     BaseTransformer,
 )
diff --git a/python/paddle/jit/dy2static/cast_transformer.py b/python/paddle/jit/dy2static/cast_transformer.py
index ca1bf11c43897f..96a504d70cdb58 100644
--- a/python/paddle/jit/dy2static/cast_transformer.py
+++ b/python/paddle/jit/dy2static/cast_transformer.py
@@ -14,10 +14,10 @@
 
 from paddle.utils import gast
 
-from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
+from paddle.jit.dy2static.static_analysis import (
     AstNodeWrapper,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
+from paddle.jit.dy2static.utils import ast_to_source_code
 from .base_transformer import (
     BaseTransformer,
 )
diff --git a/python/paddle/jit/dy2static/convert_call_func.py b/python/paddle/jit/dy2static/convert_call_func.py
index 325ffb206011fd..e0f393028cfac4 100644
--- a/python/paddle/jit/dy2static/convert_call_func.py
+++ b/python/paddle/jit/dy2static/convert_call_func.py
@@ -32,11 +32,11 @@
     convert_enumerate,
 )
 
-from paddle.fluid.dygraph.dygraph_to_static.logging_utils import (
+from paddle.jit.dy2static.logging_utils import (
     TranslatorLogger,
 )
 
-from paddle.fluid.dygraph.dygraph_to_static.utils import is_paddle_func, unwrap
+from paddle.jit.dy2static.utils import is_paddle_func, unwrap
 from paddle.fluid.dygraph.layers import Layer
 
 __all__ = ["convert_call"]
diff --git a/python/paddle/jit/dy2static/convert_operators.py b/python/paddle/jit/dy2static/convert_operators.py
index e5b35d0b4c0d11..3643da7591f4f4 100644
--- a/python/paddle/jit/dy2static/convert_operators.py
+++ b/python/paddle/jit/dy2static/convert_operators.py
@@ -15,7 +15,7 @@
 import re
 import paddle
 from paddle.fluid.data_feeder import convert_dtype
-from paddle.fluid.dygraph.dygraph_to_static.variable_trans_func import (
+from paddle.jit.dy2static.variable_trans_func import (
     to_static_variable,
 )
 from paddle.fluid.framework import core, Variable
@@ -46,11 +46,11 @@
 from .return_transformer import (
     RETURN_NO_VALUE_VAR_NAME,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
+from paddle.jit.dy2static.utils import (
     UndefinedVar,
     Dygraph2StaticException,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import GetterSetterHelper
+from paddle.jit.dy2static.utils import GetterSetterHelper
 from paddle.fluid.layers.utils import copy_mutable_vars
 
 
diff --git a/python/paddle/jit/dy2static/create_variable_transformer.py b/python/paddle/jit/dy2static/create_variable_transformer.py
index feccbfe594e137..808a047c3a2838 100644
--- a/python/paddle/jit/dy2static/create_variable_transformer.py
+++ b/python/paddle/jit/dy2static/create_variable_transformer.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
+from paddle.jit.dy2static.static_analysis import (
     AstNodeWrapper,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
+from paddle.jit.dy2static.utils import (
     FunctionNameLivenessAnalysis,
 )
-from paddle.fluid.dygraph.dygraph_to_static.variable_trans_func import (
+from paddle.jit.dy2static.variable_trans_func import (
     create_undefined_var,
 )
 from .base_transformer import (
diff --git a/python/paddle/jit/dy2static/decorator_transformer.py b/python/paddle/jit/dy2static/decorator_transformer.py
index f802db72de3bc7..cd33f8625a3b07 100644
--- a/python/paddle/jit/dy2static/decorator_transformer.py
+++ b/python/paddle/jit/dy2static/decorator_transformer.py
@@ -14,13 +14,13 @@
 # limitations under the License.
 
 from paddle.utils import gast
-from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
+from paddle.jit.dy2static.static_analysis import (
     AstNodeWrapper,
 )
 from .base_transformer import (
     BaseTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
+from paddle.jit.dy2static.utils import (
     RE_PYNAME,
     RE_PYMODULE,
     ast_to_source_code,
diff --git a/python/paddle/jit/dy2static/early_return_transformer.py b/python/paddle/jit/dy2static/early_return_transformer.py
index 53bb2394e80373..72076fb3cd8839 100644
--- a/python/paddle/jit/dy2static/early_return_transformer.py
+++ b/python/paddle/jit/dy2static/early_return_transformer.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from paddle.utils import gast
-from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
+from paddle.jit.dy2static.static_analysis import (
     AstNodeWrapper,
 )
 from .base_transformer import (
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/error.py b/python/paddle/jit/dy2static/error.py
similarity index 98%
rename from python/paddle/fluid/dygraph/dygraph_to_static/error.py
rename to python/paddle/jit/dy2static/error.py
index f4a66100807fda..40f5b16def610e 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/error.py
+++ b/python/paddle/jit/dy2static/error.py
@@ -17,17 +17,18 @@
 import traceback
 import linecache
 import re
-import numpy as np
+import numpy as np  # noqa: F401
 
-from paddle.fluid.dygraph.dygraph_to_static.origin_info import (
+from .origin_info import (
     Location,
     OriginInfo,
     global_origin_info_map,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
-    _is_api_in_module_helper,
-    RE_PYMODULE,
-)
+from .utils import _is_api_in_module_helper  # noqa: F401
+from .utils import RE_PYMODULE
+
+
+__all__ = []
 
 ERROR_DATA = "Error data about original source code information and traceback."
 
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/function_spec.py b/python/paddle/jit/dy2static/function_spec.py
similarity index 97%
rename from python/paddle/fluid/dygraph/dygraph_to_static/function_spec.py
rename to python/paddle/jit/dy2static/function_spec.py
index cc77e05dad1174..370fb36bcfb91f 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/function_spec.py
+++ b/python/paddle/jit/dy2static/function_spec.py
@@ -22,13 +22,16 @@
 from paddle.fluid.layers.utils import flatten
 from paddle.fluid.layers.utils import pack_sequence_as
 from paddle.fluid.dygraph.base import switch_to_static_graph
-from paddle.fluid.dygraph.dygraph_to_static import logging_utils
-from paddle.fluid.dygraph.dygraph_to_static.utils import parse_arg_and_kwargs
-from paddle.fluid.dygraph.dygraph_to_static.utils import parse_varargs_name
-from paddle.fluid.dygraph.dygraph_to_static.utils import type_name
-from paddle.fluid.dygraph.dygraph_to_static.utils import func_to_source_code
 from paddle.fluid.dygraph.io import TranslatedLayer
 
+from . import logging_utils
+from .utils import (
+    parse_arg_and_kwargs,
+    parse_varargs_name,
+    type_name,
+    func_to_source_code,
+)
+
 
 class FunctionSpec:
     """
diff --git a/python/paddle/jit/dy2static/ifelse_transformer.py b/python/paddle/jit/dy2static/ifelse_transformer.py
index 8bae82c11a16fd..8759e742cef47b 100644
--- a/python/paddle/jit/dy2static/ifelse_transformer.py
+++ b/python/paddle/jit/dy2static/ifelse_transformer.py
@@ -22,27 +22,27 @@
 from paddle.utils import gast
 from paddle.fluid import unique_name
 
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
+from paddle.jit.dy2static.utils import (
     create_funcDef_node,
     ast_to_source_code,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
+from paddle.jit.dy2static.utils import (
     FunctionNameLivenessAnalysis,
 )
-from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
+from paddle.jit.dy2static.static_analysis import (
     AstNodeWrapper,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
+from paddle.jit.dy2static.utils import (
     create_nonlocal_stmt_nodes,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
+from paddle.jit.dy2static.utils import (
     create_get_args_node,
     create_set_args_node,
 )
 from .base_transformer import (
     BaseTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
+from paddle.jit.dy2static.utils import (
     FOR_ITER_INDEX_PREFIX,
     FOR_ITER_TUPLE_PREFIX,
     FOR_ITER_TUPLE_INDEX_PREFIX,
@@ -52,7 +52,7 @@
     FOR_ITER_TARGET_PREFIX,
     FOR_ITER_ITERATOR_PREFIX,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
+from paddle.jit.dy2static.utils import (
     GetterSetterHelper,
     create_name_str,
 )
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/logging_utils.py b/python/paddle/jit/dy2static/logging_utils.py
similarity index 99%
rename from python/paddle/fluid/dygraph/dygraph_to_static/logging_utils.py
rename to python/paddle/jit/dy2static/logging_utils.py
index 7b004964a472dd..ca36cb0b35aa5e 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/logging_utils.py
+++ b/python/paddle/jit/dy2static/logging_utils.py
@@ -16,7 +16,7 @@
 import threading
 
 from paddle.fluid import log_helper
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
+from .utils import ast_to_source_code
 
 __all__ = ["TranslatorLogger", "set_verbosity", "set_code_level"]
 
diff --git a/python/paddle/jit/dy2static/logical_transformer.py b/python/paddle/jit/dy2static/logical_transformer.py
index 1ab4491d8d9412..cd4d27ac73e579 100644
--- a/python/paddle/jit/dy2static/logical_transformer.py
+++ b/python/paddle/jit/dy2static/logical_transformer.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from paddle.utils import gast
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
+from paddle.jit.dy2static.utils import ast_to_source_code
 from .base_transformer import (
     BaseTransformer,
 )
diff --git a/python/paddle/jit/dy2static/loop_transformer.py b/python/paddle/jit/dy2static/loop_transformer.py
index 7d42638b9e0f6b..4bb99e830b3501 100644
--- a/python/paddle/jit/dy2static/loop_transformer.py
+++ b/python/paddle/jit/dy2static/loop_transformer.py
@@ -17,21 +17,21 @@
 
 from collections import defaultdict
 from paddle.fluid import unique_name
-from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
+from paddle.jit.dy2static.static_analysis import (
     AstNodeWrapper,
 )
-from paddle.fluid.dygraph.dygraph_to_static.static_analysis import NodeVarType
-from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
+from paddle.jit.dy2static.static_analysis import NodeVarType
+from paddle.jit.dy2static.static_analysis import (
     StaticAnalysisVisitor,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
-from paddle.fluid.dygraph.dygraph_to_static.utils import get_attribute_full_name
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
+from paddle.jit.dy2static.utils import ast_to_source_code
+from paddle.jit.dy2static.utils import get_attribute_full_name
+from paddle.jit.dy2static.utils import (
     create_nonlocal_stmt_nodes,
     create_get_args_node,
     create_set_args_node,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
+from paddle.jit.dy2static.utils import (
     FunctionNameLivenessAnalysis,
 )
 from .ifelse_transformer import ARGS_NAME
@@ -41,7 +41,7 @@
     ForNodeVisitor,
 )
 
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
+from paddle.jit.dy2static.utils import (
     GetterSetterHelper,
     create_name_str,
 )
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/origin_info.py b/python/paddle/jit/dy2static/origin_info.py
similarity index 98%
rename from python/paddle/fluid/dygraph/dygraph_to_static/origin_info.py
rename to python/paddle/jit/dy2static/origin_info.py
index 7eb9da1206439d..7f3c7f719a5395 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/origin_info.py
+++ b/python/paddle/jit/dy2static/origin_info.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import collections
 import inspect
 
 from paddle.utils import gast
 from paddle.fluid import core
-from paddle.fluid.dygraph.dygraph_to_static.utils import unwrap
-from paddle.fluid.dygraph.dygraph_to_static.utils import ORIGI_INFO
+from .utils import (
+    unwrap,
+    ORIGI_INFO,
+)
 from paddle.fluid.framework import Program
 
 from collections.abc import Sequence
diff --git a/python/paddle/jit/dy2static/partial_program.py b/python/paddle/jit/dy2static/partial_program.py
index ad5afaff7cdd51..d1ebdbe5ccc57f 100644
--- a/python/paddle/jit/dy2static/partial_program.py
+++ b/python/paddle/jit/dy2static/partial_program.py
@@ -22,7 +22,7 @@
 )
 from paddle.fluid.dygraph import layers
 from paddle.fluid.dygraph.base import switch_to_static_graph
-from paddle.fluid.dygraph.dygraph_to_static import logging_utils
+from . import logging_utils
 from .return_transformer import (
     RETURN_NO_VALUE_MAGIC_NUM,
 )
diff --git a/python/paddle/jit/dy2static/print_transformer.py b/python/paddle/jit/dy2static/print_transformer.py
index cd4f13f019bc8f..aa4bc2c219bbce 100644
--- a/python/paddle/jit/dy2static/print_transformer.py
+++ b/python/paddle/jit/dy2static/print_transformer.py
@@ -14,7 +14,7 @@
 
 from paddle.utils import gast
 
-from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
+from paddle.jit.dy2static.static_analysis import (
     AstNodeWrapper,
     StaticAnalysisVisitor,
 )
diff --git a/python/paddle/jit/dy2static/program_translator.py b/python/paddle/jit/dy2static/program_translator.py
index 0e912954ae6950..37a85fc078bc9a 100644
--- a/python/paddle/jit/dy2static/program_translator.py
+++ b/python/paddle/jit/dy2static/program_translator.py
@@ -26,39 +26,35 @@
 from paddle.fluid.layers.utils import flatten
 from paddle.fluid.dygraph.base import param_guard
 from paddle.fluid.dygraph.base import switch_to_static_graph
-from paddle.fluid.dygraph.dygraph_to_static import error
-from paddle.fluid.dygraph.dygraph_to_static import logging_utils
-from paddle.fluid.dygraph.dygraph_to_static.origin_info import (
+from . import error
+from . import logging_utils
+from .origin_info import (
     attach_origin_info,
-)
-from paddle.fluid.dygraph.dygraph_to_static.origin_info import (
     create_and_update_origin_info_map,
-)
-from paddle.fluid.dygraph.dygraph_to_static.origin_info import (
     update_op_callstack_with_origin_info,
 )
+
 from .partial_program import (
     partial_program_from,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_func
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
-from paddle.fluid.dygraph.dygraph_to_static.utils import func_to_source_code
-from paddle.fluid.dygraph.dygraph_to_static.utils import input_specs_compatible
-from paddle.fluid.dygraph.dygraph_to_static.utils import type_name
-from paddle.fluid.dygraph.dygraph_to_static.utils import unwrap
-from paddle.fluid.dygraph.dygraph_to_static.utils import (
+from .utils import (
+    ast_to_func,
+    ast_to_source_code,
+    func_to_source_code,
+    input_specs_compatible,
+    type_name,
+    unwrap,
     make_hashable,
     ALREADY_D2S,
 )
-from paddle.fluid.dygraph.dygraph_to_static.function_spec import (
+from .function_spec import (
     FunctionSpec,
     _hash_spec_names,
-)
-from paddle.fluid.dygraph.dygraph_to_static.function_spec import (
     get_buffers,
     get_parameters,
 )
 
+
 from .ast_transformer import DygraphToStaticAst
 
 __all__ = ['ProgramTranslator', 'convert_to_static']
diff --git a/python/paddle/jit/dy2static/return_transformer.py b/python/paddle/jit/dy2static/return_transformer.py
index c8114e078d592e..8aa96b61578468 100644
--- a/python/paddle/jit/dy2static/return_transformer.py
+++ b/python/paddle/jit/dy2static/return_transformer.py
@@ -15,16 +15,16 @@
 from paddle.utils import gast
 
 from paddle.fluid import unique_name
-from paddle.fluid.dygraph.dygraph_to_static.utils import index_in_list
+from paddle.jit.dy2static.utils import index_in_list
 from .break_continue_transformer import (
     ForToWhileTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
+from paddle.jit.dy2static.utils import ast_to_source_code
 from .base_transformer import (
     BaseTransformer,
 )
-from paddle.fluid.dygraph.dygraph_to_static.utils import Dygraph2StaticException
-from paddle.fluid.dygraph.dygraph_to_static.utils import ORIGI_INFO
+from paddle.jit.dy2static.utils import Dygraph2StaticException
+from paddle.jit.dy2static.utils import ORIGI_INFO
 
 __all__ = [
     'RETURN_NO_VALUE_MAGIC_NUM',
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/static_analysis.py b/python/paddle/jit/dy2static/static_analysis.py
similarity index 100%
rename from python/paddle/fluid/dygraph/dygraph_to_static/static_analysis.py
rename to python/paddle/jit/dy2static/static_analysis.py
diff --git a/python/paddle/jit/dy2static/tensor_shape_transformer.py b/python/paddle/jit/dy2static/tensor_shape_transformer.py
index 9dae08b123eebc..ffdba7e790a77c 100644
--- a/python/paddle/jit/dy2static/tensor_shape_transformer.py
+++ b/python/paddle/jit/dy2static/tensor_shape_transformer.py
@@ -14,8 +14,8 @@
 
 from paddle.utils import gast
 
-from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code
-from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
+from paddle.jit.dy2static.utils import ast_to_source_code
+from paddle.jit.dy2static.static_analysis import (
     AstNodeWrapper,
 )
 from .base_transformer import (
diff --git a/python/paddle/jit/dy2static/typehint_transformer.py b/python/paddle/jit/dy2static/typehint_transformer.py
index dd272f76477f47..d5c23d1d7d73c1 100644
--- a/python/paddle/jit/dy2static/typehint_transformer.py
+++ b/python/paddle/jit/dy2static/typehint_transformer.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 
-from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
+from paddle.jit.dy2static.static_analysis import (
     AstNodeWrapper,
 )
 from .base_transformer import (
diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/utils.py b/python/paddle/jit/dy2static/utils.py
similarity index 98%
rename from python/paddle/fluid/dygraph/dygraph_to_static/utils.py
rename to python/paddle/jit/dy2static/utils.py
index 23579720f3804c..a57134411ffec3 100644
--- a/python/paddle/fluid/dygraph/dygraph_to_static/utils.py
+++ b/python/paddle/jit/dy2static/utils.py
@@ -16,7 +16,6 @@
 import astor
 import atexit
 import copy
-import collections
 from paddle.utils import gast
 import inspect
 import os
@@ -32,15 +31,17 @@
 from paddle.fluid import core
 from paddle.fluid.layer_helper import LayerHelper
 from paddle.fluid.layers import assign
-import collections
 from functools import reduce
 import warnings
 
+
+__all__ = []
+
 # Note(Aurelius): Do not forget the dot `.` to distinguish other
 # module such as paddlenlp.
 PADDLE_MODULE_PREFIX = 'paddle.'
 DYGRAPH_MODULE_PREFIX = 'paddle.fluid.dygraph'
-DYGRAPH_TO_STATIC_MODULE_PREFIX = 'paddle.fluid.dygraph.dygraph_to_static'
+DYGRAPH_TO_STATIC_MODULE_PREFIX = 'paddle.jit.dy2static'
 GET_ARGS_FUNC_PREFIX = 'get_args'
 SET_ARGS_FUNC_PREFIX = 'set_args'
 ALREADY_D2S = '__already_d2s'
@@ -258,19 +259,13 @@ def is_api_in_module(node, module_prefix):
 
     func_str = astor.to_source(gast.gast_to_ast(func_node)).strip()
     try:
-        # TODO(liym27):
-        #  Consider a better to import modules like:
-        #  source_file = inspect.getfile(dyfunc)
-        #  import_statements = ImportVisitor(source_file).transform()
-        #  import_str = "".join(import_statements)
-        import paddle
-        import paddle.fluid as fluid
-        import paddle.fluid.dygraph as dygraph
-        import paddle.fluid.layers as layers
-        import paddle.jit.dy2static as _jst
-
-        from paddle.fluid.dygraph import to_variable
-        from paddle import to_tensor
+        import paddle  # noqa: F401
+        import paddle.fluid as fluid  # noqa: F401
+        import paddle.fluid.dygraph as dygraph  # noqa: F401
+        import paddle.fluid.layers as layers  # noqa: F401
+        import paddle.jit.dy2static as _jst  # noqa: F401
+        from paddle.fluid.dygraph import to_variable  # noqa: F401
+        from paddle import to_tensor  # noqa: F401
 
         return eval(
             "_is_api_in_module_helper({}, '{}')".format(func_str, module_prefix)
@@ -304,7 +299,7 @@ def is_numpy_api(node):
     assert isinstance(node, gast.Call), "Input non-Call node for is_numpy_api"
     func_str = astor.to_source(gast.gast_to_ast(node.func))
     try:
-        import numpy as np
+        import numpy as np  # noqa: F401
 
         module_result = eval(
             "_is_api_in_module_helper({}, '{}')".format(func_str, "numpy")
@@ -321,7 +316,7 @@ def is_numpy_api(node):
 def _delete_keywords_from(node):
     assert isinstance(node, gast.Call)
     func_src = astor.to_source(gast.gast_to_ast(node.func))
-    import paddle.fluid as fluid
+    import paddle.fluid as fluid  # noqa: F401
 
     full_args = eval(f"inspect.getfullargspec({func_src})")
     full_args_name = full_args[0]
@@ -402,7 +397,7 @@ def update_args_of_func(node, dygraph_node, method_name):
         )
 
     class_src = astor.to_source(gast.gast_to_ast(dygraph_node.func))
-    import paddle.fluid as fluid
+    import paddle.fluid as fluid  # noqa: F401
 
     if method_name == "__init__" or eval(
         "issubclass({}, fluid.dygraph.Layer)".format(class_src)
@@ -894,7 +889,7 @@ def visit_Constant(self, node):
         return node
 
     def _is_node_with_tensor(self, node, name_id):
-        from paddle.fluid.dygraph.dygraph_to_static.static_analysis import (
+        from paddle.jit.dy2static.static_analysis import (
             NodeVarType,
         )
 
@@ -1213,7 +1208,6 @@ def post_func():
             because we do ifelse_transformer after loop_transformer. Loops will changed into functioons. but we know this function will be called in if. so we add w_vars to father function scope.
             """
             from paddle.jit.dy2static.loop_transformer import (
-                WHILE_CONDITION_PREFIX,
                 WHILE_BODY_PREFIX,
                 FOR_CONDITION_PREFIX,
                 FOR_BODY_PREFIX,
diff --git a/python/paddle/jit/dy2static/variable_trans_func.py b/python/paddle/jit/dy2static/variable_trans_func.py
index 88f8cd3c2c464f..c98823242b40a0 100644
--- a/python/paddle/jit/dy2static/variable_trans_func.py
+++ b/python/paddle/jit/dy2static/variable_trans_func.py
@@ -12,9 +12,82 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...fluid.dygraph.dygraph_to_static.variable_trans_func import (  # noqa: F401
-    create_bool_as_type,
-    to_static_variable,
+import paddle
+from paddle.utils import gast
+from paddle.fluid.framework import Variable
+from paddle.jit.dy2static.utils import (
+    UndefinedVar,
+    create_undefined_variable,
 )
+from paddle.fluid.layers.utils import map_structure, is_sequence
 
-__all__ = []
+__all__ = [
+    'create_bool_as_type',
+    'create_fill_constant_node',
+    'to_static_variable',
+    'create_undefined_var',
+]
+
+
+def create_undefined_var(name):
+    func_code = "{} = _jst.UndefinedVar('{}')".format(name, name)
+    return gast.parse(func_code).body[0]
+
+
+def create_fill_constant_node(name, value=0):
+    func_code = "{} = paddle.full(shape=[1], ".format(name)
+    if isinstance(value, bool):
+        func_code += "dtype='bool', fill_value={}, name='{}')".format(
+            value, name
+        )
+        return gast.parse(func_code).body[0]
+    if isinstance(value, float):
+        func_code += "dtype='float64', fill_value={}, name='{}')".format(
+            value, name
+        )
+        return gast.parse(func_code).body[0]
+
+    if isinstance(value, int):
+        func_code += "dtype='int64', fill_value={}, name='{}')".format(
+            value, name
+        )
+        return gast.parse(func_code).body[0]
+
+
+def to_static_variable(x):
+    '''
+    Translate a Python Tensor to PaddlePaddle static graph Tensor
+    '''
+    if isinstance(x, bool):
+        return paddle.full(shape=[1], dtype='bool', fill_value=x)
+    if isinstance(x, float):
+        return paddle.full(shape=[1], dtype='float64', fill_value=x)
+    if isinstance(x, int):
+        return paddle.full(shape=[1], dtype='int64', fill_value=x)
+    if isinstance(x, UndefinedVar) or x is None:
+        """
+        for early return case, we need a variable to represent None, current we use data_layer_not_check.
+        """
+        return create_undefined_variable()
+    if is_sequence(x):
+        return map_structure(to_static_variable, x)
+    return x
+
+
+def create_bool_as_type(x, value=True):
+    '''
+    Create a bool variable, which type is the same as x.
+    '''
+    if isinstance(x, Variable):
+        return paddle.full(shape=[1], fill_value=value, dtype="bool")
+    else:
+        return value
+
+
+def create_bool_node(name, value):
+    '''
+    Create a assign stmt for name = value .
+    '''
+    assert isinstance(value, bool)
+    node = "{} = {}".format(name, value)
+    return gast.parse(node).body[0]
diff --git a/python/setup.py.in b/python/setup.py.in
index cab42d8f3613ba..faa994d744747a 100755
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -331,7 +331,6 @@ packages=['paddle',
           'paddle.inference.contrib.utils',
           'paddle.fluid',
           'paddle.fluid.dygraph',
-          'paddle.fluid.dygraph.dygraph_to_static',
           'paddle.fluid.dygraph.amp',
           'paddle.fluid.proto',
           'paddle.fluid.proto.profiler',

From dab1896d487967d2557b05cf3e089250b4ce12a8 Mon Sep 17 00:00:00 2001
From: HongyuJia <jiahongyu@baidu.com>
Date: Thu, 1 Dec 2022 23:59:51 +0800
Subject: [PATCH 094/154] clean elem_arithmetic part4 unittest (#48465)

---
 .../tests/unittests/ipu/test_assign_op_ipu.py      |  4 ++--
 .../tests/unittests/ipu/test_dropout_op_ipu.py     |  2 +-
 .../unittests/ipu/test_elemetwise_x_op_ipu.py      |  6 +++---
 .../unittests/ipu/test_fill_any_like_op_ipu.py     |  4 ++--
 .../unittests/ipu/test_fill_constant_op_ipu.py     |  2 +-
 .../unittests/ipu/test_reshape_inplace_op_ipu.py   |  2 +-
 .../tests/unittests/ipu/test_share_data_op_ipu.py  |  4 ++--
 .../unittests/ipu/test_varname_inplace_ipu.py      |  4 ++--
 .../paddle/fluid/tests/unittests/ps_dnn_model.py   |  4 +---
 .../paddle/fluid/tests/unittests/seresnext_net.py  |  2 +-
 .../fluid/tests/unittests/test_argsort_op.py       |  2 +-
 .../paddle/fluid/tests/unittests/test_assign_op.py |  4 ++--
 .../paddle/fluid/tests/unittests/test_backward.py  |  6 ++----
 python/paddle/fluid/tests/unittests/test_case.py   |  4 ++--
 .../fluid/tests/unittests/test_complex_variable.py |  2 +-
 python/paddle/fluid/tests/unittests/test_cond.py   | 14 +++++++-------
 .../paddle/fluid/tests/unittests/test_dataset.py   | 12 +++---------
 .../test_executor_return_tensor_not_overwriting.py |  4 ++--
 18 files changed, 36 insertions(+), 46 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/ipu/test_assign_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_assign_op_ipu.py
index 324bac452c6bb4..42b817da6f1769 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_assign_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_assign_op_ipu.py
@@ -43,7 +43,7 @@ def build_model(self):
             name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32'
         )
         x = paddle.assign(x)
-        out = paddle.fluid.layers.elementwise_add(x, x)
+        out = paddle.add(x, x)
         self.fetch_list = [out.name]
 
     def run_model(self, exec_mode):
@@ -72,7 +72,7 @@ def build_model(self):
             name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32'
         )
         assign = paddle.assign(self.assign_fp32)
-        out = paddle.fluid.layers.elementwise_add(x, assign)
+        out = paddle.add(x, assign)
         self.fetch_list = [out.name]
 
 
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_dropout_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_dropout_op_ipu.py
index 3c1dc5c07efa6a..ad560f36a20387 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_dropout_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_dropout_op_ipu.py
@@ -51,7 +51,7 @@ def build_model(self):
             name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32'
         )
         x = paddle.fluid.layers.dropout(x, **self.attrs)
-        out = paddle.fluid.layers.elementwise_add(x, x)
+        out = paddle.add(x, x)
         self.fetch_list = [out.name]
 
     def run_model(self, exec_mode):
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_elemetwise_x_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_elemetwise_x_op_ipu.py
index 908538a9d6b73e..62d0376e58f3df 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_elemetwise_x_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_elemetwise_x_op_ipu.py
@@ -126,17 +126,17 @@ def test_case3(self):
 
 class TestAdd(TestMul):
     def set_test_op(self):
-        self.op = paddle.fluid.layers.elementwise_add
+        self.op = paddle.add
 
 
 class TestSub(TestMul):
     def set_test_op(self):
-        self.op = paddle.fluid.layers.elementwise_sub
+        self.op = paddle.subtract
 
 
 class TestDiv(TestMul):
     def set_test_op(self):
-        self.op = paddle.fluid.layers.elementwise_div
+        self.op = paddle.divide
 
 
 class TestMin(TestMul):
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_fill_any_like_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_fill_any_like_op_ipu.py
index a5df1299938455..14a8d69a8e5218 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_fill_any_like_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_fill_any_like_op_ipu.py
@@ -47,7 +47,7 @@ def build_model(self):
             name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32'
         )
         x_fill = paddle.full_like(x, **self.attrs)
-        out = paddle.fluid.layers.elementwise_add(x_fill, x_fill)
+        out = paddle.add(x_fill, x_fill)
         self.fetch_list = [out.name]
 
     def run_model(self, exec_mode):
@@ -71,7 +71,7 @@ class TestError(TestBase):
     def build_model(self):
         x = paddle.fluid.data('x', [-1, 3, 13], 'float32')
         x_fill = paddle.full_like(x, **self.attrs)
-        out = paddle.fluid.layers.elementwise_add(x_fill, x_fill)
+        out = paddle.add(x_fill, x_fill)
         self.fetch_list = [out.name]
 
     def test(self):
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_fill_constant_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_fill_constant_op_ipu.py
index 71dfb2d409747f..f1e2c79cc6ba37 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_fill_constant_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_fill_constant_op_ipu.py
@@ -47,7 +47,7 @@ def set_op_attrs(self):
     @IPUOpTest.static_graph
     def build_model(self):
         x = paddle.fluid.layers.fill_constant(**self.attrs)
-        out = paddle.fluid.layers.elementwise_add(x, x)
+        out = paddle.add(x, x)
         self.fetch_list = [out.name]
 
     def run_model(self, exec_mode):
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_reshape_inplace_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_reshape_inplace_op_ipu.py
index c12b8919d3f6da..223e821facbf25 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_reshape_inplace_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_reshape_inplace_op_ipu.py
@@ -50,7 +50,7 @@ def build_model(self):
         x = paddle.static.data(
             name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32'
         )
-        add = paddle.fluid.layers.elementwise_add(x, x)
+        add = paddle.add(x, x)
         out = paddle.reshape(add, **self.attrs)
         self.fetch_list = [out.name]
 
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_share_data_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_share_data_op_ipu.py
index 94e2fd353b305f..132da198f65f45 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_share_data_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_share_data_op_ipu.py
@@ -48,7 +48,7 @@ def build_model(self):
         self.main_prog.global_block().append_op(
             type="share_data", inputs={"X": x}, outputs={'Out': y}
         )
-        out = paddle.fluid.layers.elementwise_add(y, y)
+        out = paddle.add(y, y)
         self.fetch_list = [out.name]
 
     def run_model(self, exec_mode):
@@ -82,7 +82,7 @@ def build_model(self):
         self.main_prog.global_block().append_op(
             type="share_data", inputs={"X": x}, outputs={'Out': y}
         )
-        out = paddle.fluid.layers.elementwise_add(x, y)
+        out = paddle.add(x, y)
         self.fetch_list = [out.name]
 
 
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_varname_inplace_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_varname_inplace_ipu.py
index 61b38e80e7873a..0f956e16d40335 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_varname_inplace_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_varname_inplace_ipu.py
@@ -59,9 +59,9 @@ def _test_base(self, run_ipu=True):
                     shape=self.feed_shape[0],
                     dtype=self.feed_dtype[0],
                 )
-                add1 = paddle.fluid.layers.elementwise_add(x, x)
+                add1 = paddle.add(x, x)
                 reshape = paddle.reshape(add1, **self.attrs)
-                add2 = paddle.fluid.layers.elementwise_add(reshape, reshape)
+                add2 = paddle.add(reshape, reshape)
                 scale1 = paddle.scale(add2)
                 scale2 = paddle.scale(scale1, scale=1.3, bias=0.5)
                 scale3 = paddle.scale(scale2, scale=2, bias=0.7)
diff --git a/python/paddle/fluid/tests/unittests/ps_dnn_model.py b/python/paddle/fluid/tests/unittests/ps_dnn_model.py
index 9cb61f24a0c3a7..c5c29e2299d579 100755
--- a/python/paddle/fluid/tests/unittests/ps_dnn_model.py
+++ b/python/paddle/fluid/tests/unittests/ps_dnn_model.py
@@ -230,9 +230,7 @@ def interactive_layer(self, bottom_a, bottom_b):
         with paddle.fluid.device_guard(
             self.PART_B_JOINT_OP_DEVICE_FlAG
         ):  # joint point
-            interactive = paddle.fluid.layers.elementwise_add(
-                bottom_a, bottom_b
-            )
+            interactive = paddle.add(bottom_a, bottom_b)
         return interactive
 
     def top_layer(self, interactive, label_input):
diff --git a/python/paddle/fluid/tests/unittests/seresnext_net.py b/python/paddle/fluid/tests/unittests/seresnext_net.py
index 7d96ea40ef4c04..146cd58bcf438b 100644
--- a/python/paddle/fluid/tests/unittests/seresnext_net.py
+++ b/python/paddle/fluid/tests/unittests/seresnext_net.py
@@ -121,7 +121,7 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio):
 
     short = shortcut(input, num_filters * 2, stride)
 
-    return fluid.layers.elementwise_add(x=short, y=scale, act='relu')
+    return paddle.nn.functional.relu(paddle.add(x=short, y=scale))
 
 
 img_shape = [3, 224, 224]
diff --git a/python/paddle/fluid/tests/unittests/test_argsort_op.py b/python/paddle/fluid/tests/unittests/test_argsort_op.py
index f939462a6451b8..87ab9604678507 100644
--- a/python/paddle/fluid/tests/unittests/test_argsort_op.py
+++ b/python/paddle/fluid/tests/unittests/test_argsort_op.py
@@ -95,7 +95,7 @@ def setUp(self):
                 input=x, axis=self.axis, descending=self.descending
             )
             self.sorted_x.stop_gradient = False
-            loss = fluid.layers.elementwise_mul(self.sorted_x, label)
+            loss = paddle.multiply(self.sorted_x, label)
             self.loss = paddle.sum(loss)
 
     def forward(self):
diff --git a/python/paddle/fluid/tests/unittests/test_assign_op.py b/python/paddle/fluid/tests/unittests/test_assign_op.py
index 4c5921a5f2012b..e2325733d1ad77 100644
--- a/python/paddle/fluid/tests/unittests/test_assign_op.py
+++ b/python/paddle/fluid/tests/unittests/test_assign_op.py
@@ -90,7 +90,7 @@ def test_assign_LoDTensorArray(self):
             y = fluid.layers.fill_constant(
                 shape=[100, 10], dtype='float32', value=1
             )
-            z = fluid.layers.elementwise_add(x=x, y=y)
+            z = paddle.add(x=x, y=y)
             i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0)
             init_array = fluid.layers.array_write(x=z, i=i)
             array = fluid.layers.assign(init_array)
@@ -144,7 +144,7 @@ def test_assign_LoDTensorArray(self):
             y = fluid.layers.fill_constant(
                 shape=[100, 10], dtype='float32', value=1
             )
-            z = fluid.layers.elementwise_add(x=x, y=y)
+            z = paddle.add(x=x, y=y)
             i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0)
             init_array = fluid.layers.array_write(x=z, i=i)
             array = paddle.assign(init_array)
diff --git a/python/paddle/fluid/tests/unittests/test_backward.py b/python/paddle/fluid/tests/unittests/test_backward.py
index 9ba1cf884a1f30..c98fd7dff5e2b2 100644
--- a/python/paddle/fluid/tests/unittests/test_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_backward.py
@@ -243,10 +243,8 @@ def build_model(self):
             x3, size=[100, 64], param_attr=fluid.ParamAttr(name='w2v')
         )
         # merge layers
-        x_merge = fluid.layers.elementwise_add(x_emb, x2_emb, name='x_add_x2')
-        x2_merge = fluid.layers.elementwise_add(
-            x2_emb, x3_emb, name='x2_add_x3'
-        )
+        x_merge = paddle.add(x_emb, x2_emb, name='x_add_x2')
+        x2_merge = paddle.add(x2_emb, x3_emb, name='x2_add_x3')
         # shared fc_w
         predict = fluid.layers.fc(
             input=x_merge,
diff --git a/python/paddle/fluid/tests/unittests/test_case.py b/python/paddle/fluid/tests/unittests/test_case.py
index 7008544d17a68b..777db3a3480003 100644
--- a/python/paddle/fluid/tests/unittests/test_case.py
+++ b/python/paddle/fluid/tests/unittests/test_case.py
@@ -299,12 +299,12 @@ def test_optimizer_in_case(self):
         adagrad = optimizer.Adagrad(learning_rate=0.001)
 
         def fn_1():
-            sum = layers.elementwise_mul(x, y)
+            sum = paddle.multiply(x, y)
             loss = paddle.mean(sum, name="f_1_loss")
             adam.minimize(loss)
 
         def fn_2():
-            sum = layers.elementwise_mul(x, y)
+            sum = paddle.multiply(x, y)
             loss = paddle.mean(sum, name="f_2_loss")
             adagrad.minimize(loss)
 
diff --git a/python/paddle/fluid/tests/unittests/test_complex_variable.py b/python/paddle/fluid/tests/unittests/test_complex_variable.py
index 588f87b3b2cb61..57774ac5f09b00 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_variable.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_variable.py
@@ -33,7 +33,7 @@ def compare(self):
         with dg.guard():
             x = dg.to_variable(a, "x")
             y = dg.to_variable(b)
-            out = paddle.fluid.layers.elementwise_add(x, y)
+            out = paddle.add(x, y)
             self.assertIsNotNone("{}".format(out))
 
         np.testing.assert_allclose(out.numpy(), a + b, rtol=1e-05)
diff --git a/python/paddle/fluid/tests/unittests/test_cond.py b/python/paddle/fluid/tests/unittests/test_cond.py
index 41bfcff7363b87..d31ac885b1ebea 100644
--- a/python/paddle/fluid/tests/unittests/test_cond.py
+++ b/python/paddle/fluid/tests/unittests/test_cond.py
@@ -310,15 +310,15 @@ def test_cond_inside_cond(self):
         def less_than_branch(i, a):
             return layers.cond(
                 i >= 3.0,
-                lambda: layers.elementwise_add(a, a),
-                lambda: layers.elementwise_sub(a, a),
+                lambda: paddle.add(a, a),
+                lambda: paddle.subtract(a, a),
             )
 
         def greater_equal_branch(i, a):
             return layers.cond(
                 i < 8.0,
-                lambda: layers.elementwise_mul(a, a),
-                lambda: layers.elementwise_div(a, a),
+                lambda: paddle.multiply(a, a),
+                lambda: paddle.divide(a, a),
             )
 
         main_program = Program()
@@ -374,12 +374,12 @@ def test_cond_op_in_condition(self):
                 a < b,
                 lambda: fluid.layers.cond(
                     a - b < -1.0,
-                    lambda: fluid.layers.elementwise_add(a, b),
-                    lambda: fluid.layers.elementwise_mul(a, b),
+                    lambda: paddle.add(a, b),
+                    lambda: paddle.multiply(a, b),
                 ),
                 lambda: fluid.layers.cond(
                     a == b,
-                    lambda: fluid.layers.elementwise_sub(a, b),
+                    lambda: paddle.subtract(a, b),
                     lambda: paddle.pow(a, b),
                 ),
             )
diff --git a/python/paddle/fluid/tests/unittests/test_dataset.py b/python/paddle/fluid/tests/unittests/test_dataset.py
index 95e252c12f41b5..ab126c43781145 100644
--- a/python/paddle/fluid/tests/unittests/test_dataset.py
+++ b/python/paddle/fluid/tests/unittests/test_dataset.py
@@ -1029,9 +1029,7 @@ def test_dataset_fleet(self):
                     name=slot, shape=[1], dtype="float32", lod_level=1
                 )
                 slots_vars.append(var)
-            fake_cost = fluid.layers.elementwise_sub(
-                slots_vars[0], slots_vars[-1]
-            )
+            fake_cost = paddle.subtract(slots_vars[0], slots_vars[-1])
             fake_cost = paddle.mean(fake_cost)
         with fluid.scope_guard(scope):
             place = fluid.CPUPlace()
@@ -1101,9 +1099,7 @@ def test_dataset_fleet2(self):
                     name=slot, shape=[1], dtype="float32", lod_level=1
                 )
                 slots_vars.append(var)
-            fake_cost = fluid.layers.elementwise_sub(
-                slots_vars[0], slots_vars[-1]
-            )
+            fake_cost = paddle.subtract(slots_vars[0], slots_vars[-1])
             fake_cost = paddle.mean(fake_cost)
         with fluid.scope_guard(scope):
             place = fluid.CPUPlace()
@@ -1234,9 +1230,7 @@ def test_bosps_dataset_fleet2(self):
                     name=slot, shape=[1], dtype="float32", lod_level=1
                 )
                 slots_vars.append(var)
-            fake_cost = fluid.layers.elementwise_sub(
-                slots_vars[0], slots_vars[-1]
-            )
+            fake_cost = paddle.subtract(slots_vars[0], slots_vars[-1])
             fake_cost = paddle.mean(fake_cost)
         with fluid.scope_guard(scope):
             place = fluid.CPUPlace()
diff --git a/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py b/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py
index 954d47287bfe13..5e8179886eae8d 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_return_tensor_not_overwriting.py
@@ -75,7 +75,7 @@ def setUp(self):
     def calc_add_out(self, place=None, parallel=None):
         x = paddle.ones(shape=[3, 3], dtype='float32')
         y = paddle.ones(shape=[3, 3], dtype='float32')
-        out = fluid.layers.elementwise_add(x=x, y=y)
+        out = paddle.add(x=x, y=y)
         program = fluid.default_main_program()
         if parallel:
             program = fluid.CompiledProgram(program).with_data_parallel(
@@ -88,7 +88,7 @@ def calc_add_out(self, place=None, parallel=None):
     def calc_sub_out(self, place=None, parallel=None):
         x = paddle.ones(shape=[2, 2], dtype='float32')
         y = paddle.ones(shape=[2, 2], dtype='float32')
-        out = fluid.layers.elementwise_sub(x=x, y=y)
+        out = paddle.subtract(x=x, y=y)
         program = fluid.default_main_program()
         if parallel:
             program = fluid.CompiledProgram(program).with_data_parallel(

From 5789ac5acc1d6a1feb8bf6b05e480cc423fec55f Mon Sep 17 00:00:00 2001
From: HongyuJia <jiahongyu@baidu.com>
Date: Fri, 2 Dec 2022 00:00:16 +0800
Subject: [PATCH 095/154] [Clean fluid] Clean fluid elementwise_arithmetic
 (part6 unit test) (#48467)

* clean elem_arithmetic part6 unittest

* delete op_name_conflict unittest

* restore test_op_name_conflict

* fix codestyle test_op_function_generator
---
 .../unittests/test_imperative_ptb_rnn.py      |  4 +--
 .../test_imperative_reinforcement.py          | 10 +++----
 .../tests/unittests/test_imperative_resnet.py |  2 +-
 .../unittests/test_imperative_save_load.py    |  4 +--
 .../unittests/test_imperative_save_load_v2.py |  4 +--
 .../unittests/test_imperative_se_resnext.py   |  2 +-
 ..._imperative_selected_rows_to_lod_tensor.py |  2 +-
 .../test_infer_no_need_buffer_slots.py        |  3 +-
 .../fluid/tests/unittests/test_layers.py      | 28 +++++++++----------
 .../unittests/test_op_function_generator.py   |  5 ++--
 .../tests/unittests/test_optimizer_grad.py    | 20 +++++--------
 .../tests/unittests/test_recurrent_op.py      | 12 +++-----
 .../tests/unittests/test_rnn_decode_api.py    |  4 +--
 .../fluid/tests/unittests/test_sgd_op_bf16.py |  2 +-
 .../tests/unittests/test_static_save_load.py  |  4 +--
 .../tests/unittests/test_weight_decay.py      |  4 +--
 .../tests/unittests/test_while_loop_op.py     | 26 ++++++++---------
 .../tests/unittests/transformer_model.py      |  2 +-
 18 files changed, 62 insertions(+), 76 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
index f8f8620338ca32..c86a802a0a4006 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
@@ -111,7 +111,7 @@ def forward(self, input_embedding, init_hidden=None, init_cell=None):
                 nn = fluid.layers.concat([self._input, pre_hidden], 1)
                 gate_input = fluid.layers.matmul(x=nn, y=weight_1)
 
-                gate_input = fluid.layers.elementwise_add(gate_input, bias)
+                gate_input = paddle.add(gate_input, bias)
                 i, j, f, o = fluid.layers.split(
                     gate_input, num_or_sections=4, dim=-1
                 )
@@ -226,7 +226,7 @@ def forward(self, input, label, init_hidden, init_cell):
             rnn_out, shape=[-1, self.num_steps, self.hidden_size]
         )
         projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
-        projection = fluid.layers.elementwise_add(projection, self.softmax_bias)
+        projection = paddle.add(projection, self.softmax_bias)
         projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
         loss = fluid.layers.softmax_with_cross_entropy(
             logits=projection, label=label, soft_label=False
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
index 2cc85fbc30f0d8..dfbaae4926d0ef 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
@@ -72,13 +72,13 @@ def run_dygraph():
             dy_mask.stop_gradient = True
 
             loss_probs = paddle.log(loss_probs)
-            loss_probs = fluid.layers.elementwise_mul(loss_probs, dy_mask)
+            loss_probs = paddle.multiply(loss_probs, dy_mask)
             loss_probs = paddle.sum(loss_probs, axis=-1)
 
             dy_reward = fluid.dygraph.base.to_variable(reward)
             dy_reward.stop_gradient = True
 
-            loss_probs = fluid.layers.elementwise_mul(dy_reward, loss_probs)
+            loss_probs = paddle.multiply(dy_reward, loss_probs)
             loss = paddle.sum(loss_probs)
 
             sgd = SGDOptimizer(
@@ -140,12 +140,10 @@ def run_dygraph():
             st_loss_probs = policy(st_state)
 
             st_loss_probs = paddle.log(st_loss_probs)
-            st_loss_probs = fluid.layers.elementwise_mul(st_loss_probs, st_mask)
+            st_loss_probs = paddle.multiply(st_loss_probs, st_mask)
             st_loss_probs = paddle.sum(st_loss_probs, axis=-1)
 
-            st_loss_probs = fluid.layers.elementwise_mul(
-                st_reward, st_loss_probs
-            )
+            st_loss_probs = paddle.multiply(st_reward, st_loss_probs)
             st_loss = paddle.sum(st_loss_probs)
 
             st_sgd.minimize(st_loss)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
index 53e8b1d93c4b7a..0b5ba9a5631470 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
@@ -158,7 +158,7 @@ def forward(self, inputs):
         else:
             short = self.short(inputs)
 
-        y = fluid.layers.elementwise_add(x=short, y=conv2)
+        y = paddle.add(x=short, y=conv2)
 
         layer_helper = LayerHelper(self.full_name(), act='relu')
         return layer_helper.append_activation(y)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
index 260c3e0b8eb414..f9b618cedf1c2a 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
@@ -106,7 +106,7 @@ def forward(self, input_embedding, init_hidden=None, init_cell=None):
                 nn = fluid.layers.concat([self._input, pre_hidden], 1)
                 gate_input = fluid.layers.matmul(x=nn, y=weight_1)
 
-                gate_input = fluid.layers.elementwise_add(gate_input, bias)
+                gate_input = paddle.add(gate_input, bias)
                 i, j, f, o = fluid.layers.split(
                     gate_input, num_or_sections=4, dim=-1
                 )
@@ -222,7 +222,7 @@ def forward(self, input, label, init_hidden, init_cell):
         )
 
         projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
-        projection = fluid.layers.elementwise_add(projection, self.softmax_bias)
+        projection = paddle.add(projection, self.softmax_bias)
         projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
         loss = fluid.layers.softmax_with_cross_entropy(
             logits=projection, label=label, soft_label=False
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py
index ea6804e64e9eb0..bd6a6ca22f5633 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py
@@ -107,7 +107,7 @@ def forward(self, input_embedding, init_hidden=None, init_cell=None):
                 nn = fluid.layers.concat([self._input, pre_hidden], 1)
                 gate_input = fluid.layers.matmul(x=nn, y=weight_1)
 
-                gate_input = fluid.layers.elementwise_add(gate_input, bias)
+                gate_input = paddle.add(gate_input, bias)
                 i, j, f, o = fluid.layers.split(
                     gate_input, num_or_sections=4, dim=-1
                 )
@@ -223,7 +223,7 @@ def forward(self, input, label, init_hidden, init_cell):
         )
 
         projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
-        projection = fluid.layers.elementwise_add(projection, self.softmax_bias)
+        projection = paddle.add(projection, self.softmax_bias)
         projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
         loss = fluid.layers.softmax_with_cross_entropy(
             logits=projection, label=label, soft_label=False
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
index c09f6a1449faa8..95f912d8227711 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
@@ -192,7 +192,7 @@ def forward(self, inputs):
         else:
             short = self.short(inputs)
 
-        y = fluid.layers.elementwise_add(x=short, y=scale)
+        y = paddle.add(x=short, y=scale)
 
         layer_helper = LayerHelper(self.full_name(), act='relu')
         y = layer_helper.append_activation(y)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
index e99d099317e81f..5b533319019b83 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
@@ -73,7 +73,7 @@ def __init__(
     def forward(self, input, label):
         x_emb = self.embedding(input)
         fc = fluid.layers.matmul(x_emb, self.softmax_weight)
-        fc = fluid.layers.elementwise_add(fc, self.softmax_bias)
+        fc = paddle.add(fc, self.softmax_bias)
         projection = fluid.layers.matmul(
             fc, paddle.transpose(self.embedding.weight, perm=[1, 0])
         )
diff --git a/python/paddle/fluid/tests/unittests/test_infer_no_need_buffer_slots.py b/python/paddle/fluid/tests/unittests/test_infer_no_need_buffer_slots.py
index ed64e80e65872e..21f9cda5bdc4a4 100644
--- a/python/paddle/fluid/tests/unittests/test_infer_no_need_buffer_slots.py
+++ b/python/paddle/fluid/tests/unittests/test_infer_no_need_buffer_slots.py
@@ -14,6 +14,7 @@
 
 import unittest
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.fluid.framework as framework
@@ -31,7 +32,7 @@ def net(self):
             .global_block()
             .create_var(dtype="float32", shape=[1], lod_level=0, name="x2")
         )
-        x = fluid.layers.elementwise_add(x1, x2)
+        x = paddle.add(x1, x2)
         return x
 
     def test_infer_no_need_buffer_slots(self):
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 8807b77664b759..6079f7636f3af9 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -618,11 +618,11 @@ def test_elementwise_math(self):
             t5 = layers.data(name='t5', shape=[3, 3], dtype='float32')
             t6 = layers.data(name='t6', shape=[3, 3], dtype='float32')
 
-            ret = layers.elementwise_add(t, t2)
+            ret = paddle.add(t, t2)
             ret = paddle.pow(ret, t3)
-            ret = layers.elementwise_div(ret, t4)
-            ret = layers.elementwise_sub(ret, t5)
-            ret = layers.elementwise_mul(ret, t6)
+            ret = paddle.divide(ret, t4)
+            ret = paddle.subtract(ret, t5)
+            ret = paddle.multiply(ret, t6)
 
             static_ret = self.get_static_graph_result(
                 feed={'t': n, 't2': n2, 't3': n3, 't4': n4, 't5': n5, 't6': n6},
@@ -631,18 +631,18 @@ def test_elementwise_math(self):
 
         with self.dynamic_graph():
             with _test_eager_guard():
-                ret = layers.elementwise_add(to_variable(n), to_variable(n2))
+                ret = paddle.add(to_variable(n), to_variable(n2))
                 ret = paddle.pow(ret, to_variable(n3))
-                ret = layers.elementwise_div(ret, to_variable(n4))
-                ret = layers.elementwise_sub(ret, to_variable(n5))
-                dy_eager_ret = layers.elementwise_mul(ret, to_variable(n6))
+                ret = paddle.divide(ret, to_variable(n4))
+                ret = paddle.subtract(ret, to_variable(n5))
+                dy_eager_ret = paddle.multiply(ret, to_variable(n6))
                 dy_eager_ret_value = dy_eager_ret.numpy()
 
-            ret = layers.elementwise_add(to_variable(n), to_variable(n2))
+            ret = paddle.add(to_variable(n), to_variable(n2))
             ret = paddle.pow(ret, to_variable(n3))
-            ret = layers.elementwise_div(ret, to_variable(n4))
-            ret = layers.elementwise_sub(ret, to_variable(n5))
-            dy_ret = layers.elementwise_mul(ret, to_variable(n6))
+            ret = paddle.divide(ret, to_variable(n4))
+            ret = paddle.subtract(ret, to_variable(n5))
+            dy_ret = paddle.multiply(ret, to_variable(n6))
             dy_ret_value = dy_ret.numpy()
 
         np.testing.assert_allclose(static_ret, dy_ret_value, rtol=1e-05)
@@ -2606,10 +2606,10 @@ def test_compare(self):
 
     def test_cond(self):
         def less_than_branch(a, b):
-            return fluid.layers.elementwise_add(a, b)
+            return paddle.add(a, b)
 
         def greater_equal_branch(a, b):
-            return fluid.layers.elementwise_sub(a, b)
+            return paddle.subtract(a, b)
 
         with self.static_graph():
             a = fluid.layers.fill_constant(
diff --git a/python/paddle/fluid/tests/unittests/test_op_function_generator.py b/python/paddle/fluid/tests/unittests/test_op_function_generator.py
index 8e7afd0f1adec2..53edabb18236c8 100644
--- a/python/paddle/fluid/tests/unittests/test_op_function_generator.py
+++ b/python/paddle/fluid/tests/unittests/test_op_function_generator.py
@@ -16,6 +16,7 @@
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
 from paddle import _legacy_C_ops
@@ -45,7 +46,7 @@ def test_elementwise_add(self):
             y = fluid.dygraph.to_variable(b)
             x.stop_gradient = False
 
-            res1 = layers.elementwise_add(x, y)
+            res1 = paddle.add(x, y)
             res2 = _legacy_C_ops.elementwise_add(x, y)
 
             np.testing.assert_array_equal(res1.numpy(), res2.numpy())
@@ -57,7 +58,7 @@ def test_elementwise_mul(self):
             x = fluid.dygraph.to_variable(a)
             y = fluid.dygraph.to_variable(b)
 
-            res1 = layers.elementwise_mul(x, y)
+            res1 = paddle.multiply(x, y)
             res2 = _legacy_C_ops.elementwise_mul(x, y)
 
             np.testing.assert_array_equal(res1.numpy(), res2.numpy())
diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
index acdc43659d81a4..a9f7c5de8d9b58 100644
--- a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
@@ -97,27 +97,21 @@ def build_net(self, cond_i, use_bf16=False):
             default_initializer=fluid.initializer.NumpyArrayInitializer(self.z),
         )
 
-        sum_xy = fluid.layers.elementwise_add(param_x, param_y, name='sum_xy')
-        sub_yz = fluid.layers.elementwise_sub(param_y, param_z, name='sub_yz')
+        sum_xy = paddle.add(param_x, param_y, name='sum_xy')
+        sub_yz = paddle.subtract(param_y, param_z, name='sub_yz')
         useless = fluid.layers.fc(param_x, size=1, name='fc_useless')
 
         def cond_true():
-            cond_yz = fluid.layers.elementwise_add(
-                param_y, param_z, name='sum_cond_yz'
-            )
+            cond_yz = paddle.add(param_y, param_z, name='sum_cond_yz')
             # param_y will not be updated
             param_y.stop_gradient = self.y_no_grad
-            cond_res = fluid.layers.elementwise_add(
-                cond_yz, param_z, name='sum_cond_true'
-            )
-            cond_useless = fluid.layers.elementwise_mul(param_x, param_y)
+            cond_res = paddle.add(cond_yz, param_z, name='sum_cond_true')
+            cond_useless = paddle.multiply(param_x, param_y)
             return cond_res
 
         def cond_false():
-            cond_res = fluid.layers.elementwise_add(
-                param_y, param_z, name='sum_cond_false'
-            )
-            cond_useless = fluid.layers.elementwise_mul(param_z, param_z)
+            cond_res = paddle.add(param_y, param_z, name='sum_cond_false')
+            cond_useless = paddle.multiply(param_z, param_z)
             return cond_res
 
         cond_i = fluid.layers.assign(np.array([cond_i], dtype='float32'))
diff --git a/python/paddle/fluid/tests/unittests/test_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_recurrent_op.py
index cf7459fcadd4fa..6e01ee1d4f0f79 100644
--- a/python/paddle/fluid/tests/unittests/test_recurrent_op.py
+++ b/python/paddle/fluid/tests/unittests/test_recurrent_op.py
@@ -153,7 +153,7 @@ def create_rnn_op(self):
             x_t = rnn.step_input(x)
 
             h = paddle.scale(
-                x=layers.elementwise_add(x=h_pre, y=x_t),
+                x=paddle.add(x=h_pre, y=x_t),
                 scale=self.py_rnn.scale,
             )
 
@@ -317,9 +317,7 @@ def create_rnn_op(self):
                 bias_attr=False,
             )
 
-            h = paddle.nn.functional.sigmoid(
-                x=layers.elementwise_add(x=temp_l, y=temp_r)
-            )
+            h = paddle.nn.functional.sigmoid(x=paddle.add(x=temp_l, y=temp_r))
 
             rnn.update_memory(h_pre, h)
             rnn.output(h)
@@ -491,7 +489,7 @@ def create_rnn_op(self):
         with rnn.step():
             mem_pre = rnn.memory(shape=[-1, self.input_dim], batch_ref=x)
             x_t = rnn.step_input(x)
-            mem = layers.elementwise_add(x=mem_pre, y=x_t)
+            mem = paddle.add(x=mem_pre, y=x_t)
             rnn.update_memory(mem_pre, mem)
             rnn.output(mem)
 
@@ -713,9 +711,7 @@ def create_rnn_op(self):
                 bias_attr=False,
             )
 
-            h = paddle.nn.functional.sigmoid(
-                x=layers.elementwise_add(temp_l, temp_r)
-            )
+            h = paddle.nn.functional.sigmoid(x=paddle.add(temp_l, temp_r))
 
             rnn.update_memory(h_pre, h)
             rnn.output(h)
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
index b7c98515fee85b..410708a105bfd9 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
@@ -75,9 +75,7 @@ def attention(self, hidden, encoder_output, encoder_padding_mask):
             layers.unsqueeze(query, [1]), encoder_output, transpose_y=True
         )
         if encoder_padding_mask is not None:
-            attn_scores = layers.elementwise_add(
-                attn_scores, encoder_padding_mask
-            )
+            attn_scores = paddle.add(attn_scores, encoder_padding_mask)
         attn_scores = layers.softmax(attn_scores)
         attn_out = paddle.squeeze(
             layers.matmul(attn_scores, encoder_output), [1]
diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py b/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py
index dd62aa369b5f0d..81b8c1b2b16437 100644
--- a/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py
+++ b/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py
@@ -347,7 +347,7 @@ def test_sgd(self):
                 is_sparse=False,
                 dtype="uint16",
             )  # bfloat16
-            cost = fluid.layers.elementwise_add(emb, label)
+            cost = paddle.add(emb, label)
             avg_cost = paddle.mean(cost)
 
             sgd_optimizer = paddle.optimizer.SGD(
diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py
index d5bb1583651b1a..f417667a82a024 100644
--- a/python/paddle/fluid/tests/unittests/test_static_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py
@@ -117,7 +117,7 @@ def forward(self, input_embedding, init_hidden=None, init_cell=None):
                 nn = fluid.layers.concat([self._input, pre_hidden], 1)
                 gate_input = fluid.layers.matmul(x=nn, y=weight_1)
 
-                gate_input = fluid.layers.elementwise_add(gate_input, bias)
+                gate_input = paddle.add(gate_input, bias)
                 i, j, f, o = fluid.layers.split(
                     gate_input, num_or_sections=4, dim=-1
                 )
@@ -235,7 +235,7 @@ def forward(self, input, label, init_hidden, init_cell):
             rnn_out, shape=[-1, self.num_steps, self.hidden_size]
         )
         projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
-        projection = fluid.layers.elementwise_add(projection, self.softmax_bias)
+        projection = paddle.add(projection, self.softmax_bias)
         projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
         loss = fluid.layers.softmax_with_cross_entropy(
             logits=projection, label=label, soft_label=False
diff --git a/python/paddle/fluid/tests/unittests/test_weight_decay.py b/python/paddle/fluid/tests/unittests/test_weight_decay.py
index b6bf5c920dfac6..9128b13bb78391 100644
--- a/python/paddle/fluid/tests/unittests/test_weight_decay.py
+++ b/python/paddle/fluid/tests/unittests/test_weight_decay.py
@@ -160,9 +160,7 @@ def check_weight_decay(
             optimizer.minimize(avg_cost)
 
             for params in param_list:
-                updated_p = fluid.layers.elementwise_sub(
-                    x=params[0], y=params[1]
-                )
+                updated_p = paddle.subtract(x=params[0], y=params[1])
                 fluid.layers.assign(input=updated_p, output=params[0])
 
             if use_parallel_exe:
diff --git a/python/paddle/fluid/tests/unittests/test_while_loop_op.py b/python/paddle/fluid/tests/unittests/test_while_loop_op.py
index 0b4cc57c77f496..d69f114f64b07c 100644
--- a/python/paddle/fluid/tests/unittests/test_while_loop_op.py
+++ b/python/paddle/fluid/tests/unittests/test_while_loop_op.py
@@ -32,7 +32,7 @@ def cond(i):
             return layers.less_than(i, ten)
 
         def body(i):
-            return layers.elementwise_add(x=i, y=one)
+            return paddle.add(x=i, y=one)
 
         main_program = Program()
         startup_program = Program()
@@ -58,7 +58,7 @@ def cond(i, mem):
             return layers.less_than(i, ten)
 
         def body(i, mem):
-            mem = layers.elementwise_add(x=mem, y=one)
+            mem = paddle.add(x=mem, y=one)
             i = layers.increment(i)
             return [i, mem]
 
@@ -166,8 +166,8 @@ def internal_cond(j, init, sums):
                 return layers.less_than(j, loop_len2)
 
             def internal_body(j, init, sums):
-                init = layers.elementwise_add(x=init, y=ones)
-                sums = layers.elementwise_add(x=init, y=sums)
+                init = paddle.add(x=init, y=ones)
+                sums = paddle.add(x=init, y=sums)
                 j = layers.increment(j)
                 return [j, init, sums]
 
@@ -177,7 +177,7 @@ def internal_body(j, init, sums):
             j = result[0]
             init = result[1]
             sums = result[2]
-            sums = layers.elementwise_add(x=init, y=sums)
+            sums = paddle.add(x=init, y=sums)
             i = layers.increment(i)
             return [i, j, init, sums]
 
@@ -222,7 +222,7 @@ def cond(i, x):
             return layers.less_than(i, eleven)
 
         def body(i, x):
-            x = layers.elementwise_mul(x=i, y=i)
+            x = paddle.multiply(x=i, y=i)
             i = layers.increment(i)
             return [i, x]
 
@@ -316,16 +316,16 @@ def internal_cond(j, x, mem_array):
             def internal_body(j, x, mem_array):
                 inner_data = layers.array_read(array=data_array, i=j)
                 inner_prev = layers.array_read(array=mem_array, i=j)
-                inner_sum_0 = layers.elementwise_add(x=inner_data, y=inner_prev)
-                inner_sum_1 = layers.elementwise_add(x=x, y=inner_sum_0)
+                inner_sum_0 = paddle.add(x=inner_data, y=inner_prev)
+                inner_sum_1 = paddle.add(x=x, y=inner_sum_0)
                 j = layers.increment(x=j, in_place=True)
                 layers.array_write(inner_sum_1, i=j, array=mem_array)
                 return [j, x, mem_array]
 
             outer_data = layers.array_read(array=data_array, i=i)
             outer_prev = layers.array_read(array=mem_array, i=i)
-            outer_sum_0 = layers.elementwise_add(x=outer_data, y=outer_prev)
-            outer_sum_1 = layers.elementwise_add(x=x, y=outer_sum_0)
+            outer_sum_0 = paddle.add(x=outer_data, y=outer_prev)
+            outer_sum_1 = paddle.add(x=x, y=outer_sum_0)
             i = layers.increment(x=i, in_place=True)
             layers.array_write(outer_sum_1, i=i, array=mem_array)
             j, x, mem_array = layers.while_loop(
@@ -394,15 +394,15 @@ def cond(i):
 
         def body(i):
             def fn_add_three():
-                data_add_three = layers.elementwise_add(x=i, y=three)
+                data_add_three = paddle.add(x=i, y=three)
                 return data_add_three
 
             def fn_square():
-                data_mul_data = layers.elementwise_mul(x=i, y=i)
+                data_mul_data = paddle.multiply(x=i, y=i)
                 return data_mul_data
 
             def fn_add_one():
-                data_add_one = layers.elementwise_add(x=i, y=one)
+                data_add_one = paddle.add(x=i, y=one)
                 return data_add_one
 
             return layers.switch_case(
diff --git a/python/paddle/fluid/tests/unittests/transformer_model.py b/python/paddle/fluid/tests/unittests/transformer_model.py
index c956b0eabe016d..316241caf8f633 100644
--- a/python/paddle/fluid/tests/unittests/transformer_model.py
+++ b/python/paddle/fluid/tests/unittests/transformer_model.py
@@ -164,7 +164,7 @@ def __softmax(x, eps=1e-9):
 
         scaled_q = paddle.scale(x=q, scale=d_model**-0.5)
         product = layers.matmul(x=scaled_q, y=k, transpose_y=True)
-        weights = __softmax(layers.elementwise_add(x=product, y=attn_bias))
+        weights = __softmax(paddle.add(x=product, y=attn_bias))
         if dropout_rate:
             weights = layers.dropout(
                 weights, dropout_prob=dropout_rate, is_test=False

From 1da6f2e399d0ee5372a4afc9b6d3830dd4a44401 Mon Sep 17 00:00:00 2001
From: wanghuancoder <wanghuan29@baidu.com>
Date: Fri, 2 Dec 2022 07:59:26 +0800
Subject: [PATCH 096/154] adam batxx_pow not in cpu (#48626)

---
 python/paddle/optimizer/optimizer.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py
index 764c01736a3ab9..52caf92cd603a1 100644
--- a/python/paddle/optimizer/optimizer.py
+++ b/python/paddle/optimizer/optimizer.py
@@ -728,8 +728,10 @@ def _add_accumulator(
         if device is None:
             device = self._get_device_for_param(param.name)
 
-        if in_dygraph_mode() and (
-            device == 'cpu' or isinstance(device, core.CPUPlace)
+        if (
+            in_dygraph_mode()
+            and (device == 'cpu' or isinstance(device, core.CPUPlace))
+            and (not core.is_compiled_with_xpu())
         ):
             _C_ops.full_(
                 var,

From 3d0c8b12a486690a3feb912f6a05809dd89cb679 Mon Sep 17 00:00:00 2001
From: Aurelius84 <zhangliujie@baidu.com>
Date: Fri, 2 Dec 2022 10:06:55 +0800
Subject: [PATCH 097/154] [Unitttet] Fix axes error from migrating
 paddle.squeeze in test_seq2seq (#48620)

---
 .../unittests/dygraph_to_static/seq2seq_dygraph_model.py      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
index c9a58b9c78f48a..39a04d7a78548f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
@@ -297,7 +297,7 @@ def forward(self, inputs):
         loss = fluid.layers.softmax_with_cross_entropy(
             logits=dec_output, label=label, soft_label=False
         )
-        loss = paddle.squeeze(loss, axes=[2])
+        loss = paddle.squeeze(loss, axis=[2])
         max_tar_seq_len = fluid.layers.shape(tar)[1]
         tar_mask = fluid.layers.sequence_mask(
             tar_sequence_length, maxlen=max_tar_seq_len, dtype='float32'
@@ -831,7 +831,7 @@ def forward(self, inputs):
         loss = fluid.layers.softmax_with_cross_entropy(
             logits=dec_output, label=label, soft_label=False
         )
-        loss = paddle.squeeze(loss, axes=[2])
+        loss = paddle.squeeze(loss, axis=[2])
         max_tar_seq_len = fluid.layers.shape(tar)[1]
         tar_mask = fluid.layers.sequence_mask(
             tar_sequence_length, maxlen=max_tar_seq_len, dtype='float32'

From a686b3cfdfe13f512dfd1eafdfcbb2db96a99843 Mon Sep 17 00:00:00 2001
From: Wen Sun <35923278+HermitSun@users.noreply.github.com>
Date: Fri, 2 Dec 2022 10:19:30 +0800
Subject: [PATCH 098/154] [Fluid API] Remove `shrink_memory`, `create_array` &
 `array_length` (#48287)

* refactor: rm shrink_memory

* refactor: rm create_array

* refactor: rm array_length
---
 python/paddle/fluid/layers/control_flow.py    | 170 +-----------------
 python/paddle/fluid/layers/math_op_patch.py   |   4 +-
 python/paddle/fluid/layers/rnn.py             |   2 +-
 .../collective_sendrecv_op_array.py           |   2 +-
 .../fleet/hybrid_parallel_inference_helper.py |   4 +-
 .../seq2seq_dygraph_model.py                  |  12 +-
 .../tests/unittests/npu/test_concat_op_npu.py |   4 +-
 .../tests/unittests/npu/test_stack_op_npu.py  |   4 +-
 .../unittests/test_beam_search_decode_op.py   |  13 +-
 .../fluid/tests/unittests/test_concat_op.py   |   4 +-
 .../unittests/test_lod_array_length_op.py     |   5 +-
 .../tests/unittests/test_shrink_rnn_memory.py | 140 ---------------
 .../fluid/tests/unittests/test_slice_op.py    |   6 +-
 .../fluid/tests/unittests/test_stack_op.py    |   4 +-
 .../unittests/test_tensor_array_to_tensor.py  |   8 +-
 .../paddle/jit/dy2static/convert_operators.py |  12 +-
 16 files changed, 43 insertions(+), 351 deletions(-)
 delete mode 100644 python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py

diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py
index 7a6079ad62c18e..80f0830e22e0ba 100755
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -56,10 +56,8 @@
     'Switch',
     'increment',
     'array_write',
-    'create_array',
     'less_than',
     'array_read',
-    'array_length',
     'cond',
     'IfElse',
     'StaticRNN',
@@ -1712,7 +1710,7 @@ def array_write(x, i, array=None):
         ], "The shape of index 'i' should be [1] in dygraph mode"
         i = i.numpy().item(0)
         if array is None:
-            array = create_array(x.dtype)
+            array = paddle.tensor.create_array(x.dtype)
         assert isinstance(
             array, list
         ), "The 'array' in array_write must be a list in dygraph mode"
@@ -1750,64 +1748,6 @@ def array_write(x, i, array=None):
     return array
 
 
-def create_array(dtype, initialized_list=None):
-    """
-    This OP creates an LOD_TENSOR_ARRAY. It is used as
-    the input of :ref:`api_fluid_layers_array_read` and
-    :ref:`api_fluid_layers_array_write`. Also it can be used
-    with  :ref:`api_fluid_layers_While` to create RNN network.
-
-    Args:
-        dtype (str): The data type of the elements in the lod_tensor_array.
-                     Support data type: float32, float64, int32, int64.
-        initialized_list(list): Used to initialize as default value for created array.
-                    All values in initialized list should be a Tensor.
-
-    Returns:
-        Variable: The empty lod_tensor_array. The data type of elements in Tensor is ``dtype``.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          data = fluid.layers.create_array(dtype='float32') # Create a float32 LoDTensorArray.
-
-    """
-    array = []
-    if initialized_list is not None:
-        if not isinstance(initialized_list, (list, tuple)):
-            raise TypeError(
-                "Require type(initialized_list) should be list/tuple, but received {}".format(
-                    type(initialized_list)
-                )
-            )
-        array = list(initialized_list)
-
-    # NOTE: Only support plain list like [x, y,...], not support nested list in static mode.
-    for val in array:
-        if not isinstance(val, Variable):
-            raise TypeError(
-                "All values in `initialized_list` should be Variable, but recevied {}.".format(
-                    type(val)
-                )
-            )
-
-    if _non_static_mode():
-        return array
-
-    helper = LayerHelper("array", **locals())
-    tensor_array = helper.create_variable(
-        name="{0}.out".format(helper.name),
-        type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
-        dtype=dtype,
-    )
-
-    for val in array:
-        array_write(x=val, i=array_length(tensor_array), array=tensor_array)
-
-    return tensor_array
-
-
 @templatedoc()
 def less_than(x, y, force_cpu=None, cond=None, name=None):
     """
@@ -1956,114 +1896,6 @@ def array_read(array, i):
     return out
 
 
-def shrink_memory(x, i, table):
-    """
-    This function creates an operator to shrink rnn memory using the RankTable
-    as mentioned in the input parameter.
-
-    NOTE: This API is very low-level API. It is used by DynamicRNN only.
-
-    Since the Dynamic RNN uses no-padding way to implement RNN. The sequence
-    will be sorted by order, and the length of valid memory will be shrink after
-    each time step.
-
-    Args:
-        x(Variable): The memory object in the previous time step.
-        i(Variable): The step count variable. A int scalar as LoDTensor.
-        table(Variable): The RNNRankTable object.
-
-    Returns:
-        the memory variable after shrink.
-
-    Examples:
-
-        Since this API is very low level API. The example is not provided.
-        Please reference the implementation of class DynamicRNN for detail
-        usage.
-    """
-    helper = LayerHelper('shrink_memory', **locals())
-    check_type(x, 'x', Variable, 'shrink_memory')
-    check_type(i, 'i', Variable, 'shrink_memory')
-    check_type(table, 'table', Variable, 'shrink_memory')
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-    helper.append_op(
-        type='shrink_rnn_memory',
-        inputs={'X': [x], 'I': [i], 'RankTable': [table]},
-        outputs={'Out': [out]},
-        attrs={},
-    )
-    return out
-
-
-def array_length(array):
-    """
-    This OP is used to get the length of the input array :ref:`api_fluid_LoDTensorArray` .
-    It can be used together with :ref:`api_fluid_layers_array_read` , :ref:`api_fluid_layers_array_write` ,
-    :ref:`api_fluid_layers_While` OP to traverse, read and write LoDTensorArray.
-
-    Args:
-        array (LoDTensorArray): The input array that will be used to compute the length.
-
-    Returns:
-        Variable: 1-D Tensor with shape [1], which is the length of array. Datatype: int64.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            tmp = fluid.layers.zeros(shape=[10], dtype='int32')
-            i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
-            # tmp is 1-D Tensor with shape [10]. We write tmp into arr on subscript 10,
-            # then the length of arr becomes 11.
-            arr = fluid.layers.array_write(tmp, i=i)
-            # return the length of arr
-            arr_len = fluid.layers.array_length(arr)
-
-            # You can use executor to print out the length of LoDTensorArray.
-            input = fluid.layers.Print(arr_len, message="The length of LoDTensorArray:")
-            main_program = fluid.default_main_program()
-            exe = fluid.Executor(fluid.CPUPlace())
-            exe.run(main_program)
-
-            # The printed result is:
-
-            # 1569576542  The length of LoDTensorArray:   The place is:CPUPlace
-            # Tensor[array_length_0.tmp_0]
-            #    shape: [1,]
-            #    dtype: l
-            #    data: 11,
-
-            # 1-D Tensor with shape [1], whose value is 11. It means that the length of LoDTensorArray
-            # is 11.
-            # dtype is the corresponding C++ data type, which may vary in different environments.
-            # Eg: if the data type of tensor is int64, then the corresponding C++ data type is int64_t,
-            #       so the dtype value is typeid(int64_t).Name(), which is 'x' on MacOS, 'l' on Linux,
-            #       and '__int64' on Windows. They both represent 64-bit integer variables.
-    """
-
-    if _non_static_mode():
-        assert isinstance(
-            array, list
-        ), "The 'array' in array_write must be a list in dygraph mode"
-        return len(array)
-
-    if (
-        not isinstance(array, Variable)
-        or array.type != core.VarDesc.VarType.LOD_TENSOR_ARRAY
-    ):
-        raise TypeError(
-            "array should be tensor array vairable in array_length Op"
-        )
-
-    helper = LayerHelper('array_length', **locals())
-    tmp = helper.create_variable_for_type_inference(dtype='int64')
-    tmp.stop_gradient = True
-    helper.append_op(
-        type='lod_array_length', inputs={'X': [array]}, outputs={'Out': [tmp]}
-    )
-    return tmp
-
-
 class ConditionalBlockGuard(BlockGuard):
     """
     ConditionalBlockGuard is derived from BlockGuard. It is dedicated for
diff --git a/python/paddle/fluid/layers/math_op_patch.py b/python/paddle/fluid/layers/math_op_patch.py
index 5b5be84ac6b0ac..4829db24ce9863 100644
--- a/python/paddle/fluid/layers/math_op_patch.py
+++ b/python/paddle/fluid/layers/math_op_patch.py
@@ -18,7 +18,7 @@
 from .. import core
 from ..framework import Variable, unique_name, static_only
 from .layer_function_generator import OpProtoHolder
-from .control_flow import array_write, array_length
+from .control_flow import array_write
 from paddle.fluid.dygraph.base import in_declarative_mode
 
 _supported_int_dtype_ = [
@@ -246,6 +246,8 @@ def append(self, var):
                     self.type
                 )
             )
+        from paddle.tensor.array import array_length
+
         array_write(x=var, i=array_length(self), array=self)
 
     @static_only
diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py
index 1914c38f5423b0..c56b99465f194d 100644
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -1638,7 +1638,7 @@ def _create_array_out_of_while(dtype):
         default_main_program().current_block_idx = (
             default_main_program().current_block().parent_idx
         )
-        tensor_array = control_flow.create_array(dtype)
+        tensor_array = paddle.tensor.create_array(dtype)
         default_main_program().current_block_idx = current_block_idx
         return tensor_array
 
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_op_array.py b/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_op_array.py
index 0617c2f5499282..a0ed3b2efe3d37 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_op_array.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_op_array.py
@@ -49,7 +49,7 @@ def get_model(self, main_prog, startup_program):
                 data2 = fluid.layers.assign(
                     np.array([[0, 1, 2]], dtype='float32')
                 )
-            tensor_array = fluid.layers.create_array(dtype='float32')
+            tensor_array = paddle.tensor.create_array(dtype='float32')
             i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0)
             fluid.layers.array_write(data1, i, tensor_array)
             fluid.layers.array_write(data2, i + 1, tensor_array)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_inference_helper.py b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_inference_helper.py
index 569fa777822afe..2cb6f3326e91e6 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_inference_helper.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_inference_helper.py
@@ -127,12 +127,12 @@ def test_hybrid_parallel_inference_helper_mp1pp2(self):
                     layers.assign(layers.cast(cond_int, dtype='bool'), cond)
 
             with paddle.fluid.device_guard(f'{device}:all'):
-                out = layers.create_array(data.dtype)
+                out = paddle.tensor.create_array(data.dtype)
                 layers.assign(data, out)
 
             with paddle.fluid.device_guard(f'{device}:all'):
                 # use a empty lod_tensor_array to clear lod_tensor_array
-                layers.assign(layers.create_array(data.dtype), data)
+                layers.assign(paddle.tensor.create_array(data.dtype), data)
 
         helper = HybridParallelInferenceHelper(
             startup_program,
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
index 39a04d7a78548f..5f22b03cc9ff09 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
@@ -220,8 +220,8 @@ def forward(self, inputs):
             np.zeros((self.batch_size, self.hidden_size), dtype='float32')
         )
         zero = fluid.layers.zeros(shape=[1], dtype="int64")
-        enc_hidden = fluid.layers.create_array(dtype="float32")
-        enc_cell = fluid.layers.create_array(dtype="float32")
+        enc_hidden = paddle.tensor.create_array(dtype="float32")
+        enc_cell = paddle.tensor.create_array(dtype="float32")
         for i in range(self.num_layers):
             index = zero + i
             enc_hidden = fluid.layers.array_write(
@@ -322,8 +322,8 @@ def beam_search(self, inputs):
             np.zeros((self.batch_size, self.hidden_size), dtype='float32')
         )
         zero = fluid.layers.zeros(shape=[1], dtype="int64")
-        enc_hidden = fluid.layers.create_array(dtype="float32")
-        enc_cell = fluid.layers.create_array(dtype="float32")
+        enc_hidden = paddle.tensor.create_array(dtype="float32")
+        enc_cell = paddle.tensor.create_array(dtype="float32")
         for j in range(self.num_layers):
             index = zero + j
             enc_hidden = fluid.layers.array_write(
@@ -735,8 +735,8 @@ def forward(self, inputs):
         )
         enc_hidden_0.stop_gradient = True
         zero = fluid.layers.zeros(shape=[1], dtype="int64")
-        enc_hidden = fluid.layers.create_array(dtype="float32")
-        enc_cell = fluid.layers.create_array(dtype="float32")
+        enc_hidden = paddle.tensor.create_array(dtype="float32")
+        enc_cell = paddle.tensor.create_array(dtype="float32")
         for i in range(self.num_layers):
             index = zero + i
             enc_hidden = fluid.layers.array_write(
diff --git a/python/paddle/fluid/tests/unittests/npu/test_concat_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_concat_op_npu.py
index 301ce9c47736f1..102a95c4cdc49b 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_concat_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_concat_op_npu.py
@@ -170,7 +170,7 @@ def set_program(self, use_fluid_api):
             self.program = fluid.Program()
             with fluid.program_guard(self.program):
                 input = fluid.layers.assign(self.x)
-                tensor_array = fluid.layers.create_array(dtype='float32')
+                tensor_array = paddle.tensor.create_array(dtype='float32')
                 zero = fluid.layers.fill_constant(
                     shape=[1], value=0, dtype="int64"
                 )
@@ -183,7 +183,7 @@ def set_program(self, use_fluid_api):
             self.program = paddle.static.Program()
             with paddle.static.program_guard(self.program):
                 input = paddle.assign(self.x)
-                tensor_array = fluid.layers.create_array(
+                tensor_array = paddle.tensor.create_array(
                     dtype='float32'
                 )  # Api create_array is not supported in paddle 2.0 yet.
                 zero = paddle.zeros(shape=[1], dtype="int64")
diff --git a/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py
index e2509e12b27053..ed10465c295fb6 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py
@@ -138,7 +138,7 @@ def set_program(self):
         self.program = fluid.Program()
         with fluid.program_guard(self.program):
             input = fluid.layers.assign(self.x)
-            tensor_array = fluid.layers.create_array(dtype='float32')
+            tensor_array = paddle.tensor.create_array(dtype='float32')
             zero = fluid.layers.fill_constant(shape=[1], value=0, dtype="int64")
 
             for i in range(self.iter_num):
@@ -176,7 +176,7 @@ def set_program(self):
         self.program = fluid.Program()
         with fluid.program_guard(self.program):
             input = fluid.layers.assign(self.x)
-            tensor_array = fluid.layers.create_array(dtype='float32')
+            tensor_array = paddle.tensor.create_array(dtype='float32')
             zero = fluid.layers.fill_constant(shape=[1], value=0, dtype="int64")
 
             for i in range(self.iter_num):
diff --git a/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py b/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py
index c69d8fac11f3c9..062c00a03b59f7 100644
--- a/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py
+++ b/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py
@@ -16,6 +16,7 @@
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.framework import Program, program_guard
@@ -124,7 +125,7 @@ def test_errors(self):
             def test_id_Variable():
                 # the input pre_ids must be Variable
                 test_ids = np.random.randint(1, 5, [5, 1]).astype("int64")
-                scores = fluid.layers.create_array(dtype='float32')
+                scores = paddle.tensor.create_array(dtype='float32')
                 fluid.layers.beam_search_decode(
                     test_ids, scores, beam_size=5, end_id=0
                 )
@@ -133,7 +134,7 @@ def test_id_Variable():
 
             def test_score_Variable():
                 # the input pre_scores must be Variable
-                ids = fluid.layers.create_array(dtype='int64')
+                ids = paddle.tensor.create_array(dtype='int64')
                 test_scores = np.random.uniform(1, 5, [5, 1]).astype("float32")
                 fluid.layers.beam_search_decode(
                     ids, test_scores, beam_size=5, end_id=0
@@ -143,8 +144,8 @@ def test_score_Variable():
 
             def test_id_dtype():
                 # the dtype of input pre_ids must be int64
-                type_ids = fluid.layers.create_array(dtype='float32')
-                scores = fluid.layers.create_array(dtype='float32')
+                type_ids = paddle.tensor.create_array(dtype='float32')
+                scores = paddle.tensor.create_array(dtype='float32')
                 fluid.layers.beam_search_decode(
                     type_ids, scores, beam_size=5, end_id=0
                 )
@@ -153,8 +154,8 @@ def test_id_dtype():
 
             def test_score_dtype():
                 # the dtype of input pre_scores must be float32
-                ids = fluid.layers.create_array(dtype='int64')
-                type_scores = fluid.layers.create_array(dtype='int64')
+                ids = paddle.tensor.create_array(dtype='int64')
+                type_scores = paddle.tensor.create_array(dtype='int64')
                 fluid.layers.beam_search_decode(
                     ids, type_scores, beam_size=5, end_id=0
                 )
diff --git a/python/paddle/fluid/tests/unittests/test_concat_op.py b/python/paddle/fluid/tests/unittests/test_concat_op.py
index 79984c4a96da46..2cf45e271b8c32 100644
--- a/python/paddle/fluid/tests/unittests/test_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/test_concat_op.py
@@ -414,7 +414,7 @@ def set_program(self, use_fluid_api):
             self.program = fluid.Program()
             with fluid.program_guard(self.program):
                 input = fluid.layers.assign(self.x)
-                tensor_array = fluid.layers.create_array(dtype='float32')
+                tensor_array = paddle.tensor.create_array(dtype='float32')
                 zero = fluid.layers.fill_constant(
                     shape=[1], value=0, dtype="int64"
                 )
@@ -427,7 +427,7 @@ def set_program(self, use_fluid_api):
             self.program = paddle.static.Program()
             with paddle.static.program_guard(self.program):
                 input = paddle.assign(self.x)
-                tensor_array = fluid.layers.create_array(
+                tensor_array = paddle.tensor.create_array(
                     dtype='float32'
                 )  # Api create_array is not supported in paddle 2.0 yet.
                 zero = paddle.zeros(shape=[1], dtype="int64")
diff --git a/python/paddle/fluid/tests/unittests/test_lod_array_length_op.py b/python/paddle/fluid/tests/unittests/test_lod_array_length_op.py
index 4b18c2a9fd81fc..b649856978c98c 100644
--- a/python/paddle/fluid/tests/unittests/test_lod_array_length_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lod_array_length_op.py
@@ -17,7 +17,6 @@
 import numpy
 
 import paddle
-import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.fluid.layers as layers
 from paddle.fluid import Program, program_guard
@@ -29,7 +28,7 @@ def test_array_length(self):
         tmp = layers.zeros(shape=[10], dtype='int32')
         i = layers.fill_constant(shape=[1], dtype='int64', value=10)
         arr = layers.array_write(tmp, i=i)
-        arr_len = layers.array_length(arr)
+        arr_len = paddle.tensor.array_length(arr)
         cpu = core.CPUPlace()
         exe = Executor(cpu)
         result = exe.run(fetch_list=[arr_len])[0]
@@ -42,7 +41,7 @@ def test_errors(self):
             # for ci coverage
             x1 = numpy.random.randn(2, 4).astype('int32')
 
-            self.assertRaises(TypeError, fluid.layers.array_length, array=x1)
+            self.assertRaises(TypeError, paddle.tensor.array_length, array=x1)
 
 
 class TestArrayLengthApi(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py b/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py
deleted file mode 100644
index 24c7e44afcb2ab..00000000000000
--- a/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py
+++ /dev/null
@@ -1,140 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-
-import paddle
-import paddle.fluid.core as core
-import paddle.fluid.layers as layers
-from paddle.fluid.backward import append_backward
-from paddle.fluid.executor import Executor
-from paddle.fluid.framework import Program, program_guard, switch_main_program
-from paddle.fluid.layers.control_flow import lod_rank_table, shrink_memory
-
-
-class TestShrinkRNNMemoryBase(unittest.TestCase):
-    def setUp(self):
-        self.main_program = Program()
-        switch_main_program(self.main_program)
-        x = layers.data('x', shape=[100], dtype='float32')
-        x.stop_gradient = False
-        rank_table_tensor = layers.data(
-            'rank_table_tensor', shape=[1], dtype='float32', lod_level=1
-        )
-        table = lod_rank_table(x=rank_table_tensor)
-        i = layers.zeros(dtype='int64', shape=[1])
-        self.mem1 = shrink_memory(x=x, i=i, table=table)
-        i = layers.increment(x=i)
-        i.stop_gradient = True
-        self.mem2 = shrink_memory(x=self.mem1, i=i, table=table)
-        i = layers.increment(x=i)
-        i.stop_gradient = True
-        self.mem3 = shrink_memory(x=self.mem2, i=i, table=table)
-        mem3_mean = paddle.mean(self.mem3)
-        append_backward(loss=mem3_mean)
-        self.x_grad = self.main_program.global_block().var('x@GRAD')
-
-    def sum_lodtensor(self, tensor):
-        sum_res = 0.0
-        for i in range(np.product(tensor.shape())):
-            sum_res += tensor._get_float_element(i)
-        return sum_res
-
-
-class TestShrinkRNNMemoryReferLoD(TestShrinkRNNMemoryBase):
-    def test_refer_lod(self):
-        cpu = core.CPUPlace()
-        x_tensor = core.LoDTensor()
-        x_tensor.set_recursive_sequence_lengths([[2, 3, 1]])
-        tensor_np = np.random.random(size=(6, 100)).astype('float32')
-        x_tensor.set(tensor_np, cpu)
-
-        rank_table_tensor = core.LoDTensor()
-        rank_table_tensor.set_recursive_sequence_lengths([[1, 2, 3]])
-        rank_table_tensor.set(
-            np.random.random(size=(6, 1)).astype('float32'), cpu
-        )
-
-        exe = Executor(cpu)
-        outs = exe.run(
-            feed={'x': x_tensor, 'rank_table_tensor': rank_table_tensor},
-            fetch_list=[self.mem1, self.mem2, self.mem3, self.x_grad],
-            return_numpy=False,
-        )
-        np.testing.assert_allclose(tensor_np[0:6], outs[0], rtol=1e-05)
-        np.testing.assert_allclose(tensor_np[0:5], outs[1], rtol=1e-05)
-        np.testing.assert_allclose(tensor_np[0:2], outs[2], rtol=1e-05)
-        self.assertAlmostEqual(1.0, self.sum_lodtensor(outs[3]), delta=0.01)
-
-
-class TestShrinkRNNMemoryNoLoD(TestShrinkRNNMemoryBase):
-    def test_no_lod(self):
-        cpu = core.CPUPlace()
-        x_tensor = core.LoDTensor()
-        tensor_np = np.random.random(size=(3, 100)).astype('float32')
-        x_tensor.set(tensor_np, cpu)
-
-        rank_table_tensor = core.LoDTensor()
-        rank_table_tensor.set_recursive_sequence_lengths([[1, 2, 3]])
-        rank_table_tensor.set(
-            np.random.random(size=(6, 1)).astype('float32'), cpu
-        )
-
-        exe = Executor(cpu)
-        outs = exe.run(
-            feed={'x': x_tensor, 'rank_table_tensor': rank_table_tensor},
-            fetch_list=[self.mem1, self.mem2, self.mem3, self.x_grad],
-            return_numpy=False,
-        )
-        np.testing.assert_allclose(tensor_np[0:3], outs[0], rtol=1e-05)
-        np.testing.assert_allclose(tensor_np[0:2], outs[1], rtol=1e-05)
-        np.testing.assert_allclose(tensor_np[0:1], outs[2], rtol=1e-05)
-        self.assertAlmostEqual(1.0, self.sum_lodtensor(outs[3]), delta=0.01)
-
-
-class TestShrinkRNNMemoryOpError(unittest.TestCase):
-    def test_erroes(self):
-        with program_guard(Program(), Program()):
-            x = layers.zeros(dtype='int64', shape=[3, 100])
-            i = layers.zeros(dtype='int64', shape=[1])
-            rank_table_tensor = core.LoDTensor()
-            rank_table_tensor.set_recursive_sequence_lengths([[1, 2, 3]])
-            rank_table_tensor.set(
-                np.random.random(size=(6, 1)).astype('float32'), core.CPUPlace()
-            )
-            rank_table = np.random.random(size=(6, 1)).astype('float32')
-
-            # The type of x in shrink_rnn_memory must be Variable.
-            def test_x_type():
-                out = shrink_memory(x=1, i=i, table=rank_table_tensor)
-
-            self.assertRaises(TypeError, test_x_type)
-
-            # The type of i in shrink_rnn_memory must be Variable.
-            def test_i_type():
-                out = shrink_memory(x=x, i=0, table=rank_table_tensor)
-
-            self.assertRaises(TypeError, test_i_type)
-
-            # The type of table in shrink_rnn_memory must be Variable.
-            def test_table_type():
-                out = shrink_memory(x=x, i=i, table=rank_table)
-
-            self.assertRaises(TypeError, test_table_type)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_slice_op.py b/python/paddle/fluid/tests/unittests/test_slice_op.py
index 616fc33a743400..371db6edd75cb8 100644
--- a/python/paddle/fluid/tests/unittests/test_slice_op.py
+++ b/python/paddle/fluid/tests/unittests/test_slice_op.py
@@ -692,9 +692,9 @@ def set_program_and_run(self, main_program, case_num):
             for each_x in x:
                 each_x.stop_gradient = False
 
-            arr = layers.create_array(dtype="float32")
+            arr = paddle.tensor.create_array(dtype="float32")
             for i in range(3):
-                idx = layers.array_length(arr)
+                idx = paddle.tensor.array_length(arr)
                 arr = layers.array_write(x=x[i], i=idx, array=arr)
 
             if case_num == 1:
@@ -702,7 +702,7 @@ def set_program_and_run(self, main_program, case_num):
 
             elif case_num == 2:
                 end = (
-                    fluid.layers.array_length(arr) - 1
+                    paddle.tensor.array_length(arr) - 1
                 )  # dtype of end is int64
                 self.sliced_arr = slice_arr = arr[self.start : end]
                 output, _ = fluid.layers.tensor_array_to_tensor(
diff --git a/python/paddle/fluid/tests/unittests/test_stack_op.py b/python/paddle/fluid/tests/unittests/test_stack_op.py
index 95f589672c6e84..a28dc6ed582340 100644
--- a/python/paddle/fluid/tests/unittests/test_stack_op.py
+++ b/python/paddle/fluid/tests/unittests/test_stack_op.py
@@ -168,7 +168,7 @@ def set_program(self):
         self.program = fluid.Program()
         with fluid.program_guard(self.program):
             input = fluid.layers.assign(self.x)
-            tensor_array = fluid.layers.create_array(dtype='float32')
+            tensor_array = paddle.tensor.create_array(dtype='float32')
             zero = fluid.layers.fill_constant(shape=[1], value=0, dtype="int64")
 
             for i in range(self.iter_num):
@@ -206,7 +206,7 @@ def set_program(self):
         self.program = fluid.Program()
         with fluid.program_guard(self.program):
             input = fluid.layers.assign(self.x)
-            tensor_array = fluid.layers.create_array(dtype='float32')
+            tensor_array = paddle.tensor.create_array(dtype='float32')
             zero = fluid.layers.fill_constant(shape=[1], value=0, dtype="int64")
 
             for i in range(self.iter_num):
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py
index 67899f150cc26d..123a920af7cc6c 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py
@@ -191,7 +191,7 @@ def setUp(self):
     def set_program(self):
         self.program = fluid.Program()
         with fluid.program_guard(self.program):
-            self.array = array = fluid.layers.create_array(dtype='float32')
+            self.array = array = paddle.tensor.create_array(dtype='float32')
             idx = fluid.layers.fill_constant(shape=[1], dtype="int64", value=0)
             for i, x in enumerate(self.inputs):
                 x = fluid.layers.assign(x)
@@ -236,7 +236,7 @@ def _test_case(self, inp1, inp2):
         x1 = fluid.layers.assign(inp2)
         x1.stop_gradient = False
         i = fluid.layers.fill_constant(shape=[1], dtype="int64", value=0)
-        array = fluid.layers.create_array(dtype='float32')
+        array = paddle.tensor.create_array(dtype='float32')
         fluid.layers.array_write(x0, i, array)
         fluid.layers.array_write(x1, i + 1, array)
         output_stack, output_index_stack = fluid.layers.tensor_array_to_tensor(
@@ -275,7 +275,7 @@ def test_while_loop_case(self):
             zero = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0)
             i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=1)
             ten = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
-            array = fluid.layers.create_array(dtype='float32')
+            array = paddle.tensor.create_array(dtype='float32')
             inp0 = np.random.rand(2, 3, 4).astype("float32")
             x0 = fluid.layers.assign(inp0)
             fluid.layers.array_write(x0, zero, array)
@@ -290,7 +290,7 @@ def body(i, end, array):
 
             _, _, array = fluid.layers.while_loop(cond, body, [i, ten, array])
 
-            self.assertTrue(fluid.layers.array_length(array), 10)
+            self.assertTrue(paddle.tensor.array_length(array), 10)
             last = fluid.layers.fill_constant(shape=[1], dtype='int64', value=9)
             np.testing.assert_array_equal(
                 fluid.layers.array_read(array, last).numpy(), inp0
diff --git a/python/paddle/jit/dy2static/convert_operators.py b/python/paddle/jit/dy2static/convert_operators.py
index 3643da7591f4f4..898d1c35f37bf4 100644
--- a/python/paddle/jit/dy2static/convert_operators.py
+++ b/python/paddle/jit/dy2static/convert_operators.py
@@ -21,10 +21,8 @@
 from paddle.fluid.framework import core, Variable
 from paddle.fluid.layers import Assert, Print
 from paddle.fluid.layers import (
-    array_length,
     array_read,
     array_write,
-    create_array,
 )
 from paddle.fluid.layers import (
     assign,
@@ -136,7 +134,7 @@ def _convert_tensor_arrray_if_necessary(setterhelper, push_pop_names):
 
     def maybe_to_tensor_array(v):
         if isinstance(v, list):
-            return create_array("float32", initialized_list=v)
+            return paddle.tensor.create_array("float32", initialized_list=v)
         else:
             return v
 
@@ -531,7 +529,7 @@ def convert_len(var):
                 return var.shape[0]
             return nn.shape(var)[0]
         elif var.type == core.VarDesc.VarType.LOD_TENSOR_ARRAY:
-            return control_flow.array_length(var)
+            return paddle.tensor.array_length(var)
         else:
             raise TypeError(
                 'len(var) only supports LoDTensor/LoDTensorArray/SelectedRows, but received %s.'
@@ -790,11 +788,11 @@ def cond(i, new_array):
 
     def body(i, new_array):
         item = array_read(array=array, i=i)
-        array_write(item, array_length(new_array), new_array)
+        array_write(item, paddle.tensor.array_length(new_array), new_array)
         i = increment(i)
         return i, new_array
 
-    arr_len = array_length(array)
+    arr_len = paddle.tensor.array_length(array)
     if idx < 0:
         idx = idx + arr_len
     else:
@@ -814,7 +812,7 @@ def body(i, new_array):
 #  Maybe support start == end for slice op.
 def _slice_tensor_array(array, start, end):
     def true_fn():
-        null_array = create_array("float32")
+        null_array = paddle.tensor.create_array("float32")
         return null_array
 
     def false_fn(array, start, end):

From 61486bf286d729c201130f3723a154d64a5c5087 Mon Sep 17 00:00:00 2001
From: Chen Weihang <chenweihang@baidu.com>
Date: Fri, 2 Dec 2022 10:29:04 +0800
Subject: [PATCH 099/154] polish fusion kernel naming (#48609)

---
 paddle/phi/kernels/fusion/README.md                    |  4 +++-
 .../kernels/fusion/fused_softmax_mask_grad_kernel.h    |  8 ++++----
 paddle/phi/kernels/fusion/fused_softmax_mask_kernel.h  |  8 ++++----
 .../fusion/gpu/fused_softmax_mask_grad_kernel.cu       | 10 +++++-----
 .../kernels/fusion/gpu/fused_softmax_mask_kernel.cu    | 10 +++++-----
 5 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/paddle/phi/kernels/fusion/README.md b/paddle/phi/kernels/fusion/README.md
index 2080a37dd0fd59..1e9e2bb7e43145 100644
--- a/paddle/phi/kernels/fusion/README.md
+++ b/paddle/phi/kernels/fusion/README.md
@@ -10,4 +10,6 @@
   - Fusion Kernel is generally used to accelerate the combined operation on a certain device. If all devices need to be implemented, the cost is relatively high.
   - We don't recommend implementing a pseudo kernel that just throws exception, if not required, it can be not implemented.
 
-3. Fusion Kernel needs to be in the `phi/fusion` namespace
+3. Fusion Kernel needs to be in the `phi/fusion` namespace.
+
+4. The file naming of the Fusion Kernel needs to follow the format of `fused_[fusion operation name]_kernel.h/cc/cu`, the kernel function naming of the Fusion Kernel needs to follow the format of `Fused[fusion operation name]Kernel`, and the kernel registration naming of the Fusion Kernel needs to follow the format of `fused_[fusion operation name]`.
diff --git a/paddle/phi/kernels/fusion/fused_softmax_mask_grad_kernel.h b/paddle/phi/kernels/fusion/fused_softmax_mask_grad_kernel.h
index 391c614801f232..8f4486aa4903d9 100644
--- a/paddle/phi/kernels/fusion/fused_softmax_mask_grad_kernel.h
+++ b/paddle/phi/kernels/fusion/fused_softmax_mask_grad_kernel.h
@@ -19,9 +19,9 @@
 namespace phi {
 
 template <typename T, typename Context>
-void SoftmaxMaskFuseGradKernel(const Context& dev_ctx,
-                               const DenseTensor& out,
-                               const DenseTensor& out_grad,
-                               DenseTensor* x_grad);
+void FusedSoftmaxMaskGradKernel(const Context& dev_ctx,
+                                const DenseTensor& out,
+                                const DenseTensor& out_grad,
+                                DenseTensor* x_grad);
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/fusion/fused_softmax_mask_kernel.h b/paddle/phi/kernels/fusion/fused_softmax_mask_kernel.h
index dd08373f428889..1263e8c5d64ebd 100644
--- a/paddle/phi/kernels/fusion/fused_softmax_mask_kernel.h
+++ b/paddle/phi/kernels/fusion/fused_softmax_mask_kernel.h
@@ -19,9 +19,9 @@
 namespace phi {
 
 template <typename T, typename Context>
-void SoftmaxMaskFuseKernel(const Context& dev_ctx,
-                           const DenseTensor& x,
-                           const DenseTensor& mask,
-                           DenseTensor* out);
+void FusedSoftmaxMaskKernel(const Context& dev_ctx,
+                            const DenseTensor& x,
+                            const DenseTensor& mask,
+                            DenseTensor* out);
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_grad_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_grad_kernel.cu
index ab731f8f239901..d55802fdb96699 100644
--- a/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_grad_kernel.cu
+++ b/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_grad_kernel.cu
@@ -118,10 +118,10 @@ __global__ void SoftmaxMaskFuseGradGPUKernel(const T* grad_input,
 }
 
 template <typename T, typename Context>
-void SoftmaxMaskFuseGradKernel(const Context& dev_ctx,
-                               const DenseTensor& out,
-                               const DenseTensor& out_grad,
-                               DenseTensor* x_grad) {
+void FusedSoftmaxMaskGradKernel(const Context& dev_ctx,
+                                const DenseTensor& out,
+                                const DenseTensor& out_grad,
+                                DenseTensor* x_grad) {
   auto* grad_x_data = dev_ctx.template Alloc<T>(x_grad);
   auto* grad_y_data = out_grad.data<T>();
   auto* softmax_rst_data = out.data<T>();
@@ -196,6 +196,6 @@ void SoftmaxMaskFuseGradKernel(const Context& dev_ctx,
 PD_REGISTER_KERNEL(fused_softmax_mask_grad,
                    GPU,
                    ALL_LAYOUT,
-                   phi::fusion::SoftmaxMaskFuseGradKernel,
+                   phi::fusion::FusedSoftmaxMaskGradKernel,
                    float,
                    phi::dtype::float16) {}
diff --git a/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_kernel.cu
index e86b4841e926a8..0902b9448eca6c 100644
--- a/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_kernel.cu
+++ b/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_kernel.cu
@@ -146,10 +146,10 @@ __global__ void SoftmaxMaskFuseGPUKernel(const T* x_data,
 // T only supports fp16
 // leave as template only for future update
 template <typename T, typename Context>
-void SoftmaxMaskFuseKernel(const Context& dev_ctx,
-                           const DenseTensor& x,
-                           const DenseTensor& mask,
-                           DenseTensor* out) {
+void FusedSoftmaxMaskKernel(const Context& dev_ctx,
+                            const DenseTensor& x,
+                            const DenseTensor& mask,
+                            DenseTensor* out) {
   auto* x_data = x.data<T>();
   auto* mask_data = mask.data<T>();
   auto* y_data = dev_ctx.template Alloc<T>(out);
@@ -275,6 +275,6 @@ void SoftmaxMaskFuseKernel(const Context& dev_ctx,
 PD_REGISTER_KERNEL(fused_softmax_mask,
                    GPU,
                    ALL_LAYOUT,
-                   phi::fusion::SoftmaxMaskFuseKernel,
+                   phi::fusion::FusedSoftmaxMaskKernel,
                    float,
                    phi::dtype::float16) {}

From 5bcf35cd59263f692d73944a6e5ec7406fd46b53 Mon Sep 17 00:00:00 2001
From: Weilong Wu <veyron_wu@163.com>
Date: Fri, 2 Dec 2022 10:35:33 +0800
Subject: [PATCH 100/154] rm test_eager_guard (#48623)

---
 .../paddle/fluid/tests/unittests/test_tensor_to_list.py   | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_tensor_to_list.py b/python/paddle/fluid/tests/unittests/test_tensor_to_list.py
index c8f438325ae30b..bdabea5a2bbe6e 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_to_list.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_to_list.py
@@ -18,14 +18,13 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.framework import _test_eager_guard
 
 
 class TensorToListTest(unittest.TestCase):
     def setUp(self):
         self.shape = [11, 25, 32, 43]
 
-    def func_tensor_tolist(self):
+    def test_tensor_tolist(self):
         places = [fluid.CPUPlace()]
         if fluid.core.is_compiled_with_cuda():
             places.append(fluid.CUDAPlace(0))
@@ -42,11 +41,6 @@ def func_tensor_tolist(self):
 
             self.assertEqual(tensorlist, expectlist)
 
-    def test_tensor_tolist(self):
-        with _test_eager_guard():
-            self.func_tensor_tolist()
-        self.func_tensor_tolist()
-
 
 if __name__ == '__main__':
     unittest.main()

From 4a66e7cffd519db23106892aed69423d827ade1b Mon Sep 17 00:00:00 2001
From: Yiqun Liu <Xreki@users.noreply.github.com>
Date: Fri, 2 Dec 2022 10:38:36 +0800
Subject: [PATCH 101/154] Optimize the python overhead of reshape and
 layer_norm. (#48635)

---
 python/paddle/nn/functional/norm.py  | 13 +++-------
 python/paddle/tensor/manipulation.py | 38 ++++++++++------------------
 2 files changed, 18 insertions(+), 33 deletions(-)

diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py
index f2546b62442d05..6e248af3333aac 100644
--- a/python/paddle/nn/functional/norm.py
+++ b/python/paddle/nn/functional/norm.py
@@ -382,16 +382,11 @@ def layer_norm(
         )
 
     if in_dygraph_mode():
-        (
-            pre_act,
-            _,
-            _,
-        ) = _C_ops.layer_norm(x, weight, bias, epsilon, begin_norm_axis)
-
-        return dygraph_utils._append_activation_in_dygraph(pre_act, act=None)
+        out, _, _ = _C_ops.layer_norm(x, weight, bias, epsilon, begin_norm_axis)
+        return out
 
     if _in_legacy_dygraph():
-        pre_act, _, _ = _legacy_C_ops.layer_norm(
+        out, _, _ = _legacy_C_ops.layer_norm(
             x,
             weight,
             bias,
@@ -400,7 +395,7 @@ def layer_norm(
             'begin_norm_axis',
             begin_norm_axis,
         )
-        return dygraph_utils._append_activation_in_dygraph(pre_act, act=None)
+        return out
 
     check_variable_and_dtype(
         x, 'input', ['float16', 'float32', 'float64'], 'LayerNorm'
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index cb4fec4a33bd97..fceae51e14564b 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -14,7 +14,6 @@
 
 # TODO: define functions to manipulate a tensor
 
-import warnings
 from collections import Counter
 
 import numpy as np
@@ -22,7 +21,7 @@
 import paddle
 from paddle import _C_ops, _legacy_C_ops
 
-from ..common_ops_import import _varbase_creator, dygraph_utils, fill_constant
+from ..common_ops_import import _varbase_creator, fill_constant
 from ..fluid.data_feeder import (
     check_dtype,
     check_type,
@@ -3564,16 +3563,9 @@ def reshape(x, shape, name=None):
 
     """
     actual_shape = None
-    act = None
-    inplace = False
 
     if in_dygraph_mode():
         tmp_tensor_type = core.eager.Tensor
-        # TODO(zhiqiu): enable inplace in dygraph mode.
-        if inplace:
-            warnings.warn(
-                "Inplace on reshape is not allowed and will be discarded in dygraph mode currently."
-            )
         if isinstance(shape, (list, tuple)):
             shape = [
                 item.numpy().item(0)
@@ -3581,8 +3573,11 @@ def reshape(x, shape, name=None):
                 else item
                 for item in shape
             ]
-            out = _C_ops.reshape(x, shape)
-        elif isinstance(shape, tmp_tensor_type):
+            if shape == x.shape:
+                out = x
+            else:
+                out = _C_ops.reshape(x, shape)
+        elif isinstance(shape, core.eager.Tensor):
             shape.stop_gradient = True
             out = _C_ops.reshape(x, shape)
         else:
@@ -3591,14 +3586,10 @@ def reshape(x, shape, name=None):
                 " got '{}.'".format(type(shape))
             )
 
-        return dygraph_utils._append_activation_in_dygraph(out, act)
+        return out
     else:
         if _in_legacy_dygraph():
             tmp_tensor_type = Variable
-            if inplace:
-                warnings.warn(
-                    "Inplace on reshape is not allowed and will be discarded in dygraph mode currently."
-                )
             if isinstance(shape, (list, tuple)):
                 shape = [
                     item.numpy().item(0) if isinstance(item, Variable) else item
@@ -3614,7 +3605,7 @@ def reshape(x, shape, name=None):
                     " got '{}.'".format(type(shape))
                 )
 
-            return dygraph_utils._append_activation_in_dygraph(out, act)
+            return out
 
     check_variable_and_dtype(
         x,
@@ -3690,11 +3681,7 @@ def get_attr_shape(list_shape):
             actual_shape.stop_gradient = True
             inputs["Shape"] = actual_shape
 
-    out = (
-        x
-        if inplace
-        else helper.create_variable_for_type_inference(dtype=x.dtype)
-    )
+    out = helper.create_variable_for_type_inference(dtype=x.dtype)
     x_shape = helper.create_variable_for_type_inference(dtype=x.dtype)
     helper.append_op(
         type="reshape2",
@@ -3703,7 +3690,7 @@ def get_attr_shape(list_shape):
         outputs={"Out": out, "XShape": x_shape},
     )
 
-    return helper.append_activation(out)
+    return out
 
 
 @inplace_apis_in_dygraph_only
@@ -3721,7 +3708,10 @@ def reshape_(x, shape, name=None):
                 else item
                 for item in shape
             ]
-            out = _C_ops.reshape_(x, shape)
+            if shape == x.shape:
+                out = x
+            else:
+                out = _C_ops.reshape_(x, shape)
         elif isinstance(shape, tmp_tensor_type):
             shape.stop_gradient = True
             out = _C_ops.reshape_(x, shape)

From c34812acf2fbe89c1c173bdb9b842c7c2643b9e1 Mon Sep 17 00:00:00 2001
From: Infinity_lee <luhputu0815@gmail.com>
Date: Fri, 2 Dec 2022 10:43:18 +0800
Subject: [PATCH 102/154] fix boardcasting superlink (#48434)

* fix boardcasting superlink

* Update bitwise_op.cc

* fix typo errors(from 48186)

* Update python/paddle/distribution/uniform.py

Co-authored-by: Ligoml <39876205+Ligoml@users.noreply.github.com>

* Update math.py

* Update math.py

* refix

* Update logic.py

* BaseTransform api doc; test=docs_preview

* Update python/paddle/vision/transforms/transforms.py

* for text block; test=docs_preview

* Update transforms.py

Co-authored-by: Ligoml <39876205+Ligoml@users.noreply.github.com>
---
 .../fluid/operators/controlflow/bitwise_op.cc |  4 +-
 python/paddle/distribution/uniform.py         |  7 ++-
 python/paddle/tensor/creation.py              |  6 ++-
 python/paddle/tensor/logic.py                 | 17 ++++--
 python/paddle/tensor/math.py                  | 52 ++++++++++++++-----
 python/paddle/vision/image.py                 |  6 +--
 python/paddle/vision/transforms/functional.py | 15 +++++-
 python/paddle/vision/transforms/transforms.py | 47 ++++++++---------
 8 files changed, 106 insertions(+), 48 deletions(-)

diff --git a/paddle/fluid/operators/controlflow/bitwise_op.cc b/paddle/fluid/operators/controlflow/bitwise_op.cc
index 90faea31532de7..4b339f4bd58627 100644
--- a/paddle/fluid/operators/controlflow/bitwise_op.cc
+++ b/paddle/fluid/operators/controlflow/bitwise_op.cc
@@ -48,7 +48,9 @@ It operates ``%s`` on Tensor ``X`` and ``Y`` .
         %s
 
 .. note::
-    ``paddle.%s`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting`.
+    ``paddle.%s`` supports broadcasting. If you want know more about broadcasting, please refer to please refer to `Introduction to Tensor`_ .
+
+    .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor.
 )DOC",
                                comment.type,
                                comment.equation,
diff --git a/python/paddle/distribution/uniform.py b/python/paddle/distribution/uniform.py
index 1b1ef5906d9643..9b41dd026709f4 100644
--- a/python/paddle/distribution/uniform.py
+++ b/python/paddle/distribution/uniform.py
@@ -49,7 +49,12 @@ class Uniform(distribution.Distribution):
     * :math:`Z`: is the normalizing constant.
 
     The parameters `low` and `high` must be shaped in a way that supports
-    :ref:`user_guide_broadcasting` (e.g., `high - low` is a valid operation).
+    `Boardcasting` (e.g., `high - low` is a valid operation).
+
+    Note:
+        If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
 
     Args:
         low(int|float|list|tuple|numpy.ndarray|Tensor): The lower boundary of
diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py
index e44582c0f3a11b..e5005dbe16f206 100644
--- a/python/paddle/tensor/creation.py
+++ b/python/paddle/tensor/creation.py
@@ -2005,8 +2005,10 @@ def complex(real, imag, name=None):
     Returns:
         Tensor: The output tensor. The data type is 'complex64' or 'complex128', with the same precision as ``real`` and ``imag``.
 
-    **Note**:
-        ``paddle.complex`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting` .
+    Note:
+        ``paddle.complex`` supports broadcasting. If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
 
     Examples:
         .. code-block:: python
diff --git a/python/paddle/tensor/logic.py b/python/paddle/tensor/logic.py
index a4cc95edfc7c35..271aa7240115f1 100755
--- a/python/paddle/tensor/logic.py
+++ b/python/paddle/tensor/logic.py
@@ -96,7 +96,9 @@ def logical_and(x, y, out=None, name=None):
         out = x \&\& y
 
     Note:
-        ``paddle.logical_and`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting`.
+        ``paddle.logical_and`` supports broadcasting. If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
 
     Args:
         x (Tensor): the input tensor, it's data type should be one of bool, int8, int16, in32, in64, float32, float64.
@@ -136,7 +138,9 @@ def logical_or(x, y, out=None, name=None):
         out = x || y
 
     Note:
-        ``paddle.logical_or`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting`.
+        ``paddle.logical_or`` supports broadcasting. If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
 
     Args:
         x (Tensor): the input tensor, it's data type should be one of bool, int8, int16, in32, in64, float32, float64.
@@ -178,7 +182,9 @@ def logical_xor(x, y, out=None, name=None):
         out = (x || y) \&\& !(x \&\& y)
 
     Note:
-        ``paddle.logical_xor`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting`.
+        ``paddle.logical_xor`` supports broadcasting. If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
 
     Args:
         x (Tensor): the input tensor, it's data type should be one of bool, int8, int16, in32, in64, float32, float64.
@@ -221,6 +227,11 @@ def logical_not(x, out=None, name=None):
 
         out = !x
 
+    Note:
+        ``paddle.logical_not`` supports broadcasting. If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
+
     Args:
         x(Tensor):  Operand of logical_not operator. Must be a Tensor of type bool, int8, int16, in32, in64, float32, or float64.
         out(Tensor): The ``Tensor`` that specifies the output of the operator, which can be any ``Tensor`` that has been created in the program. The default value is None, and a new ``Tensor` will be created to save the output.
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index 40dd2291e4c0a6..857f99c8f6bce2 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -420,7 +420,9 @@ def pow(x, y, name=None):
         out = x^{y}
 
     Note:
-        ``paddle.pow`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting` .
+        ``paddle.pow`` supports broadcasting. If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensors
 
 
     Args:
@@ -679,7 +681,9 @@ def subtract(x, y, name=None):
         out = x - y
 
     Note:
-        ``paddle.subtract`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting` .
+        ``paddle.subtract`` supports broadcasting. If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
 
     Args:
         x (Tensor): the input tensor, it's data type should be float32, float64, int32, int64.
@@ -773,7 +777,9 @@ def divide(x, y, name=None):
         out = x / y
 
     Note:
-        ``paddle.divide`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting` .
+        ``paddle.divide`` supports broadcasting. If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
 
     Args:
         x (Tensor): the input tensor, it's data type should be float32, float64, int32, int64.
@@ -817,7 +823,10 @@ def floor_divide(x, y, name=None):
         out = trunc(x / y)
 
     Note:
-        ``paddle.floor_divide`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting` .
+        ``paddle.floor_divide`` supports broadcasting. If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
+
         Also note that the name ``floor_divide`` can be misleading, as the quotinents are actually rounded toward zero, not toward negative infinite.
 
     Args:
@@ -859,7 +868,9 @@ def remainder(x, y, name=None):
         out = x \% y
 
     Note:
-        ``paddle.remainder`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting` .
+        ``paddle.remainder`` supports broadcasting. If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
 
     Args:
         x (Tensor): the input tensor, it's data type should be float16, float32, float64, int32, int64.
@@ -924,7 +935,9 @@ def multiply(x, y, name=None):
         out = x * y
 
     Note:
-        ``paddle.multiply`` supports broadcasting. If you would like to know more about broadcasting, please refer to :ref:`user_guide_broadcasting`.
+        ``paddle.multiply`` supports broadcasting. If you would like to know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
 
     Args:
         x (Tensor): the input tensor, its data type should be one of float32, float64, int32, int64, bool.
@@ -980,7 +993,9 @@ def maximum(x, y, name=None):
         out = max(x, y)
 
     Note:
-        ``paddle.maximum`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting` .
+        ``paddle.maximum`` supports broadcasting. If you want know more about broadcasting, please refer to  `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
 
     Args:
         x (Tensor): the input tensor, it's data type should be float32, float64, int32, int64.
@@ -1046,7 +1061,9 @@ def minimum(x, y, name=None):
         out = min(x, y)
 
     Note:
-        ``paddle.minimum`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting` .
+        ``paddle.minimum`` supports broadcasting. If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
 
     Args:
         x (Tensor): the input tensor, it's data type should be float32, float64, int32, int64.
@@ -1114,7 +1131,9 @@ def fmax(x, y, name=None):
         out = fmax(x, y)
 
     Note:
-        ``paddle.fmax`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting` .
+        ``paddle.fmax`` supports broadcasting. If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
 
     Args:
         x (Tensor): the input tensor, it's data type should be float16, float32, float64, int32, int64.
@@ -1182,7 +1201,9 @@ def fmin(x, y, name=None):
         out = fmin(x, y)
 
     Note:
-        ``paddle.fmin`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting` .
+        ``paddle.fmin`` supports broadcasting. If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
 
     Args:
         x (Tensor): the input tensor, it's data type should be float16, float32, float64, int32, int64.
@@ -3962,7 +3983,12 @@ def any(x, axis=None, keepdim=False, name=None):
 
 def broadcast_shape(x_shape, y_shape):
     """
-    The function returns the shape of doing operation with broadcasting on tensors of x_shape and y_shape, please refer to :ref:`user_guide_broadcasting` for more details.
+    The function returns the shape of doing operation with broadcasting on tensors of x_shape and y_shape.
+
+    Note:
+        If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
 
     Args:
         x_shape (list[int]|tuple[int]): A shape of tensor.
@@ -4991,7 +5017,9 @@ def heaviside(x, y, name=None):
             \right.
 
     Note:
-        ``paddle.heaviside`` supports broadcasting. If you want know more about broadcasting, please refer to :ref:`user_guide_broadcasting`.
+        ``paddle.heaviside`` supports broadcasting. If you want know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
 
     Args:
         x (Tensor): The input tensor of Heaviside step function, it's data type should be float16, float32, float64, int32 or int64.
diff --git a/python/paddle/vision/image.py b/python/paddle/vision/image.py
index 8285132e4ea3e4..7a512176c1efe4 100644
--- a/python/paddle/vision/image.py
+++ b/python/paddle/vision/image.py
@@ -23,8 +23,8 @@
 
 def set_image_backend(backend):
     """
-    Specifies the backend used to load images in class ``paddle.vision.datasets.ImageFolder``
-    and ``paddle.vision.datasets.DatasetFolder`` . Now support backends are pillow and opencv.
+    Specifies the backend used to load images in class :ref:`api_paddle_datasets_ImageFolder`
+    and :ref:`api_paddle_datasets_DatasetFolder` . Now support backends are pillow and opencv.
     If backend not set, will use 'pil' as default.
 
     Args:
@@ -117,7 +117,7 @@ def image_load(path, backend=None):
         path (str): Path of the image.
         backend (str, optional): The image decoding backend type. Options are
             `cv2`, `pil`, `None`. If backend is None, the global _imread_backend
-            specified by ``paddle.vision.set_image_backend`` will be used. Default: None.
+            specified by :ref:`api_paddle_vision_set_image_backend` will be used. Default: None.
 
     Returns:
         PIL.Image or np.array: Loaded image.
diff --git a/python/paddle/vision/transforms/functional.py b/python/paddle/vision/transforms/functional.py
index b5889981d24870..d58c0f610edb37 100644
--- a/python/paddle/vision/transforms/functional.py
+++ b/python/paddle/vision/transforms/functional.py
@@ -42,7 +42,18 @@ def _is_numpy_image(img):
 def to_tensor(pic, data_format='CHW'):
     """Converts a ``PIL.Image`` or ``numpy.ndarray`` to paddle.Tensor.
 
-    See ``ToTensor`` for more details.
+    Converts a PIL.Image or numpy.ndarray (H x W x C) to a paddle.Tensor of shape (C x H x W).
+
+    If input is a grayscale image (H x W), it will be converted to an image of shape (H x W x 1).
+    And the shape of output tensor will be (1 x H x W).
+
+    If you want to keep the shape of output tensor as (H x W x C), you can set data_format = ``HWC`` .
+
+    Converts a PIL.Image or numpy.ndarray in the range [0, 255] to a paddle.Tensor in the
+    range [0.0, 1.0] if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr,
+    RGBA, CMYK, 1) or if the numpy.ndarray has dtype = np.uint8.
+
+    In the other cases, tensors are returned without scaling.
 
     Args:
         pic (PIL.Image|np.ndarray): Image to be converted to tensor.
@@ -764,7 +775,7 @@ def rotate(
         center (2-list|2-tuple, optional): Optional center of rotation.
             Origin is the upper left corner.
             Default is the center of the image.
-        fill (3-list|3-tuple or int): RGB pixel fill value for area outside the rotated image.
+        fill (3-list|3-tuple or int, optional): RGB pixel fill value for area outside the rotated image.
             If int, it is used for all channels respectively. Default value is 0.
 
 
diff --git a/python/paddle/vision/transforms/transforms.py b/python/paddle/vision/transforms/transforms.py
index 9b14c775982a3b..cb48598c8fd141 100644
--- a/python/paddle/vision/transforms/transforms.py
+++ b/python/paddle/vision/transforms/transforms.py
@@ -136,6 +136,8 @@ class BaseTransform:
 
     calling logic:
 
+    .. code-block:: text
+
         if keys is None:
             _get_params -> _apply_image()
         else:
@@ -153,14 +155,11 @@ class BaseTransform:
 
             Current available strings & data type are describe below:
 
-            - "image": input image, with shape of (H, W, C)
-            - "coords": coordinates, with shape of (N, 2)
-            - "boxes": bounding boxes, with shape of (N, 4), "xyxy" format,
-
-                       the 1st "xy" represents top left point of a box,
-                       the 2nd "xy" represents right bottom point.
-
-            - "mask": map used for segmentation, with shape of (H, W, 1)
+                - "image": input image, with shape of (H, W, C)
+                - "coords": coordinates, with shape of (N, 2)
+                - "boxes": bounding boxes, with shape of (N, 4), "xyxy" format,the 1st "xy" represents
+                  top left point of a box,the 2nd "xy" represents right bottom point.
+                - "mask": map used for segmentation, with shape of (H, W, 1)
 
             You can also customize your data types only if you implement the corresponding
             _apply_*() methods, otherwise ``NotImplementedError`` will be raised.
@@ -433,9 +432,9 @@ class RandomResizedCrop(BaseTransform):
 
     Args:
         size (int|list|tuple): Target size of output image, with (height, width) shape.
-        scale (list|tuple): Scale range of the cropped image before resizing, relatively to the origin
-            image. Default: (0.08, 1.0)
-        ratio (list|tuple): Range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33)
+        scale (list|tuple, optional): Scale range of the cropped image before resizing, relatively to the origin
+            image. Default: (0.08, 1.0).
+        ratio (list|tuple, optional): Range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33)
         interpolation (int|str, optional): Interpolation method. Default: 'bilinear'. when use pil backend,
             support method are as following:
             - "nearest": Image.NEAREST,
@@ -778,7 +777,7 @@ class BrightnessTransform(BaseTransform):
 
     Args:
         value (float): How much to adjust the brightness. Can be any
-            non negative number. 0 gives the original image
+            non negative number. 0 gives the original image.
         keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
 
     Shape:
@@ -821,7 +820,7 @@ class ContrastTransform(BaseTransform):
 
     Args:
         value (float): How much to adjust the contrast. Can be any
-            non negative number. 0 gives the original image
+            non negative number. 0 gives the original image.
         keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
 
     Shape:
@@ -866,7 +865,7 @@ class SaturationTransform(BaseTransform):
 
     Args:
         value (float): How much to adjust the saturation. Can be any
-            non negative number. 0 gives the original image
+            non negative number. 0 gives the original image.
         keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
 
     Shape:
@@ -909,7 +908,7 @@ class HueTransform(BaseTransform):
 
     Args:
         value (float): How much to adjust the hue. Can be any number
-            between 0 and 0.5, 0 gives the original image
+            between 0 and 0.5, 0 gives the original image.
         keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
 
     Shape:
@@ -953,14 +952,14 @@ class ColorJitter(BaseTransform):
     """Randomly change the brightness, contrast, saturation and hue of an image.
 
     Args:
-        brightness (float): How much to jitter brightness.
-            Chosen uniformly from [max(0, 1 - brightness), 1 + brightness]. Should be non negative numbers.
-        contrast (float): How much to jitter contrast.
-            Chosen uniformly from [max(0, 1 - contrast), 1 + contrast]. Should be non negative numbers.
-        saturation (float): How much to jitter saturation.
-            Chosen uniformly from [max(0, 1 - saturation), 1 + saturation]. Should be non negative numbers.
-        hue (float): How much to jitter hue.
-            Chosen uniformly from [-hue, hue]. Should have 0<= hue <= 0.5.
+        brightness (float, optional): How much to jitter brightness.
+            Chosen uniformly from [max(0, 1 - brightness), 1 + brightness]. Should be non negative numbers. Default: 0.
+        contrast (float, optional): How much to jitter contrast.
+            Chosen uniformly from [max(0, 1 - contrast), 1 + contrast]. Should be non negative numbers. Default: 0.
+        saturation (float, optional): How much to jitter saturation.
+            Chosen uniformly from [max(0, 1 - saturation), 1 + saturation]. Should be non negative numbers. Default: 0.
+        hue (float, optional): How much to jitter hue.
+            Chosen uniformly from [-hue, hue]. Should have 0<= hue <= 0.5. Default: 0.
         keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
 
     Shape:
@@ -1665,7 +1664,7 @@ class Grayscale(BaseTransform):
     """Converts image to grayscale.
 
     Args:
-        num_output_channels (int): (1 or 3) number of channels desired for output image
+        num_output_channels (int, optional): (1 or 3) number of channels desired for output image. Default: 1.
         keys (list[str]|tuple[str], optional): Same as ``BaseTransform``. Default: None.
 
     Shape:

From 518f9d81411f16fe1cfd84ed8b3c2d4ea5c5232b Mon Sep 17 00:00:00 2001
From: cyber-pioneer <116002591+cyber-pioneer@users.noreply.github.com>
Date: Fri, 2 Dec 2022 11:03:05 +0800
Subject: [PATCH 103/154] move fluid.layer.py_func to
 paddle.static.nn.common.py_func (#48482)

---
 python/paddle/fluid/layers/nn.py              | 322 -----------------
 .../fluid/tests/unittests/test_py_func_op.py  |  12 +-
 .../tests/unittests/test_rnn_decode_api.py    |   2 +-
 python/paddle/static/__init__.py              |   5 +-
 python/paddle/static/nn/__init__.py           |   2 +-
 python/paddle/static/nn/common.py             | 324 ++++++++++++++++++
 6 files changed, 335 insertions(+), 332 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index c2599454c1c2f1..7ff74cd37cbfbd 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -113,7 +113,6 @@
     'merge_selected_rows',
     'get_tensor_from_selected_rows',
     'temporal_shift',
-    'py_func',
     'continuous_value_model',
     'unfold',
     'deformable_roi_pooling',
@@ -6635,327 +6634,6 @@ def temporal_shift(x, seg_num, shift_ratio=0.25, name=None, data_format="NCHW"):
     )
 
 
-class PyFuncRegistry:
-    _register_funcs = []
-
-    def __init__(self, func):
-        if func is None or not callable(func):
-            raise TypeError('func must be a Python function')
-
-        self._func = func
-        # find named args using reflection
-        args = inspect.getfullargspec(self._func)
-        if len(args[0]) == 0 and args[1] is None and args[2] is None:
-            # Function with no inputs
-            self._named_args = None
-        else:
-            self._named_args = args[0]
-        self._id = core._append_python_callable_object_and_return_id(self)
-        '''
-        Why record self here?
-
-        1. For debug usage. Users can call
-           :code:`py_func.registered_func(idx)` method
-           to find the registered function corresponding
-           to :code:`idx`.
-
-        2. For increasing reference count of self.
-           It seems that to release Python object
-           whose reference count is 1 would cause
-           segmentation fault error in C++ side.
-           May be lack of Python GC in C++ side?
-        '''
-        PyFuncRegistry._register_funcs.append(self)
-
-    @classmethod
-    def registered_func(cls, idx):
-        return cls._register_funcs[idx]._func
-
-    @classmethod
-    def registered_func_num(cls):
-        return len(cls._register_funcs)
-
-    @property
-    def id(self):
-        return self._id
-
-    def __call__(self, *args):
-        if self._named_args is None:
-            func_ret = self._func()
-        else:
-            kwargs = dict()
-            idx = 0
-            for arg in self._named_args:
-                kwargs[arg] = args[idx]
-                idx += 1
-            func_ret = self._func(*args[idx:], **kwargs)
-
-        if not isinstance(func_ret, (list, tuple)):
-            func_ret = (func_ret,)
-
-        ret = []
-        for each_ret in func_ret:
-            if each_ret is None or isinstance(each_ret, core.LoDTensor):
-                ret.append(each_ret)
-                continue
-
-            if not isinstance(each_ret, np.ndarray):
-                each_ret = np.array(each_ret)
-
-            tensor = core.LoDTensor()
-            tensor.set(each_ret, core.CPUPlace())
-            ret.append(tensor)
-
-        return tuple(ret)
-
-
-@static_only
-@templatedoc()
-def py_func(func, x, out, backward_func=None, skip_vars_in_backward_input=None):
-    """
-    :api_attr: Static Graph
-
-    This OP is used to register customized Python OP to Paddle. The design
-    principe of py_func is that Tensor and numpy array can be converted to each
-    other easily. So you can use Python and numpy API to register a python OP.
-
-    The forward function of the registered OP is ``func`` and the backward function
-    of that is ``backward_func``. Paddle will call ``func`` at forward runtime and
-    call ``backward_func`` at backward runtime(if ``backward_func`` is not  None).
-    ``x`` is the input of ``func``, whose type must be Tensor; ``out`` is
-    the output of ``func``, whose type can be either Tensor or numpy array.
-
-    The input of the backward function ``backward_func`` is ``x``, ``out`` and
-    the gradient of ``out``. If ``out`` have no gradient, the relevant input of
-    ``backward_func`` is None. If ``x`` do not have a gradient, the user should
-    return None in ``backward_func``.
-
-    The data type and shape of ``out`` should also be set correctly before this
-    API is called, and the data type and shape of the gradient of ``out`` and
-    ``x`` will be inferred automatically.
-
-    This API can also be used to debug the neural network by setting the ``func``
-    as a function that only print variables.
-
-    Args:
-        func (callable): The forward function of the registered OP. When the network
-            is running, the forward output ``out`` will be calculated according to this
-            function and the forward input ``x``. In ``func`` , it's suggested that we
-            actively convert Tensor into a numpy array, so that we can use Python and
-            numpy API arbitrarily. If not, some operations of numpy may not be compatible.
-        x (Tensor|tuple(Tensor)|list[Tensor]): The input of the forward function ``func``.
-            It can be Tensor|tuple(Tensor)|list[Tensor]. In addition, Multiple Tensor
-            should be passed in the form of tuple(Tensor) or list[Tensor].
-        out (T|tuple(T)|list[T]): The output of the forward function ``func``, it can be
-            T|tuple(T)|list[T], where T can be either Tensor or numpy array. Since Paddle
-            cannot automatically infer the shape and type of ``out``, you must create
-            ``out`` in advance.
-        backward_func (callable, optional): The backward function of the registered OP.
-            Its default value is None, which means there is no reverse calculation. If
-            it is not None, ``backward_func`` is called to calculate the gradient of
-            ``x`` when the network is at backward runtime.
-        skip_vars_in_backward_input (Tensor, optional): It's used to limit the input
-            list of ``backward_func``, and it can be Tensor|tuple(Tensor)|list[Tensor].
-            It must belong to either ``x`` or ``out``. The default  value is None, which means
-            that no tensors need to be removed from ``x`` and ``out``. If it is not None,
-            these tensors will not be the input of ``backward_func``. This parameter is only
-            useful when ``backward_func`` is not None.
-
-    Returns:
-        Tensor|tuple(Tensor)|list[Tensor]: The output ``out`` of the forward function ``func``.
-
-    Examples:
-        .. code-block:: python
-
-            # example 1:
-            import paddle
-            import numpy as np
-
-            paddle.enable_static()
-
-            # Creates a forward function, Tensor can be input directly without
-            # being converted into numpy array.
-            def tanh(x):
-                return np.tanh(x)
-
-            # Skip x in backward function and return the gradient of x
-            # Tensor must be actively converted to numpy array, otherwise,
-            # operations such as +/- can't be used.
-            def tanh_grad(y, dy):
-                return np.array(dy) * (1 - np.square(np.array(y)))
-
-            # Creates a forward function for debugging running networks(print value)
-            def debug_func(x):
-                print(x)
-
-            def create_tmp_var(name, dtype, shape):
-                return paddle.static.default_main_program().current_block().create_var(
-                    name=name, dtype=dtype, shape=shape)
-
-            def simple_net(img, label):
-                hidden = img
-                for idx in range(4):
-                    hidden = paddle.static.nn.fc(hidden, size=200)
-                    new_hidden = create_tmp_var(name='hidden_{}'.format(idx),
-                        dtype=hidden.dtype, shape=hidden.shape)
-
-                    # User-defined forward and backward
-                    hidden = paddle.static.py_func(func=tanh, x=hidden,
-                        out=new_hidden, backward_func=tanh_grad,
-                        skip_vars_in_backward_input=hidden)
-
-                    # User-defined debug functions that print out the input Tensor
-                    paddle.static.py_func(func=debug_func, x=hidden, out=None)
-
-                prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax')
-                ce_loss = paddle.nn.loss.CrossEntropyLoss()
-                return ce_loss(prediction, label)
-
-            x = paddle.static.data(name='x', shape=[1,4], dtype='float32')
-            y = paddle.static.data(name='y', shape=[1], dtype='int64')
-            res = simple_net(x, y)
-
-            exe = paddle.static.Executor(paddle.CPUPlace())
-            exe.run(paddle.static.default_startup_program())
-            input1 = np.random.random(size=[1,4]).astype('float32')
-            input2 = np.random.randint(1, 10, size=[1], dtype='int64')
-            out = exe.run(paddle.static.default_main_program(),
-                          feed={'x':input1, 'y':input2},
-                          fetch_list=[res.name])
-            print(out)
-
-        .. code-block:: python
-
-            # example 2:
-            # This example shows how to turn Tensor into numpy array and
-            # use numpy API to register an Python OP
-            import paddle
-            import numpy as np
-
-            paddle.enable_static()
-
-            def element_wise_add(x, y):
-                # Tensor must be actively converted to numpy array, otherwise,
-                # numpy.shape can't be used.
-                x = np.array(x)
-                y = np.array(y)
-
-                if x.shape != y.shape:
-                    raise AssertionError("the shape of inputs must be the same!")
-
-                result = np.zeros(x.shape, dtype='int32')
-                for i in range(len(x)):
-                    for j in range(len(x[0])):
-                        result[i][j] = x[i][j] + y[i][j]
-
-                return result
-
-            def create_tmp_var(name, dtype, shape):
-                return paddle.static.default_main_program().current_block().create_var(
-                            name=name, dtype=dtype, shape=shape)
-
-            def py_func_demo():
-                start_program = paddle.static.default_startup_program()
-                main_program = paddle.static.default_main_program()
-
-                # Input of the forward function
-                x = paddle.static.data(name='x', shape=[2,3], dtype='int32')
-                y = paddle.static.data(name='y', shape=[2,3], dtype='int32')
-
-                # Output of the forward function, name/dtype/shape must be specified
-                output = create_tmp_var('output','int32', [3,1])
-
-                # Multiple Variable should be passed in the form of tuple(Variale) or list[Variale]
-                paddle.static.py_func(func=element_wise_add, x=[x,y], out=output)
-
-                exe=paddle.static.Executor(paddle.CPUPlace())
-                exe.run(start_program)
-
-                # Feed numpy array to main_program
-                input1 = np.random.randint(1, 10, size=[2,3], dtype='int32')
-                input2 = np.random.randint(1, 10, size=[2,3], dtype='int32')
-                out = exe.run(main_program,
-                            feed={'x':input1, 'y':input2},
-                            fetch_list=[output.name])
-                print("{0} + {1} = {2}".format(input1, input2, out))
-
-            py_func_demo()
-
-            # Reference output:
-            # [[5, 9, 9]   + [[7, 8, 4]  =  [array([[12, 17, 13]
-            #  [7, 5, 2]]     [1, 3, 3]]            [8, 8, 5]], dtype=int32)]
-    """
-    helper = LayerHelper('py_func', **locals())
-    check_type(x, 'X', (list, tuple, Variable, type(None)), 'py_func')
-    if x is None:
-        x = []
-    elif isinstance(x, Variable):
-        x = [x]
-    elif isinstance(x, tuple):
-        x = list(x)
-    elif not isinstance(x, (list, tuple, Variable)):
-        raise TypeError('Input must be Variable/list(Variable)/tuple(Variable)')
-    check_type(out, 'Out', (list, tuple, Variable, type(None)), 'py_func')
-    if out is None:
-        out_list = []
-    elif isinstance(out, Variable):
-        out_list = [out]
-    elif isinstance(out, tuple):
-        out_list = list(out)
-    elif isinstance(out, list):
-        out_list = out
-    else:
-        raise TypeError(
-            'Output must be Variable/list(Variable)/tuple(Variable)'
-        )
-
-    fwd_func_id = PyFuncRegistry(func).id
-    bwd_func_id = (
-        PyFuncRegistry(backward_func).id if backward_func is not None else -1
-    )
-
-    for each_out in out_list:
-        if len(each_out.shape) == 0:
-            raise ValueError(
-                'Output shapes of py_func op should be provided by users manually'
-            )
-
-    backward_skip_vars = set()
-    if backward_func is not None and skip_vars_in_backward_input is not None:
-        if isinstance(skip_vars_in_backward_input, Variable):
-            skip_vars_in_backward_input = [skip_vars_in_backward_input]
-
-        fwd_in_out = [v.name for v in x]
-        fwd_in_out.extend([v.name for v in out_list])
-        fwd_in_out = set(fwd_in_out)
-        backward_skip_vars = set()
-        for v in skip_vars_in_backward_input:
-            if not v.name in fwd_in_out:
-                raise ValueError(
-                    'Variable {} is not found in forward inputs and outputs'.format(
-                        v.name
-                    )
-                )
-            backward_skip_vars.add(v.name)
-
-    helper.append_op(
-        type='py_func',
-        inputs={'X': x},
-        outputs={'Out': out_list},
-        attrs={
-            'forward_callable_id': fwd_func_id,
-            'backward_callable_id': bwd_func_id,
-            'backward_skip_vars': list(backward_skip_vars),
-        },
-    )
-    return out
-
-
-# For debug usage
-py_func.registered_func = PyFuncRegistry.registered_func
-py_func.registered_func_num = PyFuncRegistry.registered_func_num
-
-
 def continuous_value_model(input, cvm, use_cvm=True):
     r"""
 
diff --git a/python/paddle/fluid/tests/unittests/test_py_func_op.py b/python/paddle/fluid/tests/unittests/test_py_func_op.py
index 51d7af4993009c..0f2f9ea1e31708 100644
--- a/python/paddle/fluid/tests/unittests/test_py_func_op.py
+++ b/python/paddle/fluid/tests/unittests/test_py_func_op.py
@@ -94,7 +94,7 @@ def simple_fc_net(img, label, use_py_func_op):
                     shape=hidden.shape,
                 )
             )
-            hidden = fluid.layers.py_func(
+            hidden = paddle.static.py_func(
                 func=tanh,
                 x=hidden,
                 out=new_hidden,
@@ -111,7 +111,7 @@ def simple_fc_net(img, label, use_py_func_op):
             .current_block()
             .create_var(name='loss', dtype='float32', shape=[-1, 1])
         )
-        loss = fluid.layers.py_func(
+        loss = paddle.static.py_func(
             func=cross_entropy,
             x=[prediction, label],
             out=loss,
@@ -124,11 +124,11 @@ def simple_fc_net(img, label, use_py_func_op):
             .current_block()
             .create_var(name='test_tmp_var', dtype='float32', shape=[1])
         )
-        fluid.layers.py_func(
+        paddle.static.py_func(
             func=dummy_func_with_no_input, x=None, out=dummy_var
         )
         loss += dummy_var
-        fluid.layers.py_func(func=dummy_func_with_no_output, x=loss, out=None)
+        paddle.static.py_func(func=dummy_func_with_no_output, x=loss, out=None)
 
         loss_out = (
             fluid.default_main_program()
@@ -140,7 +140,7 @@ def simple_fc_net(img, label, use_py_func_op):
             .current_block()
             .create_var(dtype='float32', shape=[1])
         )
-        fluid.layers.py_func(
+        paddle.static.py_func(
             func=dummy_func_with_multi_input_output,
             x=(loss, dummy_var),
             out=(loss_out, dummy_var_out),
@@ -149,7 +149,7 @@ def simple_fc_net(img, label, use_py_func_op):
             loss == loss_out and dummy_var == dummy_var_out
         ), "py_func failed with multi input and output"
 
-        fluid.layers.py_func(
+        paddle.static.py_func(
             func=dummy_func_with_multi_input_output,
             x=[loss, dummy_var],
             out=[loss_out, dummy_var_out],
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
index 410708a105bfd9..a557fb9df00131 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
@@ -309,7 +309,7 @@ def learn(self, act_prob, action, reward, length=None):
         """
         update policy model self.model with policy gradient algorithm
         """
-        self.reward = fluid.layers.py_func(
+        self.reward = paddle.static.py_func(
             func=reward_func, x=[action, length], out=reward
         )
         neg_log_prob = layers.cross_entropy(act_prob, action)
diff --git a/python/paddle/static/__init__.py b/python/paddle/static/__init__.py
index f527b5a1c358db..118fe0b58bfdae 100644
--- a/python/paddle/static/__init__.py
+++ b/python/paddle/static/__init__.py
@@ -16,6 +16,9 @@
 from . import amp  # noqa: F401
 from . import sparsity  # noqa: F401
 from . import nn  # noqa: F401
+
+from .nn.common import py_func  # noqa: F401
+
 from .io import save_inference_model  # noqa: F401
 from .io import load_inference_model  # noqa: F401
 from .io import deserialize_persistables  # noqa: F401
@@ -53,7 +56,6 @@
 from ..fluid.framework import ipu_shard_guard  # noqa: F401
 from ..fluid.framework import set_ipu_shard  # noqa: F401
 from ..fluid.layers.control_flow import Print  # noqa: F401
-from ..fluid.layers.nn import py_func  # noqa: F401
 from ..fluid.parallel_executor import ParallelExecutor  # noqa: F401
 from ..fluid.param_attr import WeightNormParamAttr  # noqa: F401
 from ..fluid.optimizer import ExponentialMovingAverage  # noqa: F401
@@ -61,7 +63,6 @@
 from ..fluid.io import load  # noqa: F401
 from ..fluid.io import load_program_state  # noqa: F401
 from ..fluid.io import set_program_state  # noqa: F401
-
 from ..fluid.io import load_vars  # noqa: F401
 from ..fluid.io import save_vars  # noqa: F401
 from ..fluid.io import batch  # noqa: F401
diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py
index 449cd478a2c3b9..8e3048b21c5cb4 100755
--- a/python/paddle/static/nn/__init__.py
+++ b/python/paddle/static/nn/__init__.py
@@ -20,6 +20,7 @@
 from .common import conv3d  # noqa: F401
 from .common import conv2d_transpose  # noqa: F401
 from .common import conv3d_transpose  # noqa: F401
+from .common import py_func  # noqa: F401
 
 from ...fluid.layers import batch_norm  # noqa: F401
 from ...fluid.layers import bilinear_tensor_product  # noqa: F401
@@ -32,7 +33,6 @@
 from ...fluid.layers import multi_box_head  # noqa: F401
 from .loss import nce  # noqa: F401
 from .common import prelu  # noqa: F401
-from ...fluid.layers import py_func  # noqa: F401
 from ...fluid.layers import row_conv  # noqa: F401
 from ...fluid.layers import spectral_norm  # noqa: F401
 from ...fluid.layers import switch_case  # noqa: F401
diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py
index 9fc2bbd975d5fb..a8dec018ff14ab 100755
--- a/python/paddle/static/nn/common.py
+++ b/python/paddle/static/nn/common.py
@@ -12,6 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import inspect
+
+import numpy as np
+
 import paddle
 from paddle.common_ops_import import (
     LayerHelper,
@@ -19,6 +23,7 @@
     check_variable_and_dtype,
     utils,
 )
+from paddle.fluid import core
 from paddle.fluid.data_feeder import check_dtype
 from paddle.fluid.framework import Variable, _non_static_mode, static_only
 from paddle.fluid.initializer import Constant, Normal
@@ -2083,6 +2088,325 @@ def deform_conv2d(
         )
 
 
+class PyFuncRegistry:
+    _register_funcs = []
+
+    def __init__(self, func):
+        if func is None or not callable(func):
+            raise TypeError('func must be a Python function')
+
+        self._func = func
+        # find named args using reflection
+        args = inspect.getfullargspec(self._func)
+        if len(args[0]) == 0 and args[1] is None and args[2] is None:
+            # Function with no inputs
+            self._named_args = None
+        else:
+            self._named_args = args[0]
+        self._id = core._append_python_callable_object_and_return_id(self)
+        '''
+        Why record self here?
+
+        1. For debug usage. Users can call
+           :code:`py_func.registered_func(idx)` method
+           to find the registered function corresponding
+           to :code:`idx`.
+
+        2. For increasing reference count of self.
+           It seems that to release Python object
+           whose reference count is 1 would cause
+           segmentation fault error in C++ side.
+           May be lack of Python GC in C++ side?
+        '''
+        PyFuncRegistry._register_funcs.append(self)
+
+    @classmethod
+    def registered_func(cls, idx):
+        return cls._register_funcs[idx]._func
+
+    @classmethod
+    def registered_func_num(cls):
+        return len(cls._register_funcs)
+
+    @property
+    def id(self):
+        return self._id
+
+    def __call__(self, *args):
+        if self._named_args is None:
+            func_ret = self._func()
+        else:
+            kwargs = dict()
+            idx = 0
+            for arg in self._named_args:
+                kwargs[arg] = args[idx]
+                idx += 1
+            func_ret = self._func(*args[idx:], **kwargs)
+
+        if not isinstance(func_ret, (list, tuple)):
+            func_ret = (func_ret,)
+
+        ret = []
+        for each_ret in func_ret:
+            if each_ret is None or isinstance(each_ret, core.LoDTensor):
+                ret.append(each_ret)
+                continue
+
+            if not isinstance(each_ret, np.ndarray):
+                each_ret = np.array(each_ret)
+
+            tensor = core.LoDTensor()
+            tensor.set(each_ret, core.CPUPlace())
+            ret.append(tensor)
+
+        return tuple(ret)
+
+
+@static_only
+@templatedoc()
+def py_func(func, x, out, backward_func=None, skip_vars_in_backward_input=None):
+    """
+    This is used to register customized Python OP to Paddle. The design
+    principe of py_func is that Tensor and numpy array can be converted to each
+    other easily. So you can use Python and numpy API to register a python OP.
+
+    The forward function of the registered OP is ``func`` and the backward function
+    of that is ``backward_func``. Paddle will call ``func`` at forward runtime and
+    call ``backward_func`` at backward runtime(if ``backward_func`` is not  None).
+    ``x`` is the input of ``func``, whose type must be Tensor; ``out`` is
+    the output of ``func``, whose type can be either Tensor or numpy array.
+
+    The input of the backward function ``backward_func`` is ``x``, ``out`` and
+    the gradient of ``out``. If ``out`` have no gradient, the relevant input of
+    ``backward_func`` is None. If ``x`` do not have a gradient, the user should
+    return None in ``backward_func``.
+
+    The data type and shape of ``out`` should also be set correctly before this
+    API is called, and the data type and shape of the gradient of ``out`` and
+    ``x`` will be inferred automatically.
+
+    This API can also be used to debug the neural network by setting the ``func``
+    as a function that only print variables.
+
+    Args:
+        func (callable): The forward function of the registered OP. When the network
+            is running, the forward output ``out`` will be calculated according to this
+            function and the forward input ``x``. In ``func`` , it's suggested that we
+            actively convert Tensor into a numpy array, so that we can use Python and
+            numpy API arbitrarily. If not, some operations of numpy may not be compatible.
+        x (Tensor|tuple(Tensor)|list[Tensor]): The input of the forward function ``func``.
+            It can be Tensor|tuple(Tensor)|list[Tensor]. In addition, Multiple Tensor
+            should be passed in the form of tuple(Tensor) or list[Tensor].
+        out (T|tuple(T)|list[T]): The output of the forward function ``func``, it can be
+            T|tuple(T)|list[T], where T can be either Tensor or numpy array. Since Paddle
+            cannot automatically infer the shape and type of ``out``, you must create
+            ``out`` in advance.
+        backward_func (callable, optional): The backward function of the registered OP.
+            Its default value is None, which means there is no reverse calculation. If
+            it is not None, ``backward_func`` is called to calculate the gradient of
+            ``x`` when the network is at backward runtime.
+        skip_vars_in_backward_input (Tensor, optional): It's used to limit the input
+            list of ``backward_func``, and it can be Tensor|tuple(Tensor)|list[Tensor].
+            It must belong to either ``x`` or ``out``. The default  value is None, which means
+            that no tensors need to be removed from ``x`` and ``out``. If it is not None,
+            these tensors will not be the input of ``backward_func``. This parameter is only
+            useful when ``backward_func`` is not None.
+
+    Returns:
+        Tensor|tuple(Tensor)|list[Tensor]: The output ``out`` of the forward function ``func``.
+
+    Examples:
+        .. code-block:: python
+
+            # example 1:
+            import paddle
+            import numpy as np
+
+            paddle.enable_static()
+
+            # Creates a forward function, Tensor can be input directly without
+            # being converted into numpy array.
+            def tanh(x):
+                return np.tanh(x)
+
+            # Skip x in backward function and return the gradient of x
+            # Tensor must be actively converted to numpy array, otherwise,
+            # operations such as +/- can't be used.
+            def tanh_grad(y, dy):
+                return np.array(dy) * (1 - np.square(np.array(y)))
+
+            # Creates a forward function for debugging running networks(print value)
+            def debug_func(x):
+                print(x)
+
+            def create_tmp_var(name, dtype, shape):
+                return paddle.static.default_main_program().current_block().create_var(
+                    name=name, dtype=dtype, shape=shape)
+
+            def simple_net(img, label):
+                hidden = img
+                for idx in range(4):
+                    hidden = paddle.static.nn.fc(hidden, size=200)
+                    new_hidden = create_tmp_var(name='hidden_{}'.format(idx),
+                        dtype=hidden.dtype, shape=hidden.shape)
+
+                    # User-defined forward and backward
+                    hidden = paddle.static.py_func(func=tanh, x=hidden,
+                        out=new_hidden, backward_func=tanh_grad,
+                        skip_vars_in_backward_input=hidden)
+
+                    # User-defined debug functions that print out the input Tensor
+                    paddle.static.py_func(func=debug_func, x=hidden, out=None)
+
+                prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax')
+                ce_loss = paddle.nn.loss.CrossEntropyLoss()
+                return ce_loss(prediction, label)
+
+            x = paddle.static.data(name='x', shape=[1,4], dtype='float32')
+            y = paddle.static.data(name='y', shape=[1], dtype='int64')
+            res = simple_net(x, y)
+
+            exe = paddle.static.Executor(paddle.CPUPlace())
+            exe.run(paddle.static.default_startup_program())
+            input1 = np.random.random(size=[1,4]).astype('float32')
+            input2 = np.random.randint(1, 10, size=[1], dtype='int64')
+            out = exe.run(paddle.static.default_main_program(),
+                          feed={'x':input1, 'y':input2},
+                          fetch_list=[res.name])
+            print(out)
+
+        .. code-block:: python
+
+            # example 2:
+            # This example shows how to turn Tensor into numpy array and
+            # use numpy API to register an Python OP
+            import paddle
+            import numpy as np
+
+            paddle.enable_static()
+
+            def element_wise_add(x, y):
+                # Tensor must be actively converted to numpy array, otherwise,
+                # numpy.shape can't be used.
+                x = np.array(x)
+                y = np.array(y)
+
+                if x.shape != y.shape:
+                    raise AssertionError("the shape of inputs must be the same!")
+
+                result = np.zeros(x.shape, dtype='int32')
+                for i in range(len(x)):
+                    for j in range(len(x[0])):
+                        result[i][j] = x[i][j] + y[i][j]
+
+                return result
+
+            def create_tmp_var(name, dtype, shape):
+                return paddle.static.default_main_program().current_block().create_var(
+                            name=name, dtype=dtype, shape=shape)
+
+            def py_func_demo():
+                start_program = paddle.static.default_startup_program()
+                main_program = paddle.static.default_main_program()
+
+                # Input of the forward function
+                x = paddle.static.data(name='x', shape=[2,3], dtype='int32')
+                y = paddle.static.data(name='y', shape=[2,3], dtype='int32')
+
+                # Output of the forward function, name/dtype/shape must be specified
+                output = create_tmp_var('output','int32', [3,1])
+
+                # Multiple Variable should be passed in the form of tuple(Variale) or list[Variale]
+                paddle.static.py_func(func=element_wise_add, x=[x,y], out=output)
+
+                exe=paddle.static.Executor(paddle.CPUPlace())
+                exe.run(start_program)
+
+                # Feed numpy array to main_program
+                input1 = np.random.randint(1, 10, size=[2,3], dtype='int32')
+                input2 = np.random.randint(1, 10, size=[2,3], dtype='int32')
+                out = exe.run(main_program,
+                            feed={'x':input1, 'y':input2},
+                            fetch_list=[output.name])
+                print("{0} + {1} = {2}".format(input1, input2, out))
+
+            py_func_demo()
+
+            # Reference output:
+            # [[5, 9, 9]   + [[7, 8, 4]  =  [array([[12, 17, 13]
+            #  [7, 5, 2]]     [1, 3, 3]]            [8, 8, 5]], dtype=int32)]
+    """
+    helper = LayerHelper('py_func', **locals())
+    check_type(x, 'X', (list, tuple, Variable, type(None)), 'py_func')
+    if x is None:
+        x = []
+    elif isinstance(x, Variable):
+        x = [x]
+    elif isinstance(x, tuple):
+        x = list(x)
+    elif not isinstance(x, (list, tuple, Variable)):
+        raise TypeError('Input must be Variable/list(Variable)/tuple(Variable)')
+    check_type(out, 'Out', (list, tuple, Variable, type(None)), 'py_func')
+    if out is None:
+        out_list = []
+    elif isinstance(out, Variable):
+        out_list = [out]
+    elif isinstance(out, tuple):
+        out_list = list(out)
+    elif isinstance(out, list):
+        out_list = out
+    else:
+        raise TypeError(
+            'Output must be Variable/list(Variable)/tuple(Variable)'
+        )
+
+    fwd_func_id = PyFuncRegistry(func).id
+    bwd_func_id = (
+        PyFuncRegistry(backward_func).id if backward_func is not None else -1
+    )
+
+    for each_out in out_list:
+        if len(each_out.shape) == 0:
+            raise ValueError(
+                'Output shapes of py_func should be provided by users manually'
+            )
+
+    backward_skip_vars = set()
+    if backward_func is not None and skip_vars_in_backward_input is not None:
+        if isinstance(skip_vars_in_backward_input, Variable):
+            skip_vars_in_backward_input = [skip_vars_in_backward_input]
+
+        fwd_in_out = [v.name for v in x]
+        fwd_in_out.extend([v.name for v in out_list])
+        fwd_in_out = set(fwd_in_out)
+        backward_skip_vars = set()
+        for v in skip_vars_in_backward_input:
+            if v.name not in fwd_in_out:
+                raise ValueError(
+                    'Variable {} is not found in forward inputs and outputs'.format(
+                        v.name
+                    )
+                )
+            backward_skip_vars.add(v.name)
+
+    helper.append_op(
+        type='py_func',
+        inputs={'X': x},
+        outputs={'Out': out_list},
+        attrs={
+            'forward_callable_id': fwd_func_id,
+            'backward_callable_id': bwd_func_id,
+            'backward_skip_vars': list(backward_skip_vars),
+        },
+    )
+    return out
+
+
+# For debug usage
+py_func.registered_func = PyFuncRegistry.registered_func
+py_func.registered_func_num = PyFuncRegistry.registered_func_num
+
+
 @static_only
 def prelu(x, mode, param_attr=None, data_format="NCHW", name=None):
     r"""

From f71de37800557152c0fc91eab8c64e42a54c90a9 Mon Sep 17 00:00:00 2001
From: ykkk2333 <77383312+ykkk2333@users.noreply.github.com>
Date: Fri, 2 Dec 2022 11:08:08 +0800
Subject: [PATCH 104/154] add silu, silu_grad, unfold and unfold_grad xpu
 kernels (#48325)

* add stat tool

* add roll and roll_grad kernels and strided_slice and strided_slice_grad kernels, test=kunlun

* add silu, unfold and their grads,test=kunlun
---
 .../fluid/platform/device/xpu/xpu2_op_list.h  |  15 ++
 .../phi/kernels/xpu/activation_grad_kernel.cc |  27 +++
 paddle/phi/kernels/xpu/activation_kernel.cc   |  21 ++
 paddle/phi/kernels/xpu/conv_grad_kernel.cc    | 127 ++++++++++++
 paddle/phi/kernels/xpu/unfold_grad_kernel.cc  |  94 +++++++++
 paddle/phi/kernels/xpu/unfold_kernel.cc       |  85 +++++++++
 .../unittests/xpu/test_activation_op_xpu.py   |  84 ++++++++
 .../tests/unittests/xpu/test_conv3d_op_xpu.py |  58 ++++++
 .../tests/unittests/xpu/test_unfold_op_xpu.py | 180 ++++++++++++++++++
 9 files changed, 691 insertions(+)
 create mode 100644 paddle/phi/kernels/xpu/unfold_grad_kernel.cc
 create mode 100644 paddle/phi/kernels/xpu/unfold_kernel.cc
 create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_unfold_op_xpu.py

diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h
index cdd86479f44b99..d1c1c361a9b3ba 100644
--- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h
+++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h
@@ -131,6 +131,9 @@ XPUOpMap& get_kl2_ops() {
       {"conv2d",
        XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
                      pOpKernelType(vartype::FP16, XPUPlace())})},
+      {"conv3d_grad",
+       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
+                     pOpKernelType(vartype::FP16, XPUPlace())})},
       {"conv3d",
        XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
                      pOpKernelType(vartype::FP16, XPUPlace())})},
@@ -282,6 +285,12 @@ XPUOpMap& get_kl2_ops() {
                      pOpKernelType(vartype::INT32, XPUPlace()),
                      pOpKernelType(vartype::INT8, XPUPlace()),
                      pOpKernelType(vartype::FP32, XPUPlace())})},
+      {"unfold",
+       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
+                     pOpKernelType(vartype::FP16, XPUPlace())})},
+      {"unfold_grad",
+       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
+                     pOpKernelType(vartype::FP16, XPUPlace())})},
       {"floor", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
       {"gather_grad",
        XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
@@ -523,6 +532,12 @@ XPUOpMap& get_kl2_ops() {
       {"sgd_dense_param_sparse_grad",
        XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
                      pOpKernelType(vartype::FP16, XPUPlace())})},
+      {"silu_grad",
+       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
+                     pOpKernelType(vartype::FP16, XPUPlace())})},
+      {"silu",
+       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
+                     pOpKernelType(vartype::FP16, XPUPlace())})},
       {"sigmoid_cross_entropy_with_logits_grad",
        XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
       {"sigmoid_cross_entropy_with_logits",
diff --git a/paddle/phi/kernels/xpu/activation_grad_kernel.cc b/paddle/phi/kernels/xpu/activation_grad_kernel.cc
index 9585e2264db676..4ab540a570577a 100644
--- a/paddle/phi/kernels/xpu/activation_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/activation_grad_kernel.cc
@@ -367,6 +367,26 @@ struct XPURelu6GradFunctor : public funcs::BaseActivationFunctor<T> {
   }
 };
 
+template <typename T>
+struct XPUSiluGradFunctor : public funcs::BaseActivationFunctor<T> {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+  template <typename Context>
+  void operator()(const Context& dev_ctx,
+                  const DenseTensor* x,
+                  const DenseTensor* out,
+                  const DenseTensor* dout,
+                  DenseTensor* dx) const {
+    dev_ctx.template Alloc<T>(dx);
+    const XPUType* x_data = reinterpret_cast<const XPUType*>(x->data<T>());
+    const XPUType* y_grad = reinterpret_cast<const XPUType*>(dout->data<T>());
+    XPUType* x_grad = reinterpret_cast<XPUType*>(dx->data<T>());
+
+    int r = xpu::swish_grad(
+        dev_ctx.x_context(), x_data, y_grad, x_grad, dx->numel());
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "swish_grad");
+  }
+};
+
 template <typename T>
 struct XPUSigmoidGradFunctor : public funcs::BaseActivationFunctor<T> {
   using XPUType = typename XPUTypeTrait<T>::Type;
@@ -552,6 +572,7 @@ DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, XPUSqrtGradFunctor);
 DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh, XPUTanhGradFunctor);
 DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu, XPUReluGradFunctor);
 
+DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, XPUSiluGradFunctor);
 DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Log, XPULogGradFunctor);
 DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Square, XPUSquareGradFunctor);
 
@@ -603,6 +624,12 @@ PD_REGISTER_KERNEL(relu_grad,
                    phi::ReluGradKernel,
                    float,
                    phi::dtype::float16) {}
+PD_REGISTER_KERNEL(silu_grad,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::SiluGradKernel,
+                   float,
+                   phi::dtype::float16) {}
 
 #define PD_REGISTER_ACTIVATION_GRAD_KERNEL(name, func) \
   PD_REGISTER_KERNEL(name, XPU, ALL_LAYOUT, phi::func, float) {}
diff --git a/paddle/phi/kernels/xpu/activation_kernel.cc b/paddle/phi/kernels/xpu/activation_kernel.cc
index 39f928eb114733..def5fbb65b84df 100644
--- a/paddle/phi/kernels/xpu/activation_kernel.cc
+++ b/paddle/phi/kernels/xpu/activation_kernel.cc
@@ -322,6 +322,24 @@ struct XPURelu6Functor : public funcs::BaseActivationFunctor<T> {
   }
 };
 
+template <typename T>
+struct XPUSiluFunctor : public funcs::BaseActivationFunctor<T> {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+  template <typename Context>
+  void operator()(const Context& dev_ctx,
+                  const DenseTensor& x,
+                  DenseTensor* out) const {
+    dev_ctx.template Alloc<T>(out);
+    const XPUType* x_data = reinterpret_cast<const XPUType*>(x.data<T>());
+    XPUType* y_data = reinterpret_cast<XPUType*>(out->data<T>());
+
+    auto xpu_context = dev_ctx.x_context();
+    int r =
+        xpu::swish(xpu_context, x_data, y_data, x.numel(), nullptr, nullptr);
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "swish");
+  }
+};
+
 template <typename T>
 struct XPUSigmoidFunctor : public funcs::BaseActivationFunctor<T> {
   using XPUType = typename XPUTypeTrait<T>::Type;
@@ -448,6 +466,7 @@ DEFINE_XPU_ACTIVATION_KERNEL(Sigmoid, XPUSigmoidFunctor)
 DEFINE_XPU_ACTIVATION_KERNEL(Square, XPUSquareFunctor)
 DEFINE_XPU_ACTIVATION_KERNEL(Sqrt, XPUSqrtFunctor)
 DEFINE_XPU_ACTIVATION_KERNEL(Tanh, XPUTanhFunctor)
+DEFINE_XPU_ACTIVATION_KERNEL(Silu, XPUSiluFunctor)
 
 DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Mish, XPUMishFunctor, threshold)
 DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(LeakyRelu,
@@ -486,6 +505,8 @@ void HardSwishRawKernel(const Context& dev_ctx,
 
 PD_REGISTER_KERNEL(
     relu, XPU, ALL_LAYOUT, phi::ReluKernel, float, phi::dtype::float16) {}
+PD_REGISTER_KERNEL(
+    silu, XPU, ALL_LAYOUT, phi::SiluKernel, float, phi::dtype::float16) {}
 
 #define PD_REGISTER_ACTIVATION_KERNEL(name, func) \
   PD_REGISTER_KERNEL(name, XPU, ALL_LAYOUT, phi::func, float) {}
diff --git a/paddle/phi/kernels/xpu/conv_grad_kernel.cc b/paddle/phi/kernels/xpu/conv_grad_kernel.cc
index de4c573b375f68..8ce6103d47e2c1 100644
--- a/paddle/phi/kernels/xpu/conv_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/conv_grad_kernel.cc
@@ -168,6 +168,127 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
                              filter_grad);
 }
 
+template <typename T, typename Context>
+void Conv3DGradKernel(const Context& dev_ctx,
+                      const DenseTensor& input,
+                      const DenseTensor& filter,
+                      const DenseTensor& out_grad,
+                      const std::vector<int>& strides,
+                      const std::vector<int>& paddings_t,
+                      const std::string& padding_algorithm,
+                      int groups,
+                      const std::vector<int>& dilations_t,
+                      const std::string& data_format,
+                      DenseTensor* input_grad,
+                      DenseTensor* filter_grad) {
+  using XPUT = typename XPUTypeTrait<T>::Type;
+  std::vector<int> paddings = paddings_t;
+  std::vector<int> dilations = dilations_t;
+  // The filter and filter_grad will be reshaped in the calculations,
+  // so here use an assignment operation,
+  // that avoids modifying the variable in the Scope.
+  if (!input_grad && !filter_grad) return;
+
+  phi::DDim in_data_dims =
+      phi::slice_ddim(input.dims(), 2, input.dims().size());
+  phi::DDim filter_data_dims =
+      phi::slice_ddim(filter.dims(), 2, filter.dims().size());
+  std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
+  std::vector<int> filter_shape = phi::vectorize<int>(filter.dims());
+  UpdatePaddingAndDilation(
+      &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
+
+  int batch_size = static_cast<int>(input.dims()[0]);
+  int img_c = static_cast<int>(input.dims()[1]);
+  int img_d = static_cast<int>(input.dims()[2]);
+  int img_h = static_cast<int>(input.dims()[3]);
+  int img_w = static_cast<int>(input.dims()[4]);
+  int f = static_cast<int>(filter.dims()[0]);
+  bool is_ncdhw = true;
+  if (data_format == "NDHWC") {
+    img_c = static_cast<int>(input.dims()[4]);
+    img_d = static_cast<int>(input.dims()[1]);
+    img_h = static_cast<int>(input.dims()[2]);
+    img_w = static_cast<int>(input.dims()[3]);
+    is_ncdhw = false;
+  }
+
+  const XPUT* input_data = reinterpret_cast<const XPUT*>(input.data<T>());
+  const XPUT* filter_data = reinterpret_cast<const XPUT*>(filter.data<T>());
+  const XPUT* output_grad_data =
+      reinterpret_cast<const XPUT*>(out_grad.data<T>());
+  XPUT* input_grad_data = nullptr;
+  if (input_grad) {
+    dev_ctx.template Alloc<T>(input_grad);
+    input_grad_data = reinterpret_cast<XPUT*>(input_grad->data<T>());
+  }
+  XPUT* filter_grad_data = nullptr;
+  if (filter_grad) {
+    dev_ctx.template Alloc<T>(filter_grad);
+    filter_grad_data = reinterpret_cast<XPUT*>(filter_grad->data<T>());
+  }
+  xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
+
+  XPUT* filter_data_tmp;
+  XPUT* filter_grad_data_tmp;
+  const XPUT* filter_data_ptr = filter_data;
+  XPUT* filter_grad_data_ptr = filter_grad_data;
+  if (data_format == "NDHWC") {
+    filter_data_tmp = RAII_GUARD.alloc<XPUT>(filter.numel());
+    PADDLE_ENFORCE_XDNN_NOT_NULL(filter_data_tmp);
+    int r = xpu::transpose<XPUT>(dev_ctx.x_context(),
+                                 filter_data,
+                                 filter_data_tmp,
+                                 filter_shape,
+                                 {0, 2, 3, 4, 1});
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose");
+    filter_data_ptr = reinterpret_cast<const XPUT*>(filter_data_tmp);
+
+    if (filter_grad_data != nullptr) {
+      filter_grad_data_tmp = RAII_GUARD.alloc<XPUT>(filter.numel());
+      PADDLE_ENFORCE_XDNN_NOT_NULL(filter_grad_data_tmp);
+      filter_grad_data_ptr = filter_grad_data_tmp;
+    }
+  }
+  int r = xpu::conv3d_grad<XPUT, XPUT, XPUT, int16_t>(dev_ctx.x_context(),
+                                                      input_data,
+                                                      filter_data_ptr,
+                                                      output_grad_data,
+                                                      input_grad_data,
+                                                      filter_grad_data_ptr,
+                                                      batch_size,
+                                                      img_c,
+                                                      img_d,
+                                                      img_h,
+                                                      img_w,
+                                                      f,
+                                                      ksize,
+                                                      strides,
+                                                      paddings,
+                                                      dilations,
+                                                      groups,
+                                                      nullptr,
+                                                      nullptr,
+                                                      nullptr,
+                                                      nullptr,
+                                                      nullptr,
+                                                      is_ncdhw);
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv3d_grad");
+
+  if ((filter_grad_data_ptr != nullptr) && (data_format == "NDHWC")) {
+    std::vector<int> filter_shape_fhwc = {filter_shape[0],
+                                          filter_shape[2],
+                                          filter_shape[3],
+                                          filter_shape[4],
+                                          filter_shape[1]};
+    int r = xpu::transpose<XPUT>(dev_ctx.x_context(),
+                                 filter_grad_data_ptr,
+                                 filter_grad_data,
+                                 filter_shape_fhwc,
+                                 {0, 4, 1, 2, 3});
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose");
+  }
+}
 }  // namespace phi
 
 PD_REGISTER_KERNEL(conv2d_grad,
@@ -182,3 +303,9 @@ PD_REGISTER_KERNEL(depthwise_conv2d_grad,
                    ALL_LAYOUT,
                    phi::DepthwiseConvGradKernel,
                    float) {}
+PD_REGISTER_KERNEL(conv3d_grad,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::Conv3DGradKernel,
+                   float,
+                   phi::dtype::float16) {}
diff --git a/paddle/phi/kernels/xpu/unfold_grad_kernel.cc b/paddle/phi/kernels/xpu/unfold_grad_kernel.cc
new file mode 100644
index 00000000000000..298d6655331da0
--- /dev/null
+++ b/paddle/phi/kernels/xpu/unfold_grad_kernel.cc
@@ -0,0 +1,94 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/unfold_grad_kernel.h"
+
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/unfold_functor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void UnfoldGradKernel(const Context& ctx,
+                      const DenseTensor& x,
+                      const DenseTensor& out_grad,
+                      const std::vector<int>& kernel_sizes,
+                      const std::vector<int>& strides,
+                      const std::vector<int>& paddings,
+                      const std::vector<int>& dilations,
+                      DenseTensor* x_grad) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+  ctx.template Alloc<T>(x_grad);
+  const std::string data_format = phi::DataLayoutToString(x.layout());
+  bool is_nchw = data_format == "NCHW";
+  PADDLE_ENFORCE_EQ(is_nchw,
+                    true,
+                    phi::errors::PreconditionNotMet(
+                        "Unfold grad op only supports datalayout == NCHW"));
+
+  auto x_dims = x_grad->dims();
+  int n = static_cast<int>(x_dims[0]);
+  int c = static_cast<int>(x_dims[1]);
+  int h = static_cast<int>(x_dims[2]);
+  int w = static_cast<int>(x_dims[3]);
+
+  int out_height = phi::funcs::CalcOutputSize(x_dims[2],
+                                              kernel_sizes[0],
+                                              dilations[0],
+                                              paddings[0],
+                                              paddings[2],
+                                              strides[0]);
+  int out_width = phi::funcs::CalcOutputSize(x_dims[3],
+                                             kernel_sizes[1],
+                                             dilations[1],
+                                             paddings[1],
+                                             paddings[3],
+                                             strides[1]);
+
+  xpu::ctx_guard RAII_GUARD(ctx.x_context());
+  XPUType* out_grad_trans =
+      RAII_GUARD.alloc_l3_or_gm<XPUType>(out_grad.numel());
+
+  int r = xpu::transpose(
+      ctx.x_context(),
+      reinterpret_cast<const XPUType*>(out_grad.data<T>()),
+      out_grad_trans,
+      {n, c, kernel_sizes[0], kernel_sizes[1], out_height, out_width},
+      {0, 4, 5, 1, 2, 3});
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose");
+
+  r = xpu::col2im(ctx.x_context(),
+                  out_grad_trans,
+                  reinterpret_cast<XPUType*>(x_grad->data<T>()),
+                  n,
+                  c,
+                  h,
+                  w,
+                  kernel_sizes,
+                  strides,
+                  paddings,
+                  dilations,
+                  is_nchw);
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "col2im");
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(unfold_grad,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::UnfoldGradKernel,
+                   float,
+                   phi::dtype::float16) {}
diff --git a/paddle/phi/kernels/xpu/unfold_kernel.cc b/paddle/phi/kernels/xpu/unfold_kernel.cc
new file mode 100644
index 00000000000000..64a12b2881296e
--- /dev/null
+++ b/paddle/phi/kernels/xpu/unfold_kernel.cc
@@ -0,0 +1,85 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/unfold_kernel.h"
+
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/unfold_functor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void UnfoldKernel(const Context& ctx,
+                  const DenseTensor& x,
+                  const std::vector<int>& kernel_sizes,
+                  const std::vector<int>& strides,
+                  const std::vector<int>& paddings,
+                  const std::vector<int>& dilations,
+                  DenseTensor* out) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+  ctx.template Alloc<T>(out);
+  const std::string data_format = phi::DataLayoutToString(x.layout());
+  bool is_nchw = data_format == "NCHW";
+  PADDLE_ENFORCE_EQ(is_nchw,
+                    true,
+                    phi::errors::PreconditionNotMet(
+                        "Unfold op only supports datalayout == NCHW"));
+  auto x_dims = x.dims();
+  int n = static_cast<int>(x_dims[0]);
+  int c = static_cast<int>(x_dims[1]);
+  int h = static_cast<int>(x_dims[2]);
+  int w = static_cast<int>(x_dims[3]);
+
+  int out_height = phi::funcs::CalcOutputSize(x_dims[2],
+                                              kernel_sizes[0],
+                                              dilations[0],
+                                              paddings[0],
+                                              paddings[2],
+                                              strides[0]);
+  int out_width = phi::funcs::CalcOutputSize(x_dims[3],
+                                             kernel_sizes[1],
+                                             dilations[1],
+                                             paddings[1],
+                                             paddings[3],
+                                             strides[1]);
+
+  xpu::ctx_guard RAII_GUARD(ctx.x_context());
+  XPUType* out_pre_trans = RAII_GUARD.alloc_l3_or_gm<XPUType>(out->numel());
+  int r = xpu::im2col(ctx.x_context(),
+                      reinterpret_cast<const XPUType*>(x.data<T>()),
+                      out_pre_trans,
+                      n,
+                      c,
+                      h,
+                      w,
+                      kernel_sizes,
+                      strides,
+                      paddings,
+                      dilations,
+                      is_nchw);
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "im2col");
+
+  r = xpu::transpose(
+      ctx.x_context(),
+      out_pre_trans,
+      reinterpret_cast<XPUType*>(out->data<T>()),
+      {n, out_height, out_width, c, kernel_sizes[0], kernel_sizes[1]},
+      {0, 3, 4, 5, 1, 2});
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose");
+}
+}  // namespace phi
+
+PD_REGISTER_KERNEL(
+    unfold, XPU, ALL_LAYOUT, phi::UnfoldKernel, float, phi::dtype::float16) {}
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py
index d5afa4a1b130db..bb2cddf04b13aa 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py
@@ -19,6 +19,9 @@
 
 sys.path.append("..")
 
+import paddle
+import paddle.nn.functional as F
+
 from op_test import OpTest
 from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import (
@@ -86,6 +89,87 @@ def set_shape(self):
     create_test_class(globals(), XPUTestExpOP, stype)
 
 
+class XPUTestSiluOP(XPUOpTestWrapper):
+    def __init__(self):
+        self.op_name = 'silu'
+        self.use_dynamic_create_class = False
+
+    class XPUTestSilu(TestActivationOPBase):
+        def set_case(self):
+            self.op_type = "silu"
+            self.dtype = self.in_type
+            self.init_shape()
+
+            np.random.seed(1024)
+            x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
+            out = x / (np.exp(-x) + 1)
+
+            self.inputs = {'X': x}
+            self.outputs = {'Out': out}
+            self.attrs = {'use_xpu': True}
+
+        def test_check_grad(self):
+            self.check_grad_with_place(self.place, ['X'], 'Out')
+
+        def init_shape(self):
+            self.shape = [11, 17]
+
+    class TestSilu_ZeroDim(XPUTestSilu):
+        def init_shape(self):
+            self.shape = []
+
+
+class TestSiluAPI(unittest.TestCase):
+    # test paddle.nn.Silu, paddle.nn.functional.silu
+    def setUp(self):
+        self.x_np = np.random.uniform(-1, 1, [11, 17]).astype('float32')
+        self.place = paddle.XPUPlace(0)
+
+    def test_static_api(self):
+        paddle.enable_static()
+        with paddle.static.program_guard(paddle.static.Program()):
+            x = paddle.fluid.data('X', [11, 17])
+            out1 = F.silu(x)
+            m = paddle.nn.Silu()
+            out2 = m(x)
+            exe = paddle.static.Executor(self.place)
+            res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2])
+        out_ref = self.x_np / (1 + np.exp(-self.x_np))
+        for r in res:
+            np.testing.assert_allclose(out_ref, r, rtol=1e-05)
+
+    def test_dygraph_api(self):
+        paddle.disable_static(self.place)
+        x = paddle.to_tensor(self.x_np)
+        out1 = F.silu(x)
+        m = paddle.nn.Silu()
+        out2 = m(x)
+        out_ref = self.x_np / (1 + np.exp(-self.x_np))
+        for r in [out1, out2]:
+            np.testing.assert_allclose(out_ref, r.numpy(), rtol=1e-05)
+        paddle.enable_static()
+
+    def test_errors(self):
+        with paddle.static.program_guard(paddle.static.Program()):
+            # The input type must be Variable.
+            self.assertRaises(TypeError, F.silu, 1)
+            # The input dtype must be float16, float32, float64.
+            x_int32 = paddle.fluid.data(
+                name='x_int32', shape=[11, 17], dtype='int32'
+            )
+            self.assertRaises(TypeError, F.silu, x_int32)
+            # support the input dtype is float16
+            x_fp16 = paddle.fluid.data(
+                name='x_fp16', shape=[11, 17], dtype='float16'
+            )
+            F.silu(x_fp16)
+
+
+support_types = get_xpu_op_support_types('silu')
+for stype in support_types:
+    create_test_class(globals(), XPUTestSiluOP, stype)
+
+
 class XPUTestSigmoidOP(XPUOpTestWrapper):
     def __init__(self):
         self.op_name = 'sigmoid'
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py
index 46dcd0c1302220..915fb249514a96 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py
@@ -258,6 +258,38 @@ def test_check_output(self):
             place = paddle.XPUPlace(0)
             self.check_output_with_place(place)
 
+        def test_check_grad(self):
+            place = paddle.XPUPlace(0)
+            # TODO(wangzhongpu): support mkldnn op in dygraph mode
+            self.check_grad_with_place(
+                place,
+                {'Input', 'Filter'},
+                'Output',
+                max_relative_error=0.03,
+            )
+
+        def test_check_grad_no_filter(self):
+            place = paddle.XPUPlace(0)
+            # TODO(wangzhongpu): support mkldnn op in dygraph mode
+            self.check_grad_with_place(
+                place,
+                ['Input'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Filter']),
+            )
+
+        def test_check_grad_no_input(self):
+            place = paddle.XPUPlace(0)
+            # TODO(wangzhongpu): support mkldnn op in dygraph mode
+            self.check_grad_with_place(
+                place,
+                ['Filter'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Input']),
+            )
+
         def init_test_case(self):
             self.pad = [0, 0, 0]
             self.stride = [1, 1, 1]
@@ -401,6 +433,32 @@ def test_check_output(self):
             place = paddle.XPUPlace(0)
             self.check_output_with_place(place)
 
+        def test_check_grad(self):
+            place = paddle.XPUPlace(0)
+            self.check_grad_with_place(
+                place, {'Input', 'Filter'}, 'Output', max_relative_error=0.03
+            )
+
+        def test_check_grad_no_filter(self):
+            place = paddle.XPUPlace(0)
+            self.check_grad_with_place(
+                place,
+                ['Input'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Filter']),
+            )
+
+        def test_check_grad_no_input(self):
+            place = paddle.XPUPlace(0)
+            self.check_grad_with_place(
+                place,
+                ['Filter'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Input']),
+            )
+
         def init_test_case(self):
             self.stride = [1, 1, 1]
             self.input_size = [2, 3, 4, 4, 4]  # NCDHW
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_unfold_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_unfold_op_xpu.py
new file mode 100644
index 00000000000000..cce01f1aebf3be
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/xpu/test_unfold_op_xpu.py
@@ -0,0 +1,180 @@
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.fluid as fluid
+import numpy as np
+import sys
+import unittest
+
+sys.path.append("..")
+from op_test_xpu import XPUOpTest
+from xpu.get_test_cover_info import (
+    create_test_class,
+    get_xpu_op_support_types,
+    XPUOpTestWrapper,
+)
+
+paddle.enable_static()
+
+
+class XPUTestUnfoldOp(XPUOpTestWrapper):
+    def __init__(self):
+        self.op_name = 'unfold'
+        self.use_dynamic_create_class = False
+
+    class TestUnfoldOp(XPUOpTest):
+        """
+        This is for test on unfold Op
+        """
+
+        def init_data(self):
+            self.batch_size = 3
+            self.input_channels = 3
+            self.input_height = 20
+            self.input_width = 20
+
+            self.kernel_sizes = [2, 2]
+            self.strides = [1, 1]
+            self.paddings = [1, 1, 1, 1]
+            self.dilations = [1, 1]
+            input_shape = [
+                self.batch_size,
+                self.input_channels,
+                self.input_height,
+                self.input_width,
+            ]
+            self.x = np.random.rand(*input_shape).astype(self.dtype)
+
+        def calc_unfold(self):
+            output_shape = [0] * 3
+            output_shape[0] = self.batch_size
+            output_shape[1] = (
+                self.input_channels
+                * self.kernel_sizes[0]
+                * self.kernel_sizes[1]
+            )
+            dkernel_h = self.dilations[0] * (self.kernel_sizes[0] - 1) + 1
+            dkernel_w = self.dilations[1] * (self.kernel_sizes[1] - 1) + 1
+            out_height = (
+                int(
+                    (
+                        self.input_height
+                        + self.paddings[0]
+                        + self.paddings[2]
+                        - dkernel_h
+                    )
+                    / self.strides[0]
+                )
+                + 1
+            )
+            out_width = (
+                int(
+                    (
+                        self.input_width
+                        + self.paddings[1]
+                        + self.paddings[3]
+                        - dkernel_w
+                    )
+                    / self.strides[1]
+                )
+                + 1
+            )
+            output_shape[2] = out_height * out_width
+            output = np.zeros(output_shape).astype(np.float64)
+            # ------------ calculate output -------------- #
+            for i in range(output_shape[0]):
+                for j in range(output_shape[1]):
+                    for k in range(output_shape[2]):
+                        h_out = int(k / out_width)
+                        w_out = k % out_width
+                        w_offset = j % self.kernel_sizes[1]
+                        h_offset = (
+                            int(j / self.kernel_sizes[1]) % self.kernel_sizes[0]
+                        )
+                        c_in = int(
+                            j / (self.kernel_sizes[0] * self.kernel_sizes[1])
+                        )
+                        h_in = (
+                            h_offset * self.dilations[0]
+                            + h_out * self.strides[0]
+                            - self.paddings[0]
+                        )
+                        w_in = (
+                            w_offset * self.dilations[1]
+                            + w_out * self.strides[1]
+                            - self.paddings[1]
+                        )
+                        if (h_in >= 0 and h_in < self.input_height) and (
+                            w_in >= 0 and w_in < self.input_width
+                        ):
+                            output[i, j, k] = self.x[i, c_in, h_in, w_in]
+
+            self.outputs = output
+
+        def set_data(self):
+            self.init_data()
+            self.calc_unfold()
+
+            self.inputs = {'X': self.x}
+            self.attrs = {
+                'kernel_sizes': self.kernel_sizes,
+                'paddings': self.paddings,
+                'dilations': self.dilations,
+                'strides': self.strides,
+            }
+            self.outputs = {'Y': self.outputs}
+
+        def setUp(self):
+            self.op_type = 'unfold'
+            self.dtype = self.in_type
+            self.set_data()
+
+        def test_check_output(self):
+            self.check_output_with_place(paddle.XPUPlace(0))
+
+        def test_check_grad(self):
+            self.check_grad_with_place(paddle.XPUPlace(0), ['X'], 'Y')
+
+    class TestUnfoldAPI(TestUnfoldOp):
+        """
+        This is for test on paddle.nn.Unfold
+        """
+
+        def setUp(self):
+            self.op_type = 'unfold'
+            self.set_data()
+            self.places = [paddle.XPUPlace(0)]
+
+        def test_dygraph(self):
+            for place in self.places:
+                with fluid.dygraph.guard(place):
+                    input = fluid.dygraph.to_variable(self.inputs['X'])
+                    m = paddle.nn.Unfold(**self.attrs)
+                    m.eval()
+                    result = m(input)
+                    np.testing.assert_allclose(
+                        result.numpy(), self.outputs['Y'], rtol=1e-05
+                    )
+
+        def test_info(self):
+            str(paddle.nn.Unfold(**self.attrs))
+
+
+support_types = get_xpu_op_support_types('unfold')
+for stype in support_types:
+    create_test_class(globals(), XPUTestUnfoldOp, stype)
+
+if __name__ == "__main__":
+    unittest.main()

From 33173ab4ba623900e7cae51f8373ec7fb260c521 Mon Sep 17 00:00:00 2001
From: yuehuayingxueluo <867460659@qq.com>
Date: Fri, 2 Dec 2022 11:14:11 +0800
Subject: [PATCH 105/154] clear fluid apis: square_error_cost (#48029)

* clear fluid apis in fleet and passes

* fix model.py

* fix model.py

* fix cpp_pass.py

* clear loss.py

* change test file

* fix some test_*.py

* fix adaround.py

* fix evaluator.py

* fix CI bug

* fix CI bug

* fix decode.py

* fix detection.py

* fix ci bug

* rm test_sigmoid_cross_entropy_with_logits_op_ipu.py and fix __init__.py

* fix ci bug

* fix ci BUG
---
 python/paddle/fluid/contrib/optimizer.py      |  1 -
 .../contrib/slim/quantization/adaround.py     |  3 +-
 .../test_moving_average_abs_max_scale_op.py   |  2 +-
 .../tests/test_image_classification_fp16.py   |  4 +-
 .../tests/test_multi_precision_fp16_train.py  |  2 +-
 python/paddle/fluid/layers/detection.py       |  2 +-
 python/paddle/fluid/layers/loss.py            | 57 -------------------
 python/paddle/fluid/optimizer.py              | 19 +++++--
 .../fluid/tests/book/test_fit_a_line.py       | 10 +++-
 .../tests/book/test_recommender_system.py     |  4 +-
 .../tests/unittests/auto_checkpoint_utils.py  |  2 +-
 .../tests/unittests/check_nan_inf_base.py     |  2 +-
 .../fleet/hybrid_parallel_pp_embedding.py     |  4 +-
 .../fleet/hybrid_parallel_shared_weight.py    |  4 +-
 .../fleet/parallel_dygraph_transformer.py     |  2 +-
 .../fleet/test_communicator_half_async.py     |  2 +-
 .../fleet/test_communicator_sync.py           |  2 +-
 .../fleet/test_distributed_strategy.py        |  2 +-
 .../fleet/test_fleet_rolemaker_new.py         |  2 +-
 .../fluid/tests/unittests/dist_transformer.py |  2 +-
 .../dygraph_to_static/bert_dygraph_model.py   |  4 +-
 .../seq2seq_dygraph_model.py                  |  4 +-
 .../unittests/dygraph_to_static/test_bmn.py   |  2 +-
 .../dygraph_to_static/test_ptb_lm.py          |  2 +-
 .../transformer_dygraph_model.py              |  2 +-
 .../tests/unittests/ipu/test_dy2static_ipu.py |  2 +-
 .../unittests/mlu/test_huber_loss_op_mlu.py   |  2 +-
 .../unittests/mlu/test_momentum_op_mlu.py     |  4 +-
 .../test_softmax_with_cross_entropy_op_mlu.py |  2 +-
 .../unittests/npu/test_momentum_op_npu.py     |  4 +-
 .../tests/unittests/npu/test_slice_op_npu.py  |  2 +-
 .../test_softmax_with_cross_entropy_op_npu.py |  2 +-
 .../parallel_dygraph_sparse_embedding.py      |  2 +-
 .../fluid/tests/unittests/test_adadelta_op.py |  4 +-
 .../fluid/tests/unittests/test_adam_op.py     |  4 +-
 .../test_adam_optimizer_fp32_fp64.py          |  4 +-
 .../fluid/tests/unittests/test_adamw_op.py    |  4 +-
 .../fluid/tests/unittests/test_backward.py    |  6 +-
 .../fluid/tests/unittests/test_bpr_loss_op.py |  3 +
 .../unittests/test_communicator_async.py      |  2 +-
 .../tests/unittests/test_communicator_geo.py  |  2 +-
 .../unittests/test_communicator_ps_gpu.py     |  2 +-
 .../test_dist_fleet_a_sync_optimizer_async.py |  4 +-
 .../test_dist_fleet_a_sync_optimizer_sync.py  |  2 +-
 .../test_dist_fleet_trainer_desc_config.py    |  2 +-
 .../tests/unittests/test_dist_transpiler.py   | 21 +++----
 .../fluid/tests/unittests/test_downpoursgd.py | 12 +++-
 .../test_eager_deletion_padding_rnn.py        |  2 +-
 .../fluid/tests/unittests/test_exception.py   |  2 +-
 .../unittests/test_executor_check_feed.py     |  2 +-
 .../test_executor_feed_non_tensor.py          |  2 +-
 .../tests/unittests/test_imperative_gnn.py    |  8 ++-
 ..._imperative_lod_tensor_to_selected_rows.py |  2 +-
 .../unittests/test_imperative_ptb_rnn.py      |  2 +-
 .../unittests/test_imperative_save_load.py    |  2 +-
 .../unittests/test_imperative_save_load_v2.py |  2 +-
 ..._imperative_selected_rows_to_lod_tensor.py |  2 +-
 ..._imperative_transformer_sorted_gradient.py |  2 +-
 .../unittests/test_inference_model_io.py      | 24 ++++++--
 ...test_inplace_softmax_with_cross_entropy.py |  3 +-
 .../fluid/tests/unittests/test_lambv2_op.py   |  4 +-
 .../fluid/tests/unittests/test_layers.py      | 26 ++++++---
 .../unittests/test_listen_and_serv_op.py      |  4 +-
 .../unittests/test_lookup_table_v2_op.py      |  2 +-
 .../tests/unittests/test_memory_usage.py      |  2 +-
 .../fluid/tests/unittests/test_momentum_op.py |  8 ++-
 .../unittests/test_network_with_dtype.py      |  4 +-
 .../test_optimizer_in_control_flow.py         |  6 +-
 .../unittests/test_program_prune_backward.py  |  8 ++-
 .../fluid/tests/unittests/test_rmsprop_op.py  |  4 +-
 .../tests/unittests/test_rnn_cell_api.py      |  2 +-
 .../fluid/tests/unittests/test_sgd_op.py      |  2 +-
 .../tests/unittests/test_square_error_cost.py |  9 ++-
 .../tests/unittests/test_static_save_load.py  |  2 +-
 .../unittests/xpu/test_adadelta_op_xpu.py     |  4 +-
 .../tests/unittests/xpu/test_sgd_op_xpu.py    |  2 +-
 .../fluid/transpiler/distribute_transpiler.py |  7 ++-
 77 files changed, 205 insertions(+), 180 deletions(-)

diff --git a/python/paddle/fluid/contrib/optimizer.py b/python/paddle/fluid/contrib/optimizer.py
index 31b175ba62fe1e..57df115b96ee2c 100644
--- a/python/paddle/fluid/contrib/optimizer.py
+++ b/python/paddle/fluid/contrib/optimizer.py
@@ -78,7 +78,6 @@ class Momentum(Optimizer):
             import numpy as np
 
             paddle.enable_static()
-
             place = fluid.CPUPlace()
             main = fluid.Program()
             with fluid.program_guard(main):
diff --git a/python/paddle/fluid/contrib/slim/quantization/adaround.py b/python/paddle/fluid/contrib/slim/quantization/adaround.py
index 2003380fa1a7d4..d6aff8d41c69b1 100644
--- a/python/paddle/fluid/contrib/slim/quantization/adaround.py
+++ b/python/paddle/fluid/contrib/slim/quantization/adaround.py
@@ -16,6 +16,7 @@
 import time
 import sys
 import logging
+import paddle
 
 import paddle
 import paddle.fluid as fluid
@@ -61,7 +62,7 @@ def __init__(self, reg_param=0.01, default_beta_range=(20, 2)):
         self.default_beta_range = default_beta_range
 
     def compute_recon_loss(self, ada_quantized_output, orig_output):
-        square_cost = fluid.layers.square_error_cost(
+        square_cost = paddle.nn.functional.square_error_cost(
             ada_quantized_output, orig_output
         )
         recon_loss = paddle.mean(paddle.sum(square_cost, axis=-1))
diff --git a/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py
index 8ddca1b354c709..4184166806d4fc 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py
@@ -50,7 +50,7 @@ def check_backward(self, use_cuda):
                 name=fc_tmp.name, dtype=fc_tmp.dtype
             )
             fc_tmp_1 = out_scale(fc_tmp)
-            cross_entropy = fluid.layers.softmax_with_cross_entropy(
+            cross_entropy = paddle.nn.functional.softmax_with_cross_entropy(
                 fc_tmp, label
             )
             loss = paddle.mean(cross_entropy)
diff --git a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
index ab9ebfa71929b7..362dde4d4816f1 100644
--- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
+++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
@@ -127,7 +127,7 @@ def train(net_type, use_cuda, save_dirname, is_local):
             raise ValueError("%s network is not supported" % net_type)
 
         logits = fluid.layers.fc(input=net, size=classdim, act="softmax")
-        cost, predict = fluid.layers.softmax_with_cross_entropy(
+        cost, predict = paddle.nn.functional.softmax_with_cross_entropy(
             logits, label, return_softmax=True
         )
         avg_cost = paddle.mean(cost)
@@ -509,7 +509,7 @@ def decorate_with_data_loader(self):
 
                 net = vgg16_bn_drop(image)
                 logits = fluid.layers.fc(input=net, size=10, act="softmax")
-                cost, predict = fluid.layers.softmax_with_cross_entropy(
+                cost, predict = paddle.nn.functional.softmax_with_cross_entropy(
                     logits, label, return_softmax=True
                 )
                 avg_cost = paddle.mean(cost)
diff --git a/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py b/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py
index ba0f6534adfa55..4265594f71f56f 100644
--- a/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py
+++ b/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py
@@ -110,7 +110,7 @@ def train(use_pure_fp16=True, use_nesterov=False, optimizer=""):
         label = fluid.layers.data(name='label', shape=[1], dtype='int64')
         net = resnet_cifar10(images)
         logits = fluid.layers.fc(input=net, size=classdim, act="softmax")
-        cost = fluid.layers.softmax_with_cross_entropy(
+        cost = paddle.nn.functional.softmax_with_cross_entropy(
             logits, label, return_softmax=False
         )
         sum_cost = paddle.sum(cost)
diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py
index fa00813146862f..a3db0f70a6cc46 100644
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -21,7 +21,7 @@
 from ..layer_helper import LayerHelper
 from ..framework import Variable, _non_static_mode, static_only, in_dygraph_mode
 from .. import core
-from .loss import softmax_with_cross_entropy
+from paddle.fluid.layers import softmax_with_cross_entropy
 from . import tensor
 from . import nn
 from ..data_feeder import check_variable_and_dtype, check_type, check_dtype
diff --git a/python/paddle/fluid/layers/loss.py b/python/paddle/fluid/layers/loss.py
index 65a52415f15c6b..1d24489638096f 100644
--- a/python/paddle/fluid/layers/loss.py
+++ b/python/paddle/fluid/layers/loss.py
@@ -36,7 +36,6 @@
 
 __all__ = [
     'cross_entropy',
-    'square_error_cost',
     'softmax_with_cross_entropy',
 ]
 
@@ -144,41 +143,6 @@ def cross_entropy2(input, label, ignore_index=kIgnoreIndex):
     return out
 
 
-def square_error_cost(input, label):
-    r"""
-
-    Accept input predictions and target label and returns the
-    squared error cost.
-
-    For predictions label, and target label, the equation is:
-
-    .. math::
-
-        Out = (input - label)^2
-
-    Parameters:
-        input (Tensor): Input tensor, the data type should be float32.
-        label (Tensor): Label tensor, the data type should be float32.
-
-    Returns:
-        Tensor, The tensor storing the element-wise squared
-        error difference between input and label.
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle
-            input = paddle.to_tensor([1.1, 1.9])
-            label = paddle.to_tensor([1.0, 2.0])
-            output = paddle.nn.functional.square_error_cost(input, label)
-            print(output)
-            # [0.01, 0.01]
-
-    """
-    return paddle.nn.functional.square_error_cost(input, label)
-
-
 def softmax_with_cross_entropy(
     logits,
     label,
@@ -189,49 +153,32 @@ def softmax_with_cross_entropy(
     axis=-1,
 ):
     r"""
-
     This operator implements the cross entropy loss function with softmax. This function
     combines the calculation of the softmax operation and the cross entropy loss function
     to provide a more numerically stable gradient.
-
     Because this operator performs a softmax on logits internally, it expects
     unscaled logits. This operator should not be used with the output of
     softmax operator since that would produce incorrect results.
-
     When the attribute :attr:`soft_label` is set :attr:`False`, this operators
     expects mutually exclusive hard labels, each sample in a batch is in exactly
     one class with a probability of 1.0. Each sample in the batch will have a
     single label.
-
     The equation is as follows:
-
     1) Hard label (one-hot label, so every sample has exactly one class)
-
     .. math::
-
         loss_j =  -\\text{logits}_{label_j} +
         \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{logits}_i)\\right), j = 1,..., K
-
     2) Soft label (each sample can have a distribution over all classes)
-
     .. math::
-
         loss_j =  -\\sum_{i=0}^{K}\\text{label}_i
         \\left(\\text{logits}_i - \\log\\left(\\sum_{i=0}^{K}
         \\exp(\\text{logits}_i)\\right)\\right), j = 1,...,K
-
     3) If :attr:`numeric_stable_mode` is :attr:`True`, softmax is calculated first by:
-
     .. math::
-
         max_j &= \\max_{i=0}^{K}{\\text{logits}_i}
-
         log\\_max\\_sum_j &= \\log\\sum_{i=0}^{K}\\exp(logits_i - max_j)
-
         softmax_j &= \\exp(logits_j - max_j - {log\\_max\\_sum}_j)
-
     and then cross entropy loss is calculated by softmax and label.
-
     Args:
         logits (Tensor): A multi-dimension ``Tensor`` , and the data type is float32 or float64. The input tensor of unscaled log probabilities.
         label (Tensor): The ground truth  ``Tensor`` , data type is the same
@@ -258,7 +205,6 @@ def softmax_with_cross_entropy(
         axis (int, optional): The index of dimension to perform softmax calculations. It
                               should be in range :math:`[-1, rank - 1]`, while :math:`rank`
                               is the rank of input :attr:`logits`. Default: -1.
-
     Returns:
         ``Tensor`` or Tuple of two ``Tensor`` : Return the cross entropy loss if \
                                                     `return_softmax` is False, otherwise the tuple \
@@ -266,13 +212,10 @@ def softmax_with_cross_entropy(
                                                     with input logits and cross entropy loss is in \
                                                     the same shape with input logits except shape \
                                                     in dimension :attr:`axis` as 1.
-
     Examples:
         .. code-block:: python
-
             import paddle
             import numpy as np
-
             data = np.random.rand(128).astype("float32")
             label = np.random.rand(1).astype("int64")
             data = paddle.to_tensor(data)
diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index 8c9a940d846be9..ea10b49e9cc6f4 100755
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -1441,13 +1441,14 @@ class SGDOptimizer(Optimizer):
             import paddle.fluid as fluid
             import numpy as np
 
+            paddle.enable_static()
             place = fluid.CPUPlace()
             main = fluid.Program()
             with fluid.program_guard(main):
                 x = fluid.layers.data(name='x', shape=[13], dtype='float32')
                 y = fluid.layers.data(name='y', shape=[1], dtype='float32')
                 y_predict = fluid.layers.fc(input=x, size=1, act=None)
-                cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+                cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
                 avg_cost = fluid.layers.mean(cost)
 
                 sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
@@ -1642,13 +1643,14 @@ class MomentumOptimizer(Optimizer):
             import paddle.fluid as fluid
             import numpy as np
 
+            paddle.enable_static()
             place = fluid.CPUPlace()
             main = fluid.Program()
             with fluid.program_guard(main):
                 x = fluid.layers.data(name='x', shape=[13], dtype='float32')
                 y = fluid.layers.data(name='y', shape=[1], dtype='float32')
                 y_predict = fluid.layers.fc(input=x, size=1, act=None)
-                cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+                cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
                 avg_cost = fluid.layers.mean(cost)
 
                 moment_optimizer = fluid.optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
@@ -2219,13 +2221,14 @@ class AdamOptimizer(Optimizer):
             import paddle
             import paddle.fluid as fluid
 
+            paddle.enable_static()
             place = fluid.CPUPlace()
             main = fluid.Program()
             with fluid.program_guard(main):
                 x = fluid.data(name='x', shape=[None, 13], dtype='float32')
                 y = fluid.data(name='y', shape=[None, 1], dtype='float32')
                 y_predict = fluid.layers.fc(input=x, size=1, act=None)
-                cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+                cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
                 avg_cost = fluid.layers.mean(cost)
 
                 adam_optimizer = fluid.optimizer.AdamOptimizer(0.01)
@@ -2247,13 +2250,14 @@ class AdamOptimizer(Optimizer):
             import paddle.fluid as fluid
             import paddle.fluid.layers.learning_rate_scheduler as lr_scheduler
 
+            paddle.enable_static()
             place = fluid.CPUPlace()
             main = fluid.Program()
             with fluid.program_guard(main):
                 x = fluid.data(name='x', shape=[None, 13], dtype='float32')
                 y = fluid.data(name='y', shape=[None, 1], dtype='float32')
                 y_predict = fluid.layers.fc(input=x, size=1, act=None)
-                cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+                cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
                 avg_cost = fluid.layers.mean(cost)
 
                 # define beta decay variable
@@ -3276,13 +3280,14 @@ class RMSPropOptimizer(Optimizer):
             import paddle.fluid as fluid
             import numpy as np
 
+            paddle.enable_static()
             place = fluid.CPUPlace()
             main = fluid.Program()
             with fluid.program_guard(main):
                 x = fluid.layers.data(name='x', shape=[13], dtype='float32')
                 y = fluid.layers.data(name='y', shape=[1], dtype='float32')
                 y_predict = fluid.layers.fc(input=x, size=1, act=None)
-                cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+                cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
                 avg_cost = fluid.layers.mean(cost)
 
                 rms_optimizer = fluid.optimizer.RMSProp(learning_rate=0.1)
@@ -3493,13 +3498,15 @@ class FtrlOptimizer(Optimizer):
             import paddle.fluid as fluid
             import numpy as np
 
+            paddle.enable_static()
+
             place = fluid.CPUPlace()
             main = fluid.Program()
             with fluid.program_guard(main):
                 x = fluid.layers.data(name='x', shape=[13], dtype='float32')
                 y = fluid.layers.data(name='y', shape=[1], dtype='float32')
                 y_predict = fluid.layers.fc(input=x, size=1, act=None)
-                cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+                cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
                 avg_cost = fluid.layers.mean(cost)
 
                 ftrl_optimizer = fluid.optimizer.Ftrl(learning_rate=0.1)
diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py
index a6cca9ceebd6bd..558ce9febe4e5c 100644
--- a/python/paddle/fluid/tests/book/test_fit_a_line.py
+++ b/python/paddle/fluid/tests/book/test_fit_a_line.py
@@ -56,16 +56,20 @@ def train(use_cuda, save_dirname, is_local, use_bf16, pure_bf16):
         if not pure_bf16:
             with amp.bf16.bf16_guard():
                 y_predict = fluid.layers.fc(input=x, size=1, act=None)
-            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
         else:
             y_predict = fluid.layers.fc(input=x, size=1, act=None)
             with amp.bf16.bf16_guard():
-                cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+                cost = paddle.nn.functional.square_error_cost(
+                    input=y_predict, label=y
+                )
                 avg_cost = paddle.mean(cost)
     else:
         y_predict = fluid.layers.fc(input=x, size=1, act=None)
-        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
 
     lr = 5e-3 if use_bf16 else 1e-3
diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py
index 3aacd377dc0c7e..89da6135a806d3 100644
--- a/python/paddle/fluid/tests/book/test_recommender_system.py
+++ b/python/paddle/fluid/tests/book/test_recommender_system.py
@@ -167,7 +167,9 @@ def model():
     scale_infer = paddle.scale(x=inference, scale=5.0)
 
     label = layers.data(name='score', shape=[1], dtype='float32')
-    square_cost = layers.square_error_cost(input=scale_infer, label=label)
+    square_cost = paddle.nn.functional.square_error_cost(
+        input=scale_infer, label=label
+    )
     avg_cost = paddle.mean(square_cost)
 
     return scale_infer, avg_cost
diff --git a/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py b/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py
index 15d62544d217a3..cb6f8a0a29f983 100644
--- a/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py
+++ b/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py
@@ -69,7 +69,7 @@ def simple_net():
             label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
 
             fc_tmp = fluid.layers.fc(image, size=CLASS_NUM)
-            cross_entropy = fluid.layers.softmax_with_cross_entropy(
+            cross_entropy = paddle.nn.functional.softmax_with_cross_entropy(
                 fc_tmp, label
             )
             loss = paddle.mean(cross_entropy)
diff --git a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
index c6e9a36ddfe2cc..e9812d11ba7631 100644
--- a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
+++ b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
@@ -63,7 +63,7 @@ def net():
         hidden = fluid.layers.fc(input=hidden, size=400, act="sigmoid")
 
     hidden = fluid.layers.fc(input=hidden, size=3, act=None)
-    cost, y_predict = fluid.layers.softmax_with_cross_entropy(
+    cost, y_predict = paddle.nn.functional.softmax_with_cross_entropy(
         hidden, y, return_softmax=True
     )
     acc_top1 = paddle.static.accuracy(input=y_predict, label=y, k=1)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_embedding.py b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_embedding.py
index dfcdd5b130f731..104aa658ec3319 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_embedding.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_embedding.py
@@ -57,7 +57,7 @@ def forward(self, x1, x2, y1):
         fc = fluid.layers.matmul(x_emb, self.softmax_weight)
         fc = fluid.layers.elementwise_add(fc, self.softmax_bias)
         projection = paddle.reshape(fc, shape=[-1, vocab_size])
-        loss = fluid.layers.softmax_with_cross_entropy(
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=projection, label=y1, soft_label=False
         )
         return loss.mean()
@@ -106,7 +106,7 @@ def __init__(self):
 
     def forward(self, args, y1):
         projection, x2 = args
-        loss = fluid.layers.softmax_with_cross_entropy(
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=projection, label=y1[0], soft_label=False
         )
         return loss.mean()
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_shared_weight.py b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_shared_weight.py
index f63d7c9ad330ee..58c0fe7465c918 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_shared_weight.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_shared_weight.py
@@ -67,7 +67,7 @@ def forward(self, x1, x2, y1):
 
         projection = paddle.matmul(projection, self.word_embeddings.weight)
 
-        loss = fluid.layers.softmax_with_cross_entropy(
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=projection, label=y1, soft_label=False
         )
         return loss.mean()
@@ -120,7 +120,7 @@ def __init__(self):
 
     def forward(self, args, y1):
         projection = args
-        loss = fluid.layers.softmax_with_cross_entropy(
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=projection, label=y1[0], soft_label=False
         )
         return loss.mean()
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py
index ae747cb465e7e8..6792cf2877fe53 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py
@@ -941,7 +941,7 @@ def forward(self, enc_inputs, dec_inputs, label, weights):
                 epsilon=self._label_smooth_eps,
             )
 
-        cost = fluid.layers.softmax_with_cross_entropy(
+        cost = paddle.nn.functional.softmax_with_cross_entropy(
             logits=predict,
             label=label_out,
             soft_label=True if self._label_smooth_eps else False,
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py
index cedfe94448b587..209233027c8a73 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py
@@ -33,7 +33,7 @@ def net(self):
         y_predict = fluid.layers.fc(input=x, size=1, act=None)
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
 
-        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         return avg_cost, x, y
 
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_sync.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_sync.py
index 7427e6a58ff55c..550fc5db902416 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_sync.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_sync.py
@@ -29,7 +29,7 @@ class TestCommunicator(unittest.TestCase):
     def net(self):
         x = fluid.layers.data(name='x', shape=[1], dtype='float32')
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-        cost = fluid.layers.square_error_cost(input=x, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=x, label=y)
         avg_cost = paddle.mean(cost)
         return avg_cost
 
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py
index 54ee7dbdc9d026..245aa097b6aee1 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py
@@ -273,7 +273,7 @@ def test_debug_info(self):
         x = fluid.layers.data(name='x', shape=[1], dtype='float32')
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
         y_predict = fluid.layers.fc(input=x, size=1, act=None)
-        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
 
         role = role_maker.UserDefinedRoleMaker(
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py
index 16deaa3bbb9ee0..96e84251011feb 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py
@@ -449,7 +449,7 @@ def net():
             x = paddle.fluid.layers.data(name='x', shape=[13], dtype='float32')
             y_predict = paddle.fluid.layers.fc(input=x, size=1, act=None)
             y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32')
-            cost = paddle.fluid.layers.square_error_cost(
+            cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
             )
             avg_cost = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py
index 8a8b013b6b053a..52b6f674e5c19e 100644
--- a/python/paddle/fluid/tests/unittests/dist_transformer.py
+++ b/python/paddle/fluid/tests/unittests/dist_transformer.py
@@ -1585,7 +1585,7 @@ def transformer(
             epsilon=label_smooth_eps,
         )
 
-    cost = layers.softmax_with_cross_entropy(
+    cost = paddle.nn.functional.softmax_with_cross_entropy(
         logits=paddle.reshape(predict, shape=[-1, trg_vocab_size]),
         label=label,
         soft_label=True if label_smooth_eps else False,
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
index 64d0a8dc73aa7c..d45d775829944f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
@@ -410,7 +410,7 @@ def forward(
         else:
             fc_out = self.out_fc(mask_trans_feat)
 
-        mask_lm_loss = fluid.layers.softmax_with_cross_entropy(
+        mask_lm_loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=fc_out, label=mask_label
         )
         mean_mask_lm_loss = paddle.mean(mask_lm_loss)
@@ -420,7 +420,7 @@ def forward(
         (
             next_sent_loss,
             next_sent_softmax,
-        ) = fluid.layers.softmax_with_cross_entropy(
+        ) = paddle.nn.functional.softmax_with_cross_entropy(
             logits=next_sent_fc_out, label=labels, return_softmax=True
         )
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
index 5f22b03cc9ff09..bf1dfdcad23887 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
@@ -294,7 +294,7 @@ def forward(self, inputs):
 
         dec_output = paddle.stack(dec_output)
         dec_output = self.fc(self._transpose_batch_time(dec_output))
-        loss = fluid.layers.softmax_with_cross_entropy(
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=dec_output, label=label, soft_label=False
         )
         loss = paddle.squeeze(loss, axis=[2])
@@ -828,7 +828,7 @@ def forward(self, inputs):
 
         dec_output = paddle.stack(dec_output)
         dec_output = self.fc(self._transpose_batch_time(dec_output))
-        loss = fluid.layers.softmax_with_cross_entropy(
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=dec_output, label=label, soft_label=False
         )
         loss = paddle.squeeze(loss, axis=[2])
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
index f8e657499a4cd7..0cb3e333045f58 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
@@ -379,7 +379,7 @@ def pem_reg_loss_func(pred_score, gt_iou_map, mask):
 
         weights = u_hmask + u_smmask + u_slmask
         weights.stop_gradient = True
-        loss = fluid.layers.square_error_cost(pred_score, gt_iou_map)
+        loss = paddle.nn.functional.square_error_cost(pred_score, gt_iou_map)
         loss = paddle.multiply(loss, weights)
         loss = 0.5 * paddle.sum(loss) / paddle.sum(weights)
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py
index c7135a8ff781cc..62c6c18346885e 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py
@@ -216,7 +216,7 @@ def forward(self, input, label, init_hidden, init_cell):
         projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
         projection = paddle.add(projection, self.softmax_bias)
 
-        loss = fluid.layers.softmax_with_cross_entropy(
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=projection, label=label, soft_label=False
         )
         loss = paddle.reshape(loss, shape=[-1, self.num_steps])
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
index 209127104bd4fd..accf36ff179197 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
@@ -576,7 +576,7 @@ def __call__(self, predict, label, weights):
                 epsilon=self.label_smooth_eps,
             )
 
-        cost = layers.softmax_with_cross_entropy(
+        cost = paddle.nn.functional.softmax_with_cross_entropy(
             logits=predict,
             label=label_out,
             soft_label=True if self.label_smooth_eps else False,
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py
index 73ddadc0ac4174..dbdfab28825036 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py
@@ -220,7 +220,7 @@ def create_model(self, use_ipu=False):
 
 class TestWithoutIdentityLoss2(TestBase):
     def set_op_attrs(self):
-        self.loss_op = paddle.fluid.layers.softmax_with_cross_entropy
+        self.loss_op = paddle.paddle.nn.functional.softmax_with_cross_entropy
 
     def set_data_feed(self):
         self.data = paddle.uniform((8, 3, 10, 10), dtype='float32')
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_huber_loss_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_huber_loss_op_mlu.py
index 5e83c7e57daa29..a286dbf5857e1e 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_huber_loss_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_huber_loss_op_mlu.py
@@ -103,4 +103,4 @@ def set_shape(self):
 
 
 if __name__ == '__main__':
-    unittest.main()
+    unittest.main()
\ No newline at end of file
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py
index 8efc129b68885b..cb393cbd373112 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py
@@ -143,7 +143,7 @@ def test_momentum(self):
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
             y_predict = fluid.layers.fc(input=x, size=1, act=None)
-            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+            cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
             avg_cost = paddle.mean(cost)
 
             rms_optimizer = paddle.optimizer.Momentum(
@@ -268,7 +268,7 @@ def test_momentum_static(self):
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
             y_predict = fluid.layers.fc(input=x, size=1, act=None)
-            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+            cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
             avg_cost = paddle.mean(cost)
 
             momentum_optimizer = paddle.fluid.contrib.optimizer.Momentum(
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py
index f210ea0b633b21..c924bdc6918700 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py
@@ -126,7 +126,7 @@ def _test(self, run_mlu=True):
             fc_1 = fluid.layers.fc(input=z, size=128)
             prediction = fluid.layers.fc(input=fc_1, size=2)
 
-            cost = fluid.layers.softmax_with_cross_entropy(prediction, label)
+            cost = paddle.nn.functional.softmax_with_cross_entropy(prediction, label)
             loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py
index 9719c5582bc8c6..fe0882c7330fbf 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py
@@ -111,7 +111,7 @@ def test_momentum(self):
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
             y_predict = fluid.layers.fc(input=x, size=1, act=None)
-            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+            cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
             avg_cost = paddle.mean(cost)
 
             rms_optimizer = paddle.optimizer.Momentum(
@@ -239,7 +239,7 @@ def test_momentum_static(self):
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
             y_predict = fluid.layers.fc(input=x, size=1, act=None)
-            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+            cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
             avg_cost = paddle.mean(cost)
 
             momentum_optimizer = paddle.fluid.contrib.optimizer.Momentum(
diff --git a/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py
index 5bc1700cc16f46..adb1b1b269b276 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py
@@ -275,7 +275,7 @@ def _test(self, run_npu=True):
 
             prediction = paddle.static.nn.fc(z, size=2, activation='softmax')
 
-            cost = paddle.fluid.layers.softmax_with_cross_entropy(
+            cost = paddle.paddle.nn.functional.softmax_with_cross_entropy(
                 logits=prediction, label=label
             )
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py
index 487ca61320e402..f47a0275af0fd9 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py
@@ -124,7 +124,7 @@ def _test(self, run_npu=True):
             fc_1 = fluid.layers.fc(input=z, size=128)
             prediction = fluid.layers.fc(input=fc_1, size=2)
 
-            cost = fluid.layers.softmax_with_cross_entropy(prediction, label)
+            cost = paddle.nn.functional.softmax_with_cross_entropy(prediction, label)
             loss = paddle.mean(cost)
             sgd = fluid.optimizer.SGD(learning_rate=0.01)
             sgd.minimize(loss)
diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py
index 9e0ed71d03598f..7c46efe77556f8 100644
--- a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py
+++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py
@@ -68,7 +68,7 @@ def forward(self, input, label):
         fc = fluid.layers.matmul(x_emb, self.softmax_weight)
         fc = paddle.add(fc, self.softmax_bias)
         projection = paddle.reshape(fc, shape=[-1, self.vocab_size])
-        loss = fluid.layers.softmax_with_cross_entropy(
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=projection, label=label, soft_label=False
         )
         loss = paddle.reshape(loss, shape=[-1, self.num_steps])
diff --git a/python/paddle/fluid/tests/unittests/test_adadelta_op.py b/python/paddle/fluid/tests/unittests/test_adadelta_op.py
index 73a3c1e1cbf7c0..eb0fec336a33ce 100644
--- a/python/paddle/fluid/tests/unittests/test_adadelta_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adadelta_op.py
@@ -146,7 +146,9 @@ def test_adadelta(self):
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
             y_predict = fluid.layers.fc(input=x, size=1, act=None)
-            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
 
             rms_optimizer = paddle.optimizer.Adadelta(learning_rate=0.1)
diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py
index 715b5460ed2f14..6298c923a27e1d 100644
--- a/python/paddle/fluid/tests/unittests/test_adam_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adam_op.py
@@ -941,7 +941,9 @@ def test_adam_flatten_param_grads_with_regularizer(self):
             y_predict = fluid.layers.fc(
                 input=x, size=1, act=None, param_attr=weight_attr
             )
-            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
 
             adam = fluid.optimizer.AdamOptimizer(
diff --git a/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py b/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py
index 8e43728fb83da1..79c653cdfb26dc 100644
--- a/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py
+++ b/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py
@@ -33,7 +33,9 @@ def main_test_func(place, dtype):
             x = fluid.data(name='x', shape=[None, 13], dtype=dtype)
             y = fluid.data(name='y', shape=[None, 1], dtype=dtype)
             y_predict = fluid.layers.fc(input=x, size=1, act=None)
-            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
 
             adam_optimizer = fluid.optimizer.AdamOptimizer(0.01)
diff --git a/python/paddle/fluid/tests/unittests/test_adamw_op.py b/python/paddle/fluid/tests/unittests/test_adamw_op.py
index 1810a4bea6121b..405a85235cc19a 100644
--- a/python/paddle/fluid/tests/unittests/test_adamw_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adamw_op.py
@@ -621,7 +621,9 @@ def test_adamw_op(self):
                 fc2_b_mon1 = np.zeros((linear2.bias.shape)).astype("float32")
                 fc2_b_mon2 = np.zeros((linear2.bias.shape)).astype("float32")
 
-                cost = fluid.layers.square_error_cost(input=out, label=y)
+                cost = paddle.nn.functional.square_error_cost(
+                    input=out, label=y
+                )
                 avg_cost = paddle.mean(cost)
 
                 simple_lr_fun = partial(
diff --git a/python/paddle/fluid/tests/unittests/test_backward.py b/python/paddle/fluid/tests/unittests/test_backward.py
index c98fd7dff5e2b2..a24d49b5ead064 100644
--- a/python/paddle/fluid/tests/unittests/test_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_backward.py
@@ -262,7 +262,9 @@ def build_model(self):
             name='fc_no_use',
         )
         # loss
-        cost = fluid.layers.square_error_cost(input=predict, label=label)
+        cost = paddle.nn.functional.square_error_cost(
+            input=predict, label=label
+        )
         loss = paddle.mean(cost, name='mean_loss')
 
         return loss
@@ -330,7 +332,7 @@ def build_net(self):
         y = fluid.data(name='y', shape=[None, 1], dtype='float32')
         x_emb = fluid.embedding(x, size=[100, 256])
         y_predict = fluid.layers.fc(input=x_emb, size=1, name='my_fc')
-        loss = fluid.layers.square_error_cost(input=y_predict, label=y)
+        loss = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_loss = paddle.mean(loss)
         param_names = [
             param.name
diff --git a/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py b/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py
index 5bd6dbb2fda5b6..7cd3c98a68634b 100644
--- a/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py
@@ -17,6 +17,8 @@
 import numpy as np
 from op_test import OpTest, randomize_probability
 
+import paddle
+
 
 class TestBprLossOp1(OpTest):
     """Test BprLoss with discrete one-hot labels."""
@@ -47,4 +49,5 @@ def test_check_grad(self):
 
 
 if __name__ == "__main__":
+    paddle.enable_static()
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_communicator_async.py b/python/paddle/fluid/tests/unittests/test_communicator_async.py
index 978e0d644c7271..ebc65cd5ac240b 100644
--- a/python/paddle/fluid/tests/unittests/test_communicator_async.py
+++ b/python/paddle/fluid/tests/unittests/test_communicator_async.py
@@ -30,7 +30,7 @@ def net(self):
         x = fluid.layers.data(name='x', shape=[1], dtype='float32')
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
 
-        cost = fluid.layers.square_error_cost(input=x, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=x, label=y)
         avg_cost = paddle.mean(cost)
         return avg_cost
 
diff --git a/python/paddle/fluid/tests/unittests/test_communicator_geo.py b/python/paddle/fluid/tests/unittests/test_communicator_geo.py
index 9363e2fe4727c7..073a9018c19009 100644
--- a/python/paddle/fluid/tests/unittests/test_communicator_geo.py
+++ b/python/paddle/fluid/tests/unittests/test_communicator_geo.py
@@ -49,7 +49,7 @@ def net(self):
         y_predict = fluid.layers.fc(input=z, size=1, act=None)
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
 
-        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         return avg_cost, x, x1, y
 
diff --git a/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py b/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py
index fbea8b003ceda4..f0c1e9c8850d85 100644
--- a/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py
+++ b/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py
@@ -55,7 +55,7 @@ def test_communicator_ps_gpu(self):
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
         slots_vars = [x, y]
 
-        cost = fluid.layers.square_error_cost(input=x, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=x, label=y)
         avg_cost = paddle.mean(cost)
 
         optimizer = fluid.optimizer.Adam(0.01)
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py
index 4aaf596d570903..c80e1a68fddadd 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py
@@ -47,7 +47,7 @@ def test_a_sync_optimizer_trainer(self):
 
         x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32')
         y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32')
-        cost = paddle.fluid.layers.square_error_cost(input=x, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=x, label=y)
         avg_cost = paddle.mean(cost)
 
         strategy = paddle.distributed.fleet.DistributedStrategy()
@@ -85,7 +85,7 @@ def test_a_sync_optimizer_pserver(self):
 
         x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32')
         y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32')
-        cost = paddle.fluid.layers.square_error_cost(input=x, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=x, label=y)
         avg_cost = paddle.mean(cost)
 
         strategy = paddle.distributed.fleet.DistributedStrategy()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py
index 19c0f48e7c1a2c..fdaa0a69c8d208 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py
@@ -40,7 +40,7 @@ def test_gradient_merge_optimizer(self):
 
         x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32')
         y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32')
-        cost = paddle.fluid.layers.square_error_cost(input=x, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=x, label=y)
         avg_cost = paddle.mean(cost)
 
         strategy = paddle.distributed.fleet.DistributedStrategy()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py
index e64b7d8010ef59..b13e2b8171c8cf 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py
@@ -41,7 +41,7 @@ def test_trainer_desc_config(self):
 
         x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32')
         y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32')
-        cost = paddle.fluid.layers.square_error_cost(input=x, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=x, label=y)
         avg_cost = paddle.mean(cost)
 
         strategy = paddle.distributed.fleet.DistributedStrategy()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
index 00a47420210eb1..45cdf972368749 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
@@ -47,7 +47,7 @@ def net_conf(self):
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
         sgd_optimizer.minimize(avg_cost)
@@ -302,7 +302,7 @@ def net_conf(self):
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         sgd_optimizer = fluid.optimizer.SGD(
             learning_rate=fluid.layers.exponential_decay(
@@ -471,7 +471,7 @@ def net_conf(self):
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         opt = fluid.optimizer.DecayedAdagrad(learning_rate=0.1)
         opt.minimize(avg_cost)
@@ -492,7 +492,7 @@ def net_conf(self):
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         opt = fluid.optimizer.Ftrl(learning_rate=0.1)
         opt.minimize(avg_cost)
@@ -513,7 +513,7 @@ def net_conf(self):
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         sgd_optimizer = fluid.optimizer.SGD(
             learning_rate=fluid.layers.piecewise_decay(
@@ -579,7 +579,7 @@ def net_conf(self):
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
 
@@ -616,7 +616,7 @@ def net_conf(self):
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         base_lr = 1.0
         bd = [1, 10, 20, 30]
@@ -692,7 +692,7 @@ def net_conf(self):
             bias_attr=False,
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         sgd_optimizer = fluid.optimizer.SGD(learning_rate=1.0)
         sgd_optimizer.minimize(avg_cost)
@@ -1134,7 +1134,7 @@ def net_conf(self):
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         optimizer = fluid.optimizer.RMSProp(learning_rate=0.1)
         optimizer.minimize(avg_cost)
@@ -1167,7 +1167,7 @@ def net_conf(self):
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
         y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         optimizer = fluid.optimizer.RMSProp(learning_rate=0.1)
         optimizer.minimize(avg_cost)
@@ -1452,6 +1452,7 @@ def network_with_table(self, is_sparse, is_distributed):
             path_table=path_table,
             path_code=path_code,
         )
+
         avg_cost = paddle.mean(cost)
         # optimizer
         optimizer = fluid.optimizer.SGD(learning_rate=0.003)
diff --git a/python/paddle/fluid/tests/unittests/test_downpoursgd.py b/python/paddle/fluid/tests/unittests/test_downpoursgd.py
index 556bdda232a2bf..2e15d059db5f30 100644
--- a/python/paddle/fluid/tests/unittests/test_downpoursgd.py
+++ b/python/paddle/fluid/tests/unittests/test_downpoursgd.py
@@ -58,7 +58,9 @@ def test_device_work_use_cvm(self):
             )
             y_predict = fluid.layers.fc(input=x_emb, size=1, act=None)
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
 
             ps_param = pslib.PSParameter()
@@ -120,7 +122,9 @@ def test_device_work(self):
             )
             y_predict = fluid.layers.fc(input=x_emb, size=1, act=None)
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
 
             ps_param = pslib.PSParameter()
@@ -180,7 +184,9 @@ def test_downpour_opt_work(self):
             )
             y_predict = fluid.layers.fc(input=x_emb, size=1, act=None)
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
 
             ps_param = pslib.PSParameter()
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
index ccdf56e64f4900..8ba799e84bc4c1 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
@@ -463,7 +463,7 @@ def encoder_static(
     projection = paddle.add(projection, softmax_bias)
     projection = paddle.reshape(projection, shape=[-1, vocab_size])
 
-    loss = layers.softmax_with_cross_entropy(
+    loss = paddle.nn.functional.softmax_with_cross_entropy(
         logits=projection, label=y, soft_label=False
     )
 
diff --git a/python/paddle/fluid/tests/unittests/test_exception.py b/python/paddle/fluid/tests/unittests/test_exception.py
index c627f8688a1580..45a11656ccf8e5 100644
--- a/python/paddle/fluid/tests/unittests/test_exception.py
+++ b/python/paddle/fluid/tests/unittests/test_exception.py
@@ -43,7 +43,7 @@ def test_exception_in_static_mode(self):
         x = fluid.layers.data(name='X', shape=[-1, 13], dtype='float32')
         y = fluid.layers.data(name='Y', shape=[-1, 1], dtype='float32')
         predict = fluid.layers.fc(input=x, size=1, act=None)
-        loss = fluid.layers.square_error_cost(input=predict, label=y)
+        loss = paddle.nn.functional.square_error_cost(input=predict, label=y)
         avg_loss = paddle.mean(loss)
 
         fluid.optimizer.SGD(learning_rate=0.01).minimize(avg_loss)
diff --git a/python/paddle/fluid/tests/unittests/test_executor_check_feed.py b/python/paddle/fluid/tests/unittests/test_executor_check_feed.py
index 77122f46bdcf5d..9696ebcc4412eb 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_check_feed.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_check_feed.py
@@ -25,7 +25,7 @@ def net(self):
         y = fluid.data(name="y", shape=[None, 1], dtype='float32')
         y_predict = fluid.layers.fc(input=x, size=1, act=None)
 
-        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
 
         opt = fluid.optimizer.Adam(learning_rate=lr)
diff --git a/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py b/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py
index e2c52d99fd2d80..3d8371177705da 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py
@@ -27,7 +27,7 @@ def net(self):
         y = fluid.data(name="y", shape=[None, 1], dtype='float32')
         y_predict = fluid.layers.fc(input=x, size=1, act=None)
 
-        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+        cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
 
         opt = fluid.optimizer.Adam(learning_rate=lr)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
index bff393f38d1332..f62dfe436a7998 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
@@ -96,7 +96,9 @@ def func_gnn_float32(self):
             logits = paddle.reshape(logits, logits.shape[1:])
             # In other example, it's nll with log_softmax. However, paddle's
             # log_loss only supports binary classification now.
-            loss = fluid.layers.softmax_with_cross_entropy(logits, labels)
+            loss = paddle.nn.functional.softmax_with_cross_entropy(
+                logits, labels
+            )
             loss = paddle.sum(loss)
 
             adam = AdamOptimizer(learning_rate=1e-3)
@@ -134,7 +136,7 @@ def func_gnn_float32(self):
             logits = paddle.reshape(logits, logits.shape[1:])
             # In other example, it's nll with log_softmax. However, paddle's
             # log_loss only supports binary classification now.
-            loss = fluid.layers.softmax_with_cross_entropy(
+            loss = paddle.nn.functional.softmax_with_cross_entropy(
                 logits, to_variable(labels)
             )
             loss = paddle.sum(loss)
@@ -162,7 +164,7 @@ def func_gnn_float32(self):
             logits2 = paddle.reshape(logits2, logits2.shape[1:])
             # In other example, it's nll with log_softmax. However, paddle's
             # log_loss only supports binary classification now.
-            loss2 = fluid.layers.softmax_with_cross_entropy(
+            loss2 = paddle.nn.functional.softmax_with_cross_entropy(
                 logits2, to_variable(labels2)
             )
             loss2 = paddle.sum(loss2)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
index ed5d93961d1ae7..76733836ddc565 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
@@ -69,7 +69,7 @@ def forward(self, input, label):
         )
         projection = paddle.add(projection, self.softmax_bias)
         projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
-        loss = fluid.layers.softmax_with_cross_entropy(
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=projection, label=label, soft_label=False
         )
         loss = paddle.reshape(loss, shape=[-1, self.num_steps])
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
index c86a802a0a4006..3980b0dbb27e66 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
@@ -228,7 +228,7 @@ def forward(self, input, label, init_hidden, init_cell):
         projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
         projection = paddle.add(projection, self.softmax_bias)
         projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
-        loss = fluid.layers.softmax_with_cross_entropy(
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=projection, label=label, soft_label=False
         )
         loss = paddle.reshape(loss, shape=[-1, self.num_steps])
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
index f9b618cedf1c2a..a386e2113fa992 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
@@ -224,7 +224,7 @@ def forward(self, input, label, init_hidden, init_cell):
         projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
         projection = paddle.add(projection, self.softmax_bias)
         projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
-        loss = fluid.layers.softmax_with_cross_entropy(
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=projection, label=label, soft_label=False
         )
         loss = paddle.reshape(loss, shape=[-1, self.num_steps])
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py
index bd6a6ca22f5633..19f4616d927059 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py
@@ -225,7 +225,7 @@ def forward(self, input, label, init_hidden, init_cell):
         projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
         projection = paddle.add(projection, self.softmax_bias)
         projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
-        loss = fluid.layers.softmax_with_cross_entropy(
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=projection, label=label, soft_label=False
         )
         loss = paddle.reshape(loss, shape=[-1, self.num_steps])
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
index 5b533319019b83..bfba325046ea3f 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
@@ -78,7 +78,7 @@ def forward(self, input, label):
             fc, paddle.transpose(self.embedding.weight, perm=[1, 0])
         )
         projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
-        loss = fluid.layers.softmax_with_cross_entropy(
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=projection, label=label, soft_label=False
         )
         loss = paddle.reshape(loss, shape=[-1, self.num_steps])
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
index 654ebf198b7d09..c99fbcf4e9a84a 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
@@ -1099,7 +1099,7 @@ def forward(self, enc_inputs, dec_inputs, label, weights):
                 epsilon=self._label_smooth_eps,
             )
 
-        cost = fluid.layers.softmax_with_cross_entropy(
+        cost = paddle.nn.functional.softmax_with_cross_entropy(
             logits=predict,
             label=label_out,
             soft_label=True if self._label_smooth_eps else False,
diff --git a/python/paddle/fluid/tests/unittests/test_inference_model_io.py b/python/paddle/fluid/tests/unittests/test_inference_model_io.py
index 9cf82e16f742f0..daeae8e472fe26 100644
--- a/python/paddle/fluid/tests/unittests/test_inference_model_io.py
+++ b/python/paddle/fluid/tests/unittests/test_inference_model_io.py
@@ -59,7 +59,9 @@ def test_fit_line_inference_model(self):
 
             y_predict = layers.fc(input=x, size=1, act=None)
 
-            cost = layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
 
             sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
@@ -153,7 +155,9 @@ def test_save_inference_model(self):
 
             y_predict = layers.fc(input=x, size=1, act=None)
 
-            cost = layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
 
         place = core.CPUPlace()
@@ -209,7 +213,9 @@ def test_save_inference_model(self):
 
             y_predict = layers.fc(input=x, size=1, act=None)
 
-            cost = layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
 
         place = core.CPUPlace()
@@ -245,7 +251,9 @@ def test_save_and_load_inference_model(self):
 
             y_predict = layers.fc(input=x, size=1, act=None)
 
-            cost = layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
 
             sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
@@ -422,7 +430,9 @@ def test_serialize_program_and_persistables(self):
 
             y_predict = layers.fc(input=x, size=1, act=None)
 
-            cost = layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
 
             sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
@@ -469,7 +479,9 @@ def test_normalize_program(self):
 
             y_predict = layers.fc(input=x, size=1, act=None)
 
-            cost = layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
 
             sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
diff --git a/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py b/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py
index fb6f04d48f04a6..16477e086daf11 100644
--- a/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py
@@ -16,6 +16,7 @@
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 
 
@@ -48,7 +49,7 @@ def softmax_with_xe(
                     dtype='int64' if not self.soft_label else self.dtype,
                     append_batch_size=False,
                 )
-                z_d, s_d = fluid.layers.softmax_with_cross_entropy(
+                z_d, s_d = paddle.nn.functional.softmax_with_cross_entropy(
                     x_d,
                     y_d,
                     soft_label=self.soft_label,
diff --git a/python/paddle/fluid/tests/unittests/test_lambv2_op.py b/python/paddle/fluid/tests/unittests/test_lambv2_op.py
index 6b513008109ec6..d3abf54a00beed 100644
--- a/python/paddle/fluid/tests/unittests/test_lambv2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lambv2_op.py
@@ -126,7 +126,9 @@ def _build_static_model(main, startup, seed=100):
                 x = fluid.layers.data(name='X', shape=[13], dtype='float32')
                 y = fluid.layers.data(name='Y', shape=[1], dtype='float32')
                 prediction = fluid.layers.fc(input=x, size=1, act=None)
-                loss = fluid.layers.square_error_cost(input=prediction, label=y)
+                loss = paddle.nn.functional.square_error_cost(
+                    input=prediction, label=y
+                )
                 avg_loss = paddle.mean(loss)
             return avg_loss
 
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 6079f7636f3af9..1597269b29adc1 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -3037,7 +3037,9 @@ def make_fit_a_line(self):
             x = self._get_data(name='x', shape=[13], dtype='float32')
             y_predict = layers.fc(input=x, size=1, act=None)
             y = self._get_data(name='y', shape=[1], dtype='float32')
-            cost = layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
             return avg_cost
 
@@ -3256,23 +3258,31 @@ def make_softmax_with_cross_entropy(self):
         ):
             x = self._get_data(name='x', shape=[16], dtype='float32')
             y = self._get_data(name='label', shape=[1], dtype='int64')
-            loss, softmax = layers.softmax_with_cross_entropy(
+            loss, softmax = paddle.nn.functional.softmax_with_cross_entropy(
                 x, y, return_softmax=True
             )
             self.assertIsNotNone(loss)
             self.assertIsNotNone(softmax)
 
-            loss = layers.softmax_with_cross_entropy(x, y)
+            loss = paddle.nn.functional.softmax_with_cross_entropy(x, y)
             self.assertIsNotNone(loss)
 
             x1 = self._get_data(name='x1', shape=[16, 32, 64], dtype='float32')
             y1 = self._get_data(name='label1', shape=[1, 32, 64], dtype='int64')
             y2 = self._get_data(name='label2', shape=[16, 1, 64], dtype='int64')
             y3 = self._get_data(name='label3', shape=[16, 32, 1], dtype='int64')
-            loss1 = layers.softmax_with_cross_entropy(x1, y1, axis=1)
-            loss2 = layers.softmax_with_cross_entropy(x1, y2, axis=2)
-            loss3 = layers.softmax_with_cross_entropy(x1, y3, axis=3)
-            loss4 = layers.softmax_with_cross_entropy(x1, y3, axis=-1)
+            loss1 = paddle.nn.functional.softmax_with_cross_entropy(
+                x1, y1, axis=1
+            )
+            loss2 = paddle.nn.functional.softmax_with_cross_entropy(
+                x1, y2, axis=2
+            )
+            loss3 = paddle.nn.functional.softmax_with_cross_entropy(
+                x1, y3, axis=3
+            )
+            loss4 = paddle.nn.functional.softmax_with_cross_entropy(
+                x1, y3, axis=-1
+            )
             self.assertIsNotNone(loss1)
             self.assertIsNotNone(loss2)
             self.assertIsNotNone(loss3)
@@ -3694,7 +3704,7 @@ def make_square_error_cost(self):
         ):
             x = self._get_data(name="X", shape=[1], dtype="float32")
             y = self._get_data(name="Y", shape=[1], dtype="float32")
-            out = layers.square_error_cost(input=x, label=y)
+            out = paddle.nn.functional.square_error_cost(input=x, label=y)
             return out
 
     def test_dynamic_lstmp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py
index 4c63a4f2a9e4c1..66557b84079692 100644
--- a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py
@@ -36,7 +36,7 @@ def run_pserver(use_cuda, sync_mode, ip, port, trainers, trainer_id):
     y = fluid.layers.data(name='y', shape=[1], dtype='float32')
 
     # loss function
-    cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+    cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
     avg_cost = paddle.mean(cost)
 
     # optimizer
@@ -73,7 +73,7 @@ def run_pserver_with_empty_block(
     y = fluid.layers.data(name='y', shape=[1], dtype='float32')
 
     # loss function
-    cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+    cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
     avg_cost = paddle.mean(cost)
 
     # optimizer
diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py
index 2a74fff41d734f..cea6858e0d4d31 100644
--- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py
@@ -216,7 +216,7 @@ def get_w_grad(self, is_sparse):
             )
             y = paddle.sum(emb, axis=-1)
 
-            loss = fluid.layers.square_error_cost(input=y, label=y_)
+            loss = paddle.nn.functional.square_error_cost(input=y, label=y_)
             loss = paddle.mean(loss)
 
             sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-4)
diff --git a/python/paddle/fluid/tests/unittests/test_memory_usage.py b/python/paddle/fluid/tests/unittests/test_memory_usage.py
index 973dba893d4404..f1293ea7a765b8 100644
--- a/python/paddle/fluid/tests/unittests/test_memory_usage.py
+++ b/python/paddle/fluid/tests/unittests/test_memory_usage.py
@@ -30,7 +30,7 @@ def train_simulator(test_batch_size=10):
     y_predict = fluid.layers.fc(input=x, size=1, act=None)
     y = fluid.layers.data(name='y', shape=[1], dtype='float32')
 
-    cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+    cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
     avg_cost = paddle.mean(cost)
 
     sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
diff --git a/python/paddle/fluid/tests/unittests/test_momentum_op.py b/python/paddle/fluid/tests/unittests/test_momentum_op.py
index 102ef0a5fc5fde..8c9ec6d4295e08 100644
--- a/python/paddle/fluid/tests/unittests/test_momentum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_momentum_op.py
@@ -532,7 +532,9 @@ def test_momentum(self):
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
             y_predict = fluid.layers.fc(input=x, size=1, act=None)
-            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
 
             rms_optimizer = paddle.optimizer.Momentum(
@@ -673,7 +675,9 @@ def test_momentum_static(self):
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
             y_predict = fluid.layers.fc(input=x, size=1, act=None)
-            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
 
             momentum_optimizer = paddle.fluid.contrib.optimizer.Momentum(
diff --git a/python/paddle/fluid/tests/unittests/test_network_with_dtype.py b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py
index f230cc66c20a1f..af4ff64c894485 100644
--- a/python/paddle/fluid/tests/unittests/test_network_with_dtype.py
+++ b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py
@@ -33,7 +33,9 @@ def run_net_on_place(self, place):
             x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype)
             y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype)
             y_predict = fluid.layers.fc(input=x, size=1, act=None)
-            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
             sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
             sgd_optimizer.minimize(avg_cost)
diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py
index 3294b6f37067ce..df07543fa7acfe 100644
--- a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py
@@ -82,7 +82,7 @@ def fn_1(opt, avg_loss=None, pred=None, label=None):
 
         def fn_2(opt, avg_loss=None, pred=None, label=None):
             if avg_loss is None:
-                loss = layers.softmax_with_cross_entropy(
+                loss = paddle.nn.functional.softmax_with_cross_entropy(
                     logits=pred, label=label
                 )
                 avg_loss = paddle.mean(loss, name='mean_softmax_loss')
@@ -108,7 +108,7 @@ def fn_2(opt, avg_loss=None, pred=None, label=None):
         else:
             loss_1 = layers.cross_entropy(input=prediction, label=label)
             avg_loss_1 = paddle.mean(loss_1)
-            loss_2 = layers.softmax_with_cross_entropy(
+            loss_2 = paddle.nn.functional.softmax_with_cross_entropy(
                 logits=prediction, label=label
             )
             avg_loss_2 = paddle.mean(loss_2)
@@ -193,7 +193,7 @@ def dynamic(train_data, use_cuda=False, use_parallel_exe=False):
                 loss.backward()
                 adam.minimize(loss)
             else:
-                softmax_loss = layers.softmax_with_cross_entropy(
+                softmax_loss = paddle.nn.functional.softmax_with_cross_entropy(
                     prediction, var_label
                 )
                 loss = paddle.mean(softmax_loss)
diff --git a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
index d751fd4b90d862..056afc5ead8339 100755
--- a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
@@ -92,7 +92,9 @@ def loss1(pred, label):
         return avg_loss
 
     def loss2(pred, label):
-        loss = fluid.layers.softmax_with_cross_entropy(logits=pred, label=label)
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
+            logits=pred, label=label
+        )
         avg_loss = paddle.mean(loss, name='mean_softmax_loss')
         return avg_loss
 
@@ -119,7 +121,9 @@ def loss1(opt, pred, label, with_optimize):
         return avg_loss
 
     def loss2(opt, pred, label, with_optimize):
-        loss = fluid.layers.softmax_with_cross_entropy(logits=pred, label=label)
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
+            logits=pred, label=label
+        )
         avg_loss = paddle.mean(loss, name='mean_softmax_loss')
         if with_optimize:
             opt.minimize(avg_loss)
diff --git a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py
index 5e3e899eb67bdf..81b75a1513eb89 100644
--- a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py
@@ -280,7 +280,9 @@ def test_rmsprop(self):
             x = fluid.layers.data(name='x', shape=[13], dtype='float32')
             y = fluid.layers.data(name='y', shape=[1], dtype='float32')
             y_predict = fluid.layers.fc(input=x, size=1, act=None)
-            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+            cost = paddle.nn.functional.square_error_cost(
+                input=y_predict, label=y
+            )
             avg_cost = paddle.mean(cost)
 
             rms_optimizer = paddle.optimizer.RMSProp(learning_rate=0.1)
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
index 0d3ccae5bfcb44..6b2383ed56933e 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
@@ -631,7 +631,7 @@ def def_seq2seq_model(
     )
 
     # loss
-    loss = layers.softmax_with_cross_entropy(
+    loss = paddle.nn.functional.softmax_with_cross_entropy(
         logits=logits, label=label, soft_label=False
     )
     loss = layers.unsqueeze(loss, axes=[2])
diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op.py b/python/paddle/fluid/tests/unittests/test_sgd_op.py
index 26c4dd18c13a1d..b87d67c7130476 100644
--- a/python/paddle/fluid/tests/unittests/test_sgd_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sgd_op.py
@@ -204,7 +204,7 @@ def runTest(self):
         emb = fluid.embedding(input=data, size=(10000000, 150), dtype='float32')
         out = fluid.layers.l2_normalize(x=emb, axis=-1)
 
-        cost = fluid.layers.square_error_cost(input=out, label=label)
+        cost = paddle.nn.functional.square_error_cost(input=out, label=label)
         avg_cost = paddle.mean(cost)
         sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
         sgd_optimizer.minimize(avg_cost)
diff --git a/python/paddle/fluid/tests/unittests/test_square_error_cost.py b/python/paddle/fluid/tests/unittests/test_square_error_cost.py
index 1fd516c0504ad8..7828f01b02fe61 100644
--- a/python/paddle/fluid/tests/unittests/test_square_error_cost.py
+++ b/python/paddle/fluid/tests/unittests/test_square_error_cost.py
@@ -16,6 +16,7 @@
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.fluid.layers as layers
@@ -32,7 +33,9 @@ def test_square_error_cost(self):
 
         input_var = layers.create_tensor(dtype="float32", name="input")
         label_var = layers.create_tensor(dtype="float32", name="label")
-        output = layers.square_error_cost(input=input_var, label=label_var)
+        output = paddle.nn.functional.square_error_cost(
+            input=input_var, label=label_var
+        )
 
         for use_cuda in (
             [False, True] if core.is_compiled_with_cuda() else [False]
@@ -54,14 +57,14 @@ def test_error(self):
         def test_invalid_input():
             input = [256, 3]
             label = fluid.data(name='label1', shape=[None, 3], dtype='float32')
-            loss = fluid.layers.square_error_cost(input, label)
+            loss = paddle.nn.functional.square_error_cost(input, label)
 
         self.assertRaises(TypeError, test_invalid_input)
 
         def test_invalid_label():
             input = fluid.data(name='input2', shape=[None, 3], dtype='float32')
             label = [256, 3]
-            loss = fluid.layers.square_error_cost(input, label)
+            loss = paddle.nn.functional.square_error_cost(input, label)
 
         self.assertRaises(TypeError, test_invalid_label)
 
diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py
index f417667a82a024..a2c44c5fae8fae 100644
--- a/python/paddle/fluid/tests/unittests/test_static_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py
@@ -237,7 +237,7 @@ def forward(self, input, label, init_hidden, init_cell):
         projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
         projection = paddle.add(projection, self.softmax_bias)
         projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
-        loss = fluid.layers.softmax_with_cross_entropy(
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=projection, label=label, soft_label=False
         )
         loss = paddle.reshape(loss, shape=[-1, self.num_steps])
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py
index afd7a57c367006..f42ccf12c581af 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py
@@ -171,7 +171,9 @@ def test_adadelta(self):
                 x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype)
                 y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype)
                 y_predict = fluid.layers.fc(input=x, size=1, act=None)
-                cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+                cost = paddle.nn.functional.square_error_cost(
+                    input=y_predict, label=y
+                )
                 avg_cost = paddle.mean(cost)
 
                 rms_optimizer = paddle.optimizer.Adadelta(learning_rate=0.1)
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py
index e9cc501a87770a..46ecff205f79b6 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py
@@ -75,7 +75,7 @@ def runTest(self):
         emb = fluid.embedding(input=data, size=(10000, 150), dtype='float32')
         out = fluid.layers.l2_normalize(x=emb, axis=-1)
 
-        cost = fluid.layers.square_error_cost(input=out, label=label)
+        cost = paddle.nn.functional.square_error_cost(input=out, label=label)
         avg_cost = paddle.mean(cost)
         sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
         sgd_optimizer.minimize(avg_cost)
diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py
index ee98dc94a6f1ef..ebb249903b228c 100644
--- a/python/paddle/fluid/transpiler/distribute_transpiler.py
+++ b/python/paddle/fluid/transpiler/distribute_transpiler.py
@@ -283,11 +283,16 @@ class DistributeTranspiler:
     Examples:
         .. code-block:: python
 
+            import paddle
+            import paddle.fluid as fluid
+
+            paddle.enable_static()
+
             x = fluid.data(name='x', shape=[13], dtype='float32')
             y = fluid.data(name='y', shape=[1], dtype='float32')
             y_predict = fluid.layers.fc(input=x, size=1, act=None)
 
-            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+            cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
             avg_loss = fluid.layers.mean(cost)
 
             sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)

From a0b91c7b3d58c0b20fb37a60b090fccb953d37f0 Mon Sep 17 00:00:00 2001
From: Roc <30228238+sljlp@users.noreply.github.com>
Date: Fri, 2 Dec 2022 11:19:32 +0800
Subject: [PATCH 106/154] [Clean Fluid]Remove
 py_reader/double_buffer/create_py_reader_by_data/load from fluid.layer.io
 (#48589)

rm py_reader/double_buffer/create_py_reader_by_data/load

rm test_load_xpu
---
 python/paddle/fluid/layers/io.py              | 543 ------------------
 python/paddle/fluid/reader.py                 |  16 +-
 .../fluid/tests/unittests/CMakeLists.txt      |   1 -
 .../fluid/tests/unittests/test_load_op.py     |  73 ---
 .../fluid/tests/unittests/test_load_op_xpu.py |  73 ---
 .../fluid/tests/unittests/test_program.py     |  65 ---
 .../unittests/test_py_reader_error_msg.py     |  60 --
 .../test_py_reader_lod_level_share.py         |  45 --
 .../unittests/test_py_reader_pin_memory.py    | 138 -----
 .../unittests/test_py_reader_push_pop.py      | 111 ----
 .../test_py_reader_using_executor.py          | 338 -----------
 11 files changed, 5 insertions(+), 1458 deletions(-)
 delete mode 100644 python/paddle/fluid/tests/unittests/test_load_op.py
 delete mode 100644 python/paddle/fluid/tests/unittests/test_load_op_xpu.py
 delete mode 100644 python/paddle/fluid/tests/unittests/test_py_reader_error_msg.py
 delete mode 100644 python/paddle/fluid/tests/unittests/test_py_reader_lod_level_share.py
 delete mode 100644 python/paddle/fluid/tests/unittests/test_py_reader_pin_memory.py
 delete mode 100644 python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py
 delete mode 100644 python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py

diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index 2259044e14e79e..e33cf23e6c1bc1 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -43,11 +43,6 @@
 
 __all__ = [
     'data',
-    'read_file',
-    'double_buffer',
-    'py_reader',
-    'create_py_reader_by_data',
-    'load',
 ]
 
 
@@ -408,441 +403,6 @@ def _copy_reader_create_op_(block, op):
     return new_op
 
 
-def _py_reader(
-    capacity,
-    shapes,
-    dtypes,
-    lod_levels=None,
-    name=None,
-    use_double_buffer=True,
-    feed_list=None,
-):
-    if feed_list is not None:
-        if not isinstance(feed_list, list):
-            raise TypeError(
-                "feed_list should be a list of Variable"
-                " instead of " + str(type(feed_list))
-            )
-        lod_levels = []
-        dtypes = []
-        shape_concat = []
-        ranks = []
-        shapes = []
-        need_check_feed = []
-
-        for feed_data in feed_list:
-            dtypes.append(feed_data.dtype)
-            shape_concat.extend(feed_data.shape)
-            ranks.append(len(feed_data.shape))
-            shapes.append(feed_data.shape)
-            lod_levels.append(feed_data.lod_level)
-            need_check_feed.append(int(feed_data.desc.need_check_feed()))
-    else:
-        dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
-        need_check_feed = [0 for dt in dtypes]
-        shape_concat = []
-        ranks = []
-
-        for shape in shapes:
-            shape_concat.extend(shape)
-            ranks.append(len(shape))
-
-        if lod_levels is None:
-            lod_levels = [0] * len(shapes)
-    dtype_int = [int(t) for t in dtypes]
-    if name is None:
-        queue_name = unique_name('lod_tensor_blocking_queue')
-        reader_name = unique_name('create_py_reader')
-        double_buffer_name = unique_name('double_buffer')
-    else:
-        queue_name = "_".join([name, "queue"])
-        reader_name = "_".join([name, "reader"])
-        double_buffer_name = "_".join([name, "double_buffer"])
-
-    var = global_scope().var(queue_name)
-    feed_queue = core.init_lod_tensor_blocking_queue(var, capacity, False)
-
-    startup_blk = default_startup_program().current_block()
-    startup_var = startup_blk.create_var(name=reader_name)
-    startup_blk.append_op(
-        type='create_py_reader',
-        inputs={'blocking_queue': [queue_name]},
-        outputs={'Out': [startup_var]},
-        attrs={
-            'shape_concat': shape_concat,
-            'lod_levels': lod_levels,
-            'dtypes': dtype_int,
-            'need_check_feed': need_check_feed,
-            'ranks': ranks,
-        },
-    )
-
-    startup_var.desc.set_dtypes(dtypes)
-    startup_var.persistable = True
-
-    main_prog_var = _copy_reader_var_(
-        default_main_program().current_block(), startup_var
-    )
-
-    reader = monkey_patch_reader_methods(main_prog_var)
-    if use_double_buffer:
-        double_buffer_reader = double_buffer(reader, name=double_buffer_name)
-        # we return a double buffer reader. However, the reset method comes from
-        # py_reader.
-        double_buffer_reader.reset = reader.reset
-        reader = double_buffer_reader
-
-    # monkey patch py_reader special methods
-    reader.queue = feed_queue
-    current_reset_method = reader.reset
-    reader.thread = None
-    reader.tensor_provider = None
-    reader.exited = False
-
-    def start_provide_thread(func):
-        def __provider_thread__(legacy_expected_place):
-            try:
-                # See _DataLoaderIterSingleProcess._thread_loop() for why set expected place here.
-
-                _set_expected_place(legacy_expected_place)
-
-                for tensors in func():
-                    array = core.LoDTensorArray()
-                    for item in tensors:
-                        if not isinstance(item, core.LoDTensor):
-                            tmp = core.LoDTensor()
-                            tmp.set(item, core.CPUPlace())
-                            item = tmp
-
-                        array.append(item)
-
-                    if reader.exited:
-                        break
-                    feed_queue.push(array)
-                    if reader.exited:
-                        break
-                feed_queue.close()
-            except Exception as e:
-                feed_queue.kill()
-                logging.warn('Your decorated reader has raised an exception!')
-                raise e
-
-        reader.thread = threading.Thread(
-            target=__provider_thread__, args=(_current_expected_place(),)
-        )
-        reader.thread.daemon = True
-        reader.thread.start()
-
-    def __set_tensor_provider__(func):
-        reader.tensor_provider = func
-
-    def __set_paddle_reader__(paddle_reader):
-        with program_guard(Program(), Program()):
-            actual_feed_list = feed_list
-            if actual_feed_list is None:
-                actual_feed_list = []
-                counter = 0
-                for dtype, shape, lod_level in zip(dtypes, shapes, lod_levels):
-                    name = str(counter)
-                    actual_feed_list.append(
-                        data(
-                            name=name,
-                            dtype=dtype,
-                            shape=shape,
-                            lod_level=lod_level,
-                        )
-                    )
-                    counter += 1
-
-            data_names = [feed_data.name for feed_data in actual_feed_list]
-            feeder = DataFeeder(
-                feed_list=actual_feed_list, place=core.CPUPlace()
-            )
-            paddle_reader = feeder.decorate_reader(
-                paddle_reader, multi_devices=False
-            )
-
-        def __tensor_provider__():
-            for slots in paddle_reader():
-                yield [slots[data_name] for data_name in data_names]
-
-        __set_tensor_provider__(__tensor_provider__)
-
-    def __reset__():
-        current_reset_method()
-        if reader.thread is not None and reader.tensor_provider is not None:
-            reader.exited = True
-            reader.thread.join()
-            reader.exited = False
-
-    def __start__():
-        start_provide_thread(reader.tensor_provider)
-
-    reader.reset = __reset__
-    reader.decorate_tensor_provider = __set_tensor_provider__
-    reader.decorate_paddle_reader = __set_paddle_reader__
-
-    reader.decorate_batch_generator = __set_tensor_provider__
-    reader.decorate_sample_list_generator = __set_paddle_reader__
-    reader.start = __start__
-
-    return reader
-
-
-def py_reader(
-    capacity, shapes, dtypes, lod_levels=None, name=None, use_double_buffer=True
-):
-    """
-        :api_attr: Static Graph
-
-    Create a Python reader for data feeding in Python
-
-    This operator returns a Reader Variable.
-    The Reader provides :code:`decorate_paddle_reader()` and
-    :code:`decorate_tensor_provider()` to set a Python generator as the data
-    source and feed the data from the data source to the Reader Variable.
-    When :code:`Executor::Run()` is invoked in C++ side, the data from the
-    generator would be read automatically. Unlike :code:`DataFeeder.feed()`,
-    the data reading process and :code:`Executor::Run()` process can run in
-    parallel using :code:`py_reader`. The :code:`start()` method of the Reader
-    should be called when each pass begins, while the :code:`reset()` method
-    should be called when the pass ends and :code:`fluid.core.EOFException` raises.
-
-    Note:
-       :code:`Program.clone()` method cannot clone :code:`py_reader`. You can
-       refer to :ref:`api_fluid_Program` for more details.
-
-       The :code:`read_file` call needs to be in the program block of :code:`py_reader`.
-       You can refer to :ref:`api_fluid_layers_read_file` for more details.
-
-    Args:
-       capacity(int): The buffer capacity maintained by :code:`py_reader`.
-       shapes(list|tuple): List of tuples which declaring data shapes. shapes[i]
-            represents the i-th data shape.
-       dtypes(list|tuple): List of strings which declaring data type. Supported dtype:
-            bool, float16, float32, float64, int8, int16, int32, int64, uint8.
-       lod_levels(list|tuple): List of ints which declaring data lod_level.
-       name(basestring): The default value is None. Normally there is no
-            need for user to set this property. For more information, please
-            refer to :ref:`api_guide_Name`.
-       use_double_buffer(bool): Whether use double buffer or not. The double buffer is
-            for pre-reading the data of the next batch and copy the data asynchronously
-            from CPU to GPU. Default is True.
-
-    Returns:
-       A Reader from which we can get feeding data.
-
-    Return Type:
-       Variable
-
-    Examples:
-       1. The basic usage of :code:`py_reader` is as follows:
-
-       .. code-block:: python
-
-         import paddle
-         import paddle.fluid as fluid
-         import paddle.dataset.mnist as mnist
-
-         def network(image, label):
-             # user defined network, here a softmax regession example
-             predict = fluid.layers.fc(input=image, size=10, act='softmax')
-             return fluid.layers.cross_entropy(input=predict, label=label)
-
-         reader = fluid.layers.py_reader(capacity=64,
-                                         shapes=[(-1, 1, 28, 28), (-1, 1)],
-                                         dtypes=['float32', 'int64'])
-         reader.decorate_paddle_reader(
-             paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5),
-                                   buf_size=1000))
-
-         img, label = fluid.layers.read_file(reader)
-         loss = network(img, label)
-
-         fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program())
-         exe = fluid.ParallelExecutor(use_cuda=True)
-         for epoch_id in range(10):
-             reader.start()
-             try:
-                 while True:
-                     exe.run(fetch_list=[loss.name])
-             except fluid.core.EOFException:
-                 reader.reset()
-
-         fluid.io.save_inference_model(dirname='./model',
-                                       feeded_var_names=[img.name, label.name],
-                                       target_vars=[loss],
-                                       executor=fluid.Executor(fluid.CUDAPlace(0)))
-
-       2. When training and testing are both performed, two different
-       :code:`py_reader` should be created with different names, e.g.:
-
-       .. code-block:: python
-
-         import paddle
-         import paddle.fluid as fluid
-         import paddle.dataset.mnist as mnist
-
-         def network(reader):
-             img, label = fluid.layers.read_file(reader)
-             # User defined network. Here a simple regression as example
-             predict = fluid.layers.fc(input=img, size=10, act='softmax')
-             loss = fluid.layers.cross_entropy(input=predict, label=label)
-             return fluid.layers.mean(loss)
-
-         # Create train_main_prog and train_startup_prog
-         train_main_prog = fluid.Program()
-         train_startup_prog = fluid.Program()
-         with fluid.program_guard(train_main_prog, train_startup_prog):
-             # Use fluid.unique_name.guard() to share parameters with test program
-             with fluid.unique_name.guard():
-                 train_reader = fluid.layers.py_reader(capacity=64,
-                                                       shapes=[(-1, 1, 28, 28),
-                                                               (-1, 1)],
-                                                       dtypes=['float32', 'int64'],
-                                                       name='train_reader')
-                 train_reader.decorate_paddle_reader(
-                     paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5),
-                                           buf_size=500))
-                 train_loss = network(train_reader)  # some network definition
-                 adam = fluid.optimizer.Adam(learning_rate=0.01)
-                 adam.minimize(train_loss)
-
-         # Create test_main_prog and test_startup_prog
-         test_main_prog = fluid.Program()
-         test_startup_prog = fluid.Program()
-         with fluid.program_guard(test_main_prog, test_startup_prog):
-             # Use fluid.unique_name.guard() to share parameters with train program
-             with fluid.unique_name.guard():
-                 test_reader = fluid.layers.py_reader(capacity=32,
-                                                      shapes=[(-1, 1, 28, 28), (-1, 1)],
-                                                      dtypes=['float32', 'int64'],
-                                                      name='test_reader')
-                 test_reader.decorate_paddle_reader(paddle.batch(mnist.test(), 512))
-                 test_loss = network(test_reader)
-
-         fluid.Executor(fluid.CUDAPlace(0)).run(train_startup_prog)
-         fluid.Executor(fluid.CUDAPlace(0)).run(test_startup_prog)
-
-         train_exe = fluid.ParallelExecutor(use_cuda=True,
-                                            loss_name=train_loss.name,
-                                            main_program=train_main_prog)
-         test_exe = fluid.ParallelExecutor(use_cuda=True,
-                                           loss_name=test_loss.name,
-                                           main_program=test_main_prog)
-         for epoch_id in range(10):
-             train_reader.start()
-             try:
-                 while True:
-                    train_exe.run(fetch_list=[train_loss.name])
-             except fluid.core.EOFException:
-                 train_reader.reset()
-
-         test_reader.start()
-         try:
-             while True:
-                 test_exe.run(fetch_list=[test_loss.name])
-         except fluid.core.EOFException:
-             test_reader.reset()
-    """
-    logging.warn(
-        'paddle.fluid.layers.py_reader() may be deprecated in the near future. '
-        'Please use paddle.fluid.io.DataLoader.from_generator() instead.'
-    )
-    return _py_reader(
-        capacity=capacity,
-        shapes=shapes,
-        dtypes=dtypes,
-        lod_levels=lod_levels,
-        name=name,
-        use_double_buffer=use_double_buffer,
-    )
-
-
-def create_py_reader_by_data(
-    capacity, feed_list, name=None, use_double_buffer=True
-):
-    """
-        :api_attr: Static Graph
-
-    The OP creates a Python reader for data feeding in Python, it is similar
-    to :ref:`api_fluid_layers_py_reader` except that it can read data from
-    the list of feed variables.
-
-    Parameters:
-        capacity (int): The buffer capacity maintained by :code:`py_reader`. Its unit
-            is batch number. Set larger :attr:`capacity` if the reader is fast.
-        feed_list (list(Variable)): The feed variables, are usually created by
-            :code:`fluid.data()`.
-        name (str, optional): Normally there is no need for user to set this property.
-            For more information, please refer to :ref:`api_guide_Name`. Default: None.
-        use_double_buffer (bool, optional): Whether use double buffer. If it's True,
-            the OP would prefetch next batch data asynchronously. Default: True.
-
-    Returns:
-        Reader: A Reader for data feeding. The data types of read data are the same as the data types of variables of :attr:`feed_list`.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle
-          import paddle.fluid as fluid
-          import paddle.dataset.mnist as mnist
-
-          def network(img, label):
-              # User defined network. Here a simple regression as example
-              predict = fluid.layers.fc(input=img, size=10, act='softmax')
-              loss = fluid.layers.cross_entropy(input=predict, label=label)
-              return fluid.layers.mean(loss)
-
-          MEMORY_OPT = False
-          USE_CUDA = False
-
-          image = fluid.data(name='image', shape=[None, 1, 28, 28], dtype='float32')
-          label = fluid.data(name='label', shape=[None, 1], dtype='int64')
-          reader = fluid.layers.create_py_reader_by_data(capacity=64,
-                                                         feed_list=[image, label])
-          reader.decorate_paddle_reader(
-              paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5), buf_size=500))
-          img, label = fluid.layers.read_file(reader)
-          loss = network(img, label) # The definition of custom network and the loss function
-
-          place = fluid.CUDAPlace(0) if USE_CUDA else fluid.CPUPlace()
-          exe = fluid.Executor(place)
-          exe.run(fluid.default_startup_program())
-
-          build_strategy = fluid.BuildStrategy()
-          build_strategy.memory_optimize = True if MEMORY_OPT else False
-          exec_strategy = fluid.ExecutionStrategy()
-          compiled_prog = fluid.compiler.CompiledProgram(
-          fluid.default_main_program()).with_data_parallel(
-              loss_name=loss.name,
-              build_strategy=build_strategy,
-              exec_strategy=exec_strategy)
-
-          for epoch_id in range(2):
-          reader.start()
-          try:
-              while True:
-                  exe.run(compiled_prog, fetch_list=[loss.name])
-          except fluid.core.EOFException:
-              reader.reset()
-    """
-    logging.warn(
-        'paddle.fluid.layers.create_py_reader_by_data() may be deprecated in the near future. '
-        'Please use paddle.fluid.io.DataLoader.from_generator() instead.'
-    )
-    return _py_reader(
-        capacity=capacity,
-        shapes=None,
-        dtypes=None,
-        lod_levels=None,
-        name=name,
-        use_double_buffer=use_double_buffer,
-        feed_list=feed_list,
-    )
-
-
 def __create_shared_decorated_reader__(op_type, reader, attrs):
     var_name = unique_name(op_type)
     startup_blk = default_startup_program().current_block()
@@ -871,106 +431,3 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None):
         attrs=attrs,
     )
     return monkey_patch_reader_methods(new_reader)
-
-
-def double_buffer(reader, place=None, name=None):
-    """
-    Wrap a double buffer reader. The class Reader contains DecoratedReader and FileReader. Moreover, the DecoratedReader is inherited by CustomReader and BufferedReader. This function is related to BufferedReader. The data will copy to target place with a double buffer queue. If the target place is None, the place that executor perform on will be used.
-
-
-    Args:
-        reader (Variable): The Reader Variable need to be wrapped.
-        place (Place|str, optional): The place of target data, such as CPU, GPU, and if use GPU, it's necessary to point out which card is involved. Default is the sample place of executor perform.
-            if ``place`` is string, It can be ``cpu``, ``gpu:x``, where ``x`` is the ndex of the GPUs.
-        name (str, optional): Variable name. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Default is None.
-
-    Returns:
-        Variable(Reader): wrapped reader with double buffer.
-
-    Examples:
-        ..  code-block:: python
-
-            import paddle.fluid as fluid
-            reader = fluid.layers.py_reader(capacity=64,
-                                            shapes=[(-1, 1, 28, 28), (-1, 1)],
-                                            dtypes=['float32', 'int64'],
-                                            use_double_buffer=False)
-            reader = fluid.layers.double_buffer(reader)
-            image, label = fluid.layers.read_file(reader)
-    """
-    attrs = dict()
-    if place is not None:
-        attrs['place'] = str(_get_paddle_place(place)).upper()
-
-    return __create_unshared_decorated_reader__(
-        'create_double_buffer_reader', reader, attrs, name=name
-    )
-
-
-def read_file(reader):
-    """
-        :api_attr: Static Graph
-
-    Execute the given reader and get data via it.
-
-    A reader is also a Variable. It can be a raw reader generated by
-    `fluid.layers.open_files()` or a decorated one generated by
-    `fluid.layers.double_buffer()` .
-
-    Args:
-
-        reader(Variable): The reader to execute.
-
-    Returns:
-        Tuple[Variable]: Data read from the given reader.
-
-    Examples:
-        .. code-block:: python
-
-           import paddle.fluid as fluid
-           reader = fluid.layers.py_reader(capacity=64,
-                                           shapes=[(-1, 1, 28, 28), (-1, 1)],
-                                           dtypes=['float32', 'int64'])
-           image, label = fluid.layers.read_file(reader)
-    """
-    helper = LayerHelper('read_file')
-    out = [
-        helper.create_variable_for_type_inference(
-            stop_gradient=True, dtype='float32'
-        )
-        for _ in range(len(reader.desc.shapes()))
-    ]
-    helper.append_op(
-        type='read', inputs={'Reader': [reader]}, outputs={'Out': out}
-    )
-    if len(out) == 1:
-        return out[0]
-    else:
-        return out
-
-
-def load(out, file_path, load_as_fp16=None):
-    """
-    Load operator will load a LoDTensor / SelectedRows variable from disk file.
-
-    Args:
-        out(Variable): The LoDTensor / SelectedRows need to be loaded..
-
-        file_path(STRING): Variable will be loaded from "file_path".
-
-        load_as_fp16(BOOLEAN): If true, the tensor will be first loaded and then converted to float16 data type. Otherwise, the tensor will be directly loaded without data type conversion. Default is false..
-    Returns:
-        None
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            tmp_tensor = fluid.layers.create_tensor(dtype='float32')
-            fluid.layers.load(tmp_tensor, "./tmp_tensor.bin")
-    """
-    helper = LayerHelper("load", **locals())
-    attrs = {"file_path": file_path}
-    if load_as_fp16 is not None:
-        attrs['load_as_fp16'] = load_as_fp16
-    helper.append_op(type="load", inputs={}, outputs={"Out": out}, attrs=attrs)
diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py
index b6b774d5ba1a7f..4883d70d97dfca 100644
--- a/python/paddle/fluid/reader.py
+++ b/python/paddle/fluid/reader.py
@@ -51,7 +51,6 @@
 from .layers.io import (
     monkey_patch_reader_methods,
     _copy_reader_var_,
-    double_buffer,
 )
 from .unique_name import UniqueNameGenerator
 from .framework import _get_paddle_place, _get_paddle_place_list
@@ -1352,6 +1351,11 @@ def __init__(
         self._use_double_buffer = use_double_buffer
         self._capacity = capacity
         if not self._iterable:
+            # Because layers.io.double_buffer is not supported anymore, and only when iterable and use_double_buffer
+            # are both True layers.io.double_buffer will be in use, here if itrable is False, use_double_buffer will be
+            # forcely set False to avoid using layers.io.double_buffer.
+            # TODO: keep use_double_buffer
+            self._use_double_buffer = False
             self._init_non_iterable()
 
     def _wait_thread_ends(self):
@@ -1406,7 +1410,6 @@ def _init_non_iterable(self):
             'lod_tensor_blocking_queue'
         )
         reader_name = data_loader_unique_name_generator('create_py_reader')
-        double_buffer_name = data_loader_unique_name_generator('double_buffer')
 
         var = global_scope().var(queue_name)
         self._queue = core.init_lod_tensor_blocking_queue(
@@ -1452,15 +1455,6 @@ def _init_non_iterable(self):
 
             reader = monkey_patch_reader_methods(main_prog_var)
 
-        if self._use_double_buffer:
-            double_buffer_reader = double_buffer(
-                reader, name=double_buffer_name
-            )
-            # we return a double buffer reader. However, the reset method comes from
-            # py_reader.
-            double_buffer_reader.reset = reader.reset
-            reader = double_buffer_reader
-
         self._reader = reader
 
         default_main_program().current_block().append_op(
diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt
index 61e9917359b0df..9fff40e1685c13 100755
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -1079,7 +1079,6 @@ set_tests_properties(test_nan_inf PROPERTIES TIMEOUT 120)
 set_tests_properties(test_deformable_conv_v1_op PROPERTIES TIMEOUT 300)
 set_tests_properties(test_parallel_executor_transformer_auto_growth
                      PROPERTIES TIMEOUT 120)
-set_tests_properties(test_py_reader_using_executor PROPERTIES TIMEOUT 120)
 set_tests_properties(test_elementwise_add_op PROPERTIES TIMEOUT 120)
 set_tests_properties(test_weight_decay PROPERTIES TIMEOUT 120)
 set_tests_properties(test_imperative_ptb_rnn_sorted_gradient PROPERTIES TIMEOUT
diff --git a/python/paddle/fluid/tests/unittests/test_load_op.py b/python/paddle/fluid/tests/unittests/test_load_op.py
deleted file mode 100644
index acf0a810293872..00000000000000
--- a/python/paddle/fluid/tests/unittests/test_load_op.py
+++ /dev/null
@@ -1,73 +0,0 @@
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import tempfile
-import unittest
-
-import numpy as np
-
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-
-
-class TestLoadOp(unittest.TestCase):
-    """Test load operator."""
-
-    def setUp(self):
-        self.temp_dir = tempfile.TemporaryDirectory()
-        self.ones = np.ones((4, 4)).astype('float32')
-        main_prog = fluid.Program()
-        start_prog = fluid.Program()
-        with fluid.program_guard(main_prog, start_prog):
-            input = fluid.data('input', shape=[-1, 4], dtype='float32')
-            output = layers.fc(
-                input,
-                4,
-                param_attr=fluid.ParamAttr(
-                    name='w',
-                    initializer=fluid.initializer.NumpyArrayInitializer(
-                        self.ones
-                    ),
-                ),
-            )
-        exe = fluid.Executor(fluid.CPUPlace())
-        exe.run(start_prog)
-        paddle.distributed.io.save_persistables(
-            exe,
-            dirname=os.path.join(self.temp_dir.name, "./model"),
-            main_program=main_prog,
-        )
-
-    def tearDown(self):
-        self.temp_dir.cleanup()
-
-    def test_load(self):
-        main_prog = fluid.Program()
-        start_prog = fluid.Program()
-        with fluid.program_guard(main_prog, start_prog):
-            var = layers.create_tensor(dtype='float32')
-            layers.load(
-                var, file_path=os.path.join(self.temp_dir.name, './model/w')
-            )
-
-        exe = fluid.Executor(fluid.CPUPlace())
-        exe.run(start_prog)
-        ret = exe.run(main_prog, fetch_list=[var.name])
-        np.testing.assert_array_equal(self.ones, ret[0])
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_load_op_xpu.py b/python/paddle/fluid/tests/unittests/test_load_op_xpu.py
deleted file mode 100644
index 3ede3b26914881..00000000000000
--- a/python/paddle/fluid/tests/unittests/test_load_op_xpu.py
+++ /dev/null
@@ -1,73 +0,0 @@
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import tempfile
-import unittest
-
-import numpy as np
-
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-
-
-@unittest.skipIf(
-    not paddle.is_compiled_with_xpu(), "core is not compiled with XPU"
-)
-class TestLoadOpXpu(unittest.TestCase):
-    """Test load operator."""
-
-    def setUp(self):
-        self.temp_dir = tempfile.TemporaryDirectory()
-        self.model_path = os.path.join(self.temp_dir.name, "model")
-        self.ones = np.ones((4, 4)).astype('float32')
-        main_prog = fluid.Program()
-        start_prog = fluid.Program()
-        with fluid.program_guard(main_prog, start_prog):
-            input = fluid.data('input', shape=[-1, 4], dtype='float32')
-            output = layers.fc(
-                input,
-                4,
-                param_attr=fluid.ParamAttr(
-                    name='w',
-                    initializer=fluid.initializer.NumpyArrayInitializer(
-                        self.ones
-                    ),
-                ),
-            )
-        exe = fluid.Executor(fluid.XPUPlace(0))
-        exe.run(start_prog)
-        paddle.distributed.io.save_persistables(
-            exe, dirname=self.model_path, main_program=main_prog
-        )
-
-    def tearDown(self):
-        self.temp_dir.cleanup()
-
-    def test_load_xpu(self):
-        main_prog = fluid.Program()
-        start_prog = fluid.Program()
-        with fluid.program_guard(main_prog, start_prog):
-            var = layers.create_tensor(dtype='float32')
-            layers.load(var, file_path=self.model_path + '/w')
-
-        exe = fluid.Executor(fluid.XPUPlace(0))
-        exe.run(start_prog)
-        ret = exe.run(main_prog, fetch_list=[var.name])
-        np.testing.assert_array_equal(self.ones, ret[0])
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_program.py b/python/paddle/fluid/tests/unittests/test_program.py
index 1b38cf4f5fae9a..54320aee59b2a4 100644
--- a/python/paddle/fluid/tests/unittests/test_program.py
+++ b/python/paddle/fluid/tests/unittests/test_program.py
@@ -105,41 +105,6 @@ def test_program_clone_with_parameter(self):
         new_program = main_program.clone()
         self.assertNotEqual(0, len(new_program.blocks[0].all_parameters()))
 
-    def test_program_inference_optimize(self):
-        def net():
-            reader = fluid.layers.py_reader(
-                capacity=10,
-                shapes=[[-1, 10], [-1, 1]],
-                lod_levels=[0, 0],
-                dtypes=['float32', 'int64'],
-                use_double_buffer=True,
-            )
-            in_data, label = fluid.layers.read_file(reader)
-            predict_label = fluid.layers.fc(in_data, size=2, act='softmax')
-            loss = paddle.mean(
-                fluid.layers.cross_entropy(input=predict_label, label=label)
-            )
-
-            optimizer = fluid.optimizer.Adam()
-            optimizer.minimize(loss)
-
-        startup_program = fluid.Program()
-        main_program = fluid.Program()
-        with fluid.program_guard(main_program, startup_program):
-            net()
-        no_read_program = main_program._inference_optimize()
-        keep_read_program = main_program._inference_optimize(
-            prune_read_op=False
-        )
-        no_read_ops = no_read_program.global_block().ops
-        keep_read_ops = keep_read_program.global_block().ops
-        self.assertEqual(len(keep_read_ops) - len(no_read_ops), 2)
-        self.assertEqual(keep_read_ops[0].type, 'create_double_buffer_reader')
-        self.assertEqual(keep_read_ops[1].type, 'read')
-
-        for i in range(len(no_read_ops)):
-            self.assertEqual(no_read_ops[i].type, keep_read_ops[i + 2].type)
-
     def test_program_all_parameters(self):
         program = fluid.default_main_program()
         data = fluid.data(name='x', shape=[None, 13], dtype='float32')
@@ -172,36 +137,6 @@ def test_copy_info_from_error(self):
             TypeError, program._copy_dist_param_info_from, "program"
         )
 
-    def test_remove_training_info(self):
-        def net():
-            reader = fluid.layers.py_reader(
-                capacity=10,
-                shapes=[[-1, 10], [-1, 1]],
-                lod_levels=[0, 0],
-                dtypes=['float32', 'int64'],
-                use_double_buffer=True,
-            )
-            in_data, label = fluid.layers.read_file(reader)
-            predict_label = fluid.layers.fc(in_data, size=2, act='softmax')
-            loss = paddle.mean(
-                fluid.layers.cross_entropy(input=predict_label, label=label)
-            )
-
-            optimizer = fluid.optimizer.Adam()
-            optimizer.minimize(loss)
-
-        main_program = fluid.Program()
-        with fluid.program_guard(main_program):
-            net()
-
-        removed_program = main_program._remove_training_info()
-
-        for i in range(removed_program.num_blocks):
-            block = removed_program.block(i)
-            for var in block.desc.all_vars():
-                self.assertFalse(var.has_is_parameter())
-                self.assertFalse(var.has_stop_gradient())
-
 
 def build_program():
     main_program = paddle.static.Program()
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_error_msg.py b/python/paddle/fluid/tests/unittests/test_py_reader_error_msg.py
deleted file mode 100644
index 0a198647dcca1f..00000000000000
--- a/python/paddle/fluid/tests/unittests/test_py_reader_error_msg.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-
-import paddle
-import paddle.fluid as fluid
-
-
-class TestPyReaderErrorMsg(unittest.TestCase):
-    def test_check_input_array(self):
-        fluid.reader.GeneratorLoader._check_input_array(
-            [
-                np.random.randint(100, size=[2]),
-                np.random.randint(100, size=[2]),
-                np.random.randint(100, size=[2]),
-            ]
-        )
-        self.assertRaises(
-            TypeError,
-            fluid.reader.GeneratorLoader._check_input_array,
-            [
-                np.random.randint(100, size=[2]),
-                np.random.randint(100, size=[1]),
-                np.random.randint(100, size=[3]),
-            ],
-        )
-
-
-class TestDoubleBufferAPI(unittest.TestCase):
-    def test_double_buffer(self):
-        paddle.enable_static()
-        if fluid.core.is_compiled_with_cuda():
-            reader = fluid.layers.py_reader(
-                capacity=64,
-                shapes=[(-1, 1, 28, 28), (-1, 1)],
-                dtypes=['float32', 'int64'],
-                use_double_buffer=False,
-            )
-            reader = fluid.layers.double_buffer(
-                reader, place=fluid.core.CUDAPlace(0)
-            )
-            image, label = fluid.layers.read_file(reader)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_lod_level_share.py b/python/paddle/fluid/tests/unittests/test_py_reader_lod_level_share.py
deleted file mode 100644
index c7fb6a8df59514..00000000000000
--- a/python/paddle/fluid/tests/unittests/test_py_reader_lod_level_share.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import paddle.fluid as fluid
-
-
-class TestLoDLevelShare(unittest.TestCase):
-    def setUp(self):
-        self.use_double_buffer = False
-
-    def test_lod_level_share(self):
-        reader = fluid.layers.py_reader(
-            capacity=16,
-            shapes=([-1, 256], [-1, 512], [-1, 100]),
-            dtypes=('float32', 'int64', 'double'),
-            lod_levels=(1, 2, 0),
-            use_double_buffer=self.use_double_buffer,
-        )
-
-        x, y, z = fluid.layers.read_file(reader)
-        self.assertEqual(x.lod_level, 1)
-        self.assertEqual(y.lod_level, 2)
-        self.assertEqual(z.lod_level, 0)
-
-
-class TestLoDLevelShare2(TestLoDLevelShare):
-    def setUp(self):
-        self.use_double_buffer = True
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_pin_memory.py b/python/paddle/fluid/tests/unittests/test_py_reader_pin_memory.py
deleted file mode 100644
index 509d5f65292b4a..00000000000000
--- a/python/paddle/fluid/tests/unittests/test_py_reader_pin_memory.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-
-
-def user_reader(inputs):
-    def _reader():
-        for d in inputs:
-            yield d
-
-    return _reader
-
-
-def batch_feeder(batch_reader, pin_memory=False, img_dtype="float32"):
-    def _feeder():
-        for batch_data in batch_reader():
-            sample_batch = []
-            label_batch = []
-            for sample, label in batch_data:
-                sample_batch.append(sample)
-                label_batch.append([label])
-            tensor = core.LoDTensor()
-            label = core.LoDTensor()
-            place = core.CUDAPinnedPlace() if pin_memory else core.CPUPlace()
-            tensor.set(np.array(sample_batch, dtype=img_dtype), place)
-            label.set(np.array(label_batch, dtype="int64"), place)
-            yield [tensor, label]
-
-    return _feeder
-
-
-class TestPyReader(unittest.TestCase):
-    def setUp(self):
-        self.capacity = 10
-        self.shapes = [(-1, 3, 2, 1), (-1, 1)]
-        self.lod_levels = [0, 0]
-        self.dtypes = ['float32', 'int64']
-
-    def test_pin_memory_pyreader(self):
-        with fluid.program_guard(fluid.Program(), fluid.Program()):
-            place = (
-                fluid.CUDAPlace(0)
-                if fluid.core.is_compiled_with_cuda()
-                else fluid.CPUPlace()
-            )
-            executor = fluid.Executor(place)
-
-            data_file = fluid.layers.py_reader(
-                capacity=self.capacity,
-                dtypes=self.dtypes,
-                lod_levels=self.lod_levels,
-                shapes=self.shapes,
-            )
-            # feed_queue = data_file.queue
-            read_out_data = fluid.layers.read_file(data_file)
-
-            self.inputs = []
-            for _ in range(10):
-                sample = np.random.uniform(
-                    low=0, high=1, size=[3, 2, 1]
-                ).astype("float32")
-                label = np.random.randint(low=0, high=10, dtype="int64")
-                self.inputs.append((sample, label))
-
-            self.input_tensors = []
-            for d, l in batch_feeder(
-                paddle.batch(user_reader(self.inputs), batch_size=2),
-                pin_memory=True
-                if fluid.core.is_compiled_with_cuda()
-                else False,
-            )():
-                ta = fluid.LoDTensorArray()
-                ta.append(d)
-                ta.append(l)
-                self.input_tensors.append(ta)
-
-            self.batched_inputs = []
-            for batch in paddle.batch(user_reader(self.inputs), batch_size=2)():
-                feed_d = []
-                feed_l = []
-                for d, l in batch:
-                    feed_d.append(d)
-                    feed_l.append([l])
-                self.batched_inputs.append([feed_d, feed_l])
-
-            data_file.decorate_tensor_provider(
-                batch_feeder(
-                    paddle.batch(user_reader(self.inputs), batch_size=2),
-                    pin_memory=True
-                    if fluid.core.is_compiled_with_cuda()
-                    else False,
-                )
-            )
-
-            executor.run(fluid.default_startup_program())
-            self.outputs = []
-
-            data_file.start()
-            for _ in self.input_tensors:
-                self.outputs.append(
-                    executor.run(fetch_list=list(read_out_data))
-                )
-            data_file.reset()
-            self.validate()
-
-    def validate(self):
-        self.assertEqual(len(self.batched_inputs), len(self.outputs))
-        for in_data_list, out_data_list in zip(
-            self.batched_inputs, self.outputs
-        ):
-            self.assertEqual(len(in_data_list), len(out_data_list))
-            in_data_list_np = [
-                np.array(in_lod_tensor) for in_lod_tensor in in_data_list
-            ]
-            for in_data, out_data in zip(in_data_list_np, out_data_list):
-                self.assertTrue((in_data == out_data).all())
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py b/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py
deleted file mode 100644
index 7f2dc7817c8df2..00000000000000
--- a/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py
+++ /dev/null
@@ -1,111 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-from threading import Thread
-
-import numpy as np
-
-import paddle.fluid as fluid
-
-
-def feed_data(feed_queue, inputs):
-    for in_data in inputs:
-        feed_queue.push(in_data)
-
-
-class TestPyReader(unittest.TestCase):
-    def setUp(self):
-        self.capacity = 10
-        self.batch_size_min = 10
-        self.batch_size_max = 20
-        self.shapes = [(-1, 3, 2, 1), (-1, 1)]
-        self.lod_levels = [0, 0]
-        self.dtypes = ['float32', 'int64']
-        self.iterations = 20
-
-    def test_single_thread_main(self):
-        self.main(use_thread=False)
-
-    def test_multiple_thread_main(self):
-        self.main(use_thread=True)
-
-    def main(self, use_thread=False):
-        with fluid.program_guard(fluid.Program(), fluid.Program()):
-            place = (
-                fluid.CUDAPlace(0)
-                if fluid.core.is_compiled_with_cuda()
-                else fluid.CPUPlace()
-            )
-            executor = fluid.Executor(place)
-
-            data_file = fluid.layers.py_reader(
-                capacity=self.capacity,
-                dtypes=self.dtypes,
-                lod_levels=self.lod_levels,
-                shapes=self.shapes,
-            )
-            feed_queue = data_file.queue
-            read_out_data = fluid.layers.read_file(data_file)
-            self.inputs = []
-
-            for i in range(self.iterations):
-                in_data = fluid.LoDTensorArray()
-                batch_size = np.random.random_integers(
-                    self.batch_size_min, self.batch_size_max
-                )
-                for shape, dtype in zip(self.shapes, self.dtypes):
-                    next_data = np.random.uniform(
-                        low=0, high=1000, size=(batch_size,) + shape[1:]
-                    ).astype(dtype)
-                    in_data.append(
-                        fluid.executor._as_lodtensor(next_data, place)
-                    )
-
-                self.inputs.append(in_data)
-
-            executor.run(fluid.default_startup_program())
-            self.outputs = []
-            if use_thread:
-                thread = Thread(
-                    target=feed_data, args=(feed_queue, self.inputs)
-                )
-                thread.start()
-                for in_data in self.inputs:
-                    self.outputs.append(
-                        executor.run(fetch_list=list(read_out_data))
-                    )
-            else:
-                for in_data in self.inputs:
-                    feed_queue.push(in_data)
-                    self.outputs.append(
-                        executor.run(fetch_list=list(read_out_data))
-                    )
-
-            feed_queue.close()
-            self.validate()
-
-    def validate(self):
-        self.assertEqual(len(self.inputs), len(self.outputs))
-        for in_data_list, out_data_list in zip(self.inputs, self.outputs):
-            self.assertEqual(len(in_data_list), len(out_data_list))
-            in_data_list_np = [
-                np.array(in_lod_tensor) for in_lod_tensor in in_data_list
-            ]
-            for in_data, out_data in zip(in_data_list_np, out_data_list):
-                self.assertTrue((in_data == out_data).all())
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py
deleted file mode 100644
index 01ab760b6e2c32..00000000000000
--- a/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py
+++ /dev/null
@@ -1,338 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import multiprocessing
-import os
-import threading
-import unittest
-
-import numpy as np
-
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-import paddle.fluid.unique_name as unique_name
-from paddle.fluid import compiler
-
-os.environ['CPU_NUM'] = str(4)
-
-
-def as_tensor(np_array_or_tensor, place=None):
-    if isinstance(np_array_or_tensor, fluid.LoDTensor):
-        return np_array_or_tensor
-
-    if place is None:
-        place = fluid.CPUPlace()
-
-    tensor = fluid.LoDTensor()
-    tensor.set(np_array_or_tensor, place)
-    return tensor
-
-
-def as_numpy(tensor_or_numpy):
-    return (
-        tensor_or_numpy
-        if isinstance(tensor_or_numpy, np.ndarray)
-        else np.array(tensor_or_numpy)
-    )
-
-
-def sample_list_to_tensor_array(sample_list):
-    slot_num = None
-    slots = None
-    for sample in sample_list:
-        if slot_num is None:
-            slot_num = len(sample)
-            slots = [None] * len(sample)
-        else:
-            assert slot_num == len(sample)
-
-        for slot_id, slot_item in enumerate(sample):
-            if slots[slot_id] is None:
-                slots[slot_id] = []
-            slots[slot_id].append(slot_item)
-
-    tensor_array = fluid.LoDTensorArray()
-    for slot in slots:
-        t = fluid.LoDTensor()
-        t.set(np.array(slot), fluid.CPUPlace())
-        tensor_array.append(t)
-
-    return tensor_array
-
-
-def feed_data(feed_queue, batch_reader):
-    data_generator = batch_reader()
-    while True:
-        data = next(data_generator, None)
-        if data is None or (len(data) == 1 and data[0] is None):
-            break
-
-        if not feed_queue.push(sample_list_to_tensor_array(data)):
-            break
-
-    feed_queue.close()
-
-
-def simple_fc_net(
-    in_size,
-    class_num,
-    hidden_sizes,
-    batch_size,
-    queue_capacity,
-    use_double_buffer=False,
-    use_feed_list=True,
-):
-    in_data = fluid.layers.data(name="data", dtype='float32', shape=[in_size])
-    label = fluid.layers.data(name='label', dtype='int64', shape=[1])
-    if use_feed_list:
-        py_reader = fluid.layers.create_py_reader_by_data(
-            capacity=queue_capacity,
-            use_double_buffer=use_double_buffer,
-            feed_list=[in_data, label],
-            name=unique_name.generate('py_reader_name'),
-        )
-    else:
-        py_reader = fluid.layers.py_reader(
-            capacity=queue_capacity,
-            shapes=[in_data.shape, label.shape],
-            dtypes=['float32', 'int64'],
-            name=unique_name.generate('py_reader_name'),
-            use_double_buffer=use_double_buffer,
-        )
-
-    in_data, label = fluid.layers.read_file(py_reader)
-
-    feed_queue = py_reader.queue
-
-    hidden = in_data
-    for hidden_size in hidden_sizes:
-        hidden = fluid.layers.fc(
-            hidden,
-            size=hidden_size,
-            act='tanh',
-            bias_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(value=1.0)
-            ),
-        )
-
-    predict_label = fluid.layers.fc(hidden, size=class_num, act='softmax')
-    loss = paddle.mean(
-        fluid.layers.cross_entropy(input=predict_label, label=label)
-    )
-
-    optimizer = fluid.optimizer.Adam()
-    optimizer.minimize(loss)
-    return in_data, label, loss, optimizer, feed_queue, py_reader
-
-
-class TestPyReaderUsingExecutor(unittest.TestCase):
-    def setUp(self):
-        self.in_size = 1000
-        self.hidden_sizes = [50, 30, 20]
-        self.class_num = 10
-        self.batch_size = 32
-        self.iterations = 10
-        self.queue_capacity = 50
-
-    def test(self):
-        for use_cuda in (
-            [False, True] if core.is_compiled_with_cuda() else [False]
-        ):
-            for use_parallel_executor in [False, True]:
-                for use_double_buffer in [False, True]:
-                    for use_feed_list in [False, True]:
-                        for use_decorate_paddle_reader in [False, True]:
-                            print('Test Parameters:'),
-                            print(
-                                {
-                                    'use_cuda': use_cuda,
-                                    'use_parallel_executor': use_parallel_executor,
-                                    'use_double_buffer': use_double_buffer,
-                                    'use_feed_list': use_feed_list,
-                                    'use_decorate_paddle_reader': use_decorate_paddle_reader,
-                                }
-                            )
-                            self.main(
-                                use_cuda,
-                                use_parallel_executor,
-                                use_double_buffer,
-                                use_feed_list,
-                                use_decorate_paddle_reader,
-                            )
-
-    def tensor_reader(self, use_decorate_paddle_reader):
-        def reader():
-            for sample_id in range(
-                self.batch_size * self.iterations * self.batch_size_times
-            ):
-                in_data = np.random.uniform(
-                    low=0, high=1, size=(self.in_size,)
-                ).astype('float32')
-                label = np.random.random_integers(
-                    low=0, high=self.class_num - 1, size=(1,)
-                ).astype('int64')
-
-                reshaped_in_data = np.reshape(in_data, [1, -1])
-                reshaped_label = np.reshape(label, [1, -1])
-                if sample_id % (self.batch_size * self.batch_size_times) == 0:
-                    self.inputs.append([reshaped_in_data, reshaped_label])
-                else:
-                    self.inputs[-1][0] = np.concatenate(
-                        (self.inputs[-1][0], reshaped_in_data), axis=0
-                    )
-                    self.inputs[-1][1] = np.concatenate(
-                        (self.inputs[-1][1], reshaped_label), axis=0
-                    )
-
-                yield in_data, label
-
-            if not use_decorate_paddle_reader:
-                yield None
-
-        return reader
-
-    def main(
-        self,
-        use_cuda=True,
-        use_parallel_executor=False,
-        use_double_buffer=False,
-        use_feed_list=False,
-        use_decorate_paddle_reader=False,
-    ):
-        assert not use_cuda or use_cuda and core.is_compiled_with_cuda()
-
-        self.use_cuda = use_cuda
-        self.use_parallel_executor = use_parallel_executor
-        self.use_double_buffer = use_double_buffer
-        self.use_feed_list = use_feed_list
-        self.use_decorate_paddle_reader = use_decorate_paddle_reader
-
-        startup_program = fluid.Program()
-        main_program = fluid.Program()
-
-        with fluid.program_guard(main_program, startup_program):
-            (
-                in_data,
-                label,
-                loss,
-                optimizer,
-                feed_queue,
-                py_reader,
-            ) = simple_fc_net(
-                in_size=self.in_size,
-                class_num=self.class_num,
-                hidden_sizes=self.hidden_sizes,
-                batch_size=self.batch_size,
-                queue_capacity=self.queue_capacity,
-                use_double_buffer=self.use_double_buffer,
-                use_feed_list=self.use_feed_list,
-            )
-
-            place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-
-            exe = fluid.Executor(place)
-            exe.run(startup_program)
-
-            train_cp = main_program
-            if use_parallel_executor:
-                train_cp = compiler.CompiledProgram(
-                    main_program
-                ).with_data_parallel(loss_name=loss.name)
-                if use_cuda:
-                    self.batch_size_times = core.get_cuda_device_count()
-                else:
-                    self.batch_size_times = int(
-                        os.environ.get('CPU_NUM', multiprocessing.cpu_count())
-                    )
-            else:
-                self.batch_size_times = 1
-
-            reader = self.tensor_reader(use_decorate_paddle_reader)
-            batch_reader = paddle.batch(reader, batch_size=self.batch_size)
-
-            self.inputs = []
-            self.outputs = []
-
-            if use_decorate_paddle_reader:
-                if use_feed_list:
-                    py_reader.decorate_paddle_reader(batch_reader)
-                else:
-                    py_reader.decorate_sample_list_generator(batch_reader)
-                py_reader.start()
-            else:
-                thread = threading.Thread(
-                    target=feed_data, args=(feed_queue, batch_reader)
-                )
-                thread.daemon = True
-                thread.start()
-
-            try:
-                while True:
-                    fetches = exe.run(
-                        train_cp, fetch_list=[in_data.name, label.name]
-                    )
-                    fetches = [as_numpy(fetch) for fetch in fetches]
-                    self.outputs.append(fetches)
-            except fluid.core.EOFException:
-                pass
-
-            feed_queue.close()
-            self.validate()
-            if use_decorate_paddle_reader:
-                py_reader.exited = True
-                py_reader.thread.join()
-            else:
-                thread.join()
-
-    def validate(self):
-        if not self.use_double_buffer:
-            self.assertEqual(len(self.inputs), len(self.outputs))
-        else:
-            self.assertTrue(len(self.inputs) >= len(self.outputs))
-        for idx in range(len(self.outputs)):
-            batch_in = self.inputs[idx]
-            batch_out = self.outputs[idx]
-            self.assertEqual(len(batch_in), len(batch_out))
-            if self.use_parallel_executor and not self.use_double_buffer:
-                self.validate_unordered_batch(batch_in, batch_out)
-            else:
-                for in_data, out_data in zip(batch_in, batch_out):
-                    self.assertEqual(in_data.shape, out_data.shape)
-                    if not self.use_parallel_executor:
-                        self.assertTrue((in_data == out_data).all())
-
-    def validate_unordered_batch(self, batch_in, batch_out):
-        out_index_left_set = set(range(self.batch_size * self.batch_size_times))
-        mapping_num = 0
-        for i in range(self.batch_size * self.batch_size_times):
-            for j in out_index_left_set:
-                flag = True
-                for k in range(len(batch_in)):
-                    in_data = batch_in[k][i]
-                    out_data = batch_out[k][j]
-                    if (in_data != out_data).any():
-                        flag = False
-                        break
-
-                if flag:
-                    out_index_left_set.remove(j)
-                    mapping_num += 1
-                    break
-
-        self.assertEqual(mapping_num, self.batch_size * self.batch_size_times)
-
-
-if __name__ == '__main__':
-    unittest.main()

From 0754e09d1a7223be50f19555d289389a993ab4e1 Mon Sep 17 00:00:00 2001
From: heyanru <81976792+heyanru01@users.noreply.github.com>
Date: Fri, 2 Dec 2022 11:32:40 +0800
Subject: [PATCH 107/154] [Fluid Clean] remove
 paddle.fluid.layers.nn.reduce_all,reduce_any (#48269)

---
 .../contrib/mixed_precision/decorator.py      |   2 +-
 python/paddle/fluid/layers/nn.py              | 133 ------------------
 python/paddle/fluid/layers/rnn.py             |  10 +-
 .../transformer_dygraph_model.py              |   2 +-
 .../unittests/ipu/test_reduce_x_op_ipu.py     |   4 +-
 .../fluid/tests/unittests/test_reduce_op.py   |   8 +-
 .../paddle/jit/dy2static/convert_operators.py |   6 +-
 7 files changed, 15 insertions(+), 150 deletions(-)

diff --git a/python/paddle/fluid/contrib/mixed_precision/decorator.py b/python/paddle/fluid/contrib/mixed_precision/decorator.py
index 6b9f3f6eaabc51..8afae340da1ebd 100644
--- a/python/paddle/fluid/contrib/mixed_precision/decorator.py
+++ b/python/paddle/fluid/contrib/mixed_precision/decorator.py
@@ -460,7 +460,7 @@ def _check_finite_and_unscale(self, params_grads):
         if self._is_distributed or self._use_pure_fp16:
             with self._train_program._optimized_guard([]):
                 all_infs = layers.concat(found_infs)
-                found_inf = layers.reduce_any(all_infs)
+                found_inf = paddle.any(all_infs)
 
         return found_inf
 
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 7ff74cd37cbfbd..57fa7f7252488b 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -71,8 +71,6 @@
     'softmax',
     'pool2d',
     'batch_norm',
-    'reduce_all',
-    'reduce_any',
     'dropout',
     'split',
     'ctc_greedy_decoder',
@@ -2504,137 +2502,6 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None):
     return out
 
 
-def reduce_all(input, dim=None, keep_dim=False, name=None):
-    """
-
-    This OP computes the ``logical and`` of tensor elements over the given dimension, and output the result.
-
-    Args:
-        input (Tensor): the input tensor, it's data type should be `bool`.
-        dim (list|int|optional): The dimension along which the logical and is computed.
-            If :attr:`None`, compute the logical and over all elements of
-            :attr:`input` and return a Tensor variable with a single element,
-            otherwise must be in the range :math:`[-rank(input), rank(input))`.
-            If :math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`. The default value is None.
-        keep_dim (bool): Whether to reserve the reduced dimension in the
-            output Tensor. The result tensor will have one fewer dimension
-            than the :attr:`input` unless :attr:`keep_dim` is true. The default value is False.
-        name(str|None): A name for this layer(optional). If set None, the layer
-                       will be named automatically. The default value is None.
-
-    Returns:
-        Tensor, the output data type is bool. : The reduced tensor variable with ``logical and`` in given dims.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-            import paddle.fluid as fluid
-            import paddle.fluid.layers as layers
-            import numpy as np
-
-            # x is a bool Tensor variable with following elements:
-            #    [[True, False]
-            #     [True, True]]
-            x = fluid.layers.assign(np.array([[1, 0], [1, 1]], dtype='int32'))
-            x = fluid.layers.cast(x, 'bool')
-
-            out = fluid.layers.reduce_all(x)  # False
-            out = fluid.layers.reduce_all(x, dim=0)  # [True, False]
-            out = fluid.layers.reduce_all(x, dim=-1)  # [False, True]
-            # keep_dim=False, x.shape=(2,2), out.shape=(2,)
-
-            out = fluid.layers.reduce_all(x, dim=1, keep_dim=True)  # [[False], [True]]
-            # keep_dim=True, x.shape=(2,2), out.shape=(2,1)
-
-    """
-    if dim is not None and not isinstance(dim, list):
-        dim = [dim]
-
-    if in_dygraph_mode():
-        return _C_ops.all(input, dim if dim is not None else [], keep_dim)
-
-    check_variable_and_dtype(input, 'input', ('bool'), 'reduce_all')
-    helper = LayerHelper('reduce_all', **locals())
-    out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
-    helper.append_op(
-        type='reduce_all',
-        inputs={'X': input},
-        outputs={'Out': out},
-        attrs={
-            'dim': dim if dim is not None and dim != [] else [0],
-            'keep_dim': keep_dim,
-            'reduce_all': True
-            if dim is None or dim == [] or len(dim) == len(input.shape)
-            else False,
-        },
-    )
-    return out
-
-
-def reduce_any(input, dim=None, keep_dim=False, name=None):
-    """
-    This OP computes the ``logical or`` of tensor elements over the given dimension, and output the result.
-
-    Args:
-        input (Tensor): the input tensor, it's data type should be `bool`.
-        dim (list|int|optional): The dimension along which the logical and is computed.
-            If :attr:`None`, compute the logical and over all elements of
-            :attr:`input` and return a Tensor variable with a single element,
-            otherwise must be in the range :math:`[-rank(input), rank(input))`.
-            If :math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`. The default value is None.
-        keep_dim (bool): Whether to reserve the reduced dimension in the
-            output Tensor. The result tensor will have one fewer dimension
-            than the :attr:`input` unless :attr:`keep_dim` is true. The default value is False.
-        name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
-
-    Returns:
-        Tensor, the output data type is bool. : The reduced tensor variable with ``logical or`` in given dims.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-            import paddle.fluid as fluid
-            import paddle.fluid.layers as layers
-            import numpy as np
-
-            # x is a bool Tensor variable with following elements:
-            #    [[True, False]
-            #     [False, False]]
-            x = fluid.layers.assign(np.array([[1, 0], [0, 0]], dtype='int32'))
-            x = fluid.layers.cast(x, 'bool')
-
-            out = fluid.layers.reduce_any(x)  # True
-            out = fluid.layers.reduce_any(x, dim=0)  # [True, False]
-            out = fluid.layers.reduce_any(x, dim=-1)  # [True, False]
-            # keep_dim=False, x.shape=(2,2), out.shape=(2,)
-
-            out = fluid.layers.reduce_any(x, dim=1,
-                                     keep_dim=True)  # [[True], [False]]
-            # keep_dim=True, x.shape=(2,2), out.shape=(2,1)
-
-    """
-    check_variable_and_dtype(input, 'input', ('bool'), 'reduce_any')
-    helper = LayerHelper('reduce_any', **locals())
-    out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
-    if dim is not None and not isinstance(dim, list):
-        dim = [dim]
-    helper.append_op(
-        type='reduce_any',
-        inputs={'X': input},
-        outputs={'Out': out},
-        attrs={
-            'dim': dim if dim is not None and dim != [] else [0],
-            'keep_dim': keep_dim,
-            'reduce_all': True
-            if dim is None or dim == [] or len(dim) == len(input.shape)
-            else False,
-        },
-    )
-    return out
-
-
 def split(input, num_or_sections, dim=-1, name=None):
     """
     Split the input tensor into multiple sub-Tensors.
diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py
index c56b99465f194d..c08cd6208d4d13 100644
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -1481,7 +1481,7 @@ def _maybe_copy(state, new_state, step_mask):
         initial_states,
         initial_finished,
     )
-    cond = paddle.logical_not((nn.reduce_all(initial_finished)))
+    cond = paddle.logical_not((paddle.all(initial_finished)))
     sequence_lengths = tensor.cast(paddle.zeros_like(initial_finished), "int64")
     outputs = None
 
@@ -1539,7 +1539,7 @@ def _maybe_copy(state, new_state, step_mask):
         control_flow.increment(x=step_idx_tensor, value=1.0, in_place=True)
         step_idx += 1
 
-        cond = paddle.logical_not(nn.reduce_all(finished))
+        cond = paddle.logical_not(paddle.all(finished))
         if max_step_num is not None and step_idx > max_step_num:
             break
 
@@ -1589,7 +1589,7 @@ def _dynamic_decode_declarative(
     global_finished.stop_gradient = True
     step_idx = tensor.fill_constant(shape=[1], dtype="int64", value=0)
 
-    cond = paddle.logical_not((nn.reduce_all(initial_finished)))
+    cond = paddle.logical_not((paddle.all(initial_finished)))
     if max_step_num is not None:
         max_step_num = tensor.fill_constant(
             shape=[1], dtype="int64", value=max_step_num
@@ -1720,12 +1720,12 @@ def _create_array_out_of_while(dtype):
             )
         if max_step_num is not None:
             paddle.logical_and(
-                paddle.logical_not(nn.reduce_all(global_finished)),
+                paddle.logical_not(paddle.all(global_finished)),
                 paddle.less_equal(step_idx, max_step_num),
                 cond,
             )
         else:
-            paddle.logical_not(nn.reduce_all(global_finished), cond)
+            paddle.logical_not(paddle.all(global_finished), cond)
 
     final_outputs = map_structure(
         lambda array: tensor.tensor_array_to_tensor(
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
index accf36ff179197..79ec89c008261e 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
@@ -873,7 +873,7 @@ def gather(input, indices, batch_pos):
             predict_ids.append(token_indices)
             parent_ids.append(beam_indices)
 
-            if layers.reduce_all(finished).numpy():
+            if paddle.all(finished).numpy():
                 break
 
         predict_ids = paddle.stack(predict_ids, axis=0)
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_reduce_x_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_reduce_x_op_ipu.py
index fe373e91038dea..5aa8da4adab8ea 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_reduce_x_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_reduce_x_op_ipu.py
@@ -180,12 +180,12 @@ def build_model(self):
         self.fetch_list = [out.name]
 
     def set_test_op(self):
-        self.op = paddle.fluid.layers.reduce_all
+        self.op = paddle.all
 
 
 class TestAny(TestAll):
     def set_test_op(self):
-        self.op = paddle.fluid.layers.reduce_any
+        self.op = paddle.any
 
 
 if __name__ == "__main__":
diff --git a/python/paddle/fluid/tests/unittests/test_reduce_op.py b/python/paddle/fluid/tests/unittests/test_reduce_op.py
index f6434e662942cb..53a1af18ea2991 100644
--- a/python/paddle/fluid/tests/unittests/test_reduce_op.py
+++ b/python/paddle/fluid/tests/unittests/test_reduce_op.py
@@ -505,12 +505,12 @@ def test_errors(self):
         with program_guard(Program(), Program()):
             # The input type of reduce_all_op must be Variable.
             input1 = 12
-            self.assertRaises(TypeError, fluid.layers.reduce_all, input1)
+            self.assertRaises(TypeError, paddle.all, input1)
             # The input dtype of reduce_all_op must be bool.
             input2 = fluid.layers.data(
                 name='input2', shape=[12, 10], dtype="int32"
             )
-            self.assertRaises(TypeError, fluid.layers.reduce_all, input2)
+            self.assertRaises(TypeError, paddle.all, input2)
 
 
 class TestAnyOp(OpTest):
@@ -622,12 +622,12 @@ def test_errors(self):
         with program_guard(Program(), Program()):
             # The input type of reduce_any_op must be Variable.
             input1 = 12
-            self.assertRaises(TypeError, fluid.layers.reduce_any, input1)
+            self.assertRaises(TypeError, paddle.any, input1)
             # The input dtype of reduce_any_op must be bool.
             input2 = fluid.layers.data(
                 name='input2', shape=[12, 10], dtype="int32"
             )
-            self.assertRaises(TypeError, fluid.layers.reduce_any, input2)
+            self.assertRaises(TypeError, paddle.any, input2)
 
 
 class Test1DReduce(OpTest):
diff --git a/python/paddle/jit/dy2static/convert_operators.py b/python/paddle/jit/dy2static/convert_operators.py
index 898d1c35f37bf4..02cafb77bbd30a 100644
--- a/python/paddle/jit/dy2static/convert_operators.py
+++ b/python/paddle/jit/dy2static/convert_operators.py
@@ -27,8 +27,6 @@
 from paddle.fluid.layers import (
     assign,
     fill_constant,
-    reduce_all,
-    reduce_any,
 )
 from paddle.fluid.layers import (
     cast,
@@ -651,7 +649,7 @@ def convert_shape_compare(left, *args):
         def reduce_compare(x, op_str, y):
             element_wise_result = eval("x " + op_str + " y")
             if op_str == "!=":
-                return reduce_any(element_wise_result)
+                return paddle.any(element_wise_result)
             elif (
                 op_str == "is"
                 or op_str == "is not"
@@ -660,7 +658,7 @@ def reduce_compare(x, op_str, y):
             ):
                 return element_wise_result
             else:
-                return reduce_all(element_wise_result)
+                return paddle.all(element_wise_result)
 
         final_result = reduce_compare(left, args[0], args[1])
         for i in range(1, num_cmp):

From d1e93be1bae9048e3178667028b433f2a234c1ba Mon Sep 17 00:00:00 2001
From: Jiabin Yang <360788950@qq.com>
Date: Fri, 2 Dec 2022 11:41:05 +0800
Subject: [PATCH 108/154] [Eager] Optimize Grad by prune useless branch
 (#47827)

* [Eager] Fix paddle.grad interface

* [Eager] Support minimum SubGraph for GeneralGrad

* Add needed_nodes to prune grad graph more thoroughly

* [Eager] Add grad_node_trans_mapping_ to record which grad_node has been transformed to AccumulationNode

* [Eager] Fix paddle.grad interface

* Polish code

* remove potential_stop_node

* Add endding_nodes to enhance genSugraph logic

* clear endding_nodes_

* polish code

* rename endding_nodes to endding_nades_

* Refactor grad interface

* Add register_hook case to fix coverage-ci

* Fix code format

* Refactor general_grad

* Add more code comments

* call clear directly to release GradSlotMeta

* fix a mistake

* fix matmul/ multiply kernel logic and optional input in yaml, fill zeros logic and so on.

* fix batch_norm_double_grad yaml optional config

* fix tanh_triple_grad yaml and kernels

* fix MultiplyTripleGradKernel optional logic

* fix merge mistake

* fix compile error

* remove legacy attr for bn

* polish code

* fix some kernel

* merge develop

* fix error

* remote log

* fix kernel with full like

* hide value log behind

* hide value log behind

* fix matmul_triple grad

Co-authored-by: Weilong Wu <veyron_wu@163.com>
---
 .../generator/codegen_utils.py                |   1 +
 .../generator/eager_gen.py                    |   2 +-
 paddle/fluid/eager/backward.cc                |  28 +-
 paddle/fluid/eager/general_grad.h             |  49 +-
 paddle/fluid/eager/grad_tensor_holder.cc      |   5 +
 paddle/fluid/eager/utils.h                    |  53 +-
 paddle/phi/api/yaml/backward.yaml             |   5 +
 paddle/phi/api/yaml/legacy_backward.yaml      |   8 +-
 paddle/phi/kernels/activation_grad_kernel.h   |  20 +-
 paddle/phi/kernels/batch_norm_grad_kernel.h   |  43 +-
 .../phi/kernels/cpu/batch_norm_grad_kernel.cc |  47 +-
 paddle/phi/kernels/cpu/full_kernel.cc         |   5 +-
 .../elementwise_multiply_grad_kernel.h        |   4 +-
 paddle/phi/kernels/funcs/activation_functor.h | 261 ++++++--
 .../phi/kernels/gpu/batch_norm_grad_kernel.cu |  76 +--
 paddle/phi/kernels/gpu/full_kernel.cu         |   4 +-
 .../phi/kernels/impl/activation_grad_impl.h   |  48 +-
 .../impl/elementwise_grad_kernel_impl.h       | 267 +++++---
 .../phi/kernels/impl/logcumsumexp_grad_impl.h |   1 +
 .../kernels/impl/matmul_grad_kernel_impl.h    | 576 +++++++++++-------
 paddle/phi/kernels/logcumsumexp_grad_kernel.h |   2 +-
 paddle/phi/kernels/matmul_grad_kernel.h       |   4 +-
 paddle/phi/kernels/xpu/full_kernel.cc         |   4 +-
 23 files changed, 989 insertions(+), 524 deletions(-)

diff --git a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
index 748c9d1ad22f22..8d03670a80773b 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
@@ -35,6 +35,7 @@
         "multiply_triple_grad",
         "conv2d_grad_grad",
         "batch_norm_double_grad",
+        "tanh_grad",
         "tanh_double_grad",
         "tanh_triple_grad",
         "sin_double_grad",
diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
index e726ec8bd96706..b54f45363a00da 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
@@ -230,7 +230,7 @@ class {} : public egr::GradNodeBase {{
 
 AFTER_LOG_PRINT_TEMPLATE = """
   if(VLOG_IS_ON(4)){{
-      const char* INPUT_PRINT_TEMPLATE = \"{{ Input: [%s],  Output: [%s] }} \";
+      const char* INPUT_PRINT_TEMPLATE = \"{{ Input: [%s],  \\n Output: [%s] }} \";
       {}
       VLOG(4) << paddle::string::Sprintf(INPUT_PRINT_TEMPLATE, input_str, output_str);
   }}
diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc
index 20709d13166a7c..15c67f451be48d 100644
--- a/paddle/fluid/eager/backward.cc
+++ b/paddle/fluid/eager/backward.cc
@@ -173,9 +173,10 @@ std::vector<paddle::experimental::Tensor> RunBackward(
       node_input_buffers_dict[grad_node] =
           std::make_unique<GradTensorHolder>(grad_node->InputMeta());
     }
-    bool copy_from_grad_t =
-        grad_tensors.size() > 0 && grad_tensors[i].initialized();
-    if (copy_from_grad_t) {
+
+    // copy grad tensor since we should totally run grad without affect forward
+    // value
+    if (grad_tensors.size() > 0 && grad_tensors[i].initialized()) {
       PADDLE_ENFORCE(
           grad_tensors.size() == tensors.size(),
           paddle::platform::errors::Fatal(
@@ -357,22 +358,11 @@ std::vector<paddle::experimental::Tensor> RunBackward(
                 "Node's in-degree cannot be negative.",
                 next_node->name()));
 
-        if (is_general_grad) {
-          if (node_in_degree_map[next_node] == 0 &&
-              GeneralGrad::Instance().IsNeededNodes(next_node)) {
-            if (dynamic_cast<egr::GradNodeAccumulation*>(next_node)) {
-              queue.push_front(std::move(next_node));
-            } else {
-              queue.push_back(std::move(next_node));
-            }
-          }
-        } else {
-          if (node_in_degree_map[next_node] == 0) {
-            if (dynamic_cast<egr::GradNodeAccumulation*>(next_node)) {
-              queue.push_front(std::move(next_node));
-            } else {
-              queue.push_back(std::move(next_node));
-            }
+        if (node_in_degree_map[next_node] == 0) {
+          if (dynamic_cast<egr::GradNodeAccumulation*>(next_node)) {
+            queue.push_front(std::move(next_node));
+          } else {
+            queue.push_back(std::move(next_node));
           }
         }
       }
diff --git a/paddle/fluid/eager/general_grad.h b/paddle/fluid/eager/general_grad.h
index 27f6a7e609a4dd..142624a9d95642 100644
--- a/paddle/fluid/eager/general_grad.h
+++ b/paddle/fluid/eager/general_grad.h
@@ -51,6 +51,10 @@ class GeneralGrad {
       for (size_t i = 0; i < num_inputs; i++) {
         AutogradMeta* auto_grad_meta =
             EagerUtils::unsafe_autograd_meta(inputs[i]);
+        PADDLE_ENFORCE_NOT_NULL(
+            auto_grad_meta,
+            paddle::platform::errors::Fatal(
+                "We got %s:[%d] 's autograd meta is NULL.", msg, i));
         auto* target_node = auto_grad_meta->GetMutableGradNode().get();
 
         if (orig_to_copied_node_map_.count(target_node)) {
@@ -82,10 +86,13 @@ class GeneralGrad {
   // input_target_nodes
   void PurifyPotentialStartUpNodes() {
     VLOG(6) << "Running in PurifyPotentialStartUpNodes";
-    if (input_target_nodes_inputmeta_map_.empty()) return;
+    if (input_target_nodes_inputmeta_map_.empty()) {
+      VLOG(6) << "No input target nodes found, skip.";
+      return;
+    }
     std::unordered_set<GradNodeBase*> potential_startup_nodes_to_be_erased;
-    for (auto startup_op : potential_startup_nodes_) {
-      auto iter = input_target_nodes_inputmeta_map_.find(startup_op);
+    for (auto startup_node : potential_startup_nodes_) {
+      auto iter = input_target_nodes_inputmeta_map_.find(startup_node);
       if (iter != input_target_nodes_inputmeta_map_.end()) {
         potential_startup_nodes_to_be_erased.emplace(iter->first);
       }
@@ -157,11 +164,11 @@ class GeneralGrad {
           potential_startup_nodes_.erase(node);
         }
       }
-    }
+    }  // TODO(jiabin): May we need some check here.
   }
 
   // Get Graph Info Betweent input target GradNode and outputs，
-  // record depending_nodes_, potential_startup_nodes_
+  // record depending_nodes_
   void GetGraphInfoBetweenTargets(const std::deque<GradNodeBase*>& init_queue) {
     VLOG(6) << "Runing In GetGraphInfoBetweenTargets";
 
@@ -227,7 +234,7 @@ class GeneralGrad {
               std::make_shared<paddle::experimental::Tensor>(target_result);
         }
       }
-    }
+    }  // TODO(jiabin): Some check here.
   }
 
   void SetResultForEnddingNodes(
@@ -319,21 +326,22 @@ class GeneralGrad {
   void SetNodeToAccumulationNode(GradNodeBase* node) {
     if (dynamic_cast<egr::GradNodeAccumulation*>(node)) return;
     if (!(depending_nodes_)[node].empty()) {
+      // Find precedding_nodes of current node.
       auto precedding_nodes = (depending_nodes_)[node];
       for (auto pre_nodes : precedding_nodes) {
         paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
             pre_nodes_edges = pre_nodes->MutableOutputMeta();
         for (size_t i = 0; i < pre_nodes_edges.size(); i++) {
           for (size_t j = 0; j < pre_nodes_edges[i].size(); j++) {
-            auto edge_ = pre_nodes_edges[i][j].GetEdge();
+            const auto& edge_ = pre_nodes_edges[i][j].GetEdge();
             if (edge_.GetGradNode() == node) {
-              auto autograd_meta = egr::AutogradMeta(edge_);
               Edge& pre_node_edge = pre_nodes_edges[i][j].GetMutableEdge();
 
               if (copied_node_to_endding_node_map_.count(node)) {
                 pre_node_edge.SetGradNode(
                     copied_node_to_endding_node_map_[node]);
               } else {
+                auto autograd_meta = egr::AutogradMeta(edge_);
                 std::shared_ptr<GradNodeBase> shared_grad_node_accumulation =
                     std::make_shared<egr::GradNodeAccumulation>(&autograd_meta);
                 pre_node_edge.SetGradNode(shared_grad_node_accumulation);
@@ -361,7 +369,7 @@ class GeneralGrad {
                 grad_node->SetGradientHookFuntions(
                     node->GetGradientHookFuntions());
               }
-            }
+            }  // or this node has no need to change
           }
         }
       }
@@ -381,11 +389,9 @@ class GeneralGrad {
       }
       visited.insert(node);
 
-      if (IsInputTargetNodes(node)) {
-        if (IsEnddingNodes(node)) {
-          SetNodeToAccumulationNode(node);
-          continue;
-        }
+      if (IsInputTargetNodes(node) && IsEnddingNodes(node)) {
+        SetNodeToAccumulationNode(node);
+        continue;
       }
 
       paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
@@ -411,7 +417,17 @@ class GeneralGrad {
             continue;
           }
 
-          // TODO(weilong): support prune logic deeper
+          if (meta.size() != 1 && IsNeededNodes(node) &&
+              !IsNeededNodes(next_node.get()) && !IsEnddingNodes(node)) {
+            VLOG(3) << "Get stop edge from grad_node: " << node->name() << " : "
+                    << node << " to:" << next_node->name() << ", "
+                    << next_node.get() << " with output rank info: " << i
+                    << ", " << j;
+            // No need to compute grad from needed Nodes to no need Nodes
+            meta[i][j].SetStopGradient(true);
+            edge.Clear();
+            continue;
+          }
 
           // Update BFS queue
           queue_.push_back(next_node.get());
@@ -502,7 +518,8 @@ class GeneralGrad {
     // Save node and update mapping
     orig_to_copied_node_map_[orig_node.get()] = copied_node;
     copied_grad_nodes_.push_back(copied_node);
-
+    VLOG(3) << "Copied Node: " << orig_node->name() << " ptr: " << orig_node
+            << " to ptr: " << copied_node;
     return copied_node.get();
   }
 
diff --git a/paddle/fluid/eager/grad_tensor_holder.cc b/paddle/fluid/eager/grad_tensor_holder.cc
index 14a8c26f9dcb8d..56268924b50f3c 100644
--- a/paddle/fluid/eager/grad_tensor_holder.cc
+++ b/paddle/fluid/eager/grad_tensor_holder.cc
@@ -99,6 +99,11 @@ void GradTensorHolder::add(size_t slot_id,
                            size_t rank,
                            const paddle::experimental::Tensor& t,
                            bool create_graph) {
+  if (!t.initialized()) {
+    VLOG(3) << "No need to do accumulate for uninitialized t.";
+    return;
+  }  // TODO(jiabin): Remove this when we fix all kernel.
+
   PADDLE_ENFORCE(slot_id < buffer_.size(),
                  paddle::platform::errors::Fatal(
                      "Invalid slot_id for GradTensorHolder::add() "
diff --git a/paddle/fluid/eager/utils.h b/paddle/fluid/eager/utils.h
index 7146261164900c..339f7af80364b2 100644
--- a/paddle/fluid/eager/utils.h
+++ b/paddle/fluid/eager/utils.h
@@ -277,7 +277,58 @@ class EagerUtils {
     } else {
       tensor_info_str += "Unknown";
     }
-    if (VLOG_IS_ON(6)) {
+    if (VLOG_IS_ON(11)) {
+      const char* TENSOR_PRINT_TEMPLATE =
+          "{Name: %s, Initialized: %d, Ptr: %d "
+          "TensorInfo: [ %s ], Value:[ %s ], ADInfo:[ %s ]}";
+      auto* ad_meta = nullable_autograd_meta(t);
+      if (ad_meta && (ad_meta->WeakGrad().lock().get())) {
+        std::string ad_info_str = "";
+        const char* AD_INFO_TEMPLATE =
+            "Grad: [ %s ],  GradNode: [ %s ], StopGradient: [ %d ]";
+        ad_info_str += paddle::string::Sprintf(AD_INFO_TEMPLATE,
+                                               TensorStr(ad_meta->Grad()),
+                                               GradNodeStr(t),
+                                               ad_meta->StopGradient());
+        auto* data_ptr = dynamic_cast<phi::DenseTensor*>(t.impl().get());
+        if (t.is_initialized() && data_ptr) {
+          return paddle::string::Sprintf(TENSOR_PRINT_TEMPLATE,
+                                         tensor_name_str,
+                                         t.initialized(),
+                                         t.impl(),
+                                         tensor_info_str,
+                                         *data_ptr,
+                                         ad_info_str);
+        } else {
+          return paddle::string::Sprintf(TENSOR_PRINT_TEMPLATE,
+                                         tensor_name_str,
+                                         t.initialized(),
+                                         t.impl(),
+                                         tensor_info_str,
+                                         "None",
+                                         ad_info_str);
+        }
+      } else {
+        auto* data_ptr = dynamic_cast<phi::DenseTensor*>(t.impl().get());
+        if (t.is_initialized() && data_ptr) {
+          return paddle::string::Sprintf(TENSOR_PRINT_TEMPLATE,
+                                         tensor_name_str,
+                                         t.initialized(),
+                                         t.impl(),
+                                         tensor_info_str,
+                                         *data_ptr,
+                                         "None");
+        } else {
+          return paddle::string::Sprintf(TENSOR_PRINT_TEMPLATE,
+                                         tensor_name_str,
+                                         t.initialized(),
+                                         t.impl(),
+                                         tensor_info_str,
+                                         "None",
+                                         "None");
+        }
+      }
+    } else if (VLOG_IS_ON(6)) {
       const char* TENSOR_PRINT_TEMPLATE =
           "{Name: %s, Initialized: %d, Ptr: %d "
           "TensorInfo: [ %s ], ADInfo:[ %s ]}";
diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml
index 44afc43c046d70..2d333805b5aa02 100644
--- a/paddle/phi/api/yaml/backward.yaml
+++ b/paddle/phi/api/yaml/backward.yaml
@@ -187,6 +187,7 @@
     param : [x, x]
   kernel :
     func : cos_double_grad
+  optional: grad_out
   backward : cos_triple_grad
   inplace : (grad_x_grad -> grad_out_grad)
 
@@ -211,6 +212,7 @@
     param : [x, x, grad_x_grad_forward]
   kernel :
     func : cos_triple_grad
+  optional: grad_out_forward, grad_x_grad_forward, grad_out_grad_grad
   inplace : (grad_x_grad_forward -> grad_out_forward_grad)
 
 - backward_op : cosh_grad
@@ -872,6 +874,7 @@
     param : [x, x]
   kernel :
     func : sin_double_grad
+  optional: grad_out
   backward : sin_triple_grad
   inplace : (grad_x_grad -> grad_out_grad)
 
@@ -896,6 +899,7 @@
     param : [x, x, grad_x_grad_forward]
   kernel :
     func : sin_triple_grad
+  optional: grad_out_forward, grad_x_grad_forward, grad_out_grad_grad
   inplace : (grad_x_grad_forward -> grad_out_forward_grad)
 
 - backward_op : sinh_grad
@@ -1054,6 +1058,7 @@
   kernel :
     func : tanh_triple_grad
   inplace : (grad_x_grad_forward -> grad_out_forward_grad)
+  optional : grad_out_new_grad, grad_out_grad_grad
 
 - backward_op : thresholded_relu_grad
   forward : thresholded_relu (Tensor x, float threshold) -> Tensor(out)
diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
index 064c6b00a88494..b0ce57461685ef 100755
--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -124,7 +124,7 @@
   kernel :
     func : batch_norm_grad_grad
     data_type : x
-  optional : out_mean, out_variance
+  optional : out_mean, out_variance, grad_x_grad, grad_scale_grad, grad_bias_grad
   inplace : (grad_out -> grad_out_grad)
 
 - backward_op : batch_norm_grad
@@ -856,7 +856,7 @@
     param : [x, y, fwd_grad_out, fwd_grad_grad_x, fwd_grad_grad_y]
   kernel :
     func : matmul_triple_grad
-  optional : grad_x_grad, grad_y_grad, grad_grad_out_grad
+  optional : fwd_grad_grad_x, fwd_grad_grad_y, grad_x_grad, grad_y_grad, grad_grad_out_grad
 
 - backward_op : max_grad
   forward: max (Tensor x,  IntArray axis={},  bool keepdim=false) -> Tensor(out)
@@ -1024,10 +1024,10 @@
   output : Tensor(x_grad), Tensor(y_grad), Tensor(fwd_grad_out_grad), Tensor(fwd_grad_grad_x_grad), Tensor(fwd_grad_grad_y_grad)
   infer_meta :
     func : GeneralQuinaryGradInferMeta
-    param : [x, y, fwd_grad_out, x, y]
+    param : [x, y, fwd_grad_out, fwd_grad_grad_x, fwd_grad_grad_y]
   kernel :
     func : multiply_triple_grad
-  optional : fwd_grad_grad_x, fwd_grad_grad_y, grad_grad_out_grad
+  optional : fwd_grad_grad_x, fwd_grad_grad_y, grad_x_grad, grad_y_grad, grad_grad_out_grad
 
 - backward_op : nearest_interp_grad
   forward : nearest_interp (Tensor x, Tensor out_size, Tensor[] size_tensor, Tensor scale_tensor, str data_layout, int out_d, int out_h, int out_w, float[] scale, str interp_method, bool align_corners, int align_mode) -> Tensor(output)
diff --git a/paddle/phi/kernels/activation_grad_kernel.h b/paddle/phi/kernels/activation_grad_kernel.h
index 847383fc38e942..56cb316640d192 100644
--- a/paddle/phi/kernels/activation_grad_kernel.h
+++ b/paddle/phi/kernels/activation_grad_kernel.h
@@ -83,7 +83,7 @@ void ReluDoubleGradKernel(const Context& dev_ctx,
 template <typename T, typename Context>
 void SinDoubleGradKernel(const Context& dev_ctx,
                          const DenseTensor& x,
-                         const DenseTensor& dout,
+                         const paddle::optional<DenseTensor>& dout,
                          const DenseTensor& ddx,
                          DenseTensor* dx,
                          DenseTensor* ddout);
@@ -91,7 +91,7 @@ void SinDoubleGradKernel(const Context& dev_ctx,
 template <typename T, typename Context>
 void CosDoubleGradKernel(const Context& dev_ctx,
                          const DenseTensor& x,
-                         const DenseTensor& dout,
+                         const paddle::optional<DenseTensor>& dout,
                          const DenseTensor& ddx,
                          DenseTensor* dx,
                          DenseTensor* ddout);
@@ -109,8 +109,8 @@ void TanhTripleGradKernel(const Context& dev_ctx,
                           const DenseTensor& out,
                           const DenseTensor& dout,
                           const DenseTensor& ddx,
-                          const DenseTensor& d_dout_new,
-                          const DenseTensor& d_ddout,
+                          const paddle::optional<DenseTensor>& d_dout_new,
+                          const paddle::optional<DenseTensor>& d_ddout,
                           DenseTensor* d_out_new,
                           DenseTensor* d_dout,
                           DenseTensor* d_ddx);
@@ -118,10 +118,10 @@ void TanhTripleGradKernel(const Context& dev_ctx,
 template <typename T, typename Context>
 void SinTripleGradKernel(const Context& dev_ctx,
                          const DenseTensor& x,
-                         const DenseTensor& dout,
-                         const DenseTensor& ddx,
+                         const paddle::optional<DenseTensor>& dout,
+                         const paddle::optional<DenseTensor>& ddx,
                          const DenseTensor& d_dx_new,
-                         const DenseTensor& d_ddout,
+                         const paddle::optional<DenseTensor>& d_ddout,
                          DenseTensor* d_x_new,
                          DenseTensor* d_dout,
                          DenseTensor* d_ddx);
@@ -129,10 +129,10 @@ void SinTripleGradKernel(const Context& dev_ctx,
 template <typename T, typename Context>
 void CosTripleGradKernel(const Context& dev_ctx,
                          const DenseTensor& x,
-                         const DenseTensor& dout,
-                         const DenseTensor& ddx,
+                         const paddle::optional<DenseTensor>& dout,
+                         const paddle::optional<DenseTensor>& ddx,
                          const DenseTensor& d_dx_new,
-                         const DenseTensor& d_ddout,
+                         const paddle::optional<DenseTensor>& d_ddout,
                          DenseTensor* d_x_new,
                          DenseTensor* d_dout,
                          DenseTensor* d_ddx);
diff --git a/paddle/phi/kernels/batch_norm_grad_kernel.h b/paddle/phi/kernels/batch_norm_grad_kernel.h
index 24e23e8d690746..2ef183559099f1 100644
--- a/paddle/phi/kernels/batch_norm_grad_kernel.h
+++ b/paddle/phi/kernels/batch_norm_grad_kernel.h
@@ -64,25 +64,26 @@ void BatchNormGradKernel(const Context& dev_ctx,
                          DenseTensor* bias_grad);
 
 template <typename T, typename Context>
-void BatchNormDoubleGradKernel(const Context& dev_ctx,
-                               const DenseTensor& x,
-                               const DenseTensor& scale,
-                               const paddle::optional<DenseTensor>& mean,
-                               const paddle::optional<DenseTensor>& variance,
-                               const DenseTensor& saved_mean,
-                               const DenseTensor& saved_variance,
-                               const DenseTensor& y_grad,
-                               const DenseTensor& x_grad_grad,
-                               const DenseTensor& scale_grad_grad,
-                               const DenseTensor& bias_grad_grad,
-                               float momentum,
-                               float epsilon,
-                               const std::string& data_layout,
-                               bool is_test,
-                               bool use_global_stats,
-                               bool trainable_statistics,
-                               DenseTensor* x_grad,
-                               DenseTensor* scale_grad,
-                               DenseTensor* y_grad_grad);
-
+void BatchNormDoubleGradKernel(
+    const Context& dev_ctx,
+    const DenseTensor& x,
+    const DenseTensor& scale,
+    const paddle::optional<DenseTensor>& mean,
+    const paddle::optional<DenseTensor>& variance,
+    const DenseTensor& saved_mean,
+    const DenseTensor& saved_variance,
+    const DenseTensor& y_grad,
+    const paddle::optional<DenseTensor>& x_grad_grad,
+    const paddle::optional<DenseTensor>& scale_grad_grad,
+    const paddle::optional<DenseTensor>& bias_grad_grad,
+    float momentum,
+    float epsilon,
+    const std::string& data_layout,
+    bool is_test,
+    bool use_global_stats,
+    bool trainable_statistics,
+    bool fuse_with_relu,
+    DenseTensor* x_grad,
+    DenseTensor* scale_grad,
+    DenseTensor* y_grad_grad);
 }  // namespace phi
diff --git a/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc b/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
index 8d0ae7e08d70bd..49555410f99201 100644
--- a/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
@@ -334,26 +334,27 @@ void BatchNormGradKernel(const Context& dev_ctx,
 }
 
 template <typename T, typename Context>
-void BatchNormDoubleGradKernel(const Context& ctx,
-                               const DenseTensor& x,
-                               const DenseTensor& scale,
-                               const paddle::optional<DenseTensor>& mean,
-                               const paddle::optional<DenseTensor>& variance,
-                               const DenseTensor& saved_mean,
-                               const DenseTensor& saved_variance,
-                               const DenseTensor& y_grad,
-                               const DenseTensor& x_grad_grad,
-                               const DenseTensor& scale_grad_grad,
-                               const DenseTensor& bias_grad_grad,
-                               float momentum,
-                               float epsilon,
-                               const std::string& data_layout_str,
-                               bool is_test,
-                               bool use_global_stats,
-                               bool trainable_statistics,
-                               DenseTensor* x_grad,
-                               DenseTensor* scale_grad,
-                               DenseTensor* y_grad_grad) {
+void BatchNormDoubleGradKernel(
+    const Context& ctx,
+    const DenseTensor& x,
+    const DenseTensor& scale,
+    const paddle::optional<DenseTensor>& mean,
+    const paddle::optional<DenseTensor>& variance,
+    const DenseTensor& saved_mean,
+    const DenseTensor& saved_variance,
+    const DenseTensor& y_grad,
+    const paddle::optional<DenseTensor>& x_grad_grad,
+    const paddle::optional<DenseTensor>& scale_grad_grad,
+    const paddle::optional<DenseTensor>& bias_grad_grad,
+    float momentum,
+    float epsilon,
+    const std::string& data_layout_str,
+    bool is_test,
+    bool use_global_stats,
+    bool trainable_statistics,
+    DenseTensor* x_grad,
+    DenseTensor* scale_grad,
+    DenseTensor* y_grad_grad) {
   const auto* X = &x;
   const auto* Scale = &scale;
   const auto* dY = &y_grad;
@@ -369,9 +370,9 @@ void BatchNormDoubleGradKernel(const Context& ctx,
 
   const auto data_layout = phi::StringToDataLayout(data_layout_str);
 
-  const auto* ddX = &x_grad_grad;
-  const auto* ddScale = &scale_grad_grad;
-  const auto* ddBias = &bias_grad_grad;
+  const auto* ddX = x_grad_grad.get_ptr();
+  const auto* ddScale = scale_grad_grad.get_ptr();
+  const auto* ddBias = bias_grad_grad.get_ptr();
 
   auto* dX = x_grad;
   auto* dScale = scale_grad;
diff --git a/paddle/phi/kernels/cpu/full_kernel.cc b/paddle/phi/kernels/cpu/full_kernel.cc
index 6571cb2ca8faa9..e7dd6249f3644c 100644
--- a/paddle/phi/kernels/cpu/full_kernel.cc
+++ b/paddle/phi/kernels/cpu/full_kernel.cc
@@ -108,6 +108,9 @@ PD_REGISTER_KERNEL(full_like,
                    int,
                    int64_t,
                    bool,
-                   phi::dtype::float16) {
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16,
+                   phi::dtype::complex<float>,
+                   phi::dtype::complex<double>) {
   kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND);
 }
diff --git a/paddle/phi/kernels/elementwise_multiply_grad_kernel.h b/paddle/phi/kernels/elementwise_multiply_grad_kernel.h
index 9cbd5040666cf8..f175416054086d 100644
--- a/paddle/phi/kernels/elementwise_multiply_grad_kernel.h
+++ b/paddle/phi/kernels/elementwise_multiply_grad_kernel.h
@@ -47,8 +47,8 @@ void MultiplyTripleGradKernel(const Context& dev_ctx,
                               const DenseTensor& dout,
                               const paddle::optional<DenseTensor>& ddx,
                               const paddle::optional<DenseTensor>& ddy,
-                              const DenseTensor& d_dx,
-                              const DenseTensor& d_dy,
+                              const paddle::optional<DenseTensor>& d_dx,
+                              const paddle::optional<DenseTensor>& d_dy,
                               const paddle::optional<DenseTensor>& d_ddout,
                               int axis,
                               DenseTensor* d_x,
diff --git a/paddle/phi/kernels/funcs/activation_functor.h b/paddle/phi/kernels/funcs/activation_functor.h
index ccdff93d5b23c7..35970e2b7df914 100644
--- a/paddle/phi/kernels/funcs/activation_functor.h
+++ b/paddle/phi/kernels/funcs/activation_functor.h
@@ -125,14 +125,23 @@ struct SinDoubleGradFunctor : public BaseActivationFunctor<T> {
     // calculate d2x first, so d2d1y can inplace d2d1x
     auto d2x = EigenVector<T>::Flatten(
         GET_DATA_SAFELY(dX, "Output", "d2x", "SinDoubleGrad"));
-    auto d1y = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(dOut, "Output", "d1y", "SinDoubleGrad"));
-    d2x.device(*d) = -d2d1x * x.unaryExpr(Sine<T>()) * d1y;
+
+    if (dX) {
+      if (dOut) {
+        auto d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(dOut, "Output", "d1y", "SinDoubleGrad"));
+        d2x.device(*d) = -d2d1x * x.unaryExpr(Sine<T>()) * d1y;
+      } else {
+        d2x.device(*d) = -d2d1x * x.unaryExpr(Sine<T>()) * static_cast<T>(0);
+      }
+    }
 
     // calculate d2d1y
-    auto d2d1y = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(ddOut, "Output", "d2d1y", "SinDoubleGrad"));
-    d2d1y.device(*d) = d2d1x * x.unaryExpr(Cosine<T>());
+    if (ddOut) {
+      auto d2d1y = EigenVector<T>::Flatten(
+          GET_DATA_SAFELY(ddOut, "Output", "d2d1y", "SinDoubleGrad"));
+      d2d1y.device(*d) = d2d1x * x.unaryExpr(Cosine<T>());
+    }
   }
   static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
 };
@@ -167,28 +176,71 @@ struct SinTripleGradFunctor : public BaseActivationFunctor<T> {
     auto* d = dev.eigen_device();
     auto x = EigenVector<T>::Flatten(
         GET_DATA_SAFELY(X, "Input", "x", "SinTripleGrad"));
-    auto d2d1x = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(ddX, "Input", "d2d1x", "SinTripleGrad"));
-    auto d1y = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(dOut, "Input", "d1y", "SinTripleGrad"));
-    auto d3d2d1y = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(d_DDOut, "Input", "d3d2d1y", "SinTripleGrad"));
     auto d3d2x = EigenVector<T>::Flatten(
         GET_DATA_SAFELY(d_dx_New, "Input", "d3d2x", "SinTripleGrad"));
+    if (d_x_New) {
+      auto d3x = EigenVector<T>::Flatten(
+          GET_DATA_SAFELY(d_x_New, "Output", "d3x", "SinTripleGrad"));
+      if (dOut && ddX && d_DDOut) {
+        auto d2d1x = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(ddX, "Input", "d2d1x", "SinTripleGrad"));
+        auto d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(dOut, "Input", "d1y", "SinTripleGrad"));
+        auto d3d2d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(d_DDOut, "Input", "d3d2d1y", "SinTripleGrad"));
+        d3x.device(*d) = -x.unaryExpr(Cosine<T>()) * d1y * d2d1x * d3d2x -
+                         x.unaryExpr(Sine<T>()) * d2d1x * d3d2d1y;
+      } else if (!dOut && ddX && d_DDOut) {
+        auto d2d1x = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(ddX, "Input", "d2d1x", "SinTripleGrad"));
+        auto d3d2d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(d_DDOut, "Input", "d3d2d1y", "SinTripleGrad"));
+        d3x.device(*d) = -x.unaryExpr(Sine<T>()) * d2d1x * d3d2d1y;
+      } else if (dOut && ddX && !d_DDOut) {
+        auto d2d1x = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(ddX, "Input", "d2d1x", "SinTripleGrad"));
+        auto d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(dOut, "Input", "d1y", "SinTripleGrad"));
+        d3x.device(*d) = -x.unaryExpr(Cosine<T>()) * d1y * d2d1x * d3d2x;
+      } else {
+        d3x.device(*d) = x * static_cast<T>(0);
+      }
+    }
 
-    auto d3x = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(d_x_New, "Output", "d3x", "SinTripleGrad"));
-    d3x.device(*d) = -x.unaryExpr(Cosine<T>()) * d1y * d2d1x * d3d2x -
-                     x.unaryExpr(Sine<T>()) * d2d1x * d3d2d1y;
-
-    auto d3d1y = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(d_d_Out, "Output", "d3d1y", "SinTripleGrad"));
-    d3d1y.device(*d) = -x.unaryExpr(Sine<T>()) * d2d1x * d3d2x;
+    if (d_d_Out) {
+      auto d3d1y = EigenVector<T>::Flatten(
+          GET_DATA_SAFELY(d_d_Out, "Output", "d3d1y", "SinTripleGrad"));
+      if (ddX) {
+        auto d2d1x = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(ddX, "Input", "d2d1x", "SinTripleGrad"));
+        d3d1y.device(*d) = -x.unaryExpr(Sine<T>()) * d2d1x * d3d2x;
+      } else {
+        d3d1y.device(*d) = static_cast<T>(0) * x;
+      }
+    }
 
-    auto d3d2d1x = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(d_DDx, "Output", "d3d2d1x", "SinTripleGrad"));
-    d3d2d1x.device(*d) = -x.unaryExpr(Sine<T>()) * d1y * d3d2x +
-                         x.unaryExpr(Cosine<T>()) * d3d2d1y;
+    if (d_DDx) {
+      auto d3d2d1x = EigenVector<T>::Flatten(
+          GET_DATA_SAFELY(d_DDx, "Output", "d3d2d1x", "SinTripleGrad"));
+      if (dOut && d_DDOut) {
+        auto d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(dOut, "Input", "d1y", "SinTripleGrad"));
+        auto d3d2d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(d_DDOut, "Input", "d3d2d1y", "SinTripleGrad"));
+        d3d2d1x.device(*d) = -x.unaryExpr(Sine<T>()) * d1y * d3d2x +
+                             x.unaryExpr(Cosine<T>()) * d3d2d1y;
+      } else if (dOut && !d_DDOut) {
+        auto d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(dOut, "Input", "d1y", "SinTripleGrad"));
+        d3d2d1x.device(*d) = -x.unaryExpr(Sine<T>()) * d1y * d3d2x;
+      } else if (!dOut && d_DDOut) {
+        auto d3d2d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(d_DDOut, "Input", "d3d2d1y", "SinTripleGrad"));
+        d3d2d1x.device(*d) = x.unaryExpr(Cosine<T>()) * d3d2d1y;
+      } else {
+        d3d2d1x.device(*d) = x * static_cast<T>(0);
+      }
+    }
   }
   static constexpr ActBwdOpFwdDeps FwdDeps() {
     return ActBwdOpFwdDeps::kDepOut;
@@ -270,14 +322,22 @@ struct CosDoubleGradFunctor : public BaseActivationFunctor<T> {
     // calculate d2x first, so d2d1y can inplace d2d1x
     auto d2x = EigenVector<T>::Flatten(
         GET_DATA_SAFELY(dX, "Output", "d2x", "CosDoubleGrad"));
-    auto d1y = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(dOut, "Output", "d1y", "CosDoubleGrad"));
-    d2x.device(*d) = -d2d1x * x.unaryExpr(Cosine<T>()) * d1y;
+    if (ddOut) {
+      if (dOut) {
+        auto d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(dOut, "Output", "d1y", "CosDoubleGrad"));
+        d2x.device(*d) = -d2d1x * x.unaryExpr(Cosine<T>()) * d1y;
+      } else {
+        d2x.device(*d) = x * static_cast<T>(0);
+      }
+    }
 
-    // calculate d2d1y
-    auto d2d1y = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(ddOut, "Output", "d2d1y", "CosDoubleGrad"));
-    d2d1y.device(*d) = -d2d1x * x.unaryExpr(Sine<T>());
+    if (dX) {
+      // calculate d2d1y
+      auto d2d1y = EigenVector<T>::Flatten(
+          GET_DATA_SAFELY(ddOut, "Output", "d2d1y", "CosDoubleGrad"));
+      d2d1y.device(*d) = -d2d1x * x.unaryExpr(Sine<T>());
+    }
   }
   static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
 };
@@ -297,28 +357,72 @@ struct CosTripleGradFunctor : public BaseActivationFunctor<T> {
     auto* d = dev.eigen_device();
     auto x = EigenVector<T>::Flatten(
         GET_DATA_SAFELY(X, "Input", "x", "CosTripleGrad"));
-    auto d2d1x = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(ddX, "Input", "d2d1x", "CosTripleGrad"));
-    auto d1y = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(dOut, "Input", "d1y", "CosTripleGrad"));
-    auto d3d2d1y = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(d_DDOut, "Input", "d3d2d1y", "CosTripleGrad"));
     auto d3d2x = EigenVector<T>::Flatten(
         GET_DATA_SAFELY(d_dx_New, "Input", "d3d2x", "CosTripleGrad"));
 
-    auto d3x = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(d_x_New, "Output", "d3x", "CosTripleGrad"));
-    d3x.device(*d) = x.unaryExpr(Sine<T>()) * d1y * d2d1x * d3d2x -
-                     x.unaryExpr(Cosine<T>()) * d2d1x * d3d2d1y;
+    if (d_x_New) {
+      auto d3x = EigenVector<T>::Flatten(
+          GET_DATA_SAFELY(d_x_New, "Output", "d3x", "CosTripleGrad"));
+      if (dOut && ddX && d_DDOut) {
+        auto d2d1x = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(ddX, "Input", "d2d1x", "CosTripleGrad"));
+        auto d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(dOut, "Input", "d1y", "CosTripleGrad"));
+        auto d3d2d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(d_DDOut, "Input", "d3d2d1y", "CosTripleGrad"));
+        d3x.device(*d) = x.unaryExpr(Sine<T>()) * d1y * d2d1x * d3d2x -
+                         x.unaryExpr(Cosine<T>()) * d2d1x * d3d2d1y;
+      } else if (dOut && ddX && !d_DDOut) {
+        auto d2d1x = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(ddX, "Input", "d2d1x", "CosTripleGrad"));
+        auto d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(dOut, "Input", "d1y", "CosTripleGrad"));
+        d3x.device(*d) = x.unaryExpr(Sine<T>()) * d1y * d2d1x * d3d2x;
+      } else if (!dOut && ddX && d_DDOut) {
+        auto d2d1x = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(ddX, "Input", "d2d1x", "CosTripleGrad"));
+        auto d3d2d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(d_DDOut, "Input", "d3d2d1y", "CosTripleGrad"));
+        d3x.device(*d) = -x.unaryExpr(Cosine<T>()) * d2d1x * d3d2d1y;
+      } else {
+        d3x.device(*d) = static_cast<T>(0) * x;
+      }
+    }
 
-    auto d3d1y = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(d_d_Out, "Output", "d3d1y", "CosTripleGrad"));
-    d3d1y.device(*d) = -x.unaryExpr(Cosine<T>()) * d2d1x * d3d2x;
+    if (d_d_Out) {
+      auto d3d1y = EigenVector<T>::Flatten(
+          GET_DATA_SAFELY(d_d_Out, "Output", "d3d1y", "CosTripleGrad"));
+      if (ddX) {
+        auto d2d1x = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(ddX, "Input", "d2d1x", "CosTripleGrad"));
+        d3d1y.device(*d) = -x.unaryExpr(Cosine<T>()) * d2d1x * d3d2x;
+      } else {
+        d3d1y.device(*d) = static_cast<T>(0) * x;
+      }
+    }
 
-    auto d3d2d1x = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(d_DDx, "Output", "d3d2d1x", "CosTripleGrad"));
-    d3d2d1x.device(*d) = -x.unaryExpr(Cosine<T>()) * d1y * d3d2x -
-                         x.unaryExpr(Sine<T>()) * d3d2d1y;
+    if (d_DDx) {
+      auto d3d2d1x = EigenVector<T>::Flatten(
+          GET_DATA_SAFELY(d_DDx, "Output", "d3d2d1x", "CosTripleGrad"));
+      if (dOut && d_DDOut) {
+        auto d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(dOut, "Input", "d1y", "CosTripleGrad"));
+        auto d3d2d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(d_DDOut, "Input", "d3d2d1y", "CosTripleGrad"));
+        d3d2d1x.device(*d) = -x.unaryExpr(Cosine<T>()) * d1y * d3d2x -
+                             x.unaryExpr(Sine<T>()) * d3d2d1y;
+      } else if (!dOut && d_DDOut) {
+        auto d3d2d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(d_DDOut, "Input", "d3d2d1y", "CosTripleGrad"));
+        d3d2d1x.device(*d) = -x.unaryExpr(Sine<T>()) * d3d2d1y;
+      } else if (dOut && !d_DDOut) {
+        auto d1y = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(dOut, "Input", "d1y", "CosTripleGrad"));
+        d3d2d1x.device(*d) = -x.unaryExpr(Cosine<T>()) * d1y * d3d2x;
+      } else {
+        d3d2d1x.device(*d) = static_cast<T>(0) * x;
+      }
+    }
   }
   static constexpr ActBwdOpFwdDeps FwdDeps() {
     return ActBwdOpFwdDeps::kDepOut;
@@ -1106,27 +1210,70 @@ struct TanhTripleGradFunctor : public BaseActivationFunctor<T> {
         GET_DATA_SAFELY(Out, "Input", "Out", "TanhTripleGrad"));
     auto dout = EigenVector<T>::Flatten(
         GET_DATA_SAFELY(dOut, "Input", "DOut", "TanhTripleGrad"));
-    auto d_ddOut = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(d_DDOut, "Input", "D_DDOut", "TanhTripleGrad"));
-    auto d_dOutNew = EigenVector<T>::Flatten(
-        GET_DATA_SAFELY(d_dOut_New, "Input", "D_DOut_New", "TanhTripleGrad"));
 
     if (d_Out_New) {
       auto d_OutNew = EigenVector<T>::Flatten(
           GET_DATA_SAFELY(d_Out_New, "Output", "D_OutNew", "TanhTripleGrad"));
-      d_OutNew.device(*d) = (static_cast<T>(-2) * out * ddx * d_ddOut) -
-                            (static_cast<T>(2) * dout * ddx * d_dOutNew);
+
+      if (d_DDOut && d_dOut_New) {
+        auto d_ddOut = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(d_DDOut, "Input", "D_DDOut", "TanhTripleGrad"));
+        auto d_dOutNew = EigenVector<T>::Flatten(GET_DATA_SAFELY(
+            d_dOut_New, "Input", "D_DOut_New", "TanhTripleGrad"));
+
+        d_OutNew.device(*d) = (static_cast<T>(-2) * out * ddx * d_ddOut) -
+                              (static_cast<T>(2) * dout * ddx * d_dOutNew);
+
+      } else if (d_DDOut && !d_dOut_New) {
+        auto d_ddOut = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(d_DDOut, "Input", "D_DDOut", "TanhTripleGrad"));
+
+        d_OutNew.device(*d) = (static_cast<T>(-2) * out * ddx * d_ddOut);
+
+      } else if (!d_DDOut && d_dOut_New) {
+        auto d_dOutNew = EigenVector<T>::Flatten(GET_DATA_SAFELY(
+            d_dOut_New, "Input", "D_DOut_New", "TanhTripleGrad"));
+
+        d_OutNew.device(*d) = -(static_cast<T>(2) * dout * ddx * d_dOutNew);
+      } else {
+        d_OutNew.device(*d) = static_cast<T>(0) * out;
+      }
     }
     if (d_d_Out) {
       auto d_dOut = EigenVector<T>::Flatten(
           GET_DATA_SAFELY(d_d_Out, "Output", "D_DOut", "TanhTripleGrad"));
-      d_dOut.device(*d) = static_cast<T>(-2) * out * ddx * d_dOutNew;
+
+      if (d_dOut_New) {
+        auto d_dOutNew = EigenVector<T>::Flatten(GET_DATA_SAFELY(
+            d_dOut_New, "Input", "D_DOut_New", "TanhTripleGrad"));
+        d_dOut.device(*d) = static_cast<T>(-2) * out * ddx * d_dOutNew;
+      } else {
+        d_dOut.device(*d) = static_cast<T>(0) * out;
+      }
     }
     if (d_DDx) {
       auto d_ddx = EigenVector<T>::Flatten(
           GET_DATA_SAFELY(d_DDx, "Output", "D_DDx", "TanhTripleGrad"));
-      d_ddx.device(*d) = (static_cast<T>(1) - (out * out)) * d_ddOut -
-                         static_cast<T>(2) * out * dout * d_dOutNew;
+
+      if (d_DDOut && d_dOut_New) {
+        auto d_ddOut = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(d_DDOut, "Input", "D_DDOut", "TanhTripleGrad"));
+        auto d_dOutNew = EigenVector<T>::Flatten(GET_DATA_SAFELY(
+            d_dOut_New, "Input", "D_DOut_New", "TanhTripleGrad"));
+        d_ddx.device(*d) = (static_cast<T>(1) - (out * out)) * d_ddOut -
+                           static_cast<T>(2) * out * dout * d_dOutNew;
+
+      } else if (d_DDOut && !d_dOut_New) {
+        auto d_ddOut = EigenVector<T>::Flatten(
+            GET_DATA_SAFELY(d_DDOut, "Input", "D_DDOut", "TanhTripleGrad"));
+        d_ddx.device(*d) = (static_cast<T>(1) - (out * out)) * d_ddOut;
+      } else if (!d_DDOut && d_dOut_New) {
+        auto d_dOutNew = EigenVector<T>::Flatten(GET_DATA_SAFELY(
+            d_dOut_New, "Input", "D_DOut_New", "TanhTripleGrad"));
+        d_ddx.device(*d) = -static_cast<T>(2) * out * dout * d_dOutNew;
+      } else {
+        d_ddx.device(*d) = static_cast<T>(0) * ddx;
+      }
     }
   }
   static constexpr ActBwdOpFwdDeps FwdDeps() {
diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
index 0f4f39629e8793..fd6e92b2ffe06d 100644
--- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
@@ -1295,26 +1295,27 @@ void BatchNormGradKernel(const Context &dev_ctx,
 }
 
 template <typename T, typename Context>
-void BatchNormDoubleGradKernel(const Context &ctx,
-                               const DenseTensor &x,
-                               const DenseTensor &scale,
-                               const paddle::optional<DenseTensor> &mean,
-                               const paddle::optional<DenseTensor> &variance,
-                               const DenseTensor &saved_mean,
-                               const DenseTensor &saved_variance,
-                               const DenseTensor &y_grad,
-                               const DenseTensor &x_grad_grad,
-                               const DenseTensor &scale_grad_grad,
-                               const DenseTensor &bias_grad_grad,
-                               float momentum,
-                               float epsilon,
-                               const std::string &data_layout_str,
-                               bool is_test,
-                               bool use_global_stats,
-                               bool trainable_statistics,
-                               DenseTensor *x_grad,
-                               DenseTensor *scale_grad,
-                               DenseTensor *y_grad_grad) {
+void BatchNormDoubleGradKernel(
+    const Context &ctx,
+    const DenseTensor &x,
+    const DenseTensor &scale,
+    const paddle::optional<DenseTensor> &mean,
+    const paddle::optional<DenseTensor> &variance,
+    const DenseTensor &saved_mean,
+    const DenseTensor &saved_variance,
+    const DenseTensor &y_grad,
+    const paddle::optional<DenseTensor> &x_grad_grad,
+    const paddle::optional<DenseTensor> &scale_grad_grad,
+    const paddle::optional<DenseTensor> &bias_grad_grad,
+    float momentum,
+    float epsilon,
+    const std::string &data_layout_str,
+    bool is_test,
+    bool use_global_stats,
+    bool trainable_statistics,
+    DenseTensor *x_grad,
+    DenseTensor *scale_grad,
+    DenseTensor *y_grad_grad) {
   PADDLE_ENFORCE_EQ(is_test,
                     false,
                     phi::errors::InvalidArgument(
@@ -1330,23 +1331,24 @@ void BatchNormDoubleGradKernel(const Context &ctx,
     running_mean = mean.get_ptr();
     running_variance = variance.get_ptr();
   }
-  paddle::operators::NormDoubleGradFunctor<Context, T>(ctx,
-                                                       data_layout,
-                                                       &x,
-                                                       &scale,
-                                                       &y_grad,
-                                                       &saved_mean,
-                                                       &saved_variance,
-                                                       running_mean,
-                                                       running_variance,
-                                                       epsilon,
-                                                       use_global_stats,
-                                                       &x_grad_grad,
-                                                       &scale_grad_grad,
-                                                       &bias_grad_grad,
-                                                       x_grad,
-                                                       scale_grad,
-                                                       y_grad_grad);
+  paddle::operators::NormDoubleGradFunctor<Context, T>(
+      ctx,
+      data_layout,
+      &x,
+      &scale,
+      &y_grad,
+      &saved_mean,
+      &saved_variance,
+      running_mean,
+      running_variance,
+      epsilon,
+      use_global_stats,
+      x_grad_grad.get_ptr(),
+      scale_grad_grad.get_ptr(),
+      bias_grad_grad.get_ptr(),
+      x_grad,
+      scale_grad,
+      y_grad_grad);
 }
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/gpu/full_kernel.cu b/paddle/phi/kernels/gpu/full_kernel.cu
index 120e908ae8cf7d..4f030bc775b894 100644
--- a/paddle/phi/kernels/gpu/full_kernel.cu
+++ b/paddle/phi/kernels/gpu/full_kernel.cu
@@ -138,6 +138,8 @@ PD_REGISTER_KERNEL(full_like,
                    int64_t,
                    bool,
                    phi::dtype::bfloat16,
-                   phi::dtype::float16) {
+                   phi::dtype::float16,
+                   phi::dtype::complex<float>,
+                   phi::dtype::complex<double>) {
   kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND);
 }
diff --git a/paddle/phi/kernels/impl/activation_grad_impl.h b/paddle/phi/kernels/impl/activation_grad_impl.h
index dd7dadc1e1cf9e..c8f0cddbf75fa0 100644
--- a/paddle/phi/kernels/impl/activation_grad_impl.h
+++ b/paddle/phi/kernels/impl/activation_grad_impl.h
@@ -177,8 +177,8 @@ void TanhTripleGradKernel(const Context& dev_ctx,
                           const DenseTensor& out,
                           const DenseTensor& dout,
                           const DenseTensor& ddx,
-                          const DenseTensor& d_dout_new,
-                          const DenseTensor& d_ddout,
+                          const paddle::optional<DenseTensor>& d_dout_new,
+                          const paddle::optional<DenseTensor>& d_ddout,
                           DenseTensor* d_out_new,
                           DenseTensor* d_dout,
                           DenseTensor* d_ddx) {
@@ -199,8 +199,8 @@ void TanhTripleGradKernel(const Context& dev_ctx,
           &out,
           &ddx,
           &dout,
-          &d_ddout,
-          &d_dout_new,  // input
+          d_ddout.get_ptr(),
+          d_dout_new.get_ptr(),  // input
           d_dout,
           d_out_new,
           d_ddx);  // output
@@ -597,49 +597,45 @@ void SquareDoubleGradKernel(const Context& dev_ctx,
 template <typename T, typename Context>
 void SinDoubleGradKernel(const Context& dev_ctx,
                          const DenseTensor& x,
-                         const DenseTensor& dout,
+                         const paddle::optional<DenseTensor>& dout,
                          const DenseTensor& ddx,
                          DenseTensor* dx,
                          DenseTensor* ddout) {
   if (dx) {
-    dx->Resize(x.dims());
     dev_ctx.template Alloc<T>(dx);
   }
   if (ddout) {
     dev_ctx.template Alloc<T>(ddout);
   }
   phi::funcs::SinDoubleGradFunctor<T> functor;
-  functor(dev_ctx, &x, &dout, &ddx, dx, ddout);
+  functor(dev_ctx, &x, dout.get_ptr(), &ddx, dx, ddout);
 }
 
 template <typename T, typename Context>
 void SinTripleGradKernel(const Context& dev_ctx,
                          const DenseTensor& x,
-                         const DenseTensor& dout,
-                         const DenseTensor& ddx,
+                         const paddle::optional<DenseTensor>& dout,
+                         const paddle::optional<DenseTensor>& ddx,
                          const DenseTensor& d_dx_new,
-                         const DenseTensor& d_ddout,
+                         const paddle::optional<DenseTensor>& d_ddout,
                          DenseTensor* d_x_new,
                          DenseTensor* d_dout,
                          DenseTensor* d_ddx) {
   if (d_dout) {
-    d_dout->Resize(x.dims());
     dev_ctx.template Alloc<T>(d_dout);
   }
   if (d_x_new) {
-    d_dout->Resize(x.dims());
     dev_ctx.template Alloc<T>(d_x_new);
   }
   if (d_ddx) {
-    d_dout->Resize(ddx.dims());
     dev_ctx.template Alloc<T>(d_ddx);
   }
   funcs::SinTripleGradFunctor<T> functor;
   functor(dev_ctx,
           &x,
-          &ddx,
-          &dout,
-          &d_ddout,
+          ddx.get_ptr(),
+          dout.get_ptr(),
+          d_ddout.get_ptr(),
           &d_dx_new,  // input
           d_dout,
           d_x_new,
@@ -649,49 +645,45 @@ void SinTripleGradKernel(const Context& dev_ctx,
 template <typename T, typename Context>
 void CosDoubleGradKernel(const Context& dev_ctx,
                          const DenseTensor& x,
-                         const DenseTensor& dout,
+                         const paddle::optional<DenseTensor>& dout,
                          const DenseTensor& ddx,
                          DenseTensor* dx,
                          DenseTensor* ddout) {
   if (dx) {
-    dx->Resize(x.dims());
     dev_ctx.template Alloc<T>(dx);
   }
   if (ddout) {
     dev_ctx.template Alloc<T>(ddout);
   }
   phi::funcs::CosDoubleGradFunctor<T> functor;
-  functor(dev_ctx, &x, &dout, &ddx, dx, ddout);
+  functor(dev_ctx, &x, dout.get_ptr(), &ddx, dx, ddout);
 }
 
 template <typename T, typename Context>
 void CosTripleGradKernel(const Context& dev_ctx,
                          const DenseTensor& x,
-                         const DenseTensor& dout,
-                         const DenseTensor& ddx,
+                         const paddle::optional<DenseTensor>& dout,
+                         const paddle::optional<DenseTensor>& ddx,
                          const DenseTensor& d_dx_new,
-                         const DenseTensor& d_ddout,
+                         const paddle::optional<DenseTensor>& d_ddout,
                          DenseTensor* d_x_new,
                          DenseTensor* d_dout,
                          DenseTensor* d_ddx) {
   if (d_dout) {
-    d_dout->Resize(x.dims());
     dev_ctx.template Alloc<T>(d_dout);
   }
   if (d_x_new) {
-    d_dout->Resize(x.dims());
     dev_ctx.template Alloc<T>(d_x_new);
   }
   if (d_ddx) {
-    d_dout->Resize(ddx.dims());
     dev_ctx.template Alloc<T>(d_ddx);
   }
   funcs::CosTripleGradFunctor<T> functor;
   functor(dev_ctx,
           &x,
-          &ddx,
-          &dout,
-          &d_ddout,
+          ddx.get_ptr(),
+          dout.get_ptr(),
+          d_ddout.get_ptr(),
           &d_dx_new,  // input
           d_dout,
           d_x_new,
diff --git a/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h b/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h
index 7759de509af56b..28387975e6e998 100644
--- a/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h
@@ -18,6 +18,7 @@ limitations under the License. */
 #include "paddle/phi/common/float16.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/tensor_utils.h"
+#include "paddle/phi/kernels/full_kernel.h"
 #include "paddle/phi/kernels/funcs/broadcast_function.h"
 #include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/elementwise_functor.h"
@@ -472,6 +473,7 @@ void MultiplyDoubleGradKernel(const Context& dev_ctx,
                                         funcs::MultiplyFunctor<T>,
                                         funcs::InverseMultiplyFunctor<T>>(
           dev_ctx, y, ddx_safe, ddout, axis);
+
       funcs::DefaultElementwiseOperator<Context,
                                         T,
                                         funcs::MultiplyFunctor<T>,
@@ -483,42 +485,70 @@ void MultiplyDoubleGradKernel(const Context& dev_ctx,
       ddout_t.device(place) = ddout_t + ddout_tmp_t;
     } else {
       // use dx to save memory, other than alloc tmp tensor
-      DenseTensor* ddout_tmp = dx;
-      funcs::DefaultElementwiseOperator<Context,
-                                        T,
-                                        funcs::MultiplyFunctor<T>,
-                                        funcs::InverseMultiplyFunctor<T>>(
-          dev_ctx, x, ddy_safe, ddout_tmp, axis);
-      // NOTE: in the following ElemwiseGradCompute, for the
-      // first output tensor is nullptr, the branch to calculate first
-      // output tensor will not be activated, DivGradDx function will not
-      // be called and can be ignored, the first branch has little effect
-      // on running speed.
-      phi::funcs::ElemwiseGradCompute<Context, T, MulGradDX<T>, MulGradDY<T>>(
-          dev_ctx,
-          ddx_safe,
-          ddy_safe,
-          dout,
-          dout,
-          axis,
-          nullptr,
-          dy,
-          MulGradDX<T>(),
-          MulGradDY<T>());
-      funcs::DefaultElementwiseOperator<Context,
-                                        T,
-                                        funcs::MultiplyFunctor<T>,
-                                        funcs::InverseMultiplyFunctor<T>>(
-          dev_ctx, ddx_safe, y, ddout, axis);
+      if (dx) {
+        DenseTensor* ddout_tmp = dx;
+        funcs::DefaultElementwiseOperator<Context,
+                                          T,
+                                          funcs::MultiplyFunctor<T>,
+                                          funcs::InverseMultiplyFunctor<T>>(
+            dev_ctx, x, ddy_safe, ddout_tmp, axis);
+
+        // NOTE: in the following ElemwiseGradCompute, for the
+        // first output tensor is nullptr, the branch to calculate first
+        // output tensor will not be activated, DivGradDx function will not
+        // be called and can be ignored, the first branch has little effect
+        // on running speed.
+        phi::funcs::ElemwiseGradCompute<Context, T, MulGradDX<T>, MulGradDY<T>>(
+            dev_ctx,
+            ddx_safe,
+            ddy_safe,
+            dout,
+            dout,
+            axis,
+            nullptr,
+            dy,
+            MulGradDX<T>(),
+            MulGradDY<T>());
+
+        funcs::DefaultElementwiseOperator<Context,
+                                          T,
+                                          funcs::MultiplyFunctor<T>,
+                                          funcs::InverseMultiplyFunctor<T>>(
+            dev_ctx, ddx_safe, y, ddout, axis);
 
-      auto ddout_t = phi::EigenVector<T>::Flatten(*ddout);
-      auto ddout_tmp_t = phi::EigenVector<T>::Flatten(*ddout_tmp);
-      ddout_t.device(place) = ddout_t + ddout_tmp_t;
-      funcs::DefaultElementwiseOperator<Context,
-                                        T,
-                                        funcs::MultiplyFunctor<T>,
-                                        funcs::InverseMultiplyFunctor<T>>(
-          dev_ctx, dout, ddy_safe, dx, axis);
+        auto ddout_t = phi::EigenVector<T>::Flatten(*ddout);
+        auto ddout_tmp_t = phi::EigenVector<T>::Flatten(*ddout_tmp);
+        ddout_t.device(place) = ddout_t + ddout_tmp_t;
+
+        funcs::DefaultElementwiseOperator<Context,
+                                          T,
+                                          funcs::MultiplyFunctor<T>,
+                                          funcs::InverseMultiplyFunctor<T>>(
+            dev_ctx, dout, ddy_safe, dx, axis);
+
+      } else {
+        DenseTensor tmp_a(ddout->dtype());
+        tmp_a.Resize(ddout->dims());
+
+        dev_ctx.template Alloc<T>(&tmp_a);
+
+        funcs::DefaultElementwiseOperator<Context,
+                                          T,
+                                          funcs::MultiplyFunctor<T>,
+                                          funcs::InverseMultiplyFunctor<T>>(
+            dev_ctx, x, ddy_safe, &tmp_a, axis);
+
+        auto ddout_t1 = phi::EigenVector<T>::Flatten(tmp_a);
+
+        funcs::DefaultElementwiseOperator<Context,
+                                          T,
+                                          funcs::MultiplyFunctor<T>,
+                                          funcs::InverseMultiplyFunctor<T>>(
+            dev_ctx, ddx_safe, y, ddout, axis);
+
+        auto ddout_t2 = phi::EigenVector<T>::Flatten(*ddout);
+        ddout_t2.device(place) = ddout_t2 + ddout_t1;
+      }
     }
   } else {
     if (dx && dy) {
@@ -544,8 +574,8 @@ void MultiplyTripleGradKernel(const Context& dev_ctx,
                               const DenseTensor& dout,
                               const paddle::optional<DenseTensor>& ddx,
                               const paddle::optional<DenseTensor>& ddy,
-                              const DenseTensor& d_dx,
-                              const DenseTensor& d_dy,
+                              const paddle::optional<DenseTensor>& d_dx,
+                              const paddle::optional<DenseTensor>& d_dy,
                               const paddle::optional<DenseTensor>& d_ddout,
                               int axis,
                               DenseTensor* d_x,
@@ -599,6 +629,13 @@ void MultiplyTripleGradKernel(const Context& dev_ctx,
                                         funcs::InverseMultiplyFunctor<T>>(
           dev_ctx, ddx_safe, *(d_ddout.get_ptr()), d_y, axis);
     }
+  } else {
+    if (d_x) {
+      FullLikeKernel<T, Context>(dev_ctx, x, Scalar(0.0), x.dtype(), d_x);
+    }
+    if (d_y) {
+      FullLikeKernel<T, Context>(dev_ctx, y, Scalar(0.0), y.dtype(), d_y);
+    }
   }
 
   if (d_dout) {
@@ -607,61 +644,135 @@ void MultiplyTripleGradKernel(const Context& dev_ctx,
     DenseTensor d_dout_tmp;
     d_dout_tmp.Resize(dout.dims());
     dev_ctx.template Alloc<T>(&d_dout_tmp);
-    funcs::DefaultElementwiseOperator<Context,
-                                      T,
-                                      funcs::MultiplyFunctor<T>,
-                                      funcs::InverseMultiplyFunctor<T>>(
-        dev_ctx, d_dy, ddx_safe, d_dout, axis);
-    funcs::DefaultElementwiseOperator<Context,
-                                      T,
-                                      funcs::MultiplyFunctor<T>,
-                                      funcs::InverseMultiplyFunctor<T>>(
-        dev_ctx, ddy_safe, d_dx, &d_dout_tmp, axis);
-    auto d_dout_t = phi::EigenVector<T>::Flatten(*d_dout);
-    auto d_dout_tmp_t = phi::EigenVector<T>::Flatten(d_dout_tmp);
-    d_dout_t.device(place) = d_dout_t + d_dout_tmp_t;
+
+    if (d_dy && d_dx) {
+      funcs::DefaultElementwiseOperator<Context,
+                                        T,
+                                        funcs::MultiplyFunctor<T>,
+                                        funcs::InverseMultiplyFunctor<T>>(
+          dev_ctx, d_dy.get(), ddx_safe, d_dout, axis);
+
+      funcs::DefaultElementwiseOperator<Context,
+                                        T,
+                                        funcs::MultiplyFunctor<T>,
+                                        funcs::InverseMultiplyFunctor<T>>(
+          dev_ctx, ddy_safe, d_dx.get(), &d_dout_tmp, axis);
+
+      auto d_dout_t = phi::EigenVector<T>::Flatten(*d_dout);
+      auto d_dout_tmp_t = phi::EigenVector<T>::Flatten(d_dout_tmp);
+      d_dout_t.device(place) = d_dout_t + d_dout_tmp_t;
+    } else if (d_dy && !d_dx) {
+      funcs::DefaultElementwiseOperator<Context,
+                                        T,
+                                        funcs::MultiplyFunctor<T>,
+                                        funcs::InverseMultiplyFunctor<T>>(
+          dev_ctx, d_dy.get(), ddx_safe, d_dout, axis);
+      auto d_dout_t = phi::EigenVector<T>::Flatten(*d_dout);
+      d_dout_t.device(place) = d_dout_t;
+    } else if (!d_dy && d_dx) {
+      funcs::DefaultElementwiseOperator<Context,
+                                        T,
+                                        funcs::MultiplyFunctor<T>,
+                                        funcs::InverseMultiplyFunctor<T>>(
+          dev_ctx, ddy_safe, d_dx.get(), d_dout, axis);
+
+      auto d_dout_t = phi::EigenVector<T>::Flatten(*d_dout);
+      d_dout_t.device(place) = d_dout_t;
+    } else {
+      FullLikeKernel<T, Context>(
+          dev_ctx, dout, Scalar(0.0), dout.dtype(), d_dout);
+    }
   }
 
-  if (d_ddx) {
+  if (d_ddx && ddx) {
     // get d_ddx
     // d_ddx = dout * d_dy + y * d_ddout
     DenseTensor d_ddx_tmp;
     d_ddx_tmp.Resize(ddx->dims());
     dev_ctx.template Alloc<T>(&d_ddx_tmp);
-    funcs::DefaultElementwiseOperator<Context,
-                                      T,
-                                      funcs::MultiplyFunctor<T>,
-                                      funcs::InverseMultiplyFunctor<T>>(
-        dev_ctx, dout, d_dy, d_ddx, axis);
-    funcs::DefaultElementwiseOperator<Context,
-                                      T,
-                                      funcs::MultiplyFunctor<T>,
-                                      funcs::InverseMultiplyFunctor<T>>(
-        dev_ctx, y, *(d_ddout.get_ptr()), &d_ddx_tmp, axis);
-    auto d_ddx_t = phi::EigenVector<T>::Flatten(*d_ddx);
-    auto d_ddx_tmp_t = phi::EigenVector<T>::Flatten(d_ddx_tmp);
-    d_ddx_t.device(place) = d_ddx_t + d_ddx_tmp_t;
+    if (d_dy && d_ddout) {
+      funcs::DefaultElementwiseOperator<Context,
+                                        T,
+                                        funcs::MultiplyFunctor<T>,
+                                        funcs::InverseMultiplyFunctor<T>>(
+          dev_ctx, dout, d_dy.get(), d_ddx, axis);
+
+      funcs::DefaultElementwiseOperator<Context,
+                                        T,
+                                        funcs::MultiplyFunctor<T>,
+                                        funcs::InverseMultiplyFunctor<T>>(
+          dev_ctx, y, *(d_ddout.get_ptr()), &d_ddx_tmp, axis);
+
+      auto d_ddx_t = phi::EigenVector<T>::Flatten(*d_ddx);
+      auto d_ddx_tmp_t = phi::EigenVector<T>::Flatten(d_ddx_tmp);
+      d_ddx_t.device(place) = d_ddx_t + d_ddx_tmp_t;
+    } else if (d_dy && !d_ddout) {
+      funcs::DefaultElementwiseOperator<Context,
+                                        T,
+                                        funcs::MultiplyFunctor<T>,
+                                        funcs::InverseMultiplyFunctor<T>>(
+          dev_ctx, dout, d_dy.get(), d_ddx, axis);
+
+      auto d_ddx_t = phi::EigenVector<T>::Flatten(*d_ddx);
+      d_ddx_t.device(place) = d_ddx_t;
+    } else if (!d_dy && d_ddout) {
+      funcs::DefaultElementwiseOperator<Context,
+                                        T,
+                                        funcs::MultiplyFunctor<T>,
+                                        funcs::InverseMultiplyFunctor<T>>(
+          dev_ctx, y, *(d_ddout.get_ptr()), d_ddx, axis);
+
+      auto d_ddx_t = phi::EigenVector<T>::Flatten(*d_ddx);
+      d_ddx_t.device(place) = d_ddx_t;
+    } else {
+      FullLikeKernel<T, Context>(dev_ctx, x, Scalar(0.0), x.dtype(), d_ddx);
+    }
   }
 
-  if (d_ddy) {
+  if (d_ddy && ddy) {
     // get d_ddy
     // d_ddy = dout * d_dx + x * d_ddout
     DenseTensor d_ddy_tmp;
     d_ddy_tmp.Resize(ddy->dims());
     dev_ctx.template Alloc<T>(&d_ddy_tmp);
-    funcs::DefaultElementwiseOperator<Context,
-                                      T,
-                                      funcs::MultiplyFunctor<T>,
-                                      funcs::InverseMultiplyFunctor<T>>(
-        dev_ctx, dout, d_dx, d_ddy, axis);
-    funcs::DefaultElementwiseOperator<Context,
-                                      T,
-                                      funcs::MultiplyFunctor<T>,
-                                      funcs::InverseMultiplyFunctor<T>>(
-        dev_ctx, x, *(d_ddout.get_ptr()), &d_ddy_tmp, axis);
-    auto d_ddy_t = phi::EigenVector<T>::Flatten(*d_ddy);
-    auto d_ddy_tmp_t = phi::EigenVector<T>::Flatten(d_ddy_tmp);
-    d_ddy_t.device(place) = d_ddy_t + d_ddy_tmp_t;
+
+    if (d_dx && d_ddout) {
+      funcs::DefaultElementwiseOperator<Context,
+                                        T,
+                                        funcs::MultiplyFunctor<T>,
+                                        funcs::InverseMultiplyFunctor<T>>(
+          dev_ctx, dout, d_dx.get(), d_ddy, axis);
+
+      funcs::DefaultElementwiseOperator<Context,
+                                        T,
+                                        funcs::MultiplyFunctor<T>,
+                                        funcs::InverseMultiplyFunctor<T>>(
+          dev_ctx, x, *(d_ddout.get_ptr()), &d_ddy_tmp, axis);
+
+      auto d_ddy_t = phi::EigenVector<T>::Flatten(*d_ddy);
+      auto d_ddy_tmp_t = phi::EigenVector<T>::Flatten(d_ddy_tmp);
+      d_ddy_t.device(place) = d_ddy_t + d_ddy_tmp_t;
+    } else if (d_dx && !d_ddout) {
+      funcs::DefaultElementwiseOperator<Context,
+                                        T,
+                                        funcs::MultiplyFunctor<T>,
+                                        funcs::InverseMultiplyFunctor<T>>(
+          dev_ctx, dout, d_dx.get(), d_ddy, axis);
+
+      auto d_ddy_t = phi::EigenVector<T>::Flatten(*d_ddy);
+      d_ddy_t.device(place) = d_ddy_t;
+    } else if (!d_dx && d_ddout) {
+      funcs::DefaultElementwiseOperator<Context,
+                                        T,
+                                        funcs::MultiplyFunctor<T>,
+                                        funcs::InverseMultiplyFunctor<T>>(
+          dev_ctx, x, *(d_ddout.get_ptr()), d_ddy, axis);
+
+      auto d_ddy_t = phi::EigenVector<T>::Flatten(*d_ddy);
+      d_ddy_t.device(place) = d_ddy_t;
+    } else {
+      FullLikeKernel<T, Context>(dev_ctx, y, Scalar(0.0), y.dtype(), d_ddy);
+    }
   }
 }
 
diff --git a/paddle/phi/kernels/impl/logcumsumexp_grad_impl.h b/paddle/phi/kernels/impl/logcumsumexp_grad_impl.h
index 85a530b1b75597..0eeae849bcfedf 100644
--- a/paddle/phi/kernels/impl/logcumsumexp_grad_impl.h
+++ b/paddle/phi/kernels/impl/logcumsumexp_grad_impl.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include <limits>
+
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/common/amp_type_traits.h"
 #include "paddle/phi/core/kernel_registry.h"
diff --git a/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h b/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
index f499e59c307291..1bc29a34d46e1b 100644
--- a/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
@@ -18,6 +18,7 @@ limitations under the License. */
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/kernels/complex_kernel.h"
 #include "paddle/phi/kernels/empty_kernel.h"
+#include "paddle/phi/kernels/full_kernel.h"
 #include "paddle/phi/kernels/funcs/reduce_function.h"
 #include "paddle/phi/kernels/funcs/reduce_functor.h"
 #include "paddle/phi/kernels/impl/dot_grad_kernel_impl.h"
@@ -262,6 +263,7 @@ void MatmulGradKernel(const Context& dev_ctx,
     DenseTensor x_help = x;
     DenseTensor y_help = y;
     DenseTensor out_grad_help = out_grad;
+
     ReshapeXYOutIntoMatrixSequence(
         &x_help, &y_help, &out_grad_help, transpose_x, transpose_y);
 
@@ -471,13 +473,27 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
                             const DenseTensor& x,
                             const DenseTensor& y,
                             const DenseTensor& dout,
-                            const paddle::optional<DenseTensor>& ddx,
-                            const paddle::optional<DenseTensor>& ddy,
+                            const paddle::optional<DenseTensor>& ddx_opt,
+                            const paddle::optional<DenseTensor>& ddy_opt,
                             bool transpose_x,
                             bool transpose_y,
                             DenseTensor* dx,
                             DenseTensor* dy,
                             DenseTensor* ddout) {
+  paddle::optional<DenseTensor> ddx;
+  paddle::optional<DenseTensor> ddy;
+  if (!ddx_opt && (dy || ddout)) {
+    DenseTensor ddx_tmp = phi::FullLike<T, Context>(dev_ctx, x, Scalar(0.0));
+    ddx = paddle::make_optional<DenseTensor>(ddx_tmp);
+  } else {
+    ddx = ddx_opt;
+  }
+  if (!ddy_opt && (dx || ddout)) {
+    DenseTensor ddy_tmp = phi::FullLike<T, Context>(dev_ctx, y, Scalar(0.0));
+    ddy = paddle::make_optional<DenseTensor>(ddy_tmp);
+  } else {
+    ddy = ddy_opt;
+  }
   // Get dims from the input x, y, output_grad
   std::vector<std::int64_t> x_dims = vectorize(x.dims());
   std::vector<std::int64_t> y_dims = vectorize(y.dims());
@@ -688,7 +704,7 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
 
     if (transpose_x) {
       if (transpose_y) {
-        if (dx) {
+        if (dx && ddy) {
           MatMulFunction<Context, T>(dev_ctx,
                                      ddy.get(),
                                      dout_conj,
@@ -698,7 +714,7 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
                                      true,
                                      true);
         }
-        if (dy) {
+        if (dy && ddx) {
           MatMulFunction<Context, T>(dev_ctx,
                                      dout_conj,
                                      ddx.get(),
@@ -709,7 +725,7 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
                                      true);
         }
       } else {
-        if (dx)
+        if (dx && ddy) {
           MatMulFunction<Context, T>(dev_ctx,
                                      ddy.get(),
                                      dout_conj,
@@ -718,7 +734,8 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
                                      &dx_help,
                                      false,
                                      true);
-        if (dy)
+        }
+        if (dy && ddx) {
           MatMulFunction<Context, T>(dev_ctx,
                                      ddx.get(),
                                      dout_conj,
@@ -727,10 +744,11 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
                                      &dy_help,
                                      false,
                                      false);
+        }
       }
     } else {
       if (transpose_y) {
-        if (dx) {
+        if (dx && ddy) {
           MatMulFunction<Context, T>(dev_ctx,
                                      dout_conj,
                                      ddy.get(),
@@ -740,7 +758,7 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
                                      false,
                                      false);
         }
-        if (dy) {
+        if (dy && ddx) {
           MatMulFunction<Context, T>(dev_ctx,
                                      dout_conj,
                                      ddx.get(),
@@ -751,7 +769,7 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
                                      false);
         }
       } else {
-        if (dx) {
+        if (dx && ddy) {
           MatMulFunction<Context, T>(dev_ctx,
                                      dout_conj,
                                      ddy.get(),
@@ -761,7 +779,7 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
                                      false,
                                      true);
         }
-        if (dy) {
+        if (dy && ddx) {
           MatMulFunction<Context, T>(dev_ctx,
                                      ddx.get(),
                                      dout_conj,
@@ -824,23 +842,28 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
 
     if (ddout) {
       // Calculate the gradient of OutputGrad(Out)
-      MatMulFunction<Context, T>(dev_ctx,
-                                 ddx.get(),
-                                 y_conj,
-                                 x_dims,
-                                 y_dims,
-                                 ddout,
-                                 transpose_x,
-                                 transpose_y);
-      MatMulFunction<Context, T>(dev_ctx,
-                                 x_conj,
-                                 ddy.get(),
-                                 x_dims,
-                                 y_dims,
-                                 ddout,
-                                 transpose_x,
-                                 transpose_y,
-                                 true);
+      if (ddx) {
+        MatMulFunction<Context, T>(dev_ctx,
+                                   ddx.get(),
+                                   y_conj,
+                                   x_dims,
+                                   y_dims,
+                                   ddout,
+                                   transpose_x,
+                                   transpose_y);
+      }
+
+      if (ddy) {
+        MatMulFunction<Context, T>(dev_ctx,
+                                   x_conj,
+                                   ddy.get(),
+                                   x_dims,
+                                   y_dims,
+                                   ddout,
+                                   transpose_x,
+                                   transpose_y,
+                                   true);
+      }
     }
   }
 }
@@ -850,11 +873,11 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
                             const DenseTensor& x,
                             const DenseTensor& y,
                             const DenseTensor& dout,
-                            const DenseTensor& ddx,
-                            const DenseTensor& ddy,
-                            const paddle::optional<DenseTensor>& d_dx,
-                            const paddle::optional<DenseTensor>& d_dy,
-                            const paddle::optional<DenseTensor>& d_ddout,
+                            const paddle::optional<DenseTensor>& ddx_opt,
+                            const paddle::optional<DenseTensor>& ddy_opt,
+                            const paddle::optional<DenseTensor>& d_dx_opt,
+                            const paddle::optional<DenseTensor>& d_dy_opt,
+                            const paddle::optional<DenseTensor>& d_ddout_opt,
                             bool transpose_x,
                             bool transpose_y,
                             DenseTensor* out_d_x,
@@ -862,6 +885,50 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
                             DenseTensor* out_d_dout,
                             DenseTensor* out_d_ddx,
                             DenseTensor* out_d_ddy) {
+  paddle::optional<DenseTensor> ddx;
+  paddle::optional<DenseTensor> ddy;
+  paddle::optional<DenseTensor> d_dx;
+  paddle::optional<DenseTensor> d_dy;
+  paddle::optional<DenseTensor> d_ddout;
+
+  if (!ddx_opt && (out_d_y || out_d_dout)) {
+    DenseTensor ddx_tmp =
+        phi::FullLike<T, Context>(dev_ctx, x, static_cast<T>(0.0));
+    ddx = paddle::make_optional<DenseTensor>(ddx_tmp);
+  } else {
+    ddx = ddx_opt;
+  }
+  if (!ddy_opt && (out_d_x || out_d_dout)) {
+    DenseTensor ddy_tmp =
+        phi::FullLike<T, Context>(dev_ctx, y, static_cast<T>(0.0));
+    ddy = paddle::make_optional<DenseTensor>(ddy_tmp);
+  } else {
+    ddy = ddy_opt;
+  }
+
+  if (!d_ddout_opt && (out_d_y || out_d_x || out_d_ddy || out_d_ddx)) {
+    DenseTensor d_ddout_tmp =
+        phi::FullLike<T, Context>(dev_ctx, dout, static_cast<T>(0.0));
+    d_ddout = paddle::make_optional<DenseTensor>(d_ddout_tmp);
+  } else {
+    d_ddout = d_ddout_opt;
+  }
+
+  if (!d_dx_opt && (out_d_ddy || out_d_dout)) {
+    DenseTensor d_dx_tmp =
+        phi::FullLike<T, Context>(dev_ctx, x, static_cast<T>(0.0));
+    d_dx = paddle::make_optional<DenseTensor>(d_dx_tmp);
+  } else {
+    d_dx = d_dx_opt;
+  }
+
+  if (!d_dy_opt && (out_d_ddx || out_d_dout)) {
+    DenseTensor d_dy_tmp =
+        phi::FullLike<T, Context>(dev_ctx, y, static_cast<T>(0.0));
+    d_dy = paddle::make_optional<DenseTensor>(d_dy_tmp);
+  } else {
+    d_dy = d_dy_opt;
+  }
   // Get dims from the input x, y, output_grad
   std::vector<std::int64_t> x_dims = vectorize(x.dims());
   std::vector<std::int64_t> y_dims = vectorize(y.dims());
@@ -877,8 +944,8 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
     DotTripleGradFunction<Context, T>()(dev_ctx,
                                         &x,
                                         &y,
-                                        &ddx,
-                                        &ddy,
+                                        ddx.get_ptr(),
+                                        ddy.get_ptr(),
                                         d_dx.get_ptr(),
                                         d_dy.get_ptr(),
                                         &dout,
@@ -913,17 +980,23 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
     DenseTensor x_help = x;
     DenseTensor y_help = y;
     DenseTensor dout_help = dout;
-    DenseTensor ddx_help = ddx;
-    DenseTensor ddy_help = ddy;
+
+    DenseTensor ddx_help;
+    DenseTensor ddy_help;
     ReshapeXYOutIntoMatrixSequence(
         &x_help, &y_help, &dout_help, transpose_x, transpose_y);
-
-    if (ddx_help.dims() != x_help.dims()) {
-      ddx_help.Resize(x_help.dims());
+    if (ddx) {
+      ddx_help = ddx.get();
+      if (ddx_help.dims() != x_help.dims()) {
+        ddx_help.Resize(x_help.dims());
+      }
     }
 
-    if (ddy_help.dims() != y_help.dims()) {
-      ddy_help.Resize(y_help.dims());
+    if (ddy) {
+      ddy_help = ddy.get();
+      if (ddy_help.dims() != y_help.dims()) {
+        ddy_help.Resize(y_help.dims());
+      }
     }
 
     DDim out_dx_dims;
@@ -932,60 +1005,64 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
       if (out_dx_dims != x_help.dims()) {
         out_d_x->Resize(x_help.dims());
       }
+      if (ddy) {
+        ddy_conj = Conj<T>(dev_ctx, ddy_help);
+      }
     }
-
     DDim out_dy_dims;
     if (out_d_y) {
       out_dy_dims = out_d_y->dims();
       if (out_dy_dims != y_help.dims()) {
         out_d_y->Resize(y_help.dims());
       }
+      if (ddx) {
+        ddx_conj = Conj<T>(dev_ctx, ddx_help);
+      }
     }
-
     DDim out_d_dout_dims;
     if (out_d_dout) {
       out_d_dout_dims = out_d_dout->dims();
       if (out_d_dout_dims != dout_help.dims()) {
         out_d_dout->Resize(dout_help.dims());
       }
-
-      ddx_conj = Conj<T>(dev_ctx, ddx_help);
-      ddy_conj = Conj<T>(dev_ctx, ddy_help);
+      if (ddx && !ddx_conj.IsInitialized()) {
+        ddx_conj = Conj<T>(dev_ctx, ddx_help);
+      }
+      if (ddy && !ddy_conj.IsInitialized()) {
+        ddy_conj = Conj<T>(dev_ctx, ddy_help);
+      }
     }
-
     DDim out_d_ddx_dims;
     if (out_d_ddx) {
       out_d_ddx_dims = out_d_ddx->dims();
       if (out_d_ddx_dims != x_help.dims()) {
         out_d_ddx->Resize(x_help.dims());
       }
+      dout_conj = Conj<T>(dev_ctx, dout_help);
+      y_conj = Conj<T>(dev_ctx, y_help);
     }
-
     DDim out_d_ddy_dims;
     if (out_d_ddy) {
       out_d_ddy_dims = out_d_ddy->dims();
       if (out_d_ddy_dims != y_help.dims()) {
         out_d_ddy->Resize(y_help.dims());
       }
-    }
-
-    if (out_d_ddx || out_d_ddy) {
+      if (dout_conj.IsInitialized()) {
+        dout_conj = Conj<T>(dev_ctx, dout_help);
+      }
       x_conj = Conj<T>(dev_ctx, x_help);
-      y_conj = Conj<T>(dev_ctx, y_help);
-      dout_conj = Conj<T>(dev_ctx, dout_help);
     }
 
     bool d_dout_flag = false;
     bool d_ddx_flag = false;
     bool d_ddy_flag = false;
-
     if (d_ddout) {
       auto d_ddout_mat = d_ddout.get();
       if (d_ddout_mat.dims() != dout_help.dims()) {
         d_ddout_mat.Resize(dout_help.dims());
       }
 
-      if (out_d_y) {
+      if (out_d_y && ddx) {
         if (transpose_x && transpose_y) {
           // out_d_y = d_ddout' * ddx'
           CalcInputGrad<T>(dev_ctx,
@@ -1032,7 +1109,7 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
                            false);
         }
       }
-      if (out_d_x) {
+      if (out_d_x && ddy) {
         if (transpose_x && transpose_y) {
           // out_d_x = ddy' * d_ddout'
           CalcInputGrad<T>(dev_ctx,
@@ -1201,7 +1278,7 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
       }
 
       // compute d_dout1
-      if (out_d_dout) {
+      if (out_d_dout && ddx) {
         CalcInputGrad<T>(dev_ctx,
                          ddx_conj,
                          transpose_x,
@@ -1271,7 +1348,7 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
       }
 
       // compute d_dout2
-      if (out_d_dout) {
+      if (out_d_dout && ddy) {
         CalcInputGrad<T>(dev_ctx,
                          d_dx_mat,
                          transpose_x,
@@ -1376,8 +1453,12 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
     DenseTensor out_d_ddy_help;
 
     if (out_d_dout) {
-      ddx_conj = Conj<T>(dev_ctx, ddx);
-      ddy_conj = Conj<T>(dev_ctx, ddy);
+      if (ddx) {
+        ddx_conj = Conj<T>(dev_ctx, ddx.get());
+      }
+      if (ddy) {
+        ddy_conj = Conj<T>(dev_ctx, ddy.get());
+      }
     }
     if (out_d_ddx || out_d_ddy) {
       x_conj = Conj<T>(dev_ctx, x);
@@ -1388,7 +1469,7 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
     if (transpose_x) {
       if (transpose_y) {
         // dX = ddY' d_ddout’, dY = d_ddout’ ddX'
-        if (out_d_x)
+        if (out_d_x && ddy && d_ddout)
           MatMulFunction<Context, T>(dev_ctx,
                                      ddy_conj,
                                      d_ddout.get(),
@@ -1397,7 +1478,7 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
                                      &out_dx_help,
                                      true,
                                      true);
-        if (out_d_y)
+        if (out_d_y && ddx && d_ddout)
           MatMulFunction<Context, T>(dev_ctx,
                                      d_ddout.get(),
                                      ddx_conj,
@@ -1408,7 +1489,7 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
                                      true);
       } else {
         // dX = ddY d_ddout', dY = ddX d_ddout
-        if (out_d_x)
+        if (out_d_x && ddy && d_ddout)
           MatMulFunction<Context, T>(dev_ctx,
                                      ddy_conj,
                                      d_ddout.get(),
@@ -1417,7 +1498,7 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
                                      &out_dx_help,
                                      false,
                                      true);
-        if (out_d_y)
+        if (out_d_y && ddx && d_ddout)
           MatMulFunction<Context, T>(dev_ctx,
                                      ddx_conj,
                                      d_ddout.get(),
@@ -1427,10 +1508,11 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
                                      false,
                                      false);
       }
+
     } else {
       if (transpose_y) {
         // dX = d_ddout ddY, dY = d_ddout’ ddX
-        if (out_d_x)
+        if (out_d_x && ddy && d_ddout)
           MatMulFunction<Context, T>(dev_ctx,
                                      d_ddout.get(),
                                      ddy_conj,
@@ -1439,7 +1521,7 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
                                      &out_dx_help,
                                      false,
                                      false);
-        if (out_d_y)
+        if (out_d_y && ddx && d_ddout)
           MatMulFunction<Context, T>(dev_ctx,
                                      d_ddout.get(),
                                      ddx_conj,
@@ -1450,7 +1532,7 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
                                      false);
       } else {
         // dX = d_ddout ddY', dY = ddX' d_ddout
-        if (out_d_x)
+        if (out_d_x && ddy && d_ddout)
           MatMulFunction<Context, T>(dev_ctx,
                                      d_ddout.get(),
                                      ddy_conj,
@@ -1459,7 +1541,7 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
                                      &out_dx_help,
                                      false,
                                      true);
-        if (out_d_y)
+        if (out_d_y && ddx && d_ddout)
           MatMulFunction<Context, T>(dev_ctx,
                                      ddx_conj,
                                      d_ddout.get(),
@@ -1501,6 +1583,7 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
         dy_reduce_dims.push_back(idx);
       }
     }
+
     // Reduce sum to get grad by ReduceSum
     if (out_d_x) {
       if (dx_reduce_dims.empty()) {
@@ -1524,107 +1607,135 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
 
     // compute d_dout
     if (out_d_dout) {
-      MatMulFunction<Context, T>(dev_ctx,
-                                 d_dx.get(),
-                                 ddy_conj,
-                                 x_dims,
-                                 y_dims,
-                                 out_d_dout,
-                                 transpose_x,
-                                 transpose_y);
-      MatMulFunction<Context, T>(dev_ctx,
-                                 ddx_conj,
-                                 d_dy.get(),
-                                 x_dims,
-                                 y_dims,
-                                 out_d_dout,
-                                 transpose_x,
-                                 transpose_y,
-                                 true);
-    }
-    // compute d_ddx
-    if (out_d_ddx) {
-      if (transpose_x && transpose_y) {
-        // out_d_ddx1 = y' * d_ddout'
+      if (d_dx && ddy) {
         MatMulFunction<Context, T>(dev_ctx,
-                                   y_conj,
-                                   d_ddout.get(),
+                                   d_dx.get(),
+                                   ddy_conj,
+                                   x_dims,
                                    y_dims,
-                                   dout_dims,
-                                   &out_d_ddx_help,
-                                   true,
-                                   true);
-        // out_d_ddx2 = D_DY' * DOut'
+                                   out_d_dout,
+                                   transpose_x,
+                                   transpose_y);
+      }
+      if (d_dy && ddx) {
         MatMulFunction<Context, T>(dev_ctx,
+                                   ddx_conj,
                                    d_dy.get(),
-                                   dout_conj,
+                                   x_dims,
                                    y_dims,
-                                   dout_dims,
-                                   &out_d_ddx_help,
-                                   true,
-                                   true,
+                                   out_d_dout,
+                                   transpose_x,
+                                   transpose_y,
                                    true);
+      }
+    }
+
+    // compute d_ddx
+    if (out_d_ddx) {
+      if (transpose_x && transpose_y) {
+        // out_d_ddx1 = y' * d_ddout'
+        if (d_ddout) {
+          MatMulFunction<Context, T>(dev_ctx,
+                                     y_conj,
+                                     d_ddout.get(),
+                                     y_dims,
+                                     dout_dims,
+                                     &out_d_ddx_help,
+                                     true,
+                                     true);
+        }
+
+        // out_d_ddx2 = D_DY' * DOut'
+        if (d_dy) {
+          MatMulFunction<Context, T>(dev_ctx,
+                                     d_dy.get(),
+                                     dout_conj,
+                                     y_dims,
+                                     dout_dims,
+                                     &out_d_ddx_help,
+                                     true,
+                                     true,
+                                     true);
+        }
+
       } else if (transpose_x) {
         // out_d_ddx1 = y * d_ddout'
-        MatMulFunction<Context, T>(dev_ctx,
-                                   y_conj,
-                                   d_ddout.get(),
-                                   y_dims,
-                                   dout_dims,
-                                   &out_d_ddx_help,
-                                   false,
-                                   true);
+        if (d_ddout) {
+          MatMulFunction<Context, T>(dev_ctx,
+                                     y_conj,
+                                     d_ddout.get(),
+                                     y_dims,
+                                     dout_dims,
+                                     &out_d_ddx_help,
+                                     false,
+                                     true);
+        }
+
         // out_d_ddx2 = D_DY * Dout'
-        MatMulFunction<Context, T>(dev_ctx,
-                                   d_dy.get(),
-                                   dout_conj,
-                                   y_dims,
-                                   dout_dims,
-                                   &out_d_ddx_help,
-                                   false,
-                                   true,
-                                   true);
+        if (d_dy) {
+          MatMulFunction<Context, T>(dev_ctx,
+                                     d_dy.get(),
+                                     dout_conj,
+                                     y_dims,
+                                     dout_dims,
+                                     &out_d_ddx_help,
+                                     false,
+                                     true,
+                                     true);
+        }
+
       } else if (transpose_y) {
         // out_d_ddx1 = d_ddout * y
-        MatMulFunction<Context, T>(dev_ctx,
-                                   d_ddout.get(),
-                                   y_conj,
-                                   dout_dims,
-                                   y_dims,
-                                   &out_d_ddx_help,
-                                   false,
-                                   false);
+        if (d_ddout) {
+          MatMulFunction<Context, T>(dev_ctx,
+                                     d_ddout.get(),
+                                     y_conj,
+                                     dout_dims,
+                                     y_dims,
+                                     &out_d_ddx_help,
+                                     false,
+                                     false);
+        }
+
         // out_d_ddx2 = Dout * D_DY
-        MatMulFunction<Context, T>(dev_ctx,
-                                   dout_conj,
-                                   d_dy.get(),
-                                   dout_dims,
-                                   y_dims,
-                                   &out_d_ddx_help,
-                                   false,
-                                   false,
-                                   true);
+        if (d_dy) {
+          MatMulFunction<Context, T>(dev_ctx,
+                                     dout_conj,
+                                     d_dy.get(),
+                                     dout_dims,
+                                     y_dims,
+                                     &out_d_ddx_help,
+                                     false,
+                                     false,
+                                     true);
+        }
       } else {
         // out_d_ddx1 = d_ddout * y'
-        MatMulFunction<Context, T>(dev_ctx,
-                                   d_ddout.get(),
-                                   y_conj,
-                                   dout_dims,
-                                   y_dims,
-                                   &out_d_ddx_help,
-                                   false,
-                                   true);
+        if (d_ddout) {
+          MatMulFunction<Context, T>(dev_ctx,
+                                     d_ddout.get(),
+                                     y_conj,
+                                     dout_dims,
+                                     y_dims,
+                                     &out_d_ddx_help,
+                                     false,
+                                     true);
+        }
+
         // out_d_ddx2 = Dout * D_DY'
-        MatMulFunction<Context, T>(dev_ctx,
-                                   dout_conj,
-                                   d_dy.get(),
-                                   dout_dims,
-                                   y_dims,
-                                   &out_d_ddx_help,
-                                   false,
-                                   true,
-                                   true);
+        if (d_dy) {
+          MatMulFunction<Context, T>(dev_ctx,
+                                     dout_conj,
+                                     d_dy.get(),
+                                     dout_dims,
+                                     y_dims,
+                                     &out_d_ddx_help,
+                                     false,
+                                     true,
+                                     true);
+        }
       }
+
       if (dx_reduce_dims.empty()) {
         *out_d_ddx = std::move(out_d_ddx_help);
       } else {
@@ -1638,84 +1749,107 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
     if (out_d_ddy) {
       if (transpose_x && transpose_y) {
         // out_d_ddy1 = d_ddout' * x'
-        MatMulFunction<Context, T>(dev_ctx,
-                                   d_ddout.get(),
-                                   x_conj,
-                                   dout_dims,
-                                   x_dims,
-                                   &out_d_ddy_help,
-                                   true,
-                                   true);
+        if (d_ddout) {
+          MatMulFunction<Context, T>(dev_ctx,
+                                     d_ddout.get(),
+                                     x_conj,
+                                     dout_dims,
+                                     x_dims,
+                                     &out_d_ddy_help,
+                                     true,
+                                     true);
+        }
+
         // out_d_ddy2 = dout' * d_dx'
-        MatMulFunction<Context, T>(dev_ctx,
-                                   dout_conj,
-                                   d_dx.get(),
-                                   dout_dims,
-                                   x_dims,
-                                   &out_d_ddy_help,
-                                   true,
-                                   true,
-                                   true);
+        if (d_dx) {
+          MatMulFunction<Context, T>(dev_ctx,
+                                     dout_conj,
+                                     d_dx.get(),
+                                     dout_dims,
+                                     x_dims,
+                                     &out_d_ddy_help,
+                                     true,
+                                     true,
+                                     true);
+        }
+
       } else if (transpose_x) {
         // out_d_ddy1 = x * d_ddout
-        MatMulFunction<Context, T>(dev_ctx,
-                                   x_conj,
-                                   d_ddout.get(),
-                                   x_dims,
-                                   dout_dims,
-                                   &out_d_ddy_help,
-                                   false,
-                                   false);
+        if (d_ddout) {
+          MatMulFunction<Context, T>(dev_ctx,
+                                     x_conj,
+                                     d_ddout.get(),
+                                     x_dims,
+                                     dout_dims,
+                                     &out_d_ddy_help,
+                                     false,
+                                     false);
+        }
+
         // out_d_ddy2 = d_dx * dout
-        MatMulFunction<Context, T>(dev_ctx,
-                                   d_dx.get(),
-                                   dout_conj,
-                                   x_dims,
-                                   dout_dims,
-                                   &out_d_ddy_help,
-                                   false,
-                                   false,
-                                   true);
+        if (d_dx) {
+          MatMulFunction<Context, T>(dev_ctx,
+                                     d_dx.get(),
+                                     dout_conj,
+                                     x_dims,
+                                     dout_dims,
+                                     &out_d_ddy_help,
+                                     false,
+                                     false,
+                                     true);
+        }
+
       } else if (transpose_y) {
         // out_d_ddy1 = d_ddout' * x
-        MatMulFunction<Context, T>(dev_ctx,
-                                   d_ddout.get(),
-                                   x_conj,
-                                   dout_dims,
-                                   x_dims,
-                                   &out_d_ddy_help,
-                                   true,
-                                   false);
+        if (d_ddout) {
+          MatMulFunction<Context, T>(dev_ctx,
+                                     d_ddout.get(),
+                                     x_conj,
+                                     dout_dims,
+                                     x_dims,
+                                     &out_d_ddy_help,
+                                     true,
+                                     false);
+        }
+
         // out_d_ddy2 = dout' * d_dx
-        MatMulFunction<Context, T>(dev_ctx,
-                                   dout_conj,
-                                   d_dx.get(),
-                                   dout_dims,
-                                   x_dims,
-                                   &out_d_ddy_help,
-                                   true,
-                                   false,
-                                   true);
+        if (d_dx) {
+          MatMulFunction<Context, T>(dev_ctx,
+                                     dout_conj,
+                                     d_dx.get(),
+                                     dout_dims,
+                                     x_dims,
+                                     &out_d_ddy_help,
+                                     true,
+                                     false,
+                                     true);
+        }
+
       } else {
         // out_d_ddy1 = x' * d_ddout
-        MatMulFunction<Context, T>(dev_ctx,
-                                   x_conj,
-                                   d_ddout.get(),
-                                   x_dims,
-                                   dout_dims,
-                                   &out_d_ddy_help,
-                                   true,
-                                   false);
+        if (d_ddout) {
+          MatMulFunction<Context, T>(dev_ctx,
+                                     x_conj,
+                                     d_ddout.get(),
+                                     x_dims,
+                                     dout_dims,
+                                     &out_d_ddy_help,
+                                     true,
+                                     false);
+        }
+
         // out_d_ddy2 = d_dx' * dout
-        MatMulFunction<Context, T>(dev_ctx,
-                                   d_dx.get(),
-                                   dout_conj,
-                                   x_dims,
-                                   dout_dims,
-                                   &out_d_ddy_help,
-                                   true,
-                                   false,
-                                   true);
+        if (d_dx) {
+          MatMulFunction<Context, T>(dev_ctx,
+                                     d_dx.get(),
+                                     dout_conj,
+                                     x_dims,
+                                     dout_dims,
+                                     &out_d_ddy_help,
+                                     true,
+                                     false,
+                                     true);
+        }
       }
 
       if (dy_reduce_dims.empty()) {
diff --git a/paddle/phi/kernels/logcumsumexp_grad_kernel.h b/paddle/phi/kernels/logcumsumexp_grad_kernel.h
index e78a79550657eb..a16dc5318cb1ff 100644
--- a/paddle/phi/kernels/logcumsumexp_grad_kernel.h
+++ b/paddle/phi/kernels/logcumsumexp_grad_kernel.h
@@ -28,4 +28,4 @@ void LogcumsumexpGradKernel(const Context& dev_ctx,
                             bool exclusive,
                             bool reverse,
                             DenseTensor* d_x);
-}
+}  // namespace phi
diff --git a/paddle/phi/kernels/matmul_grad_kernel.h b/paddle/phi/kernels/matmul_grad_kernel.h
index 47c6acdcb39230..572b58eb0ddc64 100644
--- a/paddle/phi/kernels/matmul_grad_kernel.h
+++ b/paddle/phi/kernels/matmul_grad_kernel.h
@@ -47,8 +47,8 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
                             const DenseTensor& x,
                             const DenseTensor& y,
                             const DenseTensor& dout,
-                            const DenseTensor& ddx,
-                            const DenseTensor& ddy,
+                            const paddle::optional<DenseTensor>& ddx,
+                            const paddle::optional<DenseTensor>& ddy,
                             const paddle::optional<DenseTensor>& d_dx,
                             const paddle::optional<DenseTensor>& d_dy,
                             const paddle::optional<DenseTensor>& d_ddout,
diff --git a/paddle/phi/kernels/xpu/full_kernel.cc b/paddle/phi/kernels/xpu/full_kernel.cc
index c5fca8881e221e..ab3e0344478a46 100644
--- a/paddle/phi/kernels/xpu/full_kernel.cc
+++ b/paddle/phi/kernels/xpu/full_kernel.cc
@@ -140,6 +140,8 @@ PD_REGISTER_KERNEL(full_like,
                    float,
                    int,
                    int64_t,
-                   phi::dtype::float16) {
+                   phi::dtype::float16,
+                   phi::dtype::complex<float>,
+                   phi::dtype::complex<double>) {
   kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND);
 }

From 708c4f889568071069bdeaf7ac179387d899e8fa Mon Sep 17 00:00:00 2001
From: Weilong Wu <veyron_wu@163.com>
Date: Fri, 2 Dec 2022 12:50:07 +0800
Subject: [PATCH 109/154] [Eager, Performance Optimization] modify AllocateFrom
 to reduce deconstruction of shared_ptr (#48548)

---
 paddle/phi/core/dense_tensor.cc | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/paddle/phi/core/dense_tensor.cc b/paddle/phi/core/dense_tensor.cc
index 3c6f306e8c8f0d..3d717969afaf7e 100644
--- a/paddle/phi/core/dense_tensor.cc
+++ b/paddle/phi/core/dense_tensor.cc
@@ -128,7 +128,16 @@ void* DenseTensor::AllocateFrom(Allocator* allocator,
   if (!holder_ || holder_->size() < bytes + meta_.offset) {
     meta_.offset = 0;
     VLOG(10) << "Allocate data with bytes: " << bytes;
-    ResetHolder(allocator->Allocate(bytes));
+    auto holder = allocator->Allocate(bytes);
+    if (holder_) {
+      PADDLE_ENFORCE_LE(
+          numel() * static_cast<int64_t>(SizeOf(dtype)) +
+              static_cast<int64_t>(meta_.offset),
+          static_cast<int64_t>(holder->size()),
+          phi::errors::InvalidArgument(
+              "The size of Holder is not enough to store the Tensor."));
+    }
+    holder_ = std::move(holder);
   }
 
   return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +

From 33d90ae8eb344bed6734a9ee088616a515f260bc Mon Sep 17 00:00:00 2001
From: cyber-pioneer <116002591+cyber-pioneer@users.noreply.github.com>
Date: Fri, 2 Dec 2022 12:59:26 +0800
Subject: [PATCH 110/154] move paddle.fluid.layers.tensor.create_parameter to
 paddle.tensor.creation.create_parameter (#48579)

---
 python/paddle/__init__.py                     |  2 +-
 .../contrib/slim/quantization/adaround.py     |  3 +-
 .../slim/tests/test_quantization_pass.py      |  2 +-
 python/paddle/fluid/contrib/sparsity/asp.py   |  2 +-
 python/paddle/fluid/io.py                     | 20 ++---
 python/paddle/fluid/layers/tensor.py          | 83 ------------------
 .../auto_parallel/test_dist_op_cost.py        | 18 ++--
 .../unittests/ipu/test_weight_decay_ipu.py    |  2 +-
 ...n_reshape_transpose_matmul_v2_fuse_pass.py |  2 +-
 .../test_auto_parallel_reshard_dpmppp.py      |  4 +-
 .../test_auto_parallel_reshard_mppp.py        |  4 +-
 .../tests/unittests/test_calc_gradient.py     |  8 +-
 .../fluid/tests/unittests/test_cholesky_op.py |  2 +-
 .../tests/unittests/test_create_parameter.py  |  8 +-
 .../test_eager_deletion_padding_rnn.py        | 12 +--
 ...test_imperative_container_parameterlist.py |  6 +-
 .../test_imperative_load_static_param.py      |  4 +-
 .../fluid/tests/unittests/test_mul_nn_grad.py |  8 +-
 .../fluid/tests/unittests/test_nn_grad.py     |  4 +-
 .../tests/unittests/test_norm_nn_grad.py      | 13 ++-
 .../tests/unittests/test_optimizer_grad.py    |  6 +-
 python/paddle/framework/__init__.py           |  1 -
 python/paddle/static/__init__.py              |  4 +-
 python/paddle/static/nn/__init__.py           |  2 +-
 python/paddle/tensor/creation.py              | 85 ++++++++++++++++++-
 25 files changed, 150 insertions(+), 155 deletions(-)

diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
index 95e179bb97dc89..8e03c3b9074c83 100755
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -93,6 +93,7 @@
 from .tensor.attribute import real  # noqa: F401
 from .tensor.attribute import imag  # noqa: F401
 from .tensor.attribute import is_floating_point  # noqa: F401
+from .tensor.creation import create_parameter  # noqa: F401
 from .tensor.creation import to_tensor  # noqa: F401
 from .tensor.creation import diag  # noqa: F401
 from .tensor.creation import diagflat  # noqa: F401
@@ -326,7 +327,6 @@
 from .framework.random import get_cuda_rng_state  # noqa: F401
 from .framework.random import set_cuda_rng_state  # noqa: F401
 from .framework import ParamAttr  # noqa: F401
-from .framework import create_parameter  # noqa: F401
 from .framework import CPUPlace  # noqa: F401
 from .framework import IPUPlace  # noqa: F401
 from .framework import CUDAPlace  # noqa: F401
diff --git a/python/paddle/fluid/contrib/slim/quantization/adaround.py b/python/paddle/fluid/contrib/slim/quantization/adaround.py
index d6aff8d41c69b1..b024c0d7739967 100644
--- a/python/paddle/fluid/contrib/slim/quantization/adaround.py
+++ b/python/paddle/fluid/contrib/slim/quantization/adaround.py
@@ -20,7 +20,6 @@
 
 import paddle
 import paddle.fluid as fluid
-import paddle
 
 from ....log_helper import get_logger
 from .utils import (
@@ -148,7 +147,7 @@ def initialize_alpha(self, tensor, scale, var_name):
         tensor_floor = np.floor(tensor_scale)
         tensor = tensor_scale - tensor_floor
         alpha = -np.log((ZETA - GAMMA) / (tensor - GAMMA) - 1)
-        self.alpha_v = fluid.layers.create_parameter(
+        self.alpha_v = paddle.create_parameter(
             shape=alpha.shape,
             dtype="float32",
             name=var_name + ".alpha",
diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
index be42ab5cf2e20a..f64a047ea49264 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
@@ -73,7 +73,7 @@ def conv_bn_layer(
         conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
         short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
         hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
-    matmul_weight = fluid.layers.create_parameter(
+    matmul_weight = paddle.create_parameter(
         shape=[1, 16, 32, 32], dtype='float32'
     )
     hidden = fluid.layers.matmul(hidden, matmul_weight, True, True)
diff --git a/python/paddle/fluid/contrib/sparsity/asp.py b/python/paddle/fluid/contrib/sparsity/asp.py
index d2165def6f0cbf..fda41705373c80 100644
--- a/python/paddle/fluid/contrib/sparsity/asp.py
+++ b/python/paddle/fluid/contrib/sparsity/asp.py
@@ -881,7 +881,7 @@ def _create_mask_variables(cls, main_program, startup_program, params):
             for param in params:
                 if ASPHelper._is_supported_layer(main_program, param.name):
                     if param.name not in asp_info.mask_vars:
-                        mask_param = layers.create_parameter(
+                        mask_param = paddle.create_parameter(
                             name=ASPHelper._get_mask_name(param.name),
                             shape=param.shape,
                             dtype=param.dtype,
diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py
index a308d5e2613934..10a91a15f841f1 100644
--- a/python/paddle/fluid/io.py
+++ b/python/paddle/fluid/io.py
@@ -214,8 +214,8 @@ def get_program_parameter(program):
 
             paddle.enable_static()
             data = fluid.data(name="img", shape=[64, 784])
-            w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
-            b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b')
+            w = paddle.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
+            b = paddle.create_parameter(shape=[200], dtype='float32', name='fc_b')
             list_para  = fluid.io.get_program_parameter(  fluid.default_main_program() )
     """
     return list(filter(is_parameter, program.list_vars()))
@@ -240,8 +240,8 @@ def get_program_persistable_vars(program):
 
             paddle.enable_static()
             data = fluid.data(name="img", shape=[64, 784])
-            w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
-            b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b')
+            w = paddle.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
+            b = paddle.create_parameter(shape=[200], dtype='float32', name='fc_b')
             list_para  = fluid.io.get_program_persistable_vars(  fluid.default_main_program() )
     """
     return list(filter(is_persistable, program.list_vars()))
@@ -356,8 +356,8 @@ def save_vars(
             startup_prog = fluid.Program()
             with fluid.program_guard(main_prog, startup_prog):
                 data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False)
-                w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
-                b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b')
+                w = paddle.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
+                b = paddle.create_parameter(shape=[200], dtype='float32', name='fc_b')
                 hidden_w = fluid.layers.matmul(x=data, y=w)
                 hidden_b = fluid.layers.elementwise_add(hidden_w, b)
             place = fluid.CPUPlace()
@@ -825,8 +825,8 @@ def load_vars(
             startup_prog = fluid.Program()
             with fluid.program_guard(main_prog, startup_prog):
                 data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False)
-                w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
-                b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b')
+                w = paddle.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
+                b = paddle.create_parameter(shape=[200], dtype='float32', name='fc_b')
                 hidden_w = fluid.layers.matmul(x=data, y=w)
                 hidden_b = fluid.layers.elementwise_add(hidden_w, b)
             place = fluid.CPUPlace()
@@ -1590,8 +1590,8 @@ def load_inference_model(
             startup_prog = fluid.Program()
             with fluid.program_guard(main_prog, startup_prog):
                 data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False)
-                w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32')
-                b = fluid.layers.create_parameter(shape=[200], dtype='float32')
+                w = paddle.create_parameter(shape=[784, 200], dtype='float32')
+                b = paddle.create_parameter(shape=[200], dtype='float32')
                 hidden_w = fluid.layers.matmul(x=data, y=w)
                 hidden_b = fluid.layers.elementwise_add(hidden_w, b)
             place = fluid.CPUPlace()
diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py
index 1066efabf12efc..4c00061ae7608b 100644
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -48,7 +48,6 @@
 
 __all__ = [
     'create_tensor',
-    'create_parameter',
     'create_global_var',
     'cast',
     'tensor_array_to_tensor',
@@ -108,88 +107,6 @@ def create_tensor(dtype, name=None, persistable=False):
     )
 
 
-def create_parameter(
-    shape, dtype, name=None, attr=None, is_bias=False, default_initializer=None
-):
-    """
-        :api_attr: Static Graph
-
-    This function creates a parameter. The parameter is a learnable variable, which can have
-    gradient, and can be optimized.
-
-    NOTE: this is a very low-level API. This API is useful when you create
-    operator by your self. instead of using layers.
-
-    Parameters:
-        shape (list of int): Shape of the parameter
-        dtype (str): Data type of the parameter
-        name (str, optional): For detailed information, please refer to
-           :ref:`api_guide_Name` . Usually name is no need to set and None by default.
-        attr (ParamAttr, optional): Attributes of the parameter
-        is_bias (bool, optional): This can affect which default initializer is chosen
-                       when default_initializer is None. If is_bias,
-                       initializer.Constant(0.0) will be used. Otherwise,
-                       Xavier() will be used.
-        default_initializer (Initializer, optional): Initializer for the parameter
-
-    Returns:
-        The created parameter.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-            paddle.enable_static()
-            W = paddle.static.create_parameter(shape=[784, 200], dtype='float32')
-    """
-    check_type(shape, 'shape', (list, tuple, numpy.ndarray), 'create_parameter')
-    for item in shape:
-        check_type(
-            item,
-            'item of shape',
-            (
-                int,
-                numpy.uint8,
-                numpy.int8,
-                numpy.int16,
-                numpy.int32,
-                numpy.int64,
-            ),
-            'create_parameter',
-        )
-
-    check_dtype(
-        dtype,
-        'dtype',
-        [
-            'bool',
-            'float16',
-            'float32',
-            'float64',
-            'int8',
-            'int16',
-            'int32',
-            'int64',
-            'uint8',
-        ],
-        'create_parameter',
-    )
-    check_type(attr, 'attr', (type(None), ParamAttr), 'create_parameter')
-    check_type(
-        default_initializer,
-        'default_initializer',
-        (type(None), Initializer),
-        'create_parameter',
-    )
-
-    helper = LayerHelper("create_parameter", **locals())
-    if attr is None:
-        attr = ParamAttr(name=name)
-    return helper.create_parameter(
-        attr, shape, convert_dtype(dtype), is_bias, default_initializer
-    )
-
-
 def create_global_var(
     shape, value, dtype, persistable=False, force_cpu=False, name=None
 ):
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py
index f3956cda20801d..163309f3a37e30 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py
@@ -152,7 +152,7 @@ def make_program():
                 out = paddle.transpose(out, [1, 0])  # [8, 2] [-1, 0]
 
                 # matmul
-                param1 = paddle.fluid.layers.create_parameter(
+                param1 = paddle.create_parameter(
                     [4, 8], paddle.float32
                 )  # [2, 8] [0, -1]
                 auto.shard_tensor(
@@ -160,7 +160,7 @@ def make_program():
                     auto.ProcessMesh([0, 1], dim_names=["x"]),
                     ["x", None],
                 )
-                param2 = paddle.fluid.layers.create_parameter(
+                param2 = paddle.create_parameter(
                     [8, 8], paddle.float32
                 )  # [8, 4] [-1, 0]
                 auto.shard_tensor(
@@ -171,7 +171,7 @@ def make_program():
                 out1 = paddle.fluid.layers.matmul(
                     out, param1
                 )  # [8, 8] [-1, -1]
-                tmp_param = paddle.fluid.layers.create_parameter(
+                tmp_param = paddle.create_parameter(
                     [8, 8], paddle.float32
                 )  # [8, 8] [-1, -1]
                 auto.shard_tensor(
@@ -263,7 +263,7 @@ def make_program():
                 out = paddle.transpose(out, [1, 0])  # [8, 2] [-1, 0]
 
                 # matmul_v2
-                param1 = paddle.fluid.layers.create_parameter(
+                param1 = paddle.create_parameter(
                     [4, 8], paddle.float32
                 )  # [2, 8] [0, -1]
                 auto.shard_tensor(
@@ -271,7 +271,7 @@ def make_program():
                     auto.ProcessMesh([0, 1], dim_names=["x"]),
                     ["x", None],
                 )
-                param2 = paddle.fluid.layers.create_parameter(
+                param2 = paddle.create_parameter(
                     [8, 8], paddle.float32
                 )  # [8, 4] [-1, 0]
                 auto.shard_tensor(
@@ -280,7 +280,7 @@ def make_program():
                     [None, "x"],
                 )
                 out1 = paddle.matmul(out, param1)  # [8, 8] [-1, -1]
-                tmp_param = paddle.fluid.layers.create_parameter(
+                tmp_param = paddle.create_parameter(
                     [8, 8], paddle.float32
                 )  # [8, 8] [-1, -1]
                 auto.shard_tensor(
@@ -370,7 +370,7 @@ def make_program():
                 out = paddle.transpose(out, [1, 0])  # [8, 2] [-1, 0]
 
                 # mul
-                param1 = paddle.fluid.layers.create_parameter(
+                param1 = paddle.create_parameter(
                     [4, 8], paddle.float32
                 )  # [2, 8] [0, -1]
                 auto.shard_tensor(
@@ -378,7 +378,7 @@ def make_program():
                     auto.ProcessMesh([0, 1], dim_names=["x"]),
                     ["x", None],
                 )
-                param2 = paddle.fluid.layers.create_parameter(
+                param2 = paddle.create_parameter(
                     [8, 8], paddle.float32
                 )  # [8, 4] [-1, 0]
                 auto.shard_tensor(
@@ -388,7 +388,7 @@ def make_program():
                 )
 
                 out1 = paddle.fluid.layers.mul(out, param1)  # [8, 8] [-1, -1]
-                tmp_param = paddle.fluid.layers.create_parameter(
+                tmp_param = paddle.create_parameter(
                     [8, 8], paddle.float32
                 )  # [8, 8] [-1, -1]
                 auto.shard_tensor(
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_weight_decay_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_weight_decay_ipu.py
index 7d63c06d470afd..08bee7c90d91c9 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_weight_decay_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_weight_decay_ipu.py
@@ -72,7 +72,7 @@ def exclude_fn(param):
                 image = paddle.static.data(
                     name='image', shape=[1, 3, 10, 10], dtype='float32'
                 )
-                bias = paddle.fluid.layers.create_parameter(
+                bias = paddle.create_parameter(
                     shape=[1, 3, 10, 10], is_bias=True, dtype='float32'
                 )
                 add1 = image + bias
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py
index f6f8bcc17c55c8..ad2f5777f203b1 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py
@@ -32,7 +32,7 @@ def setUp(self):
             data = fluid.data(
                 name="data", shape=self.data_shape, dtype="float32"
             )
-            weight = fluid.layers.create_parameter(
+            weight = paddle.create_parameter(
                 shape=self.weight_shape, dtype="float32"
             )
 
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py
index 8fc9003379211b..b9320eee8264b5 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py
@@ -69,9 +69,7 @@ def forward(self, input):
         out = self.linear0(out)
         out = F.gelu(out, approximate=True)
         out = self.linear1(out)
-        param = paddle.fluid.layers.create_parameter(
-            [1024, 4096], paddle.float32
-        )
+        param = paddle.create_parameter([1024, 4096], paddle.float32)
         auto.shard_tensor(param, PP_MESH_1, [None, "y"])
         out = paddle.fluid.layers.mul(out, param)
 
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py
index e3c284bd56933a..84309aeb8a3d7f 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py
@@ -79,9 +79,7 @@ def forward(self, input):
         auto.shard_tensor(self.linear2.weight, PP_MESH_1, ["x", None])
         w_out = self.word_embeddings(input)
         out = self.linear0(w_out)
-        param = paddle.fluid.layers.create_parameter(
-            [4096, 4096], paddle.float32
-        )
+        param = paddle.create_parameter([4096, 4096], paddle.float32)
         auto.shard_tensor(param, PP_MESH_0, ["x", None])
         out = paddle.fluid.layers.mul(out, param)
         gelu_out = F.gelu(out, approximate=True)
diff --git a/python/paddle/fluid/tests/unittests/test_calc_gradient.py b/python/paddle/fluid/tests/unittests/test_calc_gradient.py
index 58c4ee6083f7ff..c6dcbc0cb7bca3 100644
--- a/python/paddle/fluid/tests/unittests/test_calc_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_calc_gradient.py
@@ -27,8 +27,8 @@ def test_calc_gradient(self):
         main = fluid.Program()
         startup = fluid.Program()
         with fluid.program_guard(main, startup):
-            x = layers.create_parameter(dtype="float32", shape=[5, 10])
-            y = layers.create_parameter(dtype="float32", shape=[10, 8])
+            x = paddle.create_parameter(dtype="float32", shape=[5, 10])
+            y = paddle.create_parameter(dtype="float32", shape=[10, 8])
             mul_out = layers.mul(x=x, y=y)
             mean_out = paddle.mean(mul_out)
             a = calc_gradient(mean_out, mul_out)
@@ -45,7 +45,7 @@ def test1(self):
         startup = fluid.Program()
         with fluid.program_guard(main, startup):
             net = lambda x: x * x
-            x = fluid.layers.create_parameter(
+            x = paddle.create_parameter(
                 name='x',
                 shape=[1],
                 dtype='float32',
@@ -66,7 +66,7 @@ def test2(self):
         main = fluid.Program()
         startup = fluid.Program()
         with fluid.program_guard(main, startup):
-            x = fluid.layers.create_parameter(
+            x = paddle.create_parameter(
                 name='x',
                 shape=[1],
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/test_cholesky_op.py b/python/paddle/fluid/tests/unittests/test_cholesky_op.py
index ffeb18b0ff90b5..20ab3e73ab3aa5 100644
--- a/python/paddle/fluid/tests/unittests/test_cholesky_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cholesky_op.py
@@ -73,7 +73,7 @@ def func(self, place):
         root_data = self.root_data[..., :3, :3]
         prog = fluid.Program()
         with fluid.program_guard(prog):
-            root = layers.create_parameter(
+            root = paddle.create_parameter(
                 dtype=root_data.dtype, shape=root_data.shape
             )
             root_t = paddle.transpose(root, self.trans_dims)
diff --git a/python/paddle/fluid/tests/unittests/test_create_parameter.py b/python/paddle/fluid/tests/unittests/test_create_parameter.py
index 75231c40474b95..e5bad117730a21 100644
--- a/python/paddle/fluid/tests/unittests/test_create_parameter.py
+++ b/python/paddle/fluid/tests/unittests/test_create_parameter.py
@@ -27,24 +27,24 @@ def func_errors(self):
         with program_guard(Program(), Program()):
 
             def test_shape():
-                fluid.layers.create_parameter(1, np.float32)
+                paddle.create_parameter(1, np.float32)
 
             self.assertRaises(TypeError, test_shape)
 
             def test_shape_item():
-                fluid.layers.create_parameter([1.0, 2.0, 3.0], "float32")
+                paddle.create_parameter([1.0, 2.0, 3.0], "float32")
 
             self.assertRaises(TypeError, test_shape_item)
 
             def test_attr():
-                fluid.layers.create_parameter(
+                paddle.create_parameter(
                     [1, 2, 3], np.float32, attr=np.array([i for i in range(6)])
                 )
 
             self.assertRaises(TypeError, test_attr)
 
             def test_default_initializer():
-                fluid.layers.create_parameter(
+                paddle.create_parameter(
                     [1, 2, 3],
                     np.float32,
                     default_initializer=np.array([i for i in range(6)]),
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
index 8ba799e84bc4c1..921e4a4e43d2e6 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
@@ -127,7 +127,7 @@ def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None):
         cell_array = []
         mask_array = []
         for i in range(num_layers):
-            weight_1 = layers.create_parameter(
+            weight_1 = paddle.create_parameter(
                 [hidden_size * 2, hidden_size * 4],
                 dtype="float32",
                 name="fc_weight1_" + str(i),
@@ -136,7 +136,7 @@ def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None):
                 ),
             )
             weight_1_arr.append(weight_1)
-            bias_1 = layers.create_parameter(
+            bias_1 = paddle.create_parameter(
                 [hidden_size * 4],
                 dtype="float32",
                 name="fc_bias1_" + str(i),
@@ -248,7 +248,7 @@ def encoder_static(
         cell_array = []
         mask_array = []
         for i in range(num_layers):
-            weight_1 = layers.create_parameter(
+            weight_1 = paddle.create_parameter(
                 [hidden_size * 2, hidden_size * 4],
                 dtype="float32",
                 name="fc_weight1_" + str(i),
@@ -257,7 +257,7 @@ def encoder_static(
                 ),
             )
             weight_1_arr.append(weight_1)
-            bias_1 = layers.create_parameter(
+            bias_1 = paddle.create_parameter(
                 [hidden_size * 4],
                 dtype="float32",
                 name="fc_bias1_" + str(i),
@@ -442,7 +442,7 @@ def encoder_static(
 
     rnn_out = paddle.reshape(rnn_out, shape=[-1, num_steps, hidden_size])
 
-    softmax_weight = layers.create_parameter(
+    softmax_weight = paddle.create_parameter(
         [hidden_size, vocab_size],
         dtype="float32",
         name="softmax_weight",
@@ -450,7 +450,7 @@ def encoder_static(
             low=-init_scale, high=init_scale
         ),
     )
-    softmax_bias = layers.create_parameter(
+    softmax_bias = paddle.create_parameter(
         [vocab_size],
         dtype="float32",
         name='softmax_bias',
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py b/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py
index 92957890e3dbda..763210361fe2f8 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_container_parameterlist.py
@@ -30,7 +30,7 @@ def __init__(self, num_stacked_param, use_fluid_api):
 
     def paddle_imperative_ParameterList(self, num_stacked_param):
         return paddle.nn.ParameterList(
-            [fluid.layers.create_parameter(shape=[2, 2], dtype='float32')]
+            [paddle.create_parameter(shape=[2, 2], dtype='float32')]
             * num_stacked_param
         )
 
@@ -53,13 +53,13 @@ def paramter_list(self, use_fluid_api):
             loss = paddle.mean(res)
             loss.backward()
 
-            model.params[num_stacked_param - 1] = fluid.layers.create_parameter(
+            model.params[num_stacked_param - 1] = paddle.create_parameter(
                 shape=[2, 3], dtype='float32'
             )
             res = model(x)
             self.assertListEqual(res.shape, [5, 3])
             model.params.append(
-                fluid.layers.create_parameter(shape=[3, 4], dtype='float32')
+                paddle.create_parameter(shape=[3, 4], dtype='float32')
             )
             self.assertEqual(len(model.params), num_stacked_param + 1)
             res = model(x)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
index 528ddac3ff4e57..2d80a3a1ee6fab 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
@@ -149,10 +149,10 @@ def testLoadStaticModel(self):
             nodes_vector, edge_set, 6, 1, 2
         )
 
-        para1 = fluid.layers.create_parameter(
+        para1 = paddle.create_parameter(
             [100, 100], 'float32', name="weight_test_1"
         )
-        para2 = fluid.layers.create_parameter(
+        para2 = paddle.create_parameter(
             [20, 200], 'float32', name="weight_test_2"
         )
 
diff --git a/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py b/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py
index 099280161a97b3..b6c3f03f979a1f 100644
--- a/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_mul_nn_grad.py
@@ -31,8 +31,8 @@ class TestMulGradCheck(unittest.TestCase):
     def func(self, place):
         prog = fluid.Program()
         with fluid.program_guard(prog):
-            x = layers.create_parameter(dtype="float64", shape=[2, 8], name='x')
-            y = layers.create_parameter(dtype="float64", shape=[8, 4], name='y')
+            x = paddle.create_parameter(dtype="float64", shape=[2, 8], name='x')
+            y = paddle.create_parameter(dtype="float64", shape=[8, 4], name='y')
             z = layers.mul(x=x, y=y)
             gradient_checker.grad_check([x, y], z, place=place)
 
@@ -88,10 +88,10 @@ def func(self, place):
         eps = 0.005
         dtype = np.float64
         typename = "float64"
-        x = layers.create_parameter(
+        x = paddle.create_parameter(
             dtype=typename, shape=self.x_shape, name='x'
         )
-        y = layers.create_parameter(
+        y = paddle.create_parameter(
             dtype=typename, shape=self.y_shape, name='y'
         )
         out = layers.matmul(
diff --git a/python/paddle/fluid/tests/unittests/test_nn_grad.py b/python/paddle/fluid/tests/unittests/test_nn_grad.py
index a4030d8adbda8c..23de988e1554cc 100644
--- a/python/paddle/fluid/tests/unittests/test_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_grad.py
@@ -43,7 +43,7 @@ def config(self):
         self.ends = [3, 3, 6]
         self.axes = [0, 1, 2]
         self.x_arr = np.random.random([3, 4, 5, 2]).astype("float64")
-        self.inputs = layers.create_parameter(
+        self.inputs = paddle.create_parameter(
             dtype="float64", shape=[3, 4, 5, 2], name='x'
         )
 
@@ -61,7 +61,7 @@ def config(self):
         self.ends = [3, 3, 3]
         self.axes = [0, 1, 2]
         self.x_arr = np.random.random([3, 3, 3]).astype("float64")
-        self.inputs = layers.create_parameter(
+        self.inputs = paddle.create_parameter(
             dtype="float64", shape=[3, 3, 3], name='x3'
         )
 
diff --git a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py
index 3142d63afcb1f3..bbcb5ef7b9b856 100644
--- a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py
@@ -21,7 +21,6 @@
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 
 
 class TestInstanceNormDoubleGradCheck(unittest.TestCase):
@@ -34,7 +33,7 @@ def func(self, place):
             dtype = "float32"
             eps = 0.005
             atol = 1e-4
-            x = layers.create_parameter(dtype=dtype, shape=shape, name='x')
+            x = paddle.create_parameter(dtype=dtype, shape=shape, name='x')
             z = paddle.static.nn.instance_norm(input=x)
             x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
             gradient_checker.double_grad_check(
@@ -62,7 +61,7 @@ def func(self, place):
             dtype = "float32"
             eps = 0.005
             atol = 1e-4
-            x = layers.create_parameter(dtype=dtype, shape=shape, name='x')
+            x = paddle.create_parameter(dtype=dtype, shape=shape, name='x')
             z = paddle.static.nn.instance_norm(
                 input=x, param_attr=False, bias_attr=False
             )
@@ -85,7 +84,7 @@ def func(self, place):
             dtype = "float32"
             eps = 0.005
             atol = 1e-4
-            x = layers.create_parameter(dtype=dtype, shape=shape, name='x')
+            x = paddle.create_parameter(dtype=dtype, shape=shape, name='x')
             z = paddle.nn.functional.instance_norm(x)
             x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
             # check for static mode
@@ -127,7 +126,7 @@ def func(self, place):
             dtype = "float32"
             eps = 0.005
             atol = 1e-4
-            x = layers.create_parameter(dtype=dtype, shape=shape, name='x')
+            x = paddle.create_parameter(dtype=dtype, shape=shape, name='x')
             z = paddle.nn.InstanceNorm2D(3)(x)
             x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
             # check for static mode
@@ -171,7 +170,7 @@ def func(self, place):
             dtype = "float32"
             eps = 0.005
             atol = 1e-4
-            x = layers.create_parameter(dtype=dtype, shape=self.shape, name='x')
+            x = paddle.create_parameter(dtype=dtype, shape=self.shape, name='x')
             z = fluid.layers.batch_norm(
                 input=x,
                 data_layout=self.data_layout,
@@ -251,7 +250,7 @@ def func(self, place):
             chn = (
                 self.shape[1] if self.data_layout == 'NCHW' else self.shape[-1]
             )
-            x = layers.create_parameter(dtype=dtype, shape=self.shape, name='x')
+            x = paddle.create_parameter(dtype=dtype, shape=self.shape, name='x')
             z = fluid.layers.batch_norm(
                 input=x,
                 data_layout=self.data_layout,
diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
index a9f7c5de8d9b58..e20d563ebdc03a 100644
--- a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py
@@ -77,20 +77,20 @@ def build_net(self, cond_i, use_bf16=False):
             mean_out = mean(sum_all)
             optimizer.minimize(mean_out)
         """
-        param_x = fluid.layers.create_parameter(
+        param_x = paddle.create_parameter(
             dtype="float32",
             shape=self.shape,
             attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_x"),
             default_initializer=fluid.initializer.NumpyArrayInitializer(self.x),
         )
 
-        param_y = fluid.layers.create_parameter(
+        param_y = paddle.create_parameter(
             dtype="float32",
             shape=self.shape,
             attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_y"),
             default_initializer=fluid.initializer.NumpyArrayInitializer(self.y),
         )
-        param_z = fluid.layers.create_parameter(
+        param_z = paddle.create_parameter(
             dtype="float32",
             shape=self.shape,
             attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_z"),
diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py
index 11250e32d35c49..30d637936804ed 100755
--- a/python/paddle/framework/__init__.py
+++ b/python/paddle/framework/__init__.py
@@ -22,7 +22,6 @@
 from .framework import is_grad_enabled  # noqa: F401
 
 from ..fluid.param_attr import ParamAttr  # noqa: F401
-from ..fluid.layers.tensor import create_parameter  # noqa: F401
 from ..fluid.core import CPUPlace  # noqa: F401
 from ..fluid.core import IPUPlace  # noqa: F401
 from ..fluid.core import CUDAPlace  # noqa: F401
diff --git a/python/paddle/static/__init__.py b/python/paddle/static/__init__.py
index 118fe0b58bfdae..983138ce976c27 100644
--- a/python/paddle/static/__init__.py
+++ b/python/paddle/static/__init__.py
@@ -31,6 +31,9 @@
 from ..fluid import Scope  # noqa: F401
 from .input import data  # noqa: F401
 from .input import InputSpec  # noqa: F401
+
+from ..tensor.creation import create_parameter  # noqa: F401
+
 from ..fluid.executor import Executor  # noqa: F401
 from ..fluid.executor import global_scope  # noqa: F401
 from ..fluid.executor import scope_guard  # noqa: F401
@@ -67,7 +70,6 @@
 from ..fluid.io import save_vars  # noqa: F401
 from ..fluid.io import batch  # noqa: F401
 
-from ..fluid.layers import create_parameter  # noqa: F401
 from ..fluid.layers import create_global_var  # noqa: F401
 from ..fluid.contrib.layers import ctr_metric_bundle  # noqa: F401
 from ..fluid.layers import exponential_decay  # noqa: F401
diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py
index 8e3048b21c5cb4..3d3cc5f8a2bb8e 100755
--- a/python/paddle/static/nn/__init__.py
+++ b/python/paddle/static/nn/__init__.py
@@ -22,12 +22,12 @@
 from .common import conv3d_transpose  # noqa: F401
 from .common import py_func  # noqa: F401
 
+from ...tensor.creation import create_parameter  # noqa: F401
 from ...fluid.layers import batch_norm  # noqa: F401
 from ...fluid.layers import bilinear_tensor_product  # noqa: F401
 from ...fluid.layers import case  # noqa: F401
 from ...fluid.layers import cond  # noqa: F401
 from ...fluid.layers import conv2d  # noqa: F401
-from ...fluid.layers import create_parameter  # noqa: F401
 from ...fluid.layers import crf_decoding  # noqa: F401
 from ...fluid.layers import layer_norm  # noqa: F401
 from ...fluid.layers import multi_box_head  # noqa: F401
diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py
index e5005dbe16f206..c969ee3639bf93 100644
--- a/python/paddle/tensor/creation.py
+++ b/python/paddle/tensor/creation.py
@@ -31,10 +31,14 @@
     convert_dtype,
 )
 from ..fluid.framework import (
+    Variable,
     _in_eager_without_dygraph_check,
     _in_legacy_dygraph,
+    device_guard,
 )
+from ..fluid.initializer import Initializer
 from ..fluid.layers import utils
+from ..fluid.param_attr import ParamAttr
 from ..framework import (
     LayerHelper,
     _current_expected_place,
@@ -44,7 +48,6 @@
     core,
     in_dygraph_mode,
 )
-from ..static import Variable, device_guard
 
 __all__ = []
 
@@ -67,6 +70,86 @@ def _real_to_complex_dtype(dtype):
         return dtype
 
 
+def create_parameter(
+    shape, dtype, name=None, attr=None, is_bias=False, default_initializer=None
+):
+    """
+    This function creates a parameter. The parameter is a learnable variable, which can have
+    gradient, and can be optimized.
+
+    Note:
+        This is a very low-level API. This API is useful when you create operator by your self, instead of using layers.
+
+    Args:
+        shape (list of int): Shape of the parameter
+        dtype (str): Data type of the parameter
+        name (str, optional): For detailed information, please refer to
+           :ref:`api_guide_Name` . Usually name is no need to set and None by default.
+        attr (ParamAttr, optional): Attributes of the parameter
+        is_bias (bool, optional): This can affect which default initializer is chosen
+                       when default_initializer is None. If is_bias,
+                       initializer.Constant(0.0) will be used. Otherwise,
+                       Xavier() will be used.
+        default_initializer (Initializer, optional): Initializer for the parameter
+
+    Returns:
+        The created parameter.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            paddle.enable_static()
+            W = paddle.static.create_parameter(shape=[784, 200], dtype='float32')
+    """
+    check_type(shape, 'shape', (list, tuple, np.ndarray), 'create_parameter')
+    for item in shape:
+        check_type(
+            item,
+            'item of shape',
+            (
+                int,
+                np.uint8,
+                np.int8,
+                np.int16,
+                np.int32,
+                np.int64,
+            ),
+            'create_parameter',
+        )
+
+    check_dtype(
+        dtype,
+        'dtype',
+        [
+            'bool',
+            'float16',
+            'float32',
+            'float64',
+            'int8',
+            'int16',
+            'int32',
+            'int64',
+            'uint8',
+        ],
+        'create_parameter',
+    )
+    check_type(attr, 'attr', (type(None), ParamAttr), 'create_parameter')
+    check_type(
+        default_initializer,
+        'default_initializer',
+        (type(None), Initializer),
+        'create_parameter',
+    )
+
+    helper = LayerHelper("create_parameter", **locals())
+    if attr is None:
+        attr = ParamAttr(name=name)
+    return helper.create_parameter(
+        attr, shape, convert_dtype(dtype), is_bias, default_initializer
+    )
+
+
 def linspace(start, stop, num, dtype=None, name=None):
     r"""
     Return fixed number of evenly spaced values within a given interval.

From a7c43ffa6d4a90024c7f26ddafc740d487ed6cee Mon Sep 17 00:00:00 2001
From: Vvsmile <17864154871@163.com>
Date: Fri, 2 Dec 2022 13:59:02 +0800
Subject: [PATCH 111/154] [Clean Fluid API]Remove API: pad (#48100)

* replace pad with paddle.nn.functional.pad and fix call arguments

* solve the ImportError in PR-CI-Mac-Python3, which is called "cannot
import name 'assign' from 'paddle.fluid.layers'"

* fix the CI error of remove_pad by remove import paddle.nn.functional as
F

* fix the ImportError of remove_pad

* fix functional call of pad
---
 python/paddle/fluid/layers/nn.py              | 87 -------------------
 python/paddle/fluid/layers/rnn.py             |  4 +-
 .../unittests/ir/inference/test_trt_pad_op.py |  5 +-
 .../tests/unittests/npu/test_pad_op_npu.py    |  4 +-
 .../fluid/tests/unittests/test_pad_op.py      | 10 +--
 5 files changed, 11 insertions(+), 99 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 57fa7f7252488b..98d63c9fd0bdf7 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -87,7 +87,6 @@
     'autoincreased_step_counter',
     'unsqueeze',
     'lod_reset',
-    'pad',
     'image_resize',
     'resize_bilinear',
     'resize_trilinear',
@@ -3878,92 +3877,6 @@ def lod_reset(x, y=None, target_lod=None):
     return out
 
 
-def pad(x, paddings, pad_value=0.0, name=None):
-    r"""
-    :alias_main: paddle.nn.functional.pad
-        :alias: paddle.nn.functional.pad,paddle.nn.functional.common.pad
-        :old_api: paddle.fluid.layers.pad
-
-    This op will pad a tensor with a constant value given by :attr:`pad_value`, and the
-    padded shape is specified by :attr:`paddings`.
-
-    Specifically, the number of values padded before the elements of :attr:`x`
-    in dimension :attr:`i` is indicated by :attr:`paddings[2*i]`, and the number
-    of values padded after the elements of :attr:`x` in dimension :attr:`i` is
-    indicated by :attr:`paddings[2*i+1]`.
-
-    See below for an example.
-
-    .. code-block:: text
-
-        Given:
-            x = [[1, 2], [3, 4]]
-
-            paddings = [0, 1, 1, 2]
-
-            pad_value = 0
-
-        Return:
-            out = [[0, 1, 2, 0, 0]
-                   [0, 3, 4, 0, 0]
-                   [0, 0, 0, 0, 0]]
-
-    Args:
-        x (Variable): Tensor, data type is float32.
-        paddings (list): A list of integers. Its elements specify the padded
-                         width before and after each dimension in turn.
-                         The length of :attr:`paddings` must be equal to
-                         :math:`rank(x) \\times 2`.
-        pad_value (float): The constant value used to pad.
-        name(str, optional): The default value is None.
-                             Normally there is no need for user to set this property.
-                             For more information, please refer to :ref:`api_guide_Name`
-
-    Returns:
-        The padded tensor, with the same data type and rank as :attr:`x`
-
-    Return Type:
-        Variable
-
-    Examples:
-        .. code-block:: python
-
-            # x is a rank 2 tensor variable
-            import paddle.fluid as fluid
-            x = fluid.data(name='data', shape=[300, 300], dtype='float32')
-            out = fluid.layers.pad(x=x, paddings=[0, 1, 1, 2], pad_value=0.)
-    """
-    check_variable_and_dtype(
-        x,
-        'x',
-        [
-            'float16',
-            'float32',
-            'float64',
-            'int32',
-            'int64',
-            'complex64',
-            'complex128',
-        ],
-        "pad",
-    )
-
-    check_type(pad_value, 'pad_value', (float, int, Variable), 'pad')
-    if isinstance(pad_value, int):
-        pad_value = float(pad_value)
-
-    helper = LayerHelper('pad', **locals())
-    dtype = helper.input_dtype(input_param_name='x')
-    out = helper.create_variable_for_type_inference(dtype)
-    helper.append_op(
-        type='pad',
-        inputs={'X': x},
-        outputs={'Out': out},
-        attrs={'paddings': paddings, 'pad_value': pad_value},
-    )
-    return out
-
-
 def image_resize(
     input,
     out_shape=None,
diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py
index c08cd6208d4d13..83c4d6c2cbcb1c 100644
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -1990,9 +1990,9 @@ def __init__(self, inputs, sequence_length, time_major=False):
         # extend inputs to avoid to slice out of range in `next_inputs`
         # may be easier and have better performance than condition_op
         self.inputs_ = map_structure(
-            lambda x: nn.pad(
+            lambda x: paddle.nn.functional.pad(
                 x,
-                paddings=([0, 1] + [0, 0] * (len(x.shape) - 1))
+                pad=([0, 1] + [0, 0] * (len(x.shape) - 1))
                 if time_major
                 else ([0, 0, 0, 1] + [0, 0] * (len(x.shape) - 2)),
             ),
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pad_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pad_op.py
index 51dc9525989a95..f0cf6ead9d3809 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pad_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pad_op.py
@@ -17,6 +17,7 @@
 import numpy as np
 from inference_pass_test import InferencePassTest
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.core import AnalysisConfig
@@ -28,8 +29,8 @@ def setUp(self):
             data = fluid.data(
                 name="data", shape=[1, 3, 128, 128], dtype="float32"
             )
-            pad_out = fluid.layers.pad(
-                x=data, paddings=[0, 0, 0, 0, 0, 1, 1, 2], pad_value=0.0
+            pad_out = paddle.nn.functional.pad(
+                x=data, pad=[0, 0, 0, 0, 0, 1, 1, 2], value=0.0
             )
             out = fluid.layers.batch_norm(pad_out, is_test=True)
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_pad_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_pad_op_npu.py
index 97d2a83260146b..c02d6012e413f1 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_pad_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_pad_op_npu.py
@@ -118,12 +118,12 @@ def test_errors(self):
             input_data = np.random.random((2, 2)).astype("float32")
 
             def test_Variable():
-                fluid.layers.pad(x=input_data, paddings=[1, 1, 1, 1])
+                paddle.nn.functional.pad(x=input_data, pad=[1, 1, 1, 1])
 
             self.assertRaises(TypeError, test_Variable)
 
             data = fluid.data(name='data', shape=[4], dtype='float16')
-            fluid.layers.pad(x=data, paddings=[0, 1])
+            paddle.nn.functional.pad(x=data, pad=[0, 1])
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/test_pad_op.py b/python/paddle/fluid/tests/unittests/test_pad_op.py
index c78692597c1350..04617274356702 100644
--- a/python/paddle/fluid/tests/unittests/test_pad_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pad_op.py
@@ -112,12 +112,12 @@ def test_errors(self):
             input_data = np.random.random((2, 2)).astype("float32")
 
             def test_Variable():
-                fluid.layers.pad(x=input_data, paddings=[1, 1, 1, 1])
+                paddle.nn.functional.pad(x=input_data, pad=[1, 1, 1, 1])
 
             self.assertRaises(TypeError, test_Variable)
 
             data = fluid.data(name='data', shape=[4], dtype='float16')
-            fluid.layers.pad(x=data, paddings=[0, 1])
+            paddle.nn.functional.pad(x=data, pad=[0, 1])
 
 
 class TestPaddingValueTensor(UnittestBase):
@@ -173,10 +173,8 @@ class TestPaddingValueTensor2(TestPaddingValueTensor):
     def call_func(self, x):
         padding_value = paddle.assign([1.0])
         # test for int value
-        tmp = paddle.fluid.layers.pad(x, paddings=[1, 1, 1, 1], pad_value=1)
-        out = paddle.fluid.layers.pad(
-            x, paddings=[1, 1, 1, 1], pad_value=padding_value
-        )
+        tmp = paddle.nn.functional.pad(x, pad=[1, 1, 1, 1], value=1)
+        out = paddle.nn.functional.pad(x, pad=[1, 1, 1, 1], value=padding_value)
         return out
 
 

From 0f3b1ad6391c9d1c7e3ef563b6b78b9cf26eae93 Mon Sep 17 00:00:00 2001
From: ronnywang <ronny1996@163.com>
Date: Fri, 2 Dec 2022 14:11:00 +0800
Subject: [PATCH 112/154] fix phi capi kernel registration macro error (#48616)

* fix capi kernel registration macro error

* update
---
 paddle/phi/capi/include/kernel_registry.h |  4 +++-
 paddle/phi/capi/include/kernel_utils.h    | 16 +++++++++++-----
 paddle/phi/capi/lib/c_kernel_context.cc   | 12 ++++++++++--
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/paddle/phi/capi/include/kernel_registry.h b/paddle/phi/capi/include/kernel_registry.h
index 47ddc0bf5be7ec..73318561dd9942 100644
--- a/paddle/phi/capi/include/kernel_registry.h
+++ b/paddle/phi/capi/include/kernel_registry.h
@@ -167,6 +167,7 @@ inline std::vector<phi::capi::DenseTensor> PD_MultiInputAt(
   for (size_t i = 0; i < list.size; ++i) {
     ret.emplace_back(data[i]);
   }
+  PD_DeletePointerList(list);
   return ret;
 }
 
@@ -182,13 +183,14 @@ inline std::vector<phi::capi::DenseTensor> PD_MultiOutputAt(
   for (size_t i = 0; i < list.size; ++i) {
     ret.emplace_back(data[i]);
   }
+  PD_DeletePointerList(list);
   return ret;
 }
 
 template <typename T>
 inline std::vector<T *> PD_GetPointerVector(std::vector<T> *vec) {
   std::vector<T *> ret;
-  for (auto &item : vec) {
+  for (auto &item : *vec) {
     ret.push_back(&item);
   }
   return ret;
diff --git a/paddle/phi/capi/include/kernel_utils.h b/paddle/phi/capi/include/kernel_utils.h
index 6c1d3f3c0ee758..d92a9e22052187 100644
--- a/paddle/phi/capi/include/kernel_utils.h
+++ b/paddle/phi/capi/include/kernel_utils.h
@@ -564,18 +564,24 @@ namespace capi {
       static_assert(out_idx == 0,                                            \
                     "Kernel's Input should appear before Outputs.");         \
       auto arg = PD_MultiInputAt(ctx, in_idx);                               \
-      auto arg_wrapper = PD_GetPointerVector(&arg);                          \
+      std::vector<const tensor_type *> tensor_ptr_vec;                       \
+      for (auto &tensor : arg) {                                             \
+        tensor_ptr_vec.push_back(tensor.raw_data() ? &tensor : nullptr);     \
+      }                                                                      \
       CustomKernelCallHelper<Tail...>::                                      \
           template Compute<dev_ctx_idx, in_idx + 1, attr_idx, out_idx>(      \
-              ctx, pargs..., arg_wrapper);                                   \
+              ctx, pargs..., tensor_ptr_vec);                                \
     }                                                                        \
     template <int idx, typename... PreviousArgs>                             \
     static void VariadicCompute(const std::tuple<DevCtx, Args &...> &ctx,    \
                                 PreviousArgs &...pargs) {                    \
       auto &arg = std::get<idx>(ctx);                                        \
-      auto tensor = PD_TensorVector(reinterpret_cast<PD_Tensor *>(           \
+      auto tensor_vec = PD_TensorVector(reinterpret_cast<PD_Tensor *>(       \
           const_cast<std::vector<const tensor_type *> *>(&arg)));            \
-      auto tensor_ptr_vec = PD_GetPointerVector(&arg);                       \
+      std::vector<const tensor_type *> tensor_ptr_vec;                       \
+      for (auto &tensor : tensor_vec) {                                      \
+        tensor_ptr_vec.push_back(tensor.raw_data() ? &tensor : nullptr);     \
+      }                                                                      \
       return CustomKernelCallHelper<Tail...>::template VariadicCompute<idx + \
                                                                        1>(   \
           ctx, pargs..., tensor_ptr_vec);                                    \
@@ -681,7 +687,7 @@ namespace capi {
         tensor_ptr_vec.push_back(tensor.raw_data() ? &tensor : nullptr);     \
       }                                                                      \
       CustomKernelCallHelper<Tail...>::                                      \
-          template Compute<dev_ctx_idx, in_idx + 1, attr_idx, out_idx>(      \
+          template Compute<dev_ctx_idx, in_idx, attr_idx, out_idx + 1>(      \
               ctx, pargs..., tensor_ptr_vec);                                \
     }                                                                        \
     template <int idx, typename... PreviousArgs>                             \
diff --git a/paddle/phi/capi/lib/c_kernel_context.cc b/paddle/phi/capi/lib/c_kernel_context.cc
index d38a19038e3144..e9fe2aada1f35f 100644
--- a/paddle/phi/capi/lib/c_kernel_context.cc
+++ b/paddle/phi/capi/lib/c_kernel_context.cc
@@ -60,7 +60,11 @@ PD_List PD_KernelContextMultiInputAt(PD_KernelContext* ctx, size_t index) {
       range.first, range.second);
   PD_List list;
   list.size = tensor_vec.size();
-  list.data = tensor_vec.data();
+  list.data = new void*[list.size];
+  for (size_t i = 0; i < list.size; ++i) {
+    (reinterpret_cast<void**>(list.data))[i] =
+        reinterpret_cast<void*>(const_cast<phi::DenseTensor*>(tensor_vec[i]));
+  }
   return list;
 }
 
@@ -78,7 +82,11 @@ PD_List PD_KernelContextMultiOutputAt(PD_KernelContext* ctx, size_t index) {
       range.first, range.second);
   PD_List list;
   list.size = tensor_vec.size();
-  list.data = tensor_vec.data();
+  list.data = new void*[list.size];
+  for (size_t i = 0; i < list.size; ++i) {
+    (reinterpret_cast<void**>(list.data))[i] =
+        reinterpret_cast<void*>(tensor_vec[i]);
+  }
   return list;
 }
 

From fcf262790c6c6a27714fe767c850948d52e52188 Mon Sep 17 00:00:00 2001
From: Charles-hit <56987902+Charles-hit@users.noreply.github.com>
Date: Fri, 2 Dec 2022 14:45:19 +0800
Subject: [PATCH 113/154] fix unit test (#48624)

fix test_primapi
---
 .../fluid/tests/unittests/autograd/test_primapi.py    | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/autograd/test_primapi.py b/python/paddle/fluid/tests/unittests/autograd/test_primapi.py
index beb971a4c1cfc6..3d1a1563860833 100644
--- a/python/paddle/fluid/tests/unittests/autograd/test_primapi.py
+++ b/python/paddle/fluid/tests/unittests/autograd/test_primapi.py
@@ -677,16 +677,15 @@ def test_illegal_param(self):
             None,
             'float32',
         ),
-        ('var', paddle.var, (np.random.rand(200, 324),), None, 'float32'),
         (
-            'var_with_axis',
-            lambda x: paddle.var(x, axis=1),
-            (np.random.rand(10, 20, 30),),
+            'var',
+            lambda x: paddle.var(x, unbiased=False),
+            (np.random.rand(200, 324),),
             None,
             'float32',
         ),
         (
-            'var_without_unbiased',
+            'var_with_axis',
             lambda x: paddle.var(x, axis=1, unbiased=False),
             (np.random.rand(10, 20, 30),),
             None,
@@ -694,7 +693,7 @@ def test_illegal_param(self):
         ),
         (
             'var_with_keepdim',
-            lambda x: paddle.var(x, axis=1, keepdim=True),
+            lambda x: paddle.var(x, axis=1, keepdim=True, unbiased=False),
             (np.random.rand(10, 20, 30),),
             None,
             'float32',

From 4c38b87ec87c35d49258be70a025baf35fc84823 Mon Sep 17 00:00:00 2001
From: gem5 <117625383+linsheng011@users.noreply.github.com>
Date: Fri, 2 Dec 2022 15:07:22 +0800
Subject: [PATCH 114/154] add some compare and logical trt converter (#48592)

---
 .../fluid/inference/api/analysis_predictor.cc |   6 +
 .../tensorrt/convert/elementwise_op.cc        |  95 +++-
 paddle/fluid/inference/tensorrt/op_teller.cc  |  38 ++
 .../test_trt_convert_compare_and_logical.py   | 483 ++++++++++++++++++
 4 files changed, 608 insertions(+), 14 deletions(-)
 mode change 100644 => 100755 paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
 create mode 100755 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 1c27c008d8ca7b..293de6bcd31a62 100755
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2238,6 +2238,12 @@ USE_TRT_CONVERTER(elementwise_max_tensor);
 USE_TRT_CONVERTER(elementwise_min_tensor);
 USE_TRT_CONVERTER(elementwise_pow_tensor);
 USE_TRT_CONVERTER(elementwise_floordiv_tensor);
+USE_TRT_CONVERTER(less_than);
+USE_TRT_CONVERTER(greater_than);
+USE_TRT_CONVERTER(logical_or);
+USE_TRT_CONVERTER(logical_xor);
+USE_TRT_CONVERTER(logical_and);
+USE_TRT_CONVERTER(less_equal);
 USE_TRT_CONVERTER(transpose);
 USE_TRT_CONVERTER(transpose2);
 USE_TRT_CONVERTER(flatten);
diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
old mode 100644
new mode 100755
index 53cb2da285afae..0280e418e804b9
--- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
@@ -74,8 +74,12 @@ class ElementwiseTensorOpConverter : public OpConverter {
     nvinfer1::Dims dims_y = Y->getDimensions();
     auto output_name = op_desc.Output("Out")[0];
 
+    int axis = -1;
     // axis here is relative to explicit batch
-    int axis = PADDLE_GET_CONST(int, op_desc.GetAttr("axis"));
+    if (op_type_ != "logical_or" && op_type_ != "logical_xor" &&
+        op_type_ != "logical_and") {
+      axis = PADDLE_GET_CONST(int, op_desc.GetAttr("axis"));
+    }
     int real_x_rank = dims_x.nbDims;
     int real_y_rank = dims_y.nbDims;
     if (!engine_->with_dynamic_shape()) {
@@ -139,17 +143,40 @@ class ElementwiseTensorOpConverter : public OpConverter {
       X = tmp;
     }
 
-    auto op_pair = ops.find(op_type_);
-    PADDLE_ENFORCE_NE(op_pair,
-                      ops.end(),
-                      platform::errors::InvalidArgument(
-                          "Elementwise op's type(%s) is not supported. Please "
-                          "check if the op_type is correct.",
-                          op_type_));
-
-    auto* layer = TRT_ENGINE_ADD_LAYER(
-        engine_, ElementWise, *X, *reshape_y_tensor, op_pair->second);
-    RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode);
+    if (op_type_ == "less_equal") {
+      auto* less_layer =
+          TRT_ENGINE_ADD_LAYER(engine_,
+                               ElementWise,
+                               *X,
+                               *reshape_y_tensor,
+                               nvinfer1::ElementWiseOperation::kLESS);
+      auto* equal_layer =
+          TRT_ENGINE_ADD_LAYER(engine_,
+                               ElementWise,
+                               *X,
+                               *reshape_y_tensor,
+                               nvinfer1::ElementWiseOperation::kEQUAL);
+      auto* layer = TRT_ENGINE_ADD_LAYER(engine_,
+                                         ElementWise,
+                                         *(less_layer->getOutput(0)),
+                                         *(equal_layer->getOutput(0)),
+                                         nvinfer1::ElementWiseOperation::kOR);
+
+      RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode);
+    } else {
+      auto op_pair = ops.find(op_type_);
+      PADDLE_ENFORCE_NE(
+          op_pair,
+          ops.end(),
+          platform::errors::InvalidArgument(
+              "Elementwise op's type(%s) is not supported. Please "
+              "check if the op_type is correct.",
+              op_type_));
+
+      auto* layer = TRT_ENGINE_ADD_LAYER(
+          engine_, ElementWise, *X, *reshape_y_tensor, op_pair->second);
+      RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode);
+    }
   }
 
  protected:
@@ -168,6 +195,11 @@ const std::unordered_map<std::string, nvinfer1::ElementWiseOperation>
         {"pow", nvinfer1::ElementWiseOperation::kPOW},
         {"max", nvinfer1::ElementWiseOperation::kMAX},
         {"floordiv", nvinfer1::ElementWiseOperation::kFLOOR_DIV},
+        {"less_than", nvinfer1::ElementWiseOperation::kLESS},
+        {"greater_than", nvinfer1::ElementWiseOperation::kGREATER},
+        {"logical_or", nvinfer1::ElementWiseOperation::kOR},
+        {"logical_xor", nvinfer1::ElementWiseOperation::kXOR},
+        {"logical_and", nvinfer1::ElementWiseOperation::kAND},
 };
 
 class ElementwiseTensorAddOpConverter : public ElementwiseTensorOpConverter {
@@ -204,13 +236,41 @@ class ElementwiseTensorPowOpConverter : public ElementwiseTensorOpConverter {
  public:
   ElementwiseTensorPowOpConverter() { op_type_ = "pow"; }
 };
-
 class ElementwiseTensorFloorDivOpConverter
     : public ElementwiseTensorOpConverter {
  public:
   ElementwiseTensorFloorDivOpConverter() { op_type_ = "floordiv"; }
 };
-
+class ElementwiseTensorLessThanOpConverter
+    : public ElementwiseTensorOpConverter {
+ public:
+  ElementwiseTensorLessThanOpConverter() { op_type_ = "less_than"; }
+};
+class ElementwiseTensorGreaterThanOpConverter
+    : public ElementwiseTensorOpConverter {
+ public:
+  ElementwiseTensorGreaterThanOpConverter() { op_type_ = "greater_than"; }
+};
+class ElementwiseTensorLogicalOrOpConverter
+    : public ElementwiseTensorOpConverter {
+ public:
+  ElementwiseTensorLogicalOrOpConverter() { op_type_ = "logical_or"; }
+};
+class ElementwiseTensorLogicalXorOpConverter
+    : public ElementwiseTensorOpConverter {
+ public:
+  ElementwiseTensorLogicalXorOpConverter() { op_type_ = "logical_xor"; }
+};
+class ElementwiseTensorLogicalAndOpConverter
+    : public ElementwiseTensorOpConverter {
+ public:
+  ElementwiseTensorLogicalAndOpConverter() { op_type_ = "logical_and"; }
+};
+class ElementwiseTensorLessEqualOpConverter
+    : public ElementwiseTensorOpConverter {
+ public:
+  ElementwiseTensorLessEqualOpConverter() { op_type_ = "less_equal"; }
+};
 }  // namespace tensorrt
 }  // namespace inference
 }  // namespace paddle
@@ -248,3 +308,10 @@ REGISTER_TRT_OP_CONVERTER(elementwise_pow_tensor,
                           ElementwiseTensorPowOpConverter);
 REGISTER_TRT_OP_CONVERTER(elementwise_floordiv_tensor,
                           ElementwiseTensorFloorDivOpConverter);
+REGISTER_TRT_OP_CONVERTER(less_than, ElementwiseTensorLessThanOpConverter);
+REGISTER_TRT_OP_CONVERTER(greater_than,
+                          ElementwiseTensorGreaterThanOpConverter);
+REGISTER_TRT_OP_CONVERTER(logical_or, ElementwiseTensorLogicalOrOpConverter);
+REGISTER_TRT_OP_CONVERTER(logical_xor, ElementwiseTensorLogicalXorOpConverter);
+REGISTER_TRT_OP_CONVERTER(logical_and, ElementwiseTensorLogicalAndOpConverter);
+REGISTER_TRT_OP_CONVERTER(less_equal, ElementwiseTensorLessEqualOpConverter);
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index 17fb2f0aa6d095..d88de415e82cdd 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -1322,6 +1322,32 @@ struct SimpleOpTypeSetTeller : public Teller {
       }
     }
 
+    if (op_type == "less_than" || op_type == "greater_than" ||
+        op_type == "logical_or" || op_type == "logical_xor" ||
+        op_type == "logical_and" || op_type == "less_equal") {
+#if IS_TRT_VERSION_GE(8400)
+      if (!with_dynamic_shape) {
+        VLOG(3) << "these ops do not support static shape yet";
+        return false;
+      }
+      if (op_type == "logical_or" || op_type == "logical_xor" ||
+          op_type == "logical_and") {
+        auto* block = desc.Block();
+        auto* x_var_desc = block->FindVar(desc.Input("X")[0]);
+        auto* y_var_desc = block->FindVar(desc.Input("Y")[0]);
+        auto x_dtype = x_var_desc->GetDataType();
+        auto y_dtype = y_var_desc->GetDataType();
+        if (x_dtype != framework::proto::VarType::BOOL ||
+            y_dtype != framework::proto::VarType::BOOL) {
+          VLOG(3) << "the op only support input of BOOL.";
+          return false;
+        }
+      }
+#else
+      VLOG(3) << "these are not supported when TensorRT < 8.4";
+      return false;
+#endif
+    }
     if (op_type == "elementwise_add" || op_type == "elementwise_mul" ||
         op_type == "elementwise_sub" || op_type == "elementwise_div" ||
         op_type == "elementwise_pow" || op_type == "elementwise_min" ||
@@ -2382,6 +2408,12 @@ struct SimpleOpTypeSetTeller : public Teller {
       "elementwise_max",
       "elementwise_floordiv",
       "equal",
+      "less_than",
+      "greater_than",
+      "logical_or",
+      "logical_xor",
+      "logical_and",
+      "less_equal",
       "dropout",
       "fill_any_like",
       "prelu",
@@ -2514,6 +2546,12 @@ struct SimpleOpTypeSetTeller : public Teller {
       "elementwise_max",
       "elementwise_floordiv",
       "equal",
+      "less_than",
+      "greater_than",
+      "logical_or",
+      "logical_xor",
+      "logical_and",
+      "less_equal",
       "dropout",
       "fill_any_like",
       "prelu",
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py
new file mode 100755
index 00000000000000..85abf692ae56a6
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py
@@ -0,0 +1,483 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from functools import partial
+from typing import List
+
+import numpy as np
+from program_config import ProgramConfig, TensorConfig
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest
+
+import paddle.inference as paddle_infer
+
+
+class TrtConvertLogicalTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        def generate_input(shape):
+            return np.random.random(shape).astype(np.float32)
+
+        for shape in [[2, 16], [2, 16, 32], [1, 32, 16, 32]]:
+            for op_type in ["logical_and", "logical_or", "logical_xor"]:
+                for axis in [-1]:
+                    self.dims = len(shape)
+                    dics = [
+                        {"axis": axis},
+                        {"in_dtype": 5, "out_dtype": 0},
+                        {"in_dtype": 0, "out_dtype": 5},
+                    ]
+                    ops_config = [
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["input_data1"]},
+                            "op_outputs": {"Out": ["cast_output_data1"]},
+                            "op_attrs": dics[1],
+                            "outputs_dtype": {"cast_output_data1": np.bool},
+                        },
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["input_data2"]},
+                            "op_outputs": {"Out": ["cast_output_data3"]},
+                            "op_attrs": dics[1],
+                            "outputs_dtype": {"cast_output_data1": np.bool},
+                        },
+                        {
+                            "op_type": op_type,
+                            "op_inputs": {
+                                "X": ["cast_output_data1"],
+                                "Y": ["cast_output_data3"],
+                            },
+                            "op_outputs": {"Out": ["cast_output_data0"]},
+                            "op_attrs": dics[0],
+                        },
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["cast_output_data0"]},
+                            "op_outputs": {"Out": ["output_data"]},
+                            "op_attrs": dics[2],
+                        },
+                    ]
+                    ops = self.generate_op_config(ops_config)
+
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={},
+                        inputs={
+                            "input_data1": TensorConfig(
+                                data_gen=partial(generate_input, shape)
+                            ),
+                            "input_data2": TensorConfig(
+                                data_gen=partial(generate_input, shape)
+                            ),
+                        },
+                        outputs=["output_data"],
+                    )
+
+                    yield program_config
+
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if self.dims == 2:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data1": [2, 16],
+                    "input_data2": [2, 16],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data1": [2, 16],
+                    "input_data2": [2, 16],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data1": [2, 16],
+                    "input_data2": [2, 16],
+                }
+            if self.dims == 3:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data1": [2, 16, 32],
+                    "input_data2": [2, 16, 32],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data1": [2, 16, 32],
+                    "input_data2": [2, 16, 32],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data1": [2, 16, 32],
+                    "input_data2": [2, 16, 32],
+                }
+            if self.dims == 4:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data1": [1, 32, 16, 32],
+                    "input_data2": [1, 32, 16, 32],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data1": [1, 32, 16, 32],
+                    "input_data2": [1, 32, 16, 32],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data1": [1, 32, 16, 32],
+                    "input_data2": [1, 32, 16, 32],
+                }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            if dynamic_shape:
+                ver = paddle_infer.get_trt_compile_version()
+                if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400:
+                    return 0, 7
+                return 1, 3
+            return 0, 7
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), (1e-3, 1e-3)
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), (1e-3, 1e-3)
+
+    def add_skip_trt_case(self):
+        pass
+
+    def test(self):
+        self.add_skip_trt_case()
+        self.run_test()
+
+
+class TrtConvertCompareTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        def generate_input(shape):
+            return np.random.random(shape).astype(np.float32)
+
+        for shape in [[2, 16], [2, 16, 32], [1, 32, 16, 32]]:
+            for op_type in ["less_than", "greater_than"]:
+                for axis in [-1]:
+                    self.dims = len(shape)
+                    dics = [
+                        {"axis": axis},
+                        {"in_dtype": 0, "out_dtype": 5},
+                    ]
+                    ops_config = [
+                        {
+                            "op_type": op_type,
+                            "op_inputs": {
+                                "X": ["input_data1"],
+                                "Y": ["input_data2"],
+                            },
+                            "op_outputs": {"Out": ["cast_output_data0"]},
+                            "op_attrs": dics[0],
+                        },
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["cast_output_data0"]},
+                            "op_outputs": {"Out": ["output_data"]},
+                            "op_attrs": dics[1],
+                        },
+                    ]
+                    ops = self.generate_op_config(ops_config)
+
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={},
+                        inputs={
+                            "input_data1": TensorConfig(
+                                data_gen=partial(generate_input, shape)
+                            ),
+                            "input_data2": TensorConfig(
+                                data_gen=partial(generate_input, shape)
+                            ),
+                        },
+                        outputs=["output_data"],
+                    )
+
+                    yield program_config
+
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if self.dims == 2:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data1": [2, 16],
+                    "input_data2": [2, 16],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data1": [2, 16],
+                    "input_data2": [2, 16],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data1": [2, 16],
+                    "input_data2": [2, 16],
+                }
+            if self.dims == 3:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data1": [2, 16, 32],
+                    "input_data2": [2, 16, 32],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data1": [2, 16, 32],
+                    "input_data2": [2, 16, 32],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data1": [2, 16, 32],
+                    "input_data2": [2, 16, 32],
+                }
+            if self.dims == 4:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data1": [1, 32, 16, 32],
+                    "input_data2": [1, 32, 16, 32],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data1": [1, 32, 16, 32],
+                    "input_data2": [1, 32, 16, 32],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data1": [1, 32, 16, 32],
+                    "input_data2": [1, 32, 16, 32],
+                }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            ver = paddle_infer.get_trt_compile_version()
+            if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400:
+                return 0, 5
+            if not dynamic_shape:
+                return 0, 5
+            return 1, 3
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), (1e-3, 1e-3)
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), (1e-3, 1e-3)
+
+    def add_skip_trt_case(self):
+        pass
+
+    def test(self):
+        self.add_skip_trt_case()
+        self.run_test()
+
+
+class TrtConvertLessEqualTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        def generate_input(shape):
+            return np.random.random(shape).astype(np.float32)
+
+        for shape in [[2, 16], [2, 16, 32], [1, 32, 16, 32]]:
+            for op_type in ["less_equal"]:
+                for axis in [-1]:
+                    self.dims = len(shape)
+                    dics = [
+                        {"axis": axis},
+                        {"in_dtype": 5, "out_dtype": 2},
+                        {"in_dtype": 0, "out_dtype": 5},
+                    ]
+                    ops_config = [
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["input_data1"]},
+                            "op_outputs": {"Out": ["cast_output_data1"]},
+                            "op_attrs": dics[1],
+                        },
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["input_data2"]},
+                            "op_outputs": {"Out": ["cast_output_data2"]},
+                            "op_attrs": dics[1],
+                        },
+                        {
+                            "op_type": op_type,
+                            "op_inputs": {
+                                "X": ["cast_output_data1"],
+                                "Y": ["cast_output_data2"],
+                            },
+                            "op_outputs": {"Out": ["cast_output_data0"]},
+                            "op_attrs": dics[0],
+                        },
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["cast_output_data0"]},
+                            "op_outputs": {"Out": ["output_data"]},
+                            "op_attrs": dics[2],
+                        },
+                    ]
+                    ops = self.generate_op_config(ops_config)
+
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={},
+                        inputs={
+                            "input_data1": TensorConfig(
+                                data_gen=partial(generate_input, shape)
+                            ),
+                            "input_data2": TensorConfig(
+                                data_gen=partial(generate_input, shape)
+                            ),
+                        },
+                        outputs=["output_data"],
+                    )
+
+                    yield program_config
+
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if self.dims == 2:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data1": [2, 16],
+                    "input_data2": [2, 16],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data1": [2, 16],
+                    "input_data2": [2, 16],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data1": [2, 16],
+                    "input_data2": [2, 16],
+                }
+            if self.dims == 3:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data1": [2, 16, 32],
+                    "input_data2": [2, 16, 32],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data1": [2, 16, 32],
+                    "input_data2": [2, 16, 32],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data1": [2, 16, 32],
+                    "input_data2": [2, 16, 32],
+                }
+            if self.dims == 4:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data1": [1, 32, 16, 32],
+                    "input_data2": [1, 32, 16, 32],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data1": [1, 32, 16, 32],
+                    "input_data2": [1, 32, 16, 32],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data1": [1, 32, 16, 32],
+                    "input_data2": [1, 32, 16, 32],
+                }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            ver = paddle_infer.get_trt_compile_version()
+            if (
+                ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400
+                or not dynamic_shape
+            ):
+                return 2, 5
+            else:
+                return 1, 3
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), (1e-3, 1e-3)
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), (1e-3, 1e-3)
+
+    def add_skip_trt_case(self):
+        pass
+
+    def test(self):
+        self.add_skip_trt_case()
+        self.run_test()
+
+
+if __name__ == "__main__":
+    unittest.main()

From 6efc2888b5c0f84c9a817c970f74ce1d69b9a056 Mon Sep 17 00:00:00 2001
From: Shijie <505749828@qq.com>
Date: Fri, 2 Dec 2022 15:46:41 +0800
Subject: [PATCH 115/154] Fix fuse_gemm_epilogue (#47805)

* Fix fuse_gemm_epilogue

* update tests

* Update CMakeLists.txt

* Update CMakeLists.txt

* Update CMakeLists.txt

* fix random seed

* use assert_allclose

* Update test_dist_fuse_gemm_epilogue_pass.py

* Update cpp_pass.py

* Update test_dist_fuse_gemm_epilogue_pass.py

* fix codestyle

* update seed and atol
---
 .../framework/ir/fuse_gemm_epilogue_pass.cc   |  58 +++--
 python/paddle/distributed/passes/cpp_pass.py  |  13 ++
 .../distributed_passes/CMakeLists.txt         |   4 +
 .../distributed_passes/dist_pass_test_base.py |  16 +-
 .../test_dist_fuse_gemm_epilogue_pass.py      | 210 ++++++++++++++++++
 5 files changed, 279 insertions(+), 22 deletions(-)
 create mode 100644 python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_gemm_epilogue_pass.py

diff --git a/paddle/fluid/framework/ir/fuse_gemm_epilogue_pass.cc b/paddle/fluid/framework/ir/fuse_gemm_epilogue_pass.cc
index e98f2bb144edde..e708a3bbdf80f3 100644
--- a/paddle/fluid/framework/ir/fuse_gemm_epilogue_pass.cc
+++ b/paddle/fluid/framework/ir/fuse_gemm_epilogue_pass.cc
@@ -16,7 +16,7 @@
 #include "paddle/fluid/framework/ir/fuse_gemm_epilogue_pass.h"
 
 #include <string>
-
+#include "paddle/fluid/framework/details/multi_devices_helper.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/platform/enforce.h"
 
@@ -106,13 +106,13 @@ ir::Graph *FuseGemmEpiloguePass::FuseLinearFwd(ir::Graph *graph,
     IR_NODE_LINK_TO(ele_bias, gemm_epilogue_node);
     IR_NODE_LINK_TO(gemm_epilogue_node, ele_out);
 
-    GraphSafeRemoveNodes(g, {matmul_op, matmul_out, ele_add_op});
-
     VLOG(4) << "\n\t " << subgraph.at(x)->Name() << " and " << matmul_w->Name()
             << " -> " << matmul_op->Name() << " -> " << matmul_out->Name()
             << "\n\t " << matmul_out->Name() << " and " << ele_bias->Name()
             << " -> " << ele_add_op->Name() << " -> " << ele_out->Name()
             << "\n\t " << ele_out->Name();
+
+    GraphSafeRemoveNodes(g, {matmul_op, matmul_out, ele_add_op});
     found_linear_count++;
   };
 
@@ -218,15 +218,15 @@ ir::Graph *FuseGemmEpiloguePass::FuseLinearActFwd(
       IR_NODE_LINK_TO(gemm_epilogue_node, reserve_space_node);
     }
 
-    GraphSafeRemoveNodes(g,
-                         {matmul_op, matmul_out, ele_add_op, ele_out, act_op});
-
     VLOG(4) << "\n\t " << subgraph.at(x)->Name() << " and " << matmul_w->Name()
             << " -> " << matmul_op->Name() << " -> " << matmul_out->Name()
             << "\n\t " << matmul_out->Name() << " and " << ele_bias->Name()
             << " -> " << ele_add_op->Name() << " -> " << ele_out->Name()
             << "\n\t " << ele_out->Name() << " -> " << act_op->Name() << " -> "
             << act_out->Name();
+
+    GraphSafeRemoveNodes(g,
+                         {matmul_op, matmul_out, ele_add_op, ele_out, act_op});
     found_linear_act_count++;
   };
 
@@ -318,6 +318,19 @@ ir::Graph *FuseGemmEpiloguePass::FuseLinearBwd(ir::Graph *graph,
         "op_role", matmul_grad_op_desc->GetAttr("op_role"));
     fused_gemm_epilogue_grad_op_desc.SetAttr("trans_x", trans_x);
     fused_gemm_epilogue_grad_op_desc.SetAttr("trans_y", trans_y);
+    auto matmul_grad_op_role_val =
+        details::GetOpRoleVarsOrEmpty(*(matmul_grad_op->Op()));
+    auto ele_add_grad_op_role_val =
+        details::GetOpRoleVarsOrEmpty(*(ele_add_grad_op->Op()));
+    std::vector<std::string> fused_gemm_epilogue_grad_op_role_var;
+    for (auto i : matmul_grad_op_role_val) {
+      fused_gemm_epilogue_grad_op_role_var.push_back(i);
+    }
+    for (auto i : ele_add_grad_op_role_val) {
+      fused_gemm_epilogue_grad_op_role_var.push_back(i);
+    }
+    fused_gemm_epilogue_grad_op_desc.SetAttr(
+        "op_role_var", fused_gemm_epilogue_grad_op_role_var);
 
     auto gemm_epilogue_grad_node =
         g->CreateOpNode(&fused_gemm_epilogue_grad_op_desc);
@@ -325,14 +338,13 @@ ir::Graph *FuseGemmEpiloguePass::FuseLinearBwd(ir::Graph *graph,
     IR_NODE_LINK_TO(subgraph.at(dout), gemm_epilogue_grad_node);
     IR_NODE_LINK_TO(matmul_grad_x, gemm_epilogue_grad_node);
     IR_NODE_LINK_TO(matmul_grad_w, gemm_epilogue_grad_node);
+    IR_NODE_LINK_TO(ele_grad_bias, gemm_epilogue_grad_node);
     IR_NODE_LINK_TO(gemm_epilogue_grad_node, matmul_grad_dw);
     IR_NODE_LINK_TO(gemm_epilogue_grad_node, ele_grad_dbias);
     if (matmul_grad_dx) {
       IR_NODE_LINK_TO(gemm_epilogue_grad_node, matmul_grad_dx);
     }
 
-    GraphSafeRemoveNodes(g, {ele_add_grad_op, ele_grad_dx, matmul_grad_op});
-
     std::string matmul_grad_dx_name =
         matmul_grad_dx != nullptr ? matmul_grad_dx->Name() : " ";
     VLOG(4) << "\n\t " << subgraph.at(dout)->Name() << " and "
@@ -342,6 +354,8 @@ ir::Graph *FuseGemmEpiloguePass::FuseLinearBwd(ir::Graph *graph,
             << matmul_grad_x->Name() << " and " << matmul_grad_w->Name()
             << " -> " << matmul_grad_op->Name() << " -> "
             << matmul_grad_w->Name() << " and " << matmul_grad_dx_name;
+
+    GraphSafeRemoveNodes(g, {ele_add_grad_op, ele_grad_dx, matmul_grad_op});
     found_ele_add_matmul_act_count++;
   };
 
@@ -442,6 +456,19 @@ ir::Graph *FuseGemmEpiloguePass::FuseLinearActBwd(
         "op_role", matmul_grad_op_desc->GetAttr("op_role"));
     fused_gemm_epilogue_grad_op_desc.SetAttr("trans_x", trans_x);
     fused_gemm_epilogue_grad_op_desc.SetAttr("trans_y", trans_y);
+    auto matmul_grad_op_role_val =
+        details::GetOpRoleVarsOrEmpty(*(matmul_grad_op->Op()));
+    auto ele_add_grad_op_role_val =
+        details::GetOpRoleVarsOrEmpty(*(ele_add_grad_op->Op()));
+    std::vector<std::string> fused_gemm_epilogue_grad_op_role_var;
+    for (auto i : matmul_grad_op_role_val) {
+      fused_gemm_epilogue_grad_op_role_var.push_back(i);
+    }
+    for (auto i : ele_add_grad_op_role_val) {
+      fused_gemm_epilogue_grad_op_role_var.push_back(i);
+    }
+    fused_gemm_epilogue_grad_op_desc.SetAttr(
+        "op_role_var", fused_gemm_epilogue_grad_op_role_var);
 
     auto gemm_epilogue_grad_node =
         g->CreateOpNode(&fused_gemm_epilogue_grad_op_desc);
@@ -449,18 +476,12 @@ ir::Graph *FuseGemmEpiloguePass::FuseLinearActBwd(
     IR_NODE_LINK_TO(subgraph.at(dout), gemm_epilogue_grad_node);
     IR_NODE_LINK_TO(matmul_grad_x, gemm_epilogue_grad_node);
     IR_NODE_LINK_TO(matmul_grad_w, gemm_epilogue_grad_node);
+    IR_NODE_LINK_TO(ele_grad_bias, gemm_epilogue_grad_node);
     IR_NODE_LINK_TO(gemm_epilogue_grad_node, act_grad_dx);
     IR_NODE_LINK_TO(gemm_epilogue_grad_node, matmul_grad_dw);
     IR_NODE_LINK_TO(gemm_epilogue_grad_node, ele_grad_dbias);
     IR_NODE_LINK_TO(reserve_space_node, gemm_epilogue_grad_node);
 
-    GraphSafeRemoveNodes(g,
-                         {ele_add_grad_op,
-                          ele_grad_dx,
-                          matmul_grad_op,
-                          matmul_grad_dx,
-                          act_grad_op});
-
     VLOG(4) << "\n\t " << subgraph.at(dout)->Name() << " and "
             << ele_grad_bias->Name() << " -> " << ele_add_grad_op->Name()
             << " -> " << ele_grad_dx->Name() << " and "
@@ -470,6 +491,13 @@ ir::Graph *FuseGemmEpiloguePass::FuseLinearActBwd(
             << matmul_grad_dx->Name() << " and " << matmul_grad_w->Name()
             << "\n\t " << matmul_grad_dx->Name() << " -> "
             << act_grad_op->Name() << " -> " << act_grad_dx->Name();
+
+    GraphSafeRemoveNodes(g,
+                         {ele_add_grad_op,
+                          ele_grad_dx,
+                          matmul_grad_op,
+                          matmul_grad_dx,
+                          act_grad_op});
     found_ele_add_matmul_act_count++;
   };
 
diff --git a/python/paddle/distributed/passes/cpp_pass.py b/python/paddle/distributed/passes/cpp_pass.py
index 07b5661950ab39..9201682d89f72d 100755
--- a/python/paddle/distributed/passes/cpp_pass.py
+++ b/python/paddle/distributed/passes/cpp_pass.py
@@ -71,6 +71,19 @@ def _type(self):
         return PassType.FUSION_OPT
 
 
+@register_pass("fuse_gemm_epilogue")
+class FuseGemmEpiloguePass(CPPPassWrapper):
+    def __init__(self):
+        super().__init__()
+
+    @property
+    def cpp_name(self):
+        return "fuse_gemm_epilogue_pass"
+
+    def _type(self):
+        return PassType.FUSION_OPT
+
+
 @register_pass("fuse_optimizer")
 class FuseOptimizerPass(CPPPassWrapper):
     def __init__(self):
diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/CMakeLists.txt b/python/paddle/fluid/tests/unittests/distributed_passes/CMakeLists.txt
index b9f4d818282962..02a65d02320b52 100644
--- a/python/paddle/fluid/tests/unittests/distributed_passes/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/distributed_passes/CMakeLists.txt
@@ -24,6 +24,10 @@ if((NOT WITH_GPU)
        "test_auto_parallel_data_parallel_optimization_pass")
 endif()
 
+if(NOT ((WITH_GPU) AND (CUDA_VERSION GREATER_EQUAL 11.6)))
+  list(REMOVE_ITEM TEST_OPS test_dist_fuse_gemm_epilogue_pass)
+endif()
+
 foreach(TEST_OP ${TEST_OPS})
   py_test_modules(${TEST_OP} MODULES ${TEST_OP})
   list(APPEND DIST_TEST_OPS ${TEST_OP})
diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/dist_pass_test_base.py b/python/paddle/fluid/tests/unittests/distributed_passes/dist_pass_test_base.py
index 2755b23a8298e0..46e8870821dd2e 100644
--- a/python/paddle/fluid/tests/unittests/distributed_passes/dist_pass_test_base.py
+++ b/python/paddle/fluid/tests/unittests/distributed_passes/dist_pass_test_base.py
@@ -105,13 +105,15 @@ def check_results(self, no_pass_rets, pass_rets):
                 if out_var_no_pass is None:
                     self.assertIsNone(out_var_pass)
                 else:
-                    np.testing.assert_allclose(
-                        out_var_no_pass,
-                        out_var_pass,
-                        rtol=self.rtol,
-                        atol=self.atol,
-                        equal_nan=self.equal_nan,
-                    )
+                    self.assertEqual(len(out_var_pass), len(out_var_no_pass))
+                    for i in range(0, len(out_var_pass)):
+                        np.testing.assert_allclose(
+                            out_var_no_pass[i],
+                            out_var_pass[i],
+                            rtol=self.rtol,
+                            atol=self.atol,
+                            equal_nan=self.equal_nan,
+                        )
 
     @classmethod
     def _to_var_names(cls, names_or_vars):
diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_gemm_epilogue_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_gemm_epilogue_pass.py
new file mode 100644
index 00000000000000..897a40abf425fe
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_dist_fuse_gemm_epilogue_pass.py
@@ -0,0 +1,210 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from dist_pass_test_base import DistPassTestBase
+
+import paddle
+import paddle.distributed.fleet as fleet
+import paddle.nn as nn
+from paddle.distributed.passes import PassManager, new_pass
+
+paddle.enable_static()
+np.random.seed(12345)
+paddle.seed(12345)
+
+
+def verify_op_count(op_types, op_name, target_count):
+    count = 0
+    for op_type in op_types:
+        if op_type == op_name:
+            count += 1
+    return count == target_count
+
+
+class MultiFCLayer(nn.Layer):
+    def __init__(self, hidden, Activation):
+        super(MultiFCLayer, self).__init__()
+        self.linear1 = paddle.nn.Linear(hidden, 4 * hidden)
+        self.linear2 = paddle.nn.Linear(4 * hidden, hidden)
+        self.linear3 = paddle.nn.Linear(hidden, hidden)
+
+        self.relu1 = Activation()
+        self.relu2 = Activation()
+        self.relu3 = Activation()
+
+    def forward(self, x, matmul_y, ele_y):
+        output = self.linear1(x)
+        output = self.relu1(output)
+        output = self.linear2(output)
+
+        output1 = paddle.matmul(output, matmul_y)
+        output = self.linear3(output)
+        output = self.relu2(output)
+
+        output = paddle.matmul(output, matmul_y)
+        output = paddle.add(output, ele_y)
+        output = self.relu3(output)
+        output = paddle.add(output, output1)
+        return output
+
+
+class TestFuseGemmEpiloguePassReluFP32(DistPassTestBase):
+    def init(self):
+        self.atol = 1e-3
+        self.rtol = 1e-3
+        self.activation = nn.ReLU
+        self.act_fwd_name = 'relu'
+        self.act_bwd_name = 'relu_grad'
+        self.batch = 64
+        self.seqlen = 128
+        self.hidden = 768
+        self.precision = 'FP32'  # FP32 or AMP
+
+    def get_model(self, place):
+        data = paddle.static.data(
+            name="_data", shape=[-1, self.seqlen, self.hidden], dtype='float32'
+        )
+        matmul_y = paddle.static.data(
+            name="_matmul_y",
+            shape=[1, self.hidden, self.hidden],
+            dtype='float32',
+        )
+        ele_y = paddle.static.data(
+            name="_ele_y",
+            shape=[
+                self.hidden,
+            ],
+            dtype='float32',
+        )
+
+        model = MultiFCLayer(self.hidden, self.activation)
+        out = model(data, matmul_y, ele_y)
+        loss = paddle.mean(out)
+        optimizer = paddle.optimizer.Adam(learning_rate=1e-3)
+
+        dist_strategy = fleet.DistributedStrategy()
+        dist_strategy.fuse_all_reduce_ops = False
+        dist_strategy.without_graph_optimization = True
+        if self.precision == 'AMP':
+            dist_strategy.amp = True
+            dist_strategy.amp_configs = {
+                "init_loss_scaling": 32768,
+                "use_dynamic_loss_scaling": True,
+                "custom_white_list": ['gelu'],
+            }
+        fleet.init(is_collective=True, strategy=dist_strategy)
+        optimizer = fleet.distributed_optimizer(optimizer)
+        optimizer.minimize(loss)
+
+        rank = paddle.distributed.get_rank()
+
+        def reader():
+            for _ in range(10):
+                data_arr = (
+                    np.random.random(
+                        (self.batch, self.seqlen, self.hidden)
+                    ).astype("float32")
+                    - 0.5
+                )
+                matmul_y_arr = (
+                    np.random.random((1, self.hidden, self.hidden)).astype(
+                        "float32"
+                    )
+                    - 0.5
+                )
+                ele_y_arr = (
+                    np.random.random((self.hidden,)).astype("float32") - 0.5
+                )
+                yield [data_arr, matmul_y_arr, ele_y_arr]
+
+        main_program = paddle.static.default_main_program()
+        startup_program = paddle.static.default_startup_program()
+
+        fetch_list = []
+        for p in model.parameters():
+            grad_name = p.name + '@GRAD'
+            fetch_list.append(grad_name)
+
+        fetch_list.append(loss.name)
+
+        return (
+            main_program,
+            startup_program,
+            [data, matmul_y, ele_y],
+            fetch_list,
+            reader,
+        )
+
+    def apply_passes(self, main_prog, startup_prog):
+        pass_manager = PassManager([new_pass("fuse_gemm_epilogue")])
+        pass_manager.apply([main_prog], [startup_prog])
+        print(pass_manager.names)
+
+        op_type = []
+        for op in main_prog.global_block().ops:
+            op_type.append(op.type)
+        print(op_type)
+        self.assertTrue(verify_op_count(op_type, "fused_gemm_epilogue", 3))
+        self.assertTrue(verify_op_count(op_type, "fused_gemm_epilogue_grad", 3))
+        self.assertTrue(verify_op_count(op_type, self.act_fwd_name, 1))
+        self.assertTrue(verify_op_count(op_type, self.act_bwd_name, 2))
+
+    def test_fuse_gemm_epilogue(self):
+        self.check_main()
+
+
+class TestFuseGemmEpiloguePassReluFP16(TestFuseGemmEpiloguePassReluFP32):
+    def init(self):
+        self.atol = 1e-3
+        self.rtol = 1e-3
+        self.activation = nn.ReLU
+        self.act_fwd_name = 'relu'
+        self.act_bwd_name = 'relu_grad'
+        self.batch = 64
+        self.seqlen = 128
+        self.hidden = 768
+        self.precision = 'AMP'  # FP32 or AMP
+
+
+class TestFuseGemmEpiloguePassGeluFP32(TestFuseGemmEpiloguePassReluFP32):
+    def init(self):
+        self.atol = 1e-3
+        self.rtol = 1e-3
+        self.activation = nn.GELU
+        self.act_fwd_name = 'gelu'
+        self.act_bwd_name = 'gelu_grad'
+        self.batch = 64
+        self.seqlen = 128
+        self.hidden = 768
+        self.precision = 'FP32'  # FP32 or AMP
+
+
+class TestFuseGemmEpiloguePassGeluFP16(TestFuseGemmEpiloguePassReluFP32):
+    def init(self):
+        self.atol = 5e-3
+        self.rtol = 1e-3
+        self.activation = nn.GELU
+        self.act_fwd_name = 'gelu'
+        self.act_bwd_name = 'gelu_grad'
+        self.batch = 64
+        self.seqlen = 128
+        self.hidden = 768
+        self.precision = 'AMP'  # FP32 or AMP
+
+
+if __name__ == "__main__":
+    unittest.main()

From e48767fe83a208f584dff417e718d7ae54b3bd67 Mon Sep 17 00:00:00 2001
From: LiYuRio <63526175+LiYuRio@users.noreply.github.com>
Date: Fri, 2 Dec 2022 16:05:50 +0800
Subject: [PATCH 116/154] remove less_than (#48584)

---
 .../fleet/utils/hybrid_parallel_inference.py  |  4 +-
 python/paddle/fluid/layers/control_flow.py    | 71 ++-----------------
 python/paddle/fluid/tests/test_if_else_op.py  |  8 +--
 .../auto_parallel/test_while_op_partition.py  |  4 +-
 .../fleet/hybrid_parallel_inference_helper.py |  4 +-
 .../tests/unittests/dist_fleet_simnet_bow.py  |  2 +-
 .../fluid/tests/unittests/dist_transformer.py |  4 +-
 .../ir/test_ir_subgraph_python_interface.py   |  2 +-
 .../tests/unittests/npu/test_while_op_npu.py  |  4 +-
 .../test_standalone_controlflow.py            |  2 +-
 .../fluid/tests/unittests/test_assert_op.py   |  2 +-
 .../paddle/fluid/tests/unittests/test_case.py | 10 +--
 .../fluid/tests/unittests/test_compare_op.py  |  7 +-
 .../paddle/fluid/tests/unittests/test_cond.py |  2 +-
 .../tests/unittests/test_device_guard.py      |  2 +-
 .../tests/unittests/test_dist_fleet_ps.py     |  2 +-
 .../tests/unittests/test_dist_fleet_ps11.py   |  2 +-
 .../tests/unittests/test_dist_fleet_ps12.py   |  2 +-
 .../tests/unittests/test_dist_fleet_ps13.py   |  2 +-
 .../tests/unittests/test_dist_fleet_ps2.py    |  2 +-
 .../tests/unittests/test_dist_fleet_ps3.py    |  2 +-
 .../tests/unittests/test_dist_fleet_ps4.py    |  2 +-
 .../tests/unittests/test_dist_fleet_ps5.py    |  2 +-
 .../tests/unittests/test_dist_fleet_ps6.py    |  2 +-
 .../test_dynamic_rnn_stop_gradient.py         |  4 +-
 .../unittests/test_eager_deletion_while_op.py |  8 +--
 .../tests/unittests/test_imperative_basic.py  |  2 +-
 .../test_ir_memory_optimize_ifelse_op.py      |  2 +-
 .../fluid/tests/unittests/test_layers.py      | 22 +++---
 .../fluid/tests/unittests/test_profiler.py    |  4 +-
 .../tests/unittests/test_program_code.py      |  3 +-
 .../fluid/tests/unittests/test_switch.py      |  7 +-
 .../unittests/test_tensor_array_to_tensor.py  |  2 +-
 .../tests/unittests/test_while_loop_op.py     | 24 +++----
 .../fluid/tests/unittests/test_while_op.py    | 10 +--
 .../unittests/xpu/test_device_guard_xpu.py    |  2 +-
 .../tests/unittests/xpu/test_while_op_xpu.py  | 10 +--
 .../paddle/jit/dy2static/convert_operators.py |  3 +-
 38 files changed, 94 insertions(+), 155 deletions(-)

diff --git a/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py b/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py
index cd1d05e913cb61..49aed0862f6974 100644
--- a/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py
+++ b/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py
@@ -129,7 +129,7 @@ class HybridParallelInferenceHelper:
                 data = layers.array_write(X, step_idx)
 
                 cond_int = layers.fill_constant(shape=[1], dtype="int64", value=0, force_cpu=False, name="cond_int")
-                cond = layers.less_than(x=step_idx, y=max_len)
+                cond = paddle.less_than(x=step_idx, y=max_len)
                 while_op = layers.While(cond, is_test=True)
 
             with while_op.block():
@@ -153,7 +153,7 @@ class HybridParallelInferenceHelper:
                     layers.array_write(hidden2, i=step_idx, array=data)
 
                     # update cond and assign to cond_int, we will sync cond_int
-                    layers.less_than(x=step_idx, y=max_len, cond=cond)
+                    paddle.assign(paddle.less_than(x=step_idx, y=max_len), cond)
                     layers.assign(layers.cast(cond, dtype="int32"), cond_int)
 
                 with paddle.fluid.device_guard(f'{device}:all'):
diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py
index 80f0830e22e0ba..cd49f94e035b80 100755
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -56,7 +56,6 @@
     'Switch',
     'increment',
     'array_write',
-    'less_than',
     'array_read',
     'cond',
     'IfElse',
@@ -1214,11 +1213,11 @@ class While:
 
             loop_len = fluid.layers.fill_constant(shape=[1],dtype='int64', value=10)    # loop length
 
-            cond = fluid.layers.less_than(x=i, y=loop_len)
+            cond = paddle.less_than(x=i, y=loop_len)
             while_op = fluid.layers.While(cond=cond)
             with while_op.block():
                 i = fluid.layers.increment(x=i, value=1, in_place=True)
-                fluid.layers.less_than(x=i, y=loop_len, cond=cond)
+                paddle.assign(paddle.less_than(x=i, y=loop_len), cond)
 
             exe = fluid.Executor(fluid.CPUPlace())
             exe.run(fluid.default_startup_program())
@@ -1230,6 +1229,7 @@ class While:
     Examples 2:
           .. code-block:: python
 
+            import paddle
             import paddle.fluid as fluid
             import numpy as np
 
@@ -1239,14 +1239,14 @@ class While:
             data = fluid.data(name='data', shape=[1], dtype='float32')
             sums = fluid.layers.fill_constant(shape=[1], dtype='float32', value=0)  # Define the variable to be obtained ouside of While, which name should be different from the variable inside the While to be obtained
 
-            cond = fluid.layers.less_than(x=i, y=loop_len)
+            cond = paddle.less_than(x=i, y=loop_len)
             while_op = fluid.layers.While(cond=cond)
             with while_op.block():
                 sums_tensor = fluid.layers.elementwise_add(x=data, y=data)
                 fluid.layers.assign(sums_tensor, sums)  # Update the value of sums_tensor defined in While to the sums which defined outside of While through layers.assign
                 i = fluid.layers.increment(x=i, value=1, in_place=True)
                 data = fluid.layers.elementwise_add(x=data, y=one)
-                fluid.layers.less_than(x=i, y=loop_len, cond=cond)
+                paddle.assign(paddle.less_than(x=i, y=loop_len), cond)
 
             feed_data = np.ones(1).astype('float32')
             exe = fluid.Executor(fluid.CPUPlace())
@@ -1748,64 +1748,6 @@ def array_write(x, i, array=None):
     return array
 
 
-@templatedoc()
-def less_than(x, y, force_cpu=None, cond=None, name=None):
-    """
-
-    ${comment}
-
-    Args:
-        x(Tensor): ${x_comment}.
-        y(Tensor): ${y_comment}.
-        force_cpu(${force_cpu_type}): ${force_cpu_comment}.
-        cond(Tensor, optional): Optional output which can be any created Tensor
-            that meets the requirements to store the result of *less_than*.
-            if cond is None, a new Tensor will be created to store the result.
-        name(str, optional): The default value is None.  Normally there is no need for
-            user to set this property.  For more information, please refer to :ref:`api_guide_Name`.
-    Returns:
-        ${out_comment}.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-
-            x = paddle.to_tensor([1, 2, 3, 4], dtype='float32')
-            y = paddle.to_tensor([2, 2, 1, 3], dtype='float32')
-            result = paddle.less_than(x, y)
-            print(result) # [True, False, False, False]
-
-    """
-    check_variable_and_dtype(
-        x, "x", ["float32", "float64", "int32", "int64"], "less_than"
-    )
-    check_variable_and_dtype(
-        y, "y", ["float32", "float64", "int32", "int64"], "less_than"
-    )
-    if cond is not None:
-        check_type(cond, "cond", Variable, "less_than")
-    if force_cpu is not None:
-        check_type(force_cpu, "force_cpu", bool, "less_than")
-
-    helper = LayerHelper("less_than", **locals())
-    if cond is None:
-        cond = helper.create_variable_for_type_inference(dtype='bool')
-        cond.stop_gradient = True
-
-    attrs = dict()
-    if force_cpu is not None:
-        attrs['force_cpu'] = force_cpu
-
-    helper.append_op(
-        type='less_than',
-        inputs={'X': [x], 'Y': [y]},
-        outputs={'Out': [cond]},
-        attrs=attrs,
-    )
-    return cond
-
-
 def array_read(array, i):
     """
     This OP is used to read data at the specified position from the input array
@@ -1932,8 +1874,9 @@ class ConditionalBlock:
     Examples:
         .. code-block:: python
 
+             import paddle
              import paddle.fluid as fluid
-             cond = layers.less_than(x=label, y=limit)
+             cond = paddle.less_than(x=label, y=limit)
              true_image, false_image = layers.split_lod_tensor(
                  input=image, mask=cond)
              true_cond = layers.ConditionalBlock([true_image])
diff --git a/python/paddle/fluid/tests/test_if_else_op.py b/python/paddle/fluid/tests/test_if_else_op.py
index 4a7f213465768d..1eba6cbb60ee19 100644
--- a/python/paddle/fluid/tests/test_if_else_op.py
+++ b/python/paddle/fluid/tests/test_if_else_op.py
@@ -43,7 +43,7 @@ def not_test_raw_api(self):
             label = layers.data(name='y', shape=[1], dtype='int64')
 
             limit = layers.fill_constant(shape=[1], dtype='int64', value=5)
-            cond = layers.less_than(x=label, y=limit)
+            cond = paddle.less_than(x=label, y=limit)
             true_image, false_image = split_lod_tensor(input=image, mask=cond)
 
             true_out = layers.create_tensor(dtype='float32')
@@ -105,7 +105,7 @@ def not_test_ifelse(self):
             label = layers.data(name='y', shape=[1], dtype='int64')
 
             limit = layers.fill_constant(shape=[1], dtype='int64', value=5)
-            cond = layers.less_than(x=label, y=limit)
+            cond = paddle.less_than(x=label, y=limit)
             ie = layers.IfElse(cond)
 
             with ie.true_block():
@@ -174,7 +174,7 @@ def compare_ifelse_op_and_numpy(self, place):
             cond = layers.fill_constant(
                 [1], dtype='float32', value=self.cond_value
             )
-            ifcond = layers.less_than(x=src, y=cond)
+            ifcond = paddle.less_than(x=src, y=cond)
             ie = layers.IfElse(ifcond)
             with ie.true_block():
                 true_target = ie.input(src)
@@ -237,7 +237,7 @@ def test_input_type_error(self):
             const_value = layers.fill_constant(
                 [1], dtype='float32', value=123.0
             )
-            ifcond = layers.less_than(x=src, y=const_value)
+            ifcond = paddle.less_than(x=src, y=const_value)
             with self.assertRaises(TypeError):
                 ie = layers.IfElse(set())
             with self.assertRaises(TypeError):
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_partition.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_partition.py
index 8825abe3324756..fcfd783f71f6df 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_partition.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_partition.py
@@ -171,7 +171,7 @@ def get_program():
         #                       "dims_mapping": [-1, -1, -1]
         #                   })
 
-        cond = fluid.layers.less_than(x=i, y=loop_len)
+        cond = paddle.less_than(x=i, y=loop_len)
         auto.shard_tensor(cond, _g_process_mesh, [None])
 
         while_op = fluid.layers.While(cond=cond)
@@ -191,7 +191,7 @@ def get_program():
             # 更新循环条件
             i = fluid.layers.increment(x=i, value=1, in_place=True)
             fluid.layers.array_write(cur_pred, array=input_array, i=i)
-            fluid.layers.less_than(x=i, y=loop_len, cond=cond)
+            paddle.assign(paddle.less_than(x=i, y=loop_len), cond)
 
         end_pred = fluid.layers.array_read(array=input_array, i=i)
         auto.shard_tensor(end_pred, _g_process_mesh, [None, None, None])
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_inference_helper.py b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_inference_helper.py
index 2cb6f3326e91e6..542b1ba6379364 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_inference_helper.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_inference_helper.py
@@ -83,7 +83,7 @@ def test_hybrid_parallel_inference_helper_mp1pp2(self):
                     name="cond_int",
                 )
                 print(cond_int.shape)
-                cond = layers.less_than(x=step_idx, y=max_len)
+                cond = paddle.less_than(x=step_idx, y=max_len)
                 while_op = layers.While(cond, is_test=True)
 
             with while_op.block():
@@ -119,7 +119,7 @@ def test_hybrid_parallel_inference_helper_mp1pp2(self):
                     layers.array_write(hidden2, i=step_idx, array=data)
 
                     # update cond and assign to cond_int, we will sync cond_int
-                    layers.less_than(x=step_idx, y=max_len, cond=cond)
+                    paddle.assign(paddle.less_than(x=step_idx, y=max_len), cond)
                     layers.assign(layers.cast(cond, dtype="int32"), cond_int)
 
                 with paddle.fluid.device_guard(f'{device}:all'):
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
index 2c16fff90a59f9..be2ea401ea93f0 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
@@ -54,7 +54,7 @@ def reader():
 
 
 def get_acc(cos_q_nt, cos_q_pt, batch_size):
-    cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
+    cond = paddle.less_than(cos_q_nt, cos_q_pt)
     cond = fluid.layers.cast(cond, dtype='float64')
     cond_3 = paddle.sum(cond)
     acc = paddle.divide(
diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py
index 52b6f674e5c19e..7106c426bcfc8f 100644
--- a/python/paddle/fluid/tests/unittests/dist_transformer.py
+++ b/python/paddle/fluid/tests/unittests/dist_transformer.py
@@ -1762,7 +1762,7 @@ def beam_search():
         step_idx = layers.fill_constant(
             shape=[1], dtype=start_tokens.dtype, value=0
         )
-        cond = layers.less_than(x=step_idx, y=max_len)
+        cond = paddle.less_than(x=step_idx, y=max_len)
         while_op = layers.While(cond)
         # array states will be stored for each step.
         ids = layers.array_write(
@@ -1861,7 +1861,7 @@ def beam_search():
             for i in range(n_layer):
                 layers.assign(pre_caches[i]["k"], caches[i]["k"])
                 layers.assign(pre_caches[i]["v"], caches[i]["v"])
-            length_cond = layers.less_than(x=step_idx, y=max_len)
+            length_cond = paddle.less_than(x=step_idx, y=max_len)
             finish_cond = paddle.logical_not(layers.is_empty(x=selected_ids))
             paddle.logical_and(x=length_cond, y=finish_cond, out=cond)
 
diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py b/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py
index 2b4577a6189d88..2a7ebae0710737 100644
--- a/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py
+++ b/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py
@@ -51,7 +51,7 @@ def false_func():
         with program_guard(main_program, startup_program):
             x = layers.fill_constant(shape=[1], dtype='float32', value=0.1)
             y = layers.fill_constant(shape=[1], dtype='float32', value=0.23)
-            pred = layers.less_than(y, x)
+            pred = paddle.less_than(y, x)
             out = layers.cond(pred, true_func, false_func)
 
         core_graph = core.Graph(main_program.desc)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_while_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_while_op_npu.py
index 1ff374fa9f9a7e..c63f11b85910cd 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_while_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_while_op_npu.py
@@ -73,7 +73,7 @@ def simple_net(self):
 
             i = layers.increment(x=i, in_place=True)
             layers.array_write(result, i=i, array=mem_array)
-            layers.less_than(x=i, y=array_len, cond=cond)
+            paddle.assign(paddle.less_than(x=i, y=array_len), cond)
 
             with while_op2.block():
                 d2 = layers.array_read(array=data_array, i=j)
@@ -82,7 +82,7 @@ def simple_net(self):
 
                 j = layers.increment(x=j, in_place=True)
                 layers.array_write(result2, i=j, array=mem_array)
-                layers.less_than(x=j, y=array_len2, cond=cond2)
+                paddle.assign(paddle.less_than(x=j, y=array_len2), cond2)
         sum_result = layers.array_read(array=mem_array, i=j)
         loss = paddle.mean(sum_result)
         return loss, sum_result
diff --git a/python/paddle/fluid/tests/unittests/standalone_executor/test_standalone_controlflow.py b/python/paddle/fluid/tests/unittests/standalone_executor/test_standalone_controlflow.py
index 81c1049c5bf1c1..c989ff866e8c01 100644
--- a/python/paddle/fluid/tests/unittests/standalone_executor/test_standalone_controlflow.py
+++ b/python/paddle/fluid/tests/unittests/standalone_executor/test_standalone_controlflow.py
@@ -56,7 +56,7 @@ def false_func():
         with program_guard(main_program, startup_program):
             x = layers.fill_constant(shape=[1], dtype='float32', value=0.1)
             y = layers.fill_constant(shape=[1], dtype='float32', value=0.23)
-            pred = layers.less_than(x, y)
+            pred = paddle.less_than(x, y)
             out = layers.cond(pred, true_func, false_func)
             # out is a tuple containing 2 tensors
             return main_program, startup_program, out
diff --git a/python/paddle/fluid/tests/unittests/test_assert_op.py b/python/paddle/fluid/tests/unittests/test_assert_op.py
index a006f999287306..d59194aef56cba 100644
--- a/python/paddle/fluid/tests/unittests/test_assert_op.py
+++ b/python/paddle/fluid/tests/unittests/test_assert_op.py
@@ -61,7 +61,7 @@ def test_assert_print_data(self):
         def net_func():
             zero = layers.fill_constant(shape=[1], dtype='int64', value=0)
             one = layers.fill_constant(shape=[1], dtype='int64', value=1)
-            condition = layers.less_than(one, zero)  # False
+            condition = paddle.less_than(one, zero)  # False
             layers.Assert(condition, [zero, one])
 
         print("test_assert_print_data")
diff --git a/python/paddle/fluid/tests/unittests/test_case.py b/python/paddle/fluid/tests/unittests/test_case.py
index 777db3a3480003..3ab6e983d90192 100644
--- a/python/paddle/fluid/tests/unittests/test_case.py
+++ b/python/paddle/fluid/tests/unittests/test_case.py
@@ -42,8 +42,8 @@ def fn_3():
             x = layers.fill_constant(shape=[1], dtype='float32', value=0.3)
             y = layers.fill_constant(shape=[1], dtype='float32', value=0.1)
             z = layers.fill_constant(shape=[1], dtype='float32', value=0.2)
-            pred_2 = layers.less_than(x, y)  # false: 0.3 < 0.1
-            pred_1 = layers.less_than(z, x)  # true: 0.2 < 0.3
+            pred_2 = paddle.less_than(x, y)  # false: 0.3 < 0.1
+            pred_1 = paddle.less_than(z, x)  # true: 0.2 < 0.3
 
             # call fn_1
             out_0 = layers.case(
@@ -200,8 +200,8 @@ def fn_3():
             x = layers.fill_constant(shape=[1], dtype='float32', value=0.3)
             y = layers.fill_constant(shape=[1], dtype='float32', value=0.1)
             z = layers.fill_constant(shape=[1], dtype='float32', value=0.2)
-            pred_2 = layers.less_than(x, y)  # false: 0.3 < 0.1
-            pred_1 = layers.less_than(z, x)  # true: 0.2 < 0.3
+            pred_2 = paddle.less_than(x, y)  # false: 0.3 < 0.1
+            pred_1 = paddle.less_than(z, x)  # true: 0.2 < 0.3
 
             out_1 = layers.case(
                 pred_fn_pairs=[(pred_1, fn_1), (pred_2, fn_2)], default=fn_3
@@ -239,7 +239,7 @@ def fn_1():
         with program_guard(main_program, startup_program):
             x = layers.fill_constant(shape=[1], dtype='float32', value=0.23)
             z = layers.fill_constant(shape=[1], dtype='float32', value=0.2)
-            pred_1 = layers.less_than(z, x)  # true
+            pred_1 = paddle.less_than(z, x)  # true
 
             # The type of 'pred_fn_pairs' in case must be list or tuple
             def type_error_pred_fn_pairs():
diff --git a/python/paddle/fluid/tests/unittests/test_compare_op.py b/python/paddle/fluid/tests/unittests/test_compare_op.py
index 8cce73391d692e..fa98771ce1ab5e 100755
--- a/python/paddle/fluid/tests/unittests/test_compare_op.py
+++ b/python/paddle/fluid/tests/unittests/test_compare_op.py
@@ -44,12 +44,7 @@ def test_errors(self):
                 x = fluid.layers.data(name='x', shape=[2], dtype='int32')
                 y = fluid.layers.data(name='y', shape=[2], dtype='int32')
                 a = fluid.layers.data(name='a', shape=[2], dtype='int16')
-                if self.op_type == "less_than":
-                    self.assertRaises(
-                        TypeError, fluid.layers.less_than, x=x, y=y, force_cpu=1
-                    )
                 op = eval("paddle.%s" % self.op_type)
-                self.assertRaises(TypeError, op, x=x, y=y, cond=1)
                 self.assertRaises(TypeError, op, x=x, y=a)
                 self.assertRaises(TypeError, op, x=a, y=y)
 
@@ -481,7 +476,7 @@ def test_place_1(self):
             place = paddle.CUDAPlace(0)
         label = fluid.layers.assign(np.array([3, 3], dtype="int32"))
         limit = fluid.layers.assign(np.array([3, 2], dtype="int32"))
-        out = fluid.layers.less_than(label, limit, force_cpu=True)
+        out = paddle.less_than(label, limit)
         exe = fluid.Executor(place)
         (res,) = exe.run(fetch_list=[out])
         self.assertEqual((res == np.array([False, False])).all(), True)
diff --git a/python/paddle/fluid/tests/unittests/test_cond.py b/python/paddle/fluid/tests/unittests/test_cond.py
index d31ac885b1ebea..bc5a73d048dfad 100644
--- a/python/paddle/fluid/tests/unittests/test_cond.py
+++ b/python/paddle/fluid/tests/unittests/test_cond.py
@@ -53,7 +53,7 @@ def false_func():
         with program_guard(main_program, startup_program):
             x = layers.fill_constant(shape=[1], dtype='float32', value=0.1)
             y = layers.fill_constant(shape=[1], dtype='float32', value=0.23)
-            pred = layers.less_than(y, x)
+            pred = paddle.less_than(y, x)
             out = layers.cond(pred, true_func, false_func)
             # out is one tensor
 
diff --git a/python/paddle/fluid/tests/unittests/test_device_guard.py b/python/paddle/fluid/tests/unittests/test_device_guard.py
index 34a029e7bac57b..d62893de97c7d7 100644
--- a/python/paddle/fluid/tests/unittests/test_device_guard.py
+++ b/python/paddle/fluid/tests/unittests/test_device_guard.py
@@ -156,7 +156,7 @@ def test_without_kernel_op(self):
                     while_op = fluid.layers.While(cond=cond)
                     with while_op.block():
                         i = paddle.increment(x=i, value=1)
-                        fluid.layers.less_than(x=i, y=loop_len, cond=cond)
+                        paddle.assign(paddle.less_than(x=i, y=loop_len), cond)
 
         warning = "The Op(while) is not support to set device."
         warning_num = get_vaild_warning_num(warning, w)
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
index a1e568f6a05321..c641155d9f2085 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
@@ -36,7 +36,7 @@
 class TestPSPassWithBow(unittest.TestCase):
     def net(self):
         def get_acc(cos_q_nt, cos_q_pt, batch_size):
-            cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
+            cond = paddle.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
             acc = paddle.divide(
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
index e7ce16057e7cfa..0261df66709e24 100755
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
@@ -36,7 +36,7 @@
 class TestPSPassWithBow(unittest.TestCase):
     def net(self):
         def get_acc(cos_q_nt, cos_q_pt, batch_size):
-            cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
+            cond = paddle.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
             acc = paddle.divide(
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
index f97046db9477b4..330d62cfa039a9 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
@@ -39,7 +39,7 @@
 class TestPSPassWithBow(unittest.TestCase):
     def net(self):
         def get_acc(cos_q_nt, cos_q_pt, batch_size):
-            cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
+            cond = paddle.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
             acc = paddle.divide(
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
index ead8e6437a080d..687c8d06ad9a83 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
@@ -40,7 +40,7 @@
 class TestPSPassWithBow(unittest.TestCase):
     def net(self):
         def get_acc(cos_q_nt, cos_q_pt, batch_size):
-            cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
+            cond = paddle.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
             acc = paddle.divide(
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
index 1ea94b85bc1dc6..d75e16f7776d67 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
@@ -39,7 +39,7 @@
 class TestPSPassWithBow(unittest.TestCase):
     def net(self):
         def get_acc(cos_q_nt, cos_q_pt, batch_size):
-            cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
+            cond = paddle.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
             acc = paddle.divide(
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
index 0f2c840019412b..3b735d193b19b5 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
@@ -36,7 +36,7 @@
 class TestPSPassWithBow(unittest.TestCase):
     def net(self):
         def get_acc(cos_q_nt, cos_q_pt, batch_size):
-            cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
+            cond = paddle.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
             acc = paddle.divide(
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
index a7d4f06c029034..d1fbfb8937943c 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
@@ -36,7 +36,7 @@
 class TestPSPassWithBow(unittest.TestCase):
     def net(self):
         def get_acc(cos_q_nt, cos_q_pt, batch_size):
-            cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
+            cond = paddle.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
             acc = paddle.divide(
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
index 25bb1b0e37f471..e2e81a747abfc2 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
@@ -36,7 +36,7 @@
 class TestPSPassWithBow(unittest.TestCase):
     def net(self):
         def get_acc(cos_q_nt, cos_q_pt, batch_size):
-            cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
+            cond = paddle.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
             acc = paddle.divide(
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
index a8c26ed9b70ff6..8e8eacece9f2b9 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
@@ -36,7 +36,7 @@
 class TestPSPassWithBow(unittest.TestCase):
     def net(self):
         def get_acc(cos_q_nt, cos_q_pt, batch_size):
-            cond = fluid.layers.less_than(cos_q_nt, cos_q_pt)
+            cond = paddle.less_than(cos_q_nt, cos_q_pt)
             cond = fluid.layers.cast(cond, dtype='float64')
             cond_3 = paddle.sum(cond)
             acc = paddle.divide(
diff --git a/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py b/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
index a92052c05065f3..9774ea32e44f68 100644
--- a/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
@@ -36,7 +36,7 @@ def build_and_run_program(place, batch_size, beam_size, stop_gradient=False):
     max_len = layers.fill_constant(
         shape=[1], dtype="int64", value=10, force_cpu=True
     )
-    cond = layers.less_than(x=step_idx, y=max_len)
+    cond = paddle.less_than(x=step_idx, y=max_len)
     while_op = layers.While(cond)
     scores = layers.array_write(x, step_idx)
     with while_op.block():
@@ -53,7 +53,7 @@ def build_and_run_program(place, batch_size, beam_size, stop_gradient=False):
         score = paddle.gather_nd(x, topk_coordinates)
         layers.increment(x=step_idx, value=1.0, in_place=True)
         layers.array_write(score, i=step_idx, array=scores)
-        length_cond = layers.less_than(x=step_idx, y=max_len)
+        length_cond = paddle.less_than(x=step_idx, y=max_len)
         layers.assign(length_cond, cond)
 
     out = layers.tensor_array_to_tensor(scores, axis=0, use_stack=True)[0]
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
index d61e6a6f078b38..943642b857cce0 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
@@ -94,14 +94,14 @@ def run_main(self, place, with_data_parallel):
 
         array_len = layers.fill_constant(shape=[1], dtype='int64', value=1)
         array_len.stop_gradient = True
-        cond = layers.less_than(x=i, y=array_len)
+        cond = paddle.less_than(x=i, y=array_len)
 
         j = layers.fill_constant(shape=[1], dtype='int64', value=1)
         j.stop_gradient = True
 
         array_len2 = layers.fill_constant(shape=[1], dtype='int64', value=3)
         array_len2.stop_gradient = True
-        cond2 = layers.less_than(x=j, y=array_len2)
+        cond2 = paddle.less_than(x=j, y=array_len2)
 
         while_op = layers.While(cond=cond)
         while_op2 = layers.While(cond=cond2)
@@ -114,7 +114,7 @@ def run_main(self, place, with_data_parallel):
 
             i = layers.increment(x=i, in_place=True)
             layers.array_write(result, i=i, array=mem_array)
-            layers.less_than(x=i, y=array_len, cond=cond)
+            paddle.assign(paddle.less_than(x=i, y=array_len), cond)
             with while_op2.block():
                 d2 = layers.array_read(array=data_array, i=j)
                 prev2 = layers.array_read(array=mem_array, i=j)
@@ -124,7 +124,7 @@ def run_main(self, place, with_data_parallel):
 
                 j = layers.increment(x=j, in_place=True)
                 layers.array_write(result2, i=j, array=mem_array)
-                layers.less_than(x=j, y=array_len2, cond=cond2)
+                paddle.assign(paddle.less_than(x=j, y=array_len2), cond2)
 
         sum_result = layers.array_read(array=mem_array, i=j)
         sum_result.persistable = True
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
index 4441098c941413..a8aa34eb44b08b 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
@@ -744,7 +744,7 @@ def func_dygraph_vs_static(self):
                 paddle.reshape(paddle.sum(inp_data2), [1, 1]),
                 [4, -1],
             )
-            cond = fluid.layers.less_than(x=a, y=b)
+            cond = paddle.less_than(x=a, y=b)
 
             ie = fluid.layers.IfElse(cond)
             with ie.true_block():
diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py
index 595dd02e432bda..852f4e550326e0 100644
--- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py
+++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py
@@ -41,7 +41,7 @@ def check_network_convergence(
             label = layers.data(name='y', shape=[1], dtype='int64')
 
             limit = layers.fill_constant(shape=[1], dtype='int64', value=5)
-            cond = layers.less_than(x=label, y=limit)
+            cond = paddle.less_than(x=label, y=limit)
             ie = layers.IfElse(cond)
 
             with ie.true_block():
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 1597269b29adc1..1fad29135853c7 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -2423,7 +2423,7 @@ def func_while_loop(self):
             ten = layers.fill_constant(shape=[1], dtype='int64', value=10)
 
             def cond(i):
-                return layers.less_than(i, ten)
+                return paddle.less_than(i, ten)
 
             def body(i):
                 return i + 1
@@ -2436,7 +2436,7 @@ def body(i):
             ten = layers.fill_constant(shape=[1], dtype='int64', value=10)
 
             def cond1(i):
-                return layers.less_than(i, ten)
+                return paddle.less_than(i, ten)
 
             def body1(i):
                 return i + 1
@@ -2464,7 +2464,7 @@ def test_compare(self):
         with self.static_graph():
             a = layers.data(name='a', shape=[1], dtype='int64')
             b = layers.data(name='b', shape=[1], dtype='int64')
-            cond = layers.less_than(x=a, y=b)
+            cond = paddle.less_than(x=a, y=b)
             static_ret = self.get_static_graph_result(
                 feed={"a": value_a, "b": value_b}, fetch_list=[cond]
             )[0]
@@ -2472,14 +2472,14 @@ def test_compare(self):
             with _test_eager_guard():
                 da = base.to_variable(value_a)
                 db = base.to_variable(value_b)
-                dcond = layers.less_than(x=da, y=db)
+                dcond = paddle.less_than(x=da, y=db)
 
                 for i in range(len(static_ret)):
                     self.assertTrue(dcond.numpy()[i] == static_ret[i])
 
             da = base.to_variable(value_a)
             db = base.to_variable(value_b)
-            dcond = layers.less_than(x=da, y=db)
+            dcond = paddle.less_than(x=da, y=db)
 
             for i in range(len(static_ret)):
                 self.assertTrue(dcond.numpy()[i] == static_ret[i])
@@ -2696,8 +2696,8 @@ def fn_3():
             y = layers.fill_constant(shape=[1], dtype='float32', value=0.1)
             z = layers.fill_constant(shape=[1], dtype='float32', value=0.2)
 
-            pred_1 = layers.less_than(z, x)  # true: 0.2 < 0.3
-            pred_2 = layers.less_than(x, y)  # false: 0.3 < 0.1
+            pred_1 = paddle.less_than(z, x)  # true: 0.2 < 0.3
+            pred_2 = paddle.less_than(x, y)  # false: 0.3 < 0.1
             pred_3 = paddle.equal(x, y)  # false: 0.3 == 0.1
 
             out_1 = layers.case(
@@ -2719,8 +2719,8 @@ def fn_3():
                 y = layers.fill_constant(shape=[1], dtype='float32', value=0.1)
                 z = layers.fill_constant(shape=[1], dtype='float32', value=0.2)
 
-                pred_1 = layers.less_than(z, x)  # true: 0.2 < 0.3
-                pred_2 = layers.less_than(x, y)  # false: 0.3 < 0.1
+                pred_1 = paddle.less_than(z, x)  # true: 0.2 < 0.3
+                pred_2 = paddle.less_than(x, y)  # false: 0.3 < 0.1
                 pred_3 = paddle.equal(x, y)  # false: 0.3 == 0.1
 
                 out_1 = layers.case(
@@ -2736,8 +2736,8 @@ def fn_3():
             y = layers.fill_constant(shape=[1], dtype='float32', value=0.1)
             z = layers.fill_constant(shape=[1], dtype='float32', value=0.2)
 
-            pred_1 = layers.less_than(z, x)  # true: 0.2 < 0.3
-            pred_2 = layers.less_than(x, y)  # false: 0.3 < 0.1
+            pred_1 = paddle.less_than(z, x)  # true: 0.2 < 0.3
+            pred_2 = paddle.less_than(x, y)  # false: 0.3 < 0.1
             pred_3 = paddle.equal(x, y)  # false: 0.3 == 0.1
 
             out_1 = layers.case(
diff --git a/python/paddle/fluid/tests/unittests/test_profiler.py b/python/paddle/fluid/tests/unittests/test_profiler.py
index 33034811650653..6b414afbe4a7fb 100644
--- a/python/paddle/fluid/tests/unittests/test_profiler.py
+++ b/python/paddle/fluid/tests/unittests/test_profiler.py
@@ -45,13 +45,13 @@ def build_program(self, compile_program=True):
             )
             until = layers.fill_constant([1], dtype='int64', value=10)
             data_arr = layers.array_write(hidden1, i)
-            cond = fluid.layers.less_than(x=counter, y=until)
+            cond = paddle.less_than(x=counter, y=until)
             while_op = fluid.layers.While(cond=cond)
             with while_op.block():
                 hidden_n = fluid.layers.fc(input=hidden1, size=64, act='relu')
                 layers.array_write(hidden_n, i, data_arr)
                 fluid.layers.increment(x=counter, value=1, in_place=True)
-                layers.less_than(x=counter, y=until, cond=cond)
+                paddle.assign(paddle.less_than(x=counter, y=until), cond)
 
             hidden_n = layers.array_read(data_arr, i)
             hidden2 = fluid.layers.fc(input=hidden_n, size=64, act='relu')
diff --git a/python/paddle/fluid/tests/unittests/test_program_code.py b/python/paddle/fluid/tests/unittests/test_program_code.py
index 449f97c22f0309..e60706794f5b1c 100644
--- a/python/paddle/fluid/tests/unittests/test_program_code.py
+++ b/python/paddle/fluid/tests/unittests/test_program_code.py
@@ -14,6 +14,7 @@
 
 import unittest
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
 
@@ -44,7 +45,7 @@ def false_func():
         with fluid.program_guard(program):
             x = layers.fill_constant(shape=[1], dtype='float32', value=0.1)
             y = layers.fill_constant(shape=[1], dtype='float32', value=0.23)
-            pred = layers.less_than(y, x)
+            pred = paddle.less_than(y, x)
             out = layers.cond(pred, true_func, false_func)
 
     def test_program_code(self):
diff --git a/python/paddle/fluid/tests/unittests/test_switch.py b/python/paddle/fluid/tests/unittests/test_switch.py
index 250d844a3fab6a..5fe69ee420a78a 100644
--- a/python/paddle/fluid/tests/unittests/test_switch.py
+++ b/python/paddle/fluid/tests/unittests/test_switch.py
@@ -14,6 +14,7 @@
 
 import unittest
 
+import paddle
 import paddle.fluid.core as core
 import paddle.fluid.framework as framework
 import paddle.fluid.layers as layers
@@ -34,11 +35,11 @@ def check_switch(self, value):
         )
 
         with layers.Switch() as switch:
-            with switch.case(layers.less_than(x, zero_var)):
+            with switch.case(paddle.less_than(x, zero_var)):
                 layers.assign(zero_var, result)
-            with switch.case(layers.less_than(x, one_var)):
+            with switch.case(paddle.less_than(x, one_var)):
                 layers.assign(one_var, result)
-            with switch.case(layers.less_than(x, two_var)):
+            with switch.case(paddle.less_than(x, two_var)):
                 layers.assign(two_var, result)
             with switch.default():
                 layers.assign(three_var, result)
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py
index 123a920af7cc6c..e662e1488c50e9 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py
@@ -281,7 +281,7 @@ def test_while_loop_case(self):
             fluid.layers.array_write(x0, zero, array)
 
             def cond(i, end, array):
-                return fluid.layers.less_than(i, end)
+                return paddle.less_than(i, end)
 
             def body(i, end, array):
                 prev = fluid.layers.array_read(array, i - 1)
diff --git a/python/paddle/fluid/tests/unittests/test_while_loop_op.py b/python/paddle/fluid/tests/unittests/test_while_loop_op.py
index d69f114f64b07c..8e733ef9208b31 100644
--- a/python/paddle/fluid/tests/unittests/test_while_loop_op.py
+++ b/python/paddle/fluid/tests/unittests/test_while_loop_op.py
@@ -29,7 +29,7 @@
 class TestApiWhileLoop(unittest.TestCase):
     def test_var_tuple(self):
         def cond(i):
-            return layers.less_than(i, ten)
+            return paddle.less_than(i, ten)
 
         def body(i):
             return paddle.add(x=i, y=one)
@@ -55,7 +55,7 @@ def body(i):
 
     def test_var_list(self):
         def cond(i, mem):
-            return layers.less_than(i, ten)
+            return paddle.less_than(i, ten)
 
         def body(i, mem):
             mem = paddle.add(x=mem, y=one)
@@ -87,7 +87,7 @@ def body(i, mem):
 
     def test_var_dict(self):
         def cond(i, ten, test_dict, test_list, test_list_dict):
-            return layers.less_than(i, ten)
+            return paddle.less_than(i, ten)
 
         def body(i, ten, test_dict, test_list, test_list_dict):
             test_dict["test_key"] = i
@@ -159,11 +159,11 @@ def body(i, ten, test_dict, test_list, test_list_dict):
 class TestApiWhileLoop_Nested(unittest.TestCase):
     def test_nested_net(self):
         def external_cond(i, j, init, sums):
-            return layers.less_than(i, loop_len1)
+            return paddle.less_than(i, loop_len1)
 
         def external_body(i, j, init, sums):
             def internal_cond(j, init, sums):
-                return layers.less_than(j, loop_len2)
+                return paddle.less_than(j, loop_len2)
 
             def internal_body(j, init, sums):
                 init = paddle.add(x=init, y=ones)
@@ -219,7 +219,7 @@ def internal_body(j, init, sums):
 class TestApiWhileLoop_Backward(unittest.TestCase):
     def test_while_loop_backward(self):
         def cond(i, x):
-            return layers.less_than(i, eleven)
+            return paddle.less_than(i, eleven)
 
         def body(i, x):
             x = paddle.multiply(x=i, y=i)
@@ -307,11 +307,11 @@ def body(i, x):
 class TestApiWhileLoop_NestedWithBackwardAndLoDTensorArray(unittest.TestCase):
     def test_nested_net_with_backward_and_lodtensor(self):
         def external_cond(i, j, x, mem_array):
-            return layers.less_than(i, array_len)
+            return paddle.less_than(i, array_len)
 
         def external_body(i, j, x, mem_array):
             def internal_cond(j, x, mem_array):
-                return layers.less_than(j, array_len2)
+                return paddle.less_than(j, array_len2)
 
             def internal_body(j, x, mem_array):
                 inner_data = layers.array_read(array=data_array, i=j)
@@ -390,7 +390,7 @@ def internal_body(j, x, mem_array):
 class TestApiWhileLoopWithSwitchCase(unittest.TestCase):
     def test_with_switch_case(self):
         def cond(i):
-            return layers.less_than(i, ten)
+            return paddle.less_than(i, ten)
 
         def body(i):
             def fn_add_three():
@@ -441,13 +441,13 @@ def cond_returns_not_bool_tensor(i):
             return layers.increment(i)
 
         def cond_returns_bool_tensor(i):
-            return layers.less_than(i, ten)
+            return paddle.less_than(i, ten)
 
         def cond_returns_2d_tensor(i):
-            return layers.less_than(i, ten_2d)
+            return paddle.less_than(i, ten_2d)
 
         def cond_receives_two_args(i, ten):
-            return layers.less_than(i, ten)
+            return paddle.less_than(i, ten)
 
         def body(i):
             return layers.increment(i)
diff --git a/python/paddle/fluid/tests/unittests/test_while_op.py b/python/paddle/fluid/tests/unittests/test_while_op.py
index cb5f1e3664f7df..f77d9767f3c8b9 100644
--- a/python/paddle/fluid/tests/unittests/test_while_op.py
+++ b/python/paddle/fluid/tests/unittests/test_while_op.py
@@ -50,12 +50,12 @@ def simple_net(self):
         i.stop_gradient = True
         array_len = layers.fill_constant(shape=[1], dtype='int64', value=1)
         array_len.stop_gradient = True
-        cond = layers.less_than(x=i, y=array_len)
+        cond = paddle.less_than(x=i, y=array_len)
         j = layers.fill_constant(shape=[1], dtype='int64', value=1)
         j.stop_gradient = True
         array_len2 = layers.fill_constant(shape=[1], dtype='int64', value=3)
         array_len2.stop_gradient = True
-        cond2 = layers.less_than(x=j, y=array_len2)
+        cond2 = paddle.less_than(x=j, y=array_len2)
         while_op = layers.While(cond=cond)
         while_op2 = layers.While(cond=cond2)
         with while_op.block():
@@ -65,7 +65,7 @@ def simple_net(self):
 
             i = layers.increment(x=i, in_place=True)
             layers.array_write(result, i=i, array=mem_array)
-            layers.less_than(x=i, y=array_len, cond=cond)
+            paddle.assign(paddle.less_than(x=i, y=array_len), cond)
 
             with while_op2.block():
                 d2 = layers.array_read(array=data_array, i=j)
@@ -74,7 +74,7 @@ def simple_net(self):
 
                 j = layers.increment(x=j, in_place=True)
                 layers.array_write(result2, i=j, array=mem_array)
-                layers.less_than(x=j, y=array_len2, cond=cond2)
+                paddle.assign(paddle.less_than(x=j, y=array_len2), cond2)
         sum_result = layers.array_read(array=mem_array, i=j)
         loss = paddle.mean(sum_result)
         return loss, sum_result
@@ -120,7 +120,7 @@ def test_simple_net_forward(self):
     def test_exceptions(self):
         i = layers.zeros(shape=[2], dtype='int64')
         array_len = layers.fill_constant(shape=[2], dtype='int64', value=1)
-        cond = layers.less_than(x=i, y=array_len)
+        cond = paddle.less_than(x=i, y=array_len)
         with self.assertRaises(TypeError):
             layers.While(cond=cond)
         cond = layers.cast(cond, dtype='float64')
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_device_guard_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_device_guard_xpu.py
index ca4824f554bcc0..3e126318df2ae0 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_device_guard_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_device_guard_xpu.py
@@ -162,7 +162,7 @@ def test_without_kernel_op(self):
                     while_op = fluid.layers.While(cond=cond)
                     with while_op.block():
                         i = paddle.increment(x=i, value=1)
-                        fluid.layers.less_than(x=i, y=loop_len, cond=cond)
+                        paddle.assign(paddle.less_than(x=i, y=loop_len), cond)
 
         warning = "The Op(while) is not support to set device."
         warning_num = get_vaild_warning_num(warning, w)
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_while_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_while_op_xpu.py
index e0ee57d2bf9404..e52e8fdceb7ed8 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_while_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_while_op_xpu.py
@@ -49,12 +49,12 @@ def simple_net(self):
         i.stop_gradient = True
         array_len = layers.fill_constant(shape=[1], dtype='int64', value=1)
         array_len.stop_gradient = True
-        cond = layers.less_than(x=i, y=array_len)
+        cond = paddle.less_than(x=i, y=array_len)
         j = layers.fill_constant(shape=[1], dtype='int64', value=1)
         j.stop_gradient = True
         array_len2 = layers.fill_constant(shape=[1], dtype='int64', value=3)
         array_len2.stop_gradient = True
-        cond2 = layers.less_than(x=j, y=array_len2)
+        cond2 = paddle.less_than(x=j, y=array_len2)
         while_op = layers.While(cond=cond)
         while_op2 = layers.While(cond=cond2)
         with while_op.block():
@@ -64,7 +64,7 @@ def simple_net(self):
 
             i = layers.increment(x=i, in_place=True)
             layers.array_write(result, i=i, array=mem_array)
-            layers.less_than(x=i, y=array_len, cond=cond)
+            paddle.assign(paddle.less_than(x=i, y=array_len), cond)
 
             with while_op2.block():
                 d2 = layers.array_read(array=data_array, i=j)
@@ -73,7 +73,7 @@ def simple_net(self):
 
                 j = layers.increment(x=j, in_place=True)
                 layers.array_write(result2, i=j, array=mem_array)
-                layers.less_than(x=j, y=array_len2, cond=cond2)
+                paddle.assign(paddle.less_than(x=j, y=array_len2), cond2)
         sum_result = layers.array_read(array=mem_array, i=j)
         loss = paddle.mean(sum_result)
         return loss, sum_result
@@ -119,7 +119,7 @@ def test_simple_net_forward(self):
     def test_exceptions(self):
         i = layers.zeros(shape=[2], dtype='int64')
         array_len = layers.fill_constant(shape=[2], dtype='int64', value=1)
-        cond = layers.less_than(x=i, y=array_len)
+        cond = paddle.less_than(x=i, y=array_len)
         with self.assertRaises(TypeError):
             layers.While(cond=cond)
         cond = layers.cast(cond, dtype='float64')
diff --git a/python/paddle/jit/dy2static/convert_operators.py b/python/paddle/jit/dy2static/convert_operators.py
index 02cafb77bbd30a..fa622b14094de0 100644
--- a/python/paddle/jit/dy2static/convert_operators.py
+++ b/python/paddle/jit/dy2static/convert_operators.py
@@ -36,7 +36,6 @@
 from paddle.fluid.layers.control_flow import (
     cond,
     while_loop,
-    less_than,
     increment,
 )
 from .return_transformer import (
@@ -782,7 +781,7 @@ def _run_paddle_pop(array, *args):
     assert isinstance(idx, int)
 
     def cond(i, new_array):
-        return less_than(i, arr_len)
+        return paddle.less_than(i, arr_len)
 
     def body(i, new_array):
         item = array_read(array=array, i=i)

From ef575d6af426d592ce0a844d869dd6f7dc068302 Mon Sep 17 00:00:00 2001
From: Bo Zhang <105368690+zhangbopd@users.noreply.github.com>
Date: Fri, 2 Dec 2022 16:36:00 +0800
Subject: [PATCH 117/154] Split common  funcs  from reduction and structure
 modification (#46970)

* profile reduce kernel for fp16 and reduceHigherdim

* use reinterpret_cast

* fix for CI on ROCm

* add Macro for ROCm

* ROCm CI config

* ROCm CI config

* unit test repair

* pull

* add common_funcs.h

* reduceType

* Update reduce_function.h

* not higher

* rename
---
 paddle/phi/kernels/funcs/index_calculator.h | 112 +++++++++
 paddle/phi/kernels/funcs/reduce_function.h  | 243 +++++++-------------
 paddle/phi/kernels/gpu/cross_grad_kernel.cu |   2 +-
 paddle/phi/kernels/gpu/cross_kernel.cu      |   2 +-
 4 files changed, 192 insertions(+), 167 deletions(-)
 create mode 100644 paddle/phi/kernels/funcs/index_calculator.h

diff --git a/paddle/phi/kernels/funcs/index_calculator.h b/paddle/phi/kernels/funcs/index_calculator.h
new file mode 100644
index 00000000000000..13697e443e16de
--- /dev/null
+++ b/paddle/phi/kernels/funcs/index_calculator.h
@@ -0,0 +1,112 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+// CUDA, XPU and HIP use same api
+#if defined(__NVCC__) || defined(__HIPCC__) || defined(__xpu__)
+
+#include <algorithm>
+#include <cmath>
+#include <numeric>
+#include <set>
+#include <vector>
+
+#include "paddle/phi/kernels/primitive/kernel_primitives.h"
+namespace kps = phi::kps;
+
+namespace phi {
+namespace funcs {
+
+constexpr int kMaxRank = phi::DDim::kMaxRank;
+
+namespace details {
+// Convert dims from vector to array
+template <typename T, size_t ElementCount, typename VectorLikeType>
+static inline phi::Array<T, ElementCount> VectorToArray(
+    const VectorLikeType& vec) {
+  PADDLE_ENFORCE_LE(
+      vec.size(),
+      ElementCount,
+      phi::errors::InvalidArgument("Vector to Array: size not match. Received "
+                                   "vec.size() %d > ElementCount %d.",
+                                   vec.size(),
+                                   ElementCount));
+  size_t n = static_cast<size_t>(vec.size());
+  phi::Array<T, ElementCount> ret;
+  for (size_t i = 0; i < n; ++i) {
+    ret[i] = vec[i];
+  }
+  return ret;
+}
+}  // namespace details
+struct IndexCalculator {
+  IndexCalculator(int dim,
+                  const std::vector<int>& cal_dims,
+                  const std::vector<int>& cal_strides,
+                  const std::vector<int>& full_strides)
+      : dim(dim) {
+    dims = details::VectorToArray<int, kMaxRank>(cal_dims);
+    strides = details::VectorToArray<int, kMaxRank>(full_strides);
+    reduce_strides = details::VectorToArray<int, kMaxRank>(cal_strides);
+#ifndef PADDLE_WITH_XPU_KP
+    std::vector<kps::details::FastDivMod> cal_divmoders;
+    // fast divmod
+    for (auto i : cal_strides) {
+      cal_divmoders.push_back(kps::details::FastDivMod(i));
+    }
+    divmoders = details::VectorToArray<kps::details::FastDivMod, kMaxRank>(
+        cal_divmoders);
+#endif
+  }
+
+  __device__ inline int operator()(int offset) const {
+#ifdef PADDLE_WITH_XPU_KP
+    int index = 0;
+#pragma unroll
+    for (int i = 0; i < kMaxRank; ++i) {
+      if (i == dim) {
+        break;
+      }
+      index += (offset / reduce_strides[i]) * strides[dims[i]];
+      offset = offset % reduce_strides[i];
+    }
+    return index;
+#else
+    int index = 0;
+#pragma unroll
+    for (int i = 0; i < kMaxRank; ++i) {
+      if (i == dim) {
+        break;
+      }
+      auto divmod = divmoders[i].Divmod(offset);
+      index += (divmod.val[0] * strides[dims[i]]);
+      offset = divmod.val[1];
+    }
+    return index;
+#endif
+  }
+
+  int dim;
+  phi::Array<int, kMaxRank> dims;
+  phi::Array<int, kMaxRank> strides;
+  phi::Array<int, kMaxRank> reduce_strides;
+#ifndef PADDLE_WITH_XPU_KP
+  phi::Array<kps::details::FastDivMod, kMaxRank> divmoders;
+#endif
+};
+
+#endif
+}  // namespace funcs
+}  // namespace phi
diff --git a/paddle/phi/kernels/funcs/reduce_function.h b/paddle/phi/kernels/funcs/reduce_function.h
index be64e3c7db7ddc..b48f2eb4cdf2b2 100644
--- a/paddle/phi/kernels/funcs/reduce_function.h
+++ b/paddle/phi/kernels/funcs/reduce_function.h
@@ -42,6 +42,7 @@ namespace cub = hipcub;
 #include "paddle/phi/kernels/cast_kernel.h"
 #include "paddle/phi/kernels/empty_kernel.h"
 #include "paddle/phi/kernels/funcs/elementwise_base.h"
+#include "paddle/phi/kernels/funcs/index_calculator.h"
 #include "paddle/phi/kernels/primitive/kernel_primitives.h"
 #include "paddle/utils/string/string_helper.h"
 
@@ -69,40 +70,7 @@ namespace funcs {
 #if defined(__NVCC__) || defined(__HIPCC__) || defined(__xpu__)
 namespace details {
 
-static inline int GetLastPow2(int n) {
-  n |= (n >> 1);
-  n |= (n >> 2);
-  n |= (n >> 4);
-  n |= (n >> 8);
-  n |= (n >> 16);
-  return std::max(1, n - (n >> 1));
-}
-
-static inline int64_t AlignUp(int64_t a, int64_t b) { return (a + b - 1) / b; }
-
-// get strides of x_dim, reduce_dim and left_dim for reduceLastDim and reduceAny
-static inline std::vector<int> GetDimStrides(const std::vector<int>& dims,
-                                             const std::vector<int>& idx) {
-  int n = static_cast<int>(idx.size());
-  if (n == 0) return std::vector<int>();
-  std::vector<int> strides(n);
-  strides.back() = 1;
-  for (int i = n - 2; i >= 0; --i) {
-    strides[i] = strides[i + 1] * dims[idx[i + 1]];
-  }
-  return strides;
-}
-
-#ifndef PADDLE_WITH_XPU_KP
-// get blockDim for reduceLastDim and reduceAny
-static inline int GetBlockDim(int block_dim) {
-  return block_dim >= kps::details::kReduceMaxThread
-             ? kps::details::kReduceMaxThread
-             : GetLastPow2(block_dim);
-}
-#endif
-
-// check reduce rand is valid
+// Check if reduce rand is valid
 static inline void CheckReduceRank(int reduce_rank, int rank) {
   if (rank % 2 == 0) {
     PADDLE_ENFORCE_EQ(reduce_rank,
@@ -129,25 +97,6 @@ static inline void CheckReduceRank(int reduce_rank, int rank) {
   }
 }
 
-// convert dims from vector to array
-template <typename T, size_t ElementCount, typename VectorLikeType>
-static inline phi::Array<T, ElementCount> VectorToArray(
-    const VectorLikeType& vec) {
-  PADDLE_ENFORCE_LE(
-      vec.size(),
-      ElementCount,
-      phi::errors::InvalidArgument("Cub reduce Array: size not match. Received "
-                                   "vec.size() %d > ElementCount %d.",
-                                   vec.size(),
-                                   ElementCount));
-  size_t n = static_cast<size_t>(vec.size());
-  phi::Array<T, ElementCount> ret;
-  for (size_t i = 0; i < n; ++i) {
-    ret[i] = vec[i];
-  }
-  return ret;
-}
-
 static inline std::vector<int> GetReduceDim(const std::vector<int64_t>& dims,
                                             int dim_size,
                                             bool reduce_all) {
@@ -173,9 +122,33 @@ static inline std::vector<int> GetReduceDim(const std::vector<int64_t>& dims,
   return reduce_dims;
 }
 
-}  // namespace details
+// Return 2^[floor(log2(n))]
+static inline int GetLastPow2(int n) {
+  n |= (n >> 1);
+  n |= (n >> 2);
+  n |= (n >> 4);
+  n |= (n >> 8);
+  n |= (n >> 16);
+  return std::max(1, n - (n >> 1));
+}
+
+static inline int64_t CeilingDiv(int64_t a, int64_t b) {
+  return (a + b - 1) / b;
+}
 
-constexpr int kMaxRank = phi::DDim::kMaxRank;
+// Get strides of x_dim, reduce_dim and left_dim for reduceLastDim and reduceAny
+static inline std::vector<int> GetDimStrides(const std::vector<int>& dims,
+                                             const std::vector<int>& idx) {
+  int n = static_cast<int>(idx.size());
+  if (n == 0) return std::vector<int>();
+  std::vector<int> strides(n);
+  strides.back() = 1;
+  for (int i = n - 2; i >= 0; --i) {
+    strides[i] = strides[i + 1] * dims[idx[i + 1]];
+  }
+  return strides;
+}
+}  // namespace details
 
 enum ReduceType {
   kReduceLastDim = 0x01,    // when reduce_dim[0] == x_dim.size() - 1;
@@ -183,62 +156,6 @@ enum ReduceType {
   kReduceAny = 0x03,        // when reduce_dim.size() > 1
 };
 
-struct IndexCalculator {
-  IndexCalculator(int dim,
-                  const std::vector<int>& cal_dims,
-                  const std::vector<int>& cal_strides,
-                  const std::vector<int>& full_strides)
-      : dim(dim) {
-    dims = details::VectorToArray<int, kMaxRank>(cal_dims);
-    strides = details::VectorToArray<int, kMaxRank>(full_strides);
-    reduce_strides = details::VectorToArray<int, kMaxRank>(cal_strides);
-#ifndef PADDLE_WITH_XPU_KP
-    std::vector<kps::details::FastDivMod> cal_divmoders;
-    // fast divmod
-    for (auto i : cal_strides) {
-      cal_divmoders.push_back(kps::details::FastDivMod(i));
-    }
-    divmoders = details::VectorToArray<kps::details::FastDivMod, kMaxRank>(
-        cal_divmoders);
-#endif
-  }
-
-  __device__ inline int operator()(int offset) const {
-#ifdef PADDLE_WITH_XPU_KP
-    int index = 0;
-#pragma unroll
-    for (int i = 0; i < kMaxRank; ++i) {
-      if (i == dim) {
-        break;
-      }
-      index += (offset / reduce_strides[i]) * strides[dims[i]];
-      offset = offset % reduce_strides[i];
-    }
-    return index;
-#else
-    int index = 0;
-#pragma unroll
-    for (int i = 0; i < kMaxRank; ++i) {
-      if (i == dim) {
-        break;
-      }
-      auto divmod = divmoders[i].Divmod(offset);
-      index += (divmod.val[0] * strides[dims[i]]);
-      offset = divmod.val[1];
-    }
-    return index;
-#endif
-  }
-
-  int dim;
-  phi::Array<int, kMaxRank> dims;
-  phi::Array<int, kMaxRank> strides;
-  phi::Array<int, kMaxRank> reduce_strides;
-#ifndef PADDLE_WITH_XPU_KP
-  phi::Array<kps::details::FastDivMod, kMaxRank> divmoders;
-#endif
-};
-
 template <bool ReduceLastDim = false>
 struct ReduceIndexMapping {
   const kps::DimConfig dim;
@@ -311,7 +228,6 @@ struct ReduceIndexMapping {
 // for higher performance
 struct OneDimIndexCal {
   explicit OneDimIndexCal(int num) : stride(num) {}
-
   __device__ inline int operator()(int index) const { return index * stride; }
   int stride;
 };
@@ -323,7 +239,22 @@ struct ReduceConfig {
                const std::vector<int>& origin_x_dim)
       : reduce_dims_origin(origin_reduce_dims), x_dim(origin_x_dim) {}
 
-  // get the parameters of reduceKernel
+  std::vector<int> reduce_dims_origin;
+  std::vector<int> reduce_dim, x_dim, left_dim;
+  std::vector<int> reduce_strides, x_strides, left_strides;
+
+  int reduce_type;
+  int reduce_num;
+  int left_num = 1;
+  int blocking_size;
+  bool should_reduce_again = false;
+  bool reduce_last_dim = false;
+  bool vectorize_input = false;
+  Ty* output_data;
+  dim3 block;
+  dim3 grid;
+
+  // Get the parameters of reduceKernel
   void Run(const KPDevice& dev_ctx) {
     // step1: update the reduce_dim left_dim and x_dim
     SetReduceDim();
@@ -336,13 +267,23 @@ struct ReduceConfig {
 
     // step4: set the block and grid for launch kernel
     SetBlockDim();
+
 #ifndef PADDLE_WITH_XPU_KP
     // step5: limit the grid to prevent thead overflow
     phi::backends::gpu::LimitGridDim(dev_ctx, &grid);
-#endif
+#endif  // PADDLE_WITH_XPU_KP
   }
 
-  // when should_reduce_again is true, we need malloc temp space for temp data
+#ifndef PADDLE_WITH_XPU_KP
+  // Get blockDim for reduceLastDim and reduceAny
+  int GetBlockDim(int block_dim) {
+    return block_dim >= kps::details::kReduceMaxThread
+               ? kps::details::kReduceMaxThread
+               : details::GetLastPow2(block_dim);
+  }
+#endif  // PADDLE_WITH_XPU_KP
+
+  // If should_reduce_again, we need malloc temp space for temp data
   void SetOutputData(Ty* y_data,
                      const KPDevice& dev_ctx,
                      phi::DenseTensor* tmp) {
@@ -458,7 +399,6 @@ struct ReduceConfig {
     left_strides = details::GetDimStrides(x_dim, left_dim);
     reduce_num = reduce_strides[0] * x_dim[reduce_dim[0]];
 
-    left_num = 1;
     if (left_dim.size()) {
       left_num = left_strides[0] * x_dim[left_dim[0]];
     }
@@ -478,11 +418,10 @@ struct ReduceConfig {
     int device_id = paddle::platform::GetCurrentDeviceId();
     int max_grid_z = phi::backends::gpu::GetGpuMaxGridDimSize(device_id)[2];
     bool not_higher = x_dim[0] >= max_grid_z;
-#endif
+#endif  // PADDLE_WITH_XPU_KP
+    reduce_type = static_cast<int>(ReduceType::kReduceAny);
     if (reduce_last_dim && (reduce_rank == 1)) {
-#ifdef PADDLE_WITH_XPU_KP
-      reduce_type = static_cast<int>(ReduceType::kReduceAny);
-#else
+#ifndef PADDLE_WITH_XPU_KP
       reduce_type = static_cast<int>(ReduceType::kReduceLastDim);
 #endif
     } else if (reduce_rank == 1) {
@@ -490,8 +429,6 @@ struct ReduceConfig {
       if (rank == 3 && not_higher) {
         reduce_type = static_cast<int>(ReduceType::kReduceAny);
       }
-    } else {
-      reduce_type = static_cast<int>(ReduceType::kReduceAny);
     }
   }
 
@@ -501,7 +438,7 @@ struct ReduceConfig {
     constexpr int max_reduce_num_per_thread = 256;
     constexpr int max_num_threads = kps::details::kReduceMaxThread;
 
-    // set block size.
+    // Set block size.
     // 1. If reduce_last_dim == true, all the threads whose threadIdx.y are same
     //    will process the reduction for one output.
     //    The number of output for one block is blockDim.y;
@@ -512,23 +449,23 @@ struct ReduceConfig {
     int block_x, block_y;
     int grid_num, reduce_num_per_thread;
     if (reduce_last_dim) {
-      block_x = details::GetBlockDim(reduce_num);
-      block_y = details::GetBlockDim(left_num);
+      block_x = GetBlockDim(reduce_num);
+      block_y = GetBlockDim(left_num);
       block_dim->x = block_x;
       block_dim->y =
           std::min(block_y, static_cast<int>(max_num_threads / block_dim->x));
-      grid_num = details::AlignUp(left_num, block_dim->y);
-      reduce_num_per_thread = details::AlignUp(reduce_num, block_dim->x);
+      grid_num = details::CeilingDiv(left_num, block_dim->y);
+      reduce_num_per_thread = details::CeilingDiv(reduce_num, block_dim->x);
     } else {
-      block_x = details::GetBlockDim(left_num);
-      block_y = details::GetBlockDim(reduce_num);
+      block_x = GetBlockDim(left_num);
+      block_y = GetBlockDim(reduce_num);
       block_dim->x = std::min(block_x, 32);
       block_dim->y =
           std::min(block_y, static_cast<int>(max_num_threads / block_dim->x));
       block_dim->x =
           std::min(block_x, static_cast<int>(max_num_threads / block_dim->y));
-      grid_num = details::AlignUp(left_num, block_dim->x);
-      reduce_num_per_thread = details::AlignUp(reduce_num, block_dim->y);
+      grid_num = details::CeilingDiv(left_num, block_dim->x);
+      reduce_num_per_thread = details::CeilingDiv(reduce_num, block_dim->y);
     }
     int device_id = paddle::platform::GetCurrentDeviceId();
     int max_mp = paddle::platform::GetGPUMultiProcessors(device_id);
@@ -538,7 +475,7 @@ struct ReduceConfig {
     int num_threads = block_dim->x * block_dim->y;
     int max_num_blocks = max_threads / num_threads;
 
-    // set grid size.
+    // Set grid size.
     // Whether to set grid.y larger than 1, there are 3 following rules:
     // 1. The number that each thread process should no less than
     //    min_reduce_num_per_threadbut no more than max_reduce_num_per_thread;
@@ -548,10 +485,10 @@ struct ReduceConfig {
     // the number cannot be larger than max_reduce_num_per_thread, so we
     // choose the maximum between the result above and input_split_num_2.
     int input_split_num_1 =
-        details::AlignUp(reduce_num_per_thread, min_reduce_num_per_thread);
+        details::CeilingDiv(reduce_num_per_thread, min_reduce_num_per_thread);
     int input_split_num_2 =
-        details::AlignUp(reduce_num_per_thread, max_reduce_num_per_thread);
-    int input_split_num_3 = details::AlignUp(max_num_blocks, grid_num);
+        details::CeilingDiv(reduce_num_per_thread, max_reduce_num_per_thread);
+    int input_split_num_3 = details::CeilingDiv(max_num_blocks, grid_num);
 
     grid_dim->x = grid_num;
     grid_dim->y = std::max(std::min(input_split_num_1, input_split_num_3),
@@ -562,13 +499,13 @@ struct ReduceConfig {
     }
   }
 
-  // set block and grid for launch kernel
+  // Set block and grid for launch kernel
   // for ReduceHigherDim: if block is enough -> splite reduce_num
   //                     else init block(32, 1) grid(block_num, 1)
   // for others: block(block_num, 1) , grid(left_num, 1)
   void SetBlockDimForHigher(dim3* block_dim, dim3* grid_dim) {
     int last_dim_num = x_dim.back();
-    // update left_num
+    // Update left_num
     int grid_z = left_num / last_dim_num;
     left_num = last_dim_num;
     grid_dim->z = grid_z;
@@ -579,8 +516,8 @@ struct ReduceConfig {
     int max_threads = max_threads_per_mp * max_mp;
     // init
     int num_block = (max_threads / left_num);
-    block_dim->x = details::GetBlockDim(left_num);
-    grid_dim->x = details::AlignUp(left_num, block_dim->x);
+    block_dim->x = GetBlockDim(left_num);
+    grid_dim->x = details::CeilingDiv(left_num, block_dim->x);
     blocking_size = reduce_num;
 
     if (num_block > 1 && reduce_num >= REDUCE_SPLIT_BOUNDARY) {
@@ -591,14 +528,12 @@ struct ReduceConfig {
         blocking_size *= 2;
       }
       should_reduce_again = true;
-      grid_dim->y = details::AlignUp(reduce_num, blocking_size);
+      grid_dim->y = details::CeilingDiv(reduce_num, blocking_size);
     }
   }
 #endif
 
   void SetBlockDim() {
-    // init
-    should_reduce_again = false;
     dim3 block_dim(1, 1, 1);
     dim3 grid_dim(left_num, 1, 1);
     blocking_size = reduce_num;
@@ -626,25 +561,6 @@ struct ReduceConfig {
     block = block_dim;
     grid = grid_dim;
   }
-
- public:
-  std::vector<int> reduce_dims_origin;
-  std::vector<int> reduce_dim;
-  std::vector<int> x_dim;
-  std::vector<int> left_dim;
-  std::vector<int> x_strides;
-  std::vector<int> left_strides;
-  std::vector<int> reduce_strides;
-
-  int reduce_type;
-  int reduce_num;
-  int left_num;
-  int blocking_size;
-  bool should_reduce_again;
-  bool reduce_last_dim;
-  Ty* output_data;
-  dim3 block;
-  dim3 grid;
 };
 
 // when reduce_dim.size() == 1 and reduce_dim[0] == x_dim.size() - 1, or
@@ -901,7 +817,6 @@ static void LaunchReduceKernel(const Tx* x_data,
             left_index_calculator,
             dim,
             is_mean && (!config.should_reduce_again));
-
   } else {
     int reduce_rank = config.reduce_strides.size();
     int left_rank = config.left_strides.size();
@@ -948,14 +863,12 @@ static void LaunchReduceKernel(const Tx* x_data,
     dim3 grid;
     if (config.reduce_last_dim) {
       block = dim3(32, 1, 1);
-      grid = dim3(details::AlignUp(config.left_num, 32), 1, 1);
+      grid = dim3(details::CeilingDiv(config.left_num, 32), 1, 1);
     } else {
       block = dim3(config.block.x, 1, 1);
       grid = dim3(config.grid.x, 1, config.grid.z);
     }
 
-    auto last_index = OneDimIndexCal(1);
-    auto first_index = OneDimIndexCal(config.left_num);
     kps::DimConfig dim =
         kps::DimConfig(grid.x, grid.y, grid.z, block.x, config.grid.y, 0);
     dim.SetRem(config.left_num % block.x, 0, 0);
diff --git a/paddle/phi/kernels/gpu/cross_grad_kernel.cu b/paddle/phi/kernels/gpu/cross_grad_kernel.cu
index a6399ba39dcaec..4a27a8e2b05f97 100644
--- a/paddle/phi/kernels/gpu/cross_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/cross_grad_kernel.cu
@@ -18,7 +18,7 @@
 #include "paddle/phi/backends/gpu/gpu_launch_config.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/funcs/reduce_function.h"
+#include "paddle/phi/kernels/funcs/index_calculator.h"
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/gpu/cross_kernel.cu b/paddle/phi/kernels/gpu/cross_kernel.cu
index 0e1e7b3a42568b..875c043188d4ce 100644
--- a/paddle/phi/kernels/gpu/cross_kernel.cu
+++ b/paddle/phi/kernels/gpu/cross_kernel.cu
@@ -18,7 +18,7 @@
 #include "paddle/phi/backends/gpu/gpu_launch_config.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/funcs/reduce_function.h"
+#include "paddle/phi/kernels/funcs/index_calculator.h"
 
 namespace phi {
 

From 23299c70f70c8c350c7c041b5b4d3503bfe424ba Mon Sep 17 00:00:00 2001
From: heyanru <81976792+heyanru01@users.noreply.github.com>
Date: Fri, 2 Dec 2022 16:40:54 +0800
Subject: [PATCH 118/154] [Fluid Clean] remove argsort, linspace, diag (#48456)

---
 python/paddle/fluid/layers/tensor.py          | 267 ------------------
 .../unittests/ipu/test_argsort_op_ipu.py      |   2 +-
 .../fluid/tests/unittests/test_argsort_op.py  |  11 +-
 .../paddle/fluid/tests/unittests/test_diag.py |   3 +-
 .../fluid/tests/unittests/test_layers.py      |  13 +-
 .../fluid/tests/unittests/test_linspace.py    |  16 +-
 6 files changed, 19 insertions(+), 293 deletions(-)

diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py
index 4c00061ae7608b..e131744cd8685e 100644
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -58,10 +58,7 @@
     'fill_constant',
     'argmin',
     'argmax',
-    'argsort',
     'zeros',
-    'linspace',
-    'diag',
 ]
 
 
@@ -1140,102 +1137,6 @@ def argmax(x, axis=0):
     return out
 
 
-def argsort(input, axis=-1, descending=False, name=None):
-    """
-        :alias_main: paddle.argsort
-        :alias: paddle.argsort,paddle.tensor.argsort,paddle.tensor.search.argsort
-        :old_api: paddle.fluid.layers.argsort
-
-    This OP sorts the input along the given axis, and returns sorted output
-    data Varibale and its corresponding index Variable with the same shape as
-    :attr:`input`.
-
-    Args:
-        input(Variable): An input N-D Tensor with type float32, float64, int16,
-            int32, int64, uint8.
-        axis(int, optional): Axis to compute indices along. The effective range
-            is [-R, R), where R is Rank(x). when axis<0, it works the same way
-            as axis+R. Default is 0.
-        descending(bool, optional) : Descending is a flag, if set to true,
-            algorithm will sort by descending order, else sort by
-            ascending order. Default is false.
-        name(str, optional): The default value is None. Normally there is no
-            need for user to set this property. For more information, please
-            refer to :ref:`api_guide_Name`.
-
-    Returns:
-        tuple: A tuple of sorted data Variable(with the same shape and data
-        type as input) and the sorted indices(with the same shape as input's
-        and with data type int64).
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import numpy as np
-
-            in1 = np.array([[[5,8,9,5],
-                            [0,0,1,7],
-                            [6,9,2,4]],
-                            [[5,2,4,2],
-                            [4,7,7,9],
-                            [1,7,0,6]]]).astype(np.float32)
-            with fluid.dygraph.guard():
-                x = fluid.dygraph.to_variable(in1)
-                out1 = fluid.layers.argsort(input=x, axis=-1)
-                out2 = fluid.layers.argsort(input=x, axis=0)
-                out3 = fluid.layers.argsort(input=x, axis=1)
-                print(out1[0].numpy())
-                # [[[5. 5. 8. 9.]
-                #   [0. 0. 1. 7.]
-                #   [2. 4. 6. 9.]]
-                #  [[2. 2. 4. 5.]
-                #   [4. 7. 7. 9.]
-                #   [0. 1. 6. 7.]]]
-                print(out1[1].numpy())
-                # [[[0 3 1 2]
-                #   [0 1 2 3]
-                #   [2 3 0 1]]
-                #  [[1 3 2 0]
-                #   [0 1 2 3]
-                #   [2 0 3 1]]]
-                print(out2[0].numpy())
-                # [[[5. 2. 4. 2.]
-                #   [0. 0. 1. 7.]
-                #   [1. 7. 0. 4.]]
-                #  [[5. 8. 9. 5.]
-                #   [4. 7. 7. 9.]
-                #   [6. 9. 2. 6.]]]
-                print(out3[0].numpy())
-                # [[[0. 0. 1. 4.]
-                #   [5. 8. 2. 5.]
-                #   [6. 9. 9. 7.]]
-                #  [[1. 2. 0. 2.]
-                #   [4. 7. 4. 6.]
-                #   [5. 7. 7. 9.]]]
-    """
-    check_variable_and_dtype(
-        input,
-        'input',
-        ['float32', 'float64', 'int16', 'int32', 'int64', 'uint8'],
-        'argsort',
-    )
-    helper = LayerHelper("argsort", **locals())
-    out = helper.create_variable_for_type_inference(
-        dtype=input.dtype, stop_gradient=True
-    )
-    ids = helper.create_variable_for_type_inference(
-        VarDesc.VarType.INT64, stop_gradient=True
-    )
-    helper.append_op(
-        type='argsort',
-        inputs={'X': input},
-        outputs={'Out': out, 'Indices': ids},
-        attrs={'axis': axis, 'descending': descending},
-    )
-    return out, ids
-
-
 def zeros(shape, dtype, force_cpu=False, name=None):
     """
     The OP creates a tensor of specified :attr:`shape` and :attr:`dtype`, and fills it with 0.
@@ -1265,171 +1166,3 @@ def zeros(shape, dtype, force_cpu=False, name=None):
           data1 = fluid.layers.zeros(shape=shape, dtype='int32') #[[0, 0], [0, 0]]
     """
     return fill_constant(value=0.0, **locals())
-
-
-def linspace(start, stop, num, dtype=None, name=None):
-    r"""
-    This OP return fixed number of evenly spaced values within a given interval.
-
-    Args:
-        start(int|float|Tensor): The input :attr:`start` is start variable of range. It is a scalar, \
-            or a Tensor of shape [1] with input data type int32, int64, float32 or float64.
-        stop(int|float|Tensor): The input :attr:`stop` is start variable of range. It is a scalar, \
-            or a Tensor of shape [1] with input data type int32, int64, float32 or float64.
-        num(int|Tensor): The input :attr:`num` is given num of the sequence. It is an int scalar, \
-            or a Tensor of shape [1] with data type int32.
-        dtype(np.dtype|str, optional): The data type of output tensor, it could be
-            int32, int64, float32 and float64. Default: if None, the data type is float32.
-        name(str, optional): Normally there is no need for user to set this property.
-            For more information, please refer to :ref:`api_guide_Name`.Default: None.
-
-    Returns:
-        Tensor: the output data type will be float32, float64. The 1-D tensor with fixed number of evenly spaced values, \
-        the data shape of this tensor is :math:`[num]` . If the :attr:`num` is set 1, the output tensor just has \
-        the value with input :attr:`start`.
-
-    Examples:
-        .. code-block:: python
-
-             import paddle
-             data = paddle.linspace(0, 10, 5, 'float32') # [0.0,  2.5,  5.0,  7.5, 10.0]
-             data = paddle.linspace(0, 10, 1, 'float32') # [0.0]
-
-    """
-    if dtype is None:
-        dtype = 'float32'
-    tensor_num = num
-    tensor_start = start
-    tensor_stop = stop
-    if not isinstance(num, Variable):
-        check_type(num, 'num', (int), 'linspace')
-    if not isinstance(dtype, core.VarDesc.VarType):
-        dtype = convert_np_dtype_to_dtype_(dtype)
-    if not isinstance(start, Variable):
-        with device_guard("cpu"):
-            tensor_start = fill_constant([1], dtype, start)
-    if not isinstance(stop, Variable):
-        with device_guard("cpu"):
-            tensor_stop = fill_constant([1], dtype, stop)
-    if not isinstance(num, Variable):
-        with device_guard("cpu"):
-            tensor_num = fill_constant([1], 'int32', num)
-    if in_dygraph_mode():
-        return _C_ops.linspace(
-            tensor_start,
-            tensor_stop,
-            tensor_num,
-            dtype,
-            _current_expected_place(),
-        )
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.linspace(
-            tensor_start, tensor_stop, tensor_num, 'dtype', dtype
-        )
-    helper = LayerHelper("linspace", **locals())
-
-    start_dtype = convert_dtype(tensor_start.dtype)
-    stop_dtype = convert_dtype(tensor_stop.dtype)
-    out_dtype = convert_dtype(dtype)
-    if isinstance(start, Variable):
-        check_dtype(
-            start.dtype,
-            'start',
-            ['float32', 'float64', 'int32', 'int64'],
-            'linspace',
-        )
-    else:
-        check_type(start, 'start', (int, float), 'linspace')
-
-    if isinstance(stop, Variable):
-        check_dtype(
-            stop.dtype,
-            'stop',
-            ['float32', 'float64', 'int32', 'int64'],
-            'linspace',
-        )
-    else:
-        check_type(stop, 'stop', (int, float), 'linspace')
-    if isinstance(num, Variable):
-        check_dtype(num.dtype, 'num', ['int32'], 'linspace')
-    check_dtype(
-        dtype, 'dtype', ['int32', 'int64', 'float32', 'float64'], 'linspace'
-    )
-    if (
-        (stop_dtype == "float64" or start_dtype == "float64")
-        and out_dtype in ["float32", "int32"]
-    ) or (
-        (stop_dtype == "int64" or start_dtype == "int64")
-        and out_dtype == "int32"
-    ):
-        raise ValueError(
-            "The dtype of start/stop is {}/{} but the attr(dtype) of linspace is {}, "
-            "which may cause data type overflows. Please reset attr(dtype) of linspace.".format(
-                start_dtype, stop_dtype, dtype
-            )
-        )
-
-    out = helper.create_variable_for_type_inference(dtype=dtype)
-
-    helper.append_op(
-        type='linspace',
-        inputs={'Start': tensor_start, 'Stop': tensor_stop, 'Num': tensor_num},
-        attrs={'dtype': dtype},
-        outputs={'Out': [out]},
-    )
-    if isinstance(num, int):
-        out.desc.set_shape((num,))
-    return out
-
-
-@deprecated(since="2.0.0", update_to="paddle.diag")
-def diag(diagonal):
-    r"""
-	:alias_main: paddle.diag
-	:alias: paddle.diag,paddle.tensor.diag,paddle.tensor.creation.diag
-	:old_api: paddle.fluid.layers.diag
-
-    This OP creates a square matrix which has diagonal values specified by input :attr:`diagonal`.
-
-    Args:
-        diagonal(Variable|numpy.ndarray): The input tensor should be 1D tensor, the input shape is :math:`[ N]` , \
-            specifying diagonal values by this input tensor. The input data type should be float32, float64, int32, int64.
-
-    Returns:
-        Variable, the output data type is the same as input data type.: The tensor variable storing the square matrix, \
-            the diagonal values specified by input :attr:`diagonal`. the output shape is :math:`[N, N]` with two dims.
-
-    Examples:
-        .. code-block:: python
-
-          # [[3, 0, 0]
-          #  [0, 4, 0]
-          #  [0, 0, 5]
-
-          import paddle.fluid as fluid
-          import numpy as np
-          diagonal = np.arange(3, 6, dtype='int32')
-          data = fluid.layers.diag(diagonal)
-          # diagonal.shape=(3,) data.shape=(3, 3)
-
-    """
-    check_type(diagonal, 'diagonal', (Variable, numpy.ndarray), 'diag')
-    check_dtype(
-        diagonal.dtype,
-        'diagonal',
-        ['float32', 'float64', 'int32', 'int64'],
-        'diag',
-    )
-    helper = LayerHelper("diag", **locals())
-
-    if not isinstance(diagonal, Variable):
-        diagonal = assign(diagonal)
-
-    out = helper.create_variable_for_type_inference(dtype=diagonal.dtype)
-
-    helper.append_op(
-        type='diag', inputs={'Diagonal': [diagonal]}, outputs={'Out': [out]}
-    )
-
-    out.stop_gradient = True
-    return out
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_argsort_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_argsort_op_ipu.py
index c0ae5971926d2c..0c7dcf1977e619 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_argsort_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_argsort_op_ipu.py
@@ -50,7 +50,7 @@ def build_model(self):
         x = paddle.static.data(
             name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32'
         )
-        out, _ = paddle.fluid.layers.argsort(x, **self.attrs)
+        out, _ = paddle.argsort(x, **self.attrs)
         self.fetch_list = [out.name]
 
     def run_model(self, exec_mode):
diff --git a/python/paddle/fluid/tests/unittests/test_argsort_op.py b/python/paddle/fluid/tests/unittests/test_argsort_op.py
index 87ab9604678507..17614692f69d8a 100644
--- a/python/paddle/fluid/tests/unittests/test_argsort_op.py
+++ b/python/paddle/fluid/tests/unittests/test_argsort_op.py
@@ -91,8 +91,11 @@ def setUp(self):
             label = fluid.layers.data(
                 name="label", shape=self.input_shape, dtype=self.dtype
             )
-            self.sorted_x, self.index = fluid.layers.argsort(
-                input=x, axis=self.axis, descending=self.descending
+            self.index = paddle.argsort(
+                x=x, axis=self.axis, descending=self.descending
+            )
+            self.sorted_x = paddle.sort(
+                x=x, axis=self.axis, descending=self.descending
             )
             self.sorted_x.stop_gradient = False
             loss = paddle.multiply(self.sorted_x, label)
@@ -350,13 +353,13 @@ def test_error(self):
         def test_fluid_var_type():
             with fluid.program_guard(fluid.Program()):
                 x = [1]
-                output = fluid.layers.argsort(input=x)
+                output = paddle.argsort(x=x)
             self.assertRaises(TypeError, test_fluid_var_type)
 
         def test_paddle_var_type():
             with fluid.program_guard(fluid.Program()):
                 x = [1]
-                output = paddle.argsort(input=x)
+                output = paddle.argsort(x=x)
             self.assertRaises(TypeError, test_paddle_var_type)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_diag.py b/python/paddle/fluid/tests/unittests/test_diag.py
index 06969aac8f349c..b270d47316f40c 100644
--- a/python/paddle/fluid/tests/unittests/test_diag.py
+++ b/python/paddle/fluid/tests/unittests/test_diag.py
@@ -18,7 +18,6 @@
 from op_test import OpTest
 
 import paddle
-import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 
 
@@ -50,7 +49,7 @@ def test_errors(self):
 
             def test_diag_type():
                 x = [1, 2, 3]
-                output = fluid.layers.diag(diag=x)
+                output = paddle.diag(x=x)
 
             self.assertRaises(TypeError, test_diag_type)
 
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 1fad29135853c7..dfa8ee6f9c196a 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -3445,15 +3445,6 @@ def make_l2_normalize(self):
             output = layers.l2_normalize(x, axis=1)
             return output
 
-    def make_argsort(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            data = self._get_data(name='x', shape=[2, 3, 3], dtype="float32")
-            out, ids = layers.argsort(input=data, axis=1)
-            return out
-            return ids
-
     def make_shape(self):
         with program_guard(
             fluid.default_main_program(), fluid.default_startup_program()
@@ -3757,7 +3748,7 @@ def test_lod_reset(self):
     def test_affine_grid(self):
         with self.static_graph():
             data = layers.data(name='data', shape=[2, 3, 3], dtype="float32")
-            out, ids = layers.argsort(input=data, axis=1)
+            out = paddle.argsort(x=data, axis=1)
 
             theta = layers.data(name="theta", shape=[2, 3], dtype="float32")
             out_shape = layers.data(name="out_shape", shape=[-1], dtype="int32")
@@ -3983,7 +3974,7 @@ def test_flatten(self):
     def test_linspace(self):
         program = Program()
         with program_guard(program):
-            out = layers.linspace(20, 10, 5, 'float64')
+            out = paddle.linspace(20, 10, 5, 'float64')
             self.assertIsNotNone(out)
         print(str(program))
 
diff --git a/python/paddle/fluid/tests/unittests/test_linspace.py b/python/paddle/fluid/tests/unittests/test_linspace.py
index 2e2c2718f9b97b..5905f617d16926 100644
--- a/python/paddle/fluid/tests/unittests/test_linspace.py
+++ b/python/paddle/fluid/tests/unittests/test_linspace.py
@@ -139,45 +139,45 @@ def test_errors(self):
         with program_guard(Program(), Program()):
 
             def test_dtype():
-                fluid.layers.linspace(0, 10, 1, dtype="int8")
+                paddle.linspace(0, 10, 1, dtype="int8")
 
             self.assertRaises(TypeError, test_dtype)
 
             def test_dtype1():
-                fluid.layers.linspace(0, 10, 1.33, dtype="int32")
+                paddle.linspace(0, 10, 1.33, dtype="int32")
 
             self.assertRaises(TypeError, test_dtype1)
 
             def test_start_type():
-                fluid.layers.linspace([0], 10, 1, dtype="float32")
+                paddle.linspace([0], 10, 1, dtype="float32")
 
             self.assertRaises(TypeError, test_start_type)
 
             def test_end_type():
-                fluid.layers.linspace(0, [10], 1, dtype="float32")
+                paddle.linspace(0, [10], 1, dtype="float32")
 
             self.assertRaises(TypeError, test_end_type)
 
             def test_step_dtype():
-                fluid.layers.linspace(0, 10, [0], dtype="float32")
+                paddle.linspace(0, 10, [0], dtype="float32")
 
             self.assertRaises(TypeError, test_step_dtype)
 
             def test_start_dtype():
                 start = fluid.data(shape=[1], dtype="float64", name="start")
-                fluid.layers.linspace(start, 10, 1, dtype="float32")
+                paddle.linspace(start, 10, 1, dtype="float32")
 
             self.assertRaises(ValueError, test_start_dtype)
 
             def test_end_dtype():
                 end = fluid.data(shape=[1], dtype="float64", name="end")
-                fluid.layers.linspace(0, end, 1, dtype="float32")
+                paddle.linspace(0, end, 1, dtype="float32")
 
             self.assertRaises(ValueError, test_end_dtype)
 
             def test_num_dtype():
                 num = fluid.data(shape=[1], dtype="int32", name="step")
-                fluid.layers.linspace(0, 10, num, dtype="float32")
+                paddle.linspace(0, 10, num, dtype="float32")
 
             self.assertRaises(TypeError, test_step_dtype)
 

From 6af7b42b8883bc90227f6c81cef284b51d486b32 Mon Sep 17 00:00:00 2001
From: wangzhen38 <41941775+wangzhen38@users.noreply.github.com>
Date: Fri, 2 Dec 2022 17:27:15 +0800
Subject: [PATCH 119/154] [remove fluid] drop_out API (#48586)

* [remove fluid] drop_out PI

* [remove fluid] drop_out PI

* [remove fluid] drop_out layernorm

* [remove fluid] drop_out layernorm

* [remove fluid] drop_out layernorm

* [remove fluid] drop_out layernorm
---
 python/paddle/fluid/dygraph/nn.py             | 328 ------------------
 .../fleet/parallel_dygraph_transformer.py     |  12 +-
 .../transformer_dygraph_model.py              |   6 +-
 .../unittests/mlu/test_layer_norm_op_mlu.py   |   2 +-
 .../fluid/tests/unittests/test_dropout_op.py  |   2 +-
 .../test_imperative_load_static_param.py      |  13 +-
 ..._imperative_transformer_sorted_gradient.py |   6 +-
 .../tests/unittests/test_layer_norm_op.py     |   2 +-
 .../tests/unittests/test_layer_norm_op_v2.py  |  10 +-
 .../fluid/tests/unittests/test_layers.py      | 145 --------
 .../test_paddle_fluid_modelaverage.py         |   4 +-
 11 files changed, 23 insertions(+), 507 deletions(-)

diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py
index bff3d5aacb935b..abef927af86fd3 100644
--- a/python/paddle/fluid/dygraph/nn.py
+++ b/python/paddle/fluid/dygraph/nn.py
@@ -53,10 +53,8 @@
     'Pool2D',
     'Linear',
     'BatchNorm',
-    'Dropout',
     'Embedding',
     'GRUUnit',
-    'LayerNorm',
     'NCE',
     'PRelu',
     'BilinearTensorProduct',
@@ -1184,124 +1182,6 @@ def forward(self, input):
         return self._helper.append_activation(batch_norm_out, self._act)
 
 
-class Dropout(layers.Layer):
-    """
-    This interface is used to construct a callable object of the ``Dropout`` class.
-    For more details, refer to code examples.
-
-    Drop or keep each element of input independently. Dropout is a regularization
-    technique for reducing overfitting by preventing neuron co-adaption during
-    training. The dropout operator randomly sets (according to the given dropout
-    probability) the outputs of some units to zero, while others are remain
-    unchanged.
-
-    Dropout layer can be removed for efficiency concern.
-
-    Parameters:
-        p (float, optional): Probability of setting units to zero. Default: 0.5
-        seed (int, optional): A Python integer used to create random seeds. If this
-                    parameter is set to None, a random seed is used.
-                    NOTE: If an integer seed is given, always the same output
-                    units will be dropped. DO NOT use a fixed seed in training. Default: None.
-        dropout_implementation(string, optional): ['downgrade_in_infer'(default)|'upscale_in_train']
-
-                                        1. downgrade_in_infer(default), downgrade the outcome at inference
-
-                                           - train: out = input * mask
-                                           - inference: out = input * (1.0 - p)
-
-                                           (mask is a tensor same shape with input, value is 0 or 1
-                                           ratio of 0 is dropout_prob)
-                                        2. upscale_in_train, upscale the outcome at training time
-
-                                           - train: out = input * mask / ( 1.0 - p )
-                                           - inference: out = input
-
-                                           (mask is a tensor same shape with input, value is 0 or 1
-                                           ratio of 0 is p)
-        is_test (bool, optional): A flag indicating whether it is in test phrase or not.
-                    This flag only has effect on static graph mode. For dygraph mode, please use ``eval()``.
-                    Default: False.
-
-    Returns:
-        None
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            from paddle.fluid.dygraph.base import to_variable
-            import numpy as np
-
-            x = np.random.random(size=(3, 10, 3, 7)).astype('float32')
-            with fluid.dygraph.guard():
-                x = to_variable(x)
-                m = fluid.dygraph.Dropout(p=0.5)
-                droped_train = m(x)
-                # switch to eval mode
-                m.eval()
-                droped_eval = m(x)
-    """
-
-    def __init__(
-        self,
-        p=0.5,
-        seed=None,
-        dropout_implementation="downgrade_in_infer",
-        is_test=False,
-    ):
-        super().__init__()
-        assert isinstance(p, (float, int)), "p argument should be a number"
-        assert 0 <= p <= 1, "p argument should between 0 and 1"
-        self._dropout_prob = p
-        assert seed is None or isinstance(
-            seed, int
-        ), "seed argument should be None or a integer"
-        self._seed = seed
-        assert dropout_implementation in (
-            'downgrade_in_infer',
-            'upscale_in_train',
-        ), "dropout_implementation argument should be 'downgrade_in_infer' or 'upscale_in_train'"
-        self._dropout_implementation = dropout_implementation
-        self._is_test = is_test
-
-    def forward(self, input):
-        # fast return for p == 0
-        if self._dropout_prob == 0:
-            return input
-        prog = default_main_program()
-        if (self._seed is None or self._seed == 0) and prog.random_seed != 0:
-            self._seed = prog.random_seed
-        attrs = {
-            'dropout_prob': self._dropout_prob,
-            'is_test': not self.training
-            if _non_static_mode()
-            else self._is_test,
-            'fix_seed': self._seed is not None,
-            'seed': self._seed if self._seed is not None else 0,
-            'dropout_implementation': self._dropout_implementation,
-        }
-
-        if _non_static_mode():
-            attrs = sum(attrs.items(), ())
-            out, mask = _legacy_C_ops.dropout(input, *attrs)
-            return out
-
-        out = self._helper.create_variable_for_type_inference(dtype=input.dtype)
-        mask = self._helper.create_variable_for_type_inference(
-            dtype=core.VarDesc.VarType.UINT8, stop_gradient=True
-        )
-
-        self._helper.append_op(
-            type='dropout',
-            inputs={'X': [input]},
-            outputs={'Out': [out], 'Mask': [mask]},
-            attrs=attrs,
-        )
-        return out
-
-
 class Embedding(layers.Layer):
     r"""
     :alias_main: paddle.nn.Embedding
@@ -1483,214 +1363,6 @@ def forward(self, input):
         return out
 
 
-class LayerNorm(layers.Layer):
-    r"""
-    :alias_main: paddle.nn.LayerNorm
-        :alias: paddle.nn.LayerNorm,paddle.nn.layer.LayerNorm,paddle.nn.layer.norm.LayerNorm
-        :old_api: paddle.fluid.dygraph.LayerNorm
-
-    This interface is used to construct a callable object of the ``LayerNorm`` class.
-    For more details, refer to code examples.
-    It implements the function of the Layer Normalization Layer and can be applied to mini-batch input data.
-    Refer to `Layer Normalization <https://arxiv.org/pdf/1607.06450v1.pdf>`_
-
-    The formula is as follows:
-
-    ..  math::
-
-        \\mu & = \\frac{1}{H}\\sum_{i=1}^{H} x_i
-
-        \\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}{(x_i - \\mu)^2} + \\epsilon}
-
-        y & = f(\\frac{g}{\\sigma}(x - \\mu) + b)
-
-    - :math:`x`: the vector representation of the summed inputs to the neurons in that layer.
-    - :math:`H`: the number of hidden units in a layers
-    - :math:`\\epsilon`: the small value added to the variance to prevent division by zero.
-    - :math:`g`: the trainable scale parameter.
-    - :math:`b`: the trainable bias parameter.
-
-    Parameters:
-        normalized_shape(int or list or tuple): Input shape from an expected input of
-            size :math:`[*, normalized_shape[0], normalized_shape[1], ..., normalized_shape[-1]]`.
-            If it is a single integer, this module will normalize over the last dimension
-            which is expected to be of that specific size.
-        scale(bool, optional): Whether to learn the adaptive gain :math:`g` after
-            normalization. Default: True.
-        shift(bool, optional): Whether to learn the adaptive bias :math:`b` after
-            normalization. Default: True.
-        epsilon(float, optional): The small value added to the variance to prevent
-            division by zero. Default: 1e-05.
-        param_attr(ParamAttr, optional): The parameter attribute for the learnable
-            gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is
-            omitted. If :attr:`scale` is True and :attr:`param_attr` is None,
-            a default :code:`ParamAttr` would be added as scale. The
-            :attr:`param_attr` is initialized as 1 if it is added. Default: None.
-        bias_attr(ParamAttr, optional): The parameter attribute for the learnable
-            bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is
-            omitted. If :attr:`shift` is True and :attr:`param_attr` is None,
-            a default :code:`ParamAttr` would be added as bias. The
-            :attr:`bias_attr` is initialized as 0 if it is added. Default: None.
-        act(str, optional): Activation to be applied to the output of layer normalization.
-                  Default: None.
-        dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
-
-    Returns:
-        None
-
-    Examples:
-
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          from paddle.fluid.dygraph.base import to_variable
-          import numpy
-
-          x = numpy.random.random((3, 32, 32)).astype('float32')
-          with fluid.dygraph.guard():
-              x = to_variable(x)
-              layerNorm = fluid.LayerNorm([32, 32])
-              ret = layerNorm(x)
-
-    """
-
-    def __init__(
-        self,
-        normalized_shape,
-        scale=True,
-        shift=True,
-        epsilon=1e-05,
-        param_attr=None,
-        bias_attr=None,
-        act=None,
-        dtype='float32',
-    ):
-        super().__init__()
-        if isinstance(normalized_shape, numbers.Integral):
-            normalized_shape = [normalized_shape]
-
-        self._normalized_shape = list(normalized_shape)
-        self._scale = scale
-        self._shift = shift
-        self._epsilon = epsilon
-        self._param_attr = param_attr
-        self._bias_attr = bias_attr
-        self._act = act
-        self._dtype = dtype
-        param_shape = [np.prod(self._normalized_shape)]
-        if self._scale:
-            self.weight = self.create_parameter(
-                attr=self._param_attr,
-                shape=param_shape,
-                dtype=self._dtype,
-                default_initializer=Constant(1.0),
-            )
-        else:
-            if self._param_attr:
-                logging.warn("param_attr are only available with scale is True")
-            self.weight = None
-
-        if self._shift:
-            assert self._bias_attr is not False
-            self.bias = self.create_parameter(
-                attr=self._bias_attr,
-                shape=param_shape,
-                dtype=self._dtype,
-                is_bias=True,
-            )
-        else:
-            if self._bias_attr:
-                logging.warn("bias_attr are only available with shift is True")
-            self.bias = None
-
-    def forward(self, input):
-        input_shape = list(input.shape)
-        input_ndim = len(input_shape)
-        normalized_ndim = len(self._normalized_shape)
-        self._begin_norm_axis = input_ndim - normalized_ndim
-        if (
-            input_ndim < normalized_ndim
-            or input_shape[self._begin_norm_axis :] != self._normalized_shape
-        ):
-            str_normalized_shape = str(self._normalized_shape)
-            raise ValueError(
-                'Given normalized_shape is '
-                + str_normalized_shape
-                + ', expected input with shape [*, '
-                + str_normalized_shape[1:]
-                + ', but got input shape '
-                + str(input_shape)
-            )
-
-        if _non_static_mode():
-            if in_dygraph_mode():
-                pre_act, _, _, = _C_ops.layer_norm(
-                    input,
-                    self.weight,
-                    self.bias,
-                    self._epsilon,
-                    self._begin_norm_axis,
-                )
-                return dygraph_utils._append_activation_in_dygraph(
-                    pre_act, act=self._act
-                )
-            else:
-                pre_act, _, _ = _legacy_C_ops.layer_norm(
-                    input,
-                    self.weight,
-                    self.bias,
-                    'epsilon',
-                    self._epsilon,
-                    'begin_norm_axis',
-                    self._begin_norm_axis,
-                )
-                return dygraph_utils._append_activation_in_dygraph(
-                    pre_act, act=self._act
-                )
-
-        check_variable_and_dtype(
-            input, 'input', ['float32', 'float64'], 'LayerNorm'
-        )
-
-        inputs = dict()
-        inputs['X'] = [input]
-        if self._scale:
-            inputs['Scale'] = [self.weight]
-        if self._shift:
-            inputs['Bias'] = [self.bias]
-        attrs = {
-            "epsilon": self._epsilon,
-            "begin_norm_axis": self._begin_norm_axis,
-        }
-
-        # create output
-        mean_out = self._helper.create_variable_for_type_inference(
-            dtype=self._dtype, stop_gradient=True
-        )
-        variance_out = self._helper.create_variable_for_type_inference(
-            dtype=self._dtype, stop_gradient=True
-        )
-        layer_norm_out = self._helper.create_variable_for_type_inference(
-            self._dtype
-        )
-
-        self._helper.append_op(
-            type="layer_norm",
-            inputs=inputs,
-            outputs={
-                "Y": layer_norm_out,
-                "Mean": mean_out,
-                "Variance": variance_out,
-            },
-            attrs={
-                "epsilon": self._epsilon,
-                "begin_norm_axis": self._begin_norm_axis,
-            },
-        )
-
-        return self._helper.append_activation(layer_norm_out, act=self._act)
-
-
 class GRUUnit(layers.Layer):
     """
     **GRU unit layer**
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py
index 6792cf2877fe53..5cfd8a60786a9e 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py
@@ -18,13 +18,7 @@
 import paddle
 import paddle.fluid as fluid
 import paddle.nn.functional as F
-from paddle.fluid.dygraph import (
-    Embedding,
-    Layer,
-    LayerNorm,
-    Linear,
-    to_variable,
-)
+from paddle.fluid.dygraph import Embedding, Layer, Linear, to_variable
 from paddle.optimizer.lr import NoamDecay
 
 """
@@ -245,9 +239,9 @@ def __init__(self, d_model, process_cmd, shape_len=None):
         super().__init__()
         for cmd in process_cmd:
             if cmd == "n":
-                self._layer_norm = LayerNorm(
+                self._layer_norm = paddle.nn.LayerNorm(
                     normalized_shape=d_model,
-                    param_attr=fluid.ParamAttr(
+                    weight_attr=fluid.ParamAttr(
                         initializer=fluid.initializer.Constant(1.0)
                     ),
                     bias_attr=fluid.ParamAttr(
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
index 79ec89c008261e..f4c0815884a52c 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
@@ -18,7 +18,7 @@
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
 import paddle.nn.functional as F
-from paddle.fluid.dygraph import Embedding, Layer, LayerNorm, to_variable
+from paddle.fluid.dygraph import Embedding, Layer, to_variable
 from paddle.fluid.layers.utils import map_structure
 from paddle.jit.api import dygraph_to_static_func
 from paddle.nn import Linear
@@ -59,9 +59,9 @@ def __init__(self, process_cmd, d_model, dropout_rate):
                     self.add_sublayer(
                         "layer_norm_%d"
                         % len([layer for layer in self.children()]),
-                        LayerNorm(
+                        paddle.nn.LayerNorm(
                             normalized_shape=d_model,
-                            param_attr=fluid.ParamAttr(
+                            weight_attr=fluid.ParamAttr(
                                 initializer=fluid.initializer.Constant(1.0)
                             ),
                             bias_attr=fluid.ParamAttr(
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_layer_norm_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_layer_norm_op_mlu.py
index 1a0eac49eccfb0..870aacd411bbfe 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_layer_norm_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_layer_norm_op_mlu.py
@@ -286,7 +286,7 @@ def test_errors(self):
         with program_guard(Program(), Program()):
             paddle.enable_static()
 
-            layer_norm = fluid.LayerNorm([32, 32])
+            layer_norm = paddle.nn.LayerNorm([32, 32])
             # the input of LayerNorm must be Variable.
             x1 = np.random.random((3, 32, 32)).astype('float32')
             self.assertRaises(TypeError, layer_norm, x1)
diff --git a/python/paddle/fluid/tests/unittests/test_dropout_op.py b/python/paddle/fluid/tests/unittests/test_dropout_op.py
index 6c886acea0d8b2..8a47deb34c0d38 100644
--- a/python/paddle/fluid/tests/unittests/test_dropout_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dropout_op.py
@@ -538,7 +538,7 @@ def test_dygraph(self):
                 res10 = paddle.nn.functional.dropout(
                     x=input, p=1.0, training=True
                 )
-                dropout = paddle.fluid.dygraph.Dropout(
+                dropout = paddle.nn.Dropout(
                     p=0,
                 )
                 res11 = dropout(input)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
index 2d80a3a1ee6fab..169269cc03e31e 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
@@ -21,14 +21,7 @@
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.framework as framework
-from paddle.fluid.dygraph.nn import (
-    NCE,
-    BatchNorm,
-    Embedding,
-    GroupNorm,
-    LayerNorm,
-    PRelu,
-)
+from paddle.fluid.dygraph.nn import NCE, BatchNorm, Embedding, GroupNorm, PRelu
 from paddle.nn import Linear
 
 
@@ -212,8 +205,8 @@ def __init__(self):
                     self.emb1 = Embedding([1000, 100])
                     self.emb2 = Embedding([2000, 200])
 
-                    self.layer_norm_1 = LayerNorm([10])
-                    self.layer_norm_2 = LayerNorm(10)
+                    self.layer_norm_1 = paddle.nn.LayerNorm([10])
+                    self.layer_norm_2 = paddle.nn.LayerNorm(10)
 
                     self.nce1 = NCE(10000, 100)
                     self.nce2 = NCE(10000, 100)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
index c99fbcf4e9a84a..e850905141b188 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
@@ -20,7 +20,7 @@
 import paddle
 import paddle.fluid as fluid
 import paddle.nn.functional as F
-from paddle.fluid import Embedding, Layer, LayerNorm, core
+from paddle.fluid import Embedding, Layer, core
 from paddle.fluid.dygraph import guard, to_variable
 from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
 from paddle.jit import TracedLayer
@@ -399,9 +399,9 @@ def __init__(self, d_model, process_cmd, shape_len=None):
         super().__init__()
         for cmd in process_cmd:
             if cmd == "n":
-                self._layer_norm = LayerNorm(
+                self._layer_norm = paddle.nn.LayerNorm(
                     normalized_shape=d_model,
-                    param_attr=fluid.ParamAttr(
+                    weight_attr=fluid.ParamAttr(
                         initializer=fluid.initializer.Constant(1.0)
                     ),
                     bias_attr=fluid.ParamAttr(
diff --git a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py
index 200372d2d143f6..f1c16a56ef3c57 100644
--- a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py
@@ -375,7 +375,7 @@ def test_errors(self):
         with program_guard(Program(), Program()):
             paddle.enable_static()
 
-            layer_norm = fluid.LayerNorm([32, 32])
+            layer_norm = paddle.nn.LayerNorm([32, 32])
             # the input of LayerNorm must be Variable.
             x1 = np.random.random((3, 32, 32)).astype('float32')
             self.assertRaises(TypeError, layer_norm, x1)
diff --git a/python/paddle/fluid/tests/unittests/test_layer_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_layer_norm_op_v2.py
index 8b258c75661655..68a233c1af0764 100644
--- a/python/paddle/fluid/tests/unittests/test_layer_norm_op_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_layer_norm_op_v2.py
@@ -33,7 +33,7 @@ def test_dygraph(self):
 
             def compute_v1(x):
                 with fluid.dygraph.guard(p):
-                    ln = fluid.dygraph.LayerNorm(shape[1:])
+                    ln = paddle.nn.LayerNorm(shape[1:])
                     y = ln(paddle.to_tensor(x))
                 return y.numpy()
 
@@ -57,7 +57,7 @@ def test_eager(self):
 
             def compute_v1(x):
                 with fluid.dygraph.guard(p):
-                    ln = fluid.dygraph.LayerNorm(shape[1:])
+                    ln = paddle.nn.LayerNorm(shape[1:])
                     x1 = paddle.to_tensor(x)
                     x1.stop_gradient = False
                     y = ln(x1)
@@ -91,7 +91,7 @@ def test_static(self):
 
             def compute_v1(x_np):
                 with program_guard(Program(), Program()):
-                    ln = fluid.dygraph.LayerNorm(shape[1:])
+                    ln = paddle.nn.LayerNorm(shape[1:])
                     x = fluid.data(name='x', shape=x_np.shape, dtype=x_np.dtype)
                     y = ln(x)
                     exe.run(fluid.default_startup_program())
@@ -123,7 +123,7 @@ def test_dygraph(self):
 
             def compute_v0(x):
                 with fluid.dygraph.guard(p):
-                    ln = fluid.dygraph.LayerNorm(shape[1:])
+                    ln = paddle.nn.LayerNorm(shape[1:])
                     y = ln(paddle.to_tensor(x))
                 return y.numpy()
 
@@ -141,7 +141,7 @@ def compute_v2(x):
 
             def compute_v3(x):
                 with fluid.dygraph.guard(p):
-                    ln = fluid.dygraph.LayerNorm(shape[-1])
+                    ln = paddle.nn.LayerNorm(shape[-1])
                     y = ln(paddle.to_tensor(x))
                 return y.numpy()
 
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index dfa8ee6f9c196a..40508fcd52e285 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -120,50 +120,6 @@ def forward(self, x, do_linear2=False):
             ret = custom(x, do_linear2=True)
             np.testing.assert_array_equal(ret.numpy().shape, [3, 1])
 
-    def test_dropout(self):
-        inp = np.ones([3, 32, 32], dtype='float32')
-        with self.static_graph():
-            t = layers.data(
-                name='data',
-                shape=[3, 32, 32],
-                dtype='float32',
-                append_batch_size=False,
-            )
-            dropout = nn.Dropout(p=0.35, seed=1, is_test=False)
-            ret = dropout(t)
-            ret2 = fluid.layers.dropout(
-                t, dropout_prob=0.35, seed=1, is_test=False
-            )
-            static_ret, static_ret2 = self.get_static_graph_result(
-                feed={'data': inp}, fetch_list=[ret, ret2]
-            )
-        with self.dynamic_graph():
-            with _test_eager_guard():
-                t = base.to_variable(inp)
-                dropout = nn.Dropout(p=0.35, seed=1, is_test=False)
-                dy_eager_ret = dropout(t)
-                dy_eager_ret2 = fluid.layers.dropout(
-                    t, dropout_prob=0.35, seed=1, is_test=False
-                )
-                dy_eager_ret_value = dy_eager_ret.numpy()
-                dy_eager_ret2_value = dy_eager_ret2.numpy()
-
-            t = base.to_variable(inp)
-            dropout = nn.Dropout(p=0.35, seed=1, is_test=False)
-            dy_ret = dropout(t)
-            dy_ret2 = fluid.layers.dropout(
-                t, dropout_prob=0.35, seed=1, is_test=False
-            )
-            dy_ret_value = dy_ret.numpy()
-            dy_ret2_value = dy_ret2.numpy()
-
-        np.testing.assert_array_equal(dy_eager_ret_value, dy_eager_ret2_value)
-        np.testing.assert_array_equal(static_ret, dy_eager_ret_value)
-
-        np.testing.assert_array_equal(static_ret, static_ret2)
-        np.testing.assert_array_equal(dy_ret_value, dy_ret2_value)
-        np.testing.assert_array_equal(static_ret, dy_ret_value)
-
     def test_linear(self):
         inp = np.ones([3, 32, 32], dtype='float32')
         with self.static_graph():
@@ -284,107 +240,6 @@ def test_type():
 
             self.assertRaises(TypeError, test_type)
 
-    def test_layer_norm(self):
-        inp = np.ones([3, 32, 32], dtype='float32')
-        with self.static_graph():
-            t = layers.data(
-                name='data',
-                shape=[3, 32, 32],
-                dtype='float32',
-                append_batch_size=False,
-            )
-            ret = layers.layer_norm(
-                t,
-                bias_attr=fluid.initializer.ConstantInitializer(value=1),
-                act='sigmoid',
-            )
-            static_ret = self.get_static_graph_result(
-                feed={'data': inp}, fetch_list=[ret]
-            )[0]
-        with self.static_graph():
-            t = layers.data(
-                name='data',
-                shape=[3, 32, 32],
-                dtype='float32',
-                append_batch_size=False,
-            )
-            lm = nn.LayerNorm(
-                normalized_shape=[32, 32],
-                bias_attr=fluid.initializer.ConstantInitializer(value=1),
-                act='sigmoid',
-            )
-            ret = lm(t)
-            static_ret2 = self.get_static_graph_result(
-                feed={'data': inp}, fetch_list=[ret]
-            )[0]
-        with self.dynamic_graph():
-            with _test_eager_guard():
-                lm = nn.LayerNorm(
-                    normalized_shape=[32, 32],
-                    bias_attr=fluid.initializer.ConstantInitializer(value=1),
-                    act='sigmoid',
-                )
-                dy_eager_ret = lm(base.to_variable(inp))
-                dy_eager_ret_value = dy_eager_ret.numpy()
-
-            lm = nn.LayerNorm(
-                normalized_shape=[32, 32],
-                bias_attr=fluid.initializer.ConstantInitializer(value=1),
-                act='sigmoid',
-            )
-            dy_ret = lm(base.to_variable(inp))
-            dy_ret_value = dy_ret.numpy()
-
-        with self.dynamic_graph():
-            with _test_eager_guard():
-                lm = nn.LayerNorm(
-                    normalized_shape=[32, 32],
-                    shift=False,
-                    scale=False,
-                    param_attr=fluid.initializer.ConstantInitializer(value=1),
-                    bias_attr=fluid.initializer.ConstantInitializer(value=1),
-                    act='sigmoid',
-                )
-                lm(base.to_variable(inp))
-
-                self.assertFalse(hasattr(lm, "_scale_w"))
-                self.assertFalse(hasattr(lm, "_bias_w"))
-
-            lm = nn.LayerNorm(
-                normalized_shape=[32, 32],
-                shift=False,
-                scale=False,
-                param_attr=fluid.initializer.ConstantInitializer(value=1),
-                bias_attr=fluid.initializer.ConstantInitializer(value=1),
-                act='sigmoid',
-            )
-            lm(base.to_variable(inp))
-
-            self.assertFalse(hasattr(lm, "_scale_w"))
-            self.assertFalse(hasattr(lm, "_bias_w"))
-
-        np.testing.assert_array_equal(static_ret, static_ret2)
-        np.testing.assert_array_equal(dy_eager_ret_value, static_ret2)
-        np.testing.assert_array_equal(dy_ret_value, static_ret2)
-
-        with self.dynamic_graph():
-            with _test_eager_guard():
-                lm = nn.LayerNorm(
-                    normalized_shape=[16, 32],
-                    bias_attr=fluid.initializer.ConstantInitializer(value=1),
-                    act='sigmoid',
-                )
-                with self.assertRaises(ValueError):
-                    lm(base.to_variable(inp))
-
-            lm = nn.LayerNorm(
-                normalized_shape=[16, 32],
-                bias_attr=fluid.initializer.ConstantInitializer(value=1),
-                act='sigmoid',
-            )
-            with self.assertRaises(ValueError):
-                lm(base.to_variable(inp))
-
     def test_SyncBatchNorm(self):
         if core.is_compiled_with_cuda():
             with self.static_graph():
diff --git a/python/paddle/fluid/tests/unittests/test_paddle_fluid_modelaverage.py b/python/paddle/fluid/tests/unittests/test_paddle_fluid_modelaverage.py
index 7be33d31fd483a..f467d17ee20abe 100644
--- a/python/paddle/fluid/tests/unittests/test_paddle_fluid_modelaverage.py
+++ b/python/paddle/fluid/tests/unittests/test_paddle_fluid_modelaverage.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 import unittest
+
 import numpy as np
-import paddle.fluid as fluid
+
 import paddle
+import paddle.fluid as fluid
 
 
 class TestModelAverage(unittest.TestCase):

From 49656af3e96b9e04a6dce85d06350eaa9cc115ac Mon Sep 17 00:00:00 2001
From: zqw_1997 <118182234+zhengqiwen1997@users.noreply.github.com>
Date: Fri, 2 Dec 2022 17:32:17 +0800
Subject: [PATCH 120/154] [fluid remove]: remove
 paddle.fluid.layers.nn.temporal_shift and
 paddle.fluid.layers.sigmoid_focal_loss (#48553)

* remove paddle.fluid.layers.nn.temporal_shift

* code check

* rm unittest

* remove paddle.fluid.layers.sigmoid_focal_loss
---
 python/paddle/fluid/layers/detection.py       | 148 ------------------
 python/paddle/fluid/layers/nn.py              |  40 -----
 .../unittests/dygraph_to_static/test_tsm.py   |   4 +-
 .../fluid/tests/unittests/test_layers.py      |  32 ----
 .../unittests/test_sigmoid_focal_loss_op.py   |  83 +---------
 .../tests/unittests/test_temporal_shift_op.py |   3 -
 6 files changed, 10 insertions(+), 300 deletions(-)

diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py
index a3db0f70a6cc46..f021ab8f3d3605 100644
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -48,7 +48,6 @@
     'ssd_loss',
     'rpn_target_assign',
     'retinanet_target_assign',
-    'sigmoid_focal_loss',
     'anchor_generator',
     'roi_perspective_transform',
     'generate_proposal_labels',
@@ -524,153 +523,6 @@ def rpn_target_assign(
     )
 
 
-def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25):
-    r"""
-	:alias_main: paddle.nn.functional.sigmoid_focal_loss
-	:alias: paddle.nn.functional.sigmoid_focal_loss,paddle.nn.functional.loss.sigmoid_focal_loss
-	:old_api: paddle.fluid.layers.sigmoid_focal_loss
-
-    **Sigmoid Focal Loss Operator.**
-
-    `Focal Loss <https://arxiv.org/abs/1708.02002>`_ is used to address the foreground-background
-    class imbalance existed on the training phase of many computer vision tasks. This OP computes
-    the sigmoid value for each element in the input tensor :attr:`x`, after which focal loss is
-    measured between the sigmoid value and target label.
-
-    The focal loss is given as followed:
-
-    .. math::
-
-        \\mathop{loss_{i,\\,j}}\\limits_{i\\in\\mathbb{[0,\\,N-1]},\\,j\\in\\mathbb{[0,\\,C-1]}}=\\left\\{
-        \\begin{array}{rcl}
-        - \\frac{1}{fg\_num} * \\alpha * {(1 - \\sigma(x_{i,\\,j}))}^{\\gamma} * \\log(\\sigma(x_{i,\\,j})) & & {(j +1) = label_{i,\\,0}} \\\\
-        - \\frac{1}{fg\_num} * (1 - \\alpha) * {\sigma(x_{i,\\,j})}^{ \\gamma} * \\log(1 - \\sigma(x_{i,\\,j})) & & {(j +1)!= label_{i,\\,0}}
-        \\end{array} \\right.
-
-
-    We know that
-
-    .. math::
-        \\sigma(x_j) = \\frac{1}{1 + \\exp(-x_j)}
-
-
-    Args:
-        x(Variable): A 2-D tensor with shape :math:`[N, C]` represents the predicted categories of
-            all samples. :math:`N` is the number of all samples responsible for optimization in
-            a mini-batch, for example, samples are anchor boxes for object detection and :math:`N`
-            is the total number of positive and negative samples in a mini-batch; Samples are images
-            for image classification and :math:`N` is the number of images in a mini-batch. :math:`C`
-            is the number of classes (**Notice: excluding background**). The data type of :attr:`x` is
-            float32 or float64.
-        label(Variable): A 2-D tensor with shape :math:`[N, 1]` represents the target labels for
-            classification. :math:`N` is the number of all samples responsible for optimization in a
-            mini-batch, each sample has one target category. The values for positive samples are in the
-            range of :math:`[1, C]`, and the values for negative samples are 0. The data type of :attr:`label`
-            is int32.
-        fg_num(Variable): A 1-D tensor with shape [1] represents the number of positive samples in a
-            mini-batch, which should be obtained before this OP. The data type of :attr:`fg_num` is int32.
-        gamma(int|float): Hyper-parameter to balance the easy and hard examples. Default value is
-            set to 2.0.
-        alpha(int|float): Hyper-parameter to balance the positive and negative example. Default value
-            is set to 0.25.
-
-    Returns:
-        Variable(the data type is float32 or float64):
-            A 2-D tensor with shape :math:`[N, C]`, which is the focal loss of each element in the input
-            tensor :attr:`x`.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-            import numpy as np
-            import paddle.fluid as fluid
-
-            num_classes = 10  # exclude background
-            image_width = 16
-            image_height = 16
-            batch_size = 32
-            max_iter = 20
-
-            paddle.enable_static()
-            def gen_train_data():
-                x_data = np.random.uniform(0, 255, (batch_size, 3, image_height,
-                                                    image_width)).astype('float64')
-                label_data = np.random.randint(0, num_classes,
-                                               (batch_size, 1)).astype('int32')
-                return {"x": x_data, "label": label_data}
-
-
-            def get_focal_loss(pred, label, fg_num, num_classes):
-                pred = paddle.reshape(pred, [-1, num_classes])
-                label = paddle.reshape(label, [-1, 1])
-                label.stop_gradient = True
-                loss = fluid.layers.sigmoid_focal_loss(
-                    pred, label, fg_num, gamma=2.0, alpha=0.25)
-                loss = paddle.sum(loss)
-                return loss
-
-
-            def build_model(mode='train'):
-                x = fluid.data(name="x", shape=[-1, 3, -1, -1], dtype='float64')
-                output = fluid.layers.pool2d(input=x, pool_type='avg', global_pooling=True)
-                output = fluid.layers.fc(
-                    input=output,
-                    size=num_classes,
-                    # Notice: size is set to be the number of target classes (excluding backgorund)
-                    # because sigmoid activation will be done in the sigmoid_focal_loss op.
-                    act=None)
-                if mode == 'train':
-                    label = fluid.data(name="label", shape=[-1, 1], dtype='int32')
-                    # Obtain the fg_num needed by the sigmoid_focal_loss op:
-                    # 0 in label represents background, >=1 in label represents foreground,
-                    # find the elements in label which are greater or equal than 1, then
-                    # computed the numbers of these elements.
-                    data = fluid.layers.fill_constant(shape=[1], value=1, dtype='int32')
-                    fg_label = fluid.layers.greater_equal(label, data)
-                    fg_label = fluid.layers.cast(fg_label, dtype='int32')
-                    fg_num = paddle.sum(fg_label, dtype='int32')
-                    fg_num.stop_gradient = True
-                    avg_loss = get_focal_loss(output, label, fg_num, num_classes)
-                    return avg_loss
-                else:
-                    # During evaluating or testing phase,
-                    # output of the final fc layer should be connected to a sigmoid layer.
-                    pred = fluid.layers.sigmoid(output)
-                    return pred
-
-
-            loss = build_model('train')
-            moment_optimizer = fluid.optimizer.MomentumOptimizer(
-                learning_rate=0.001, momentum=0.9)
-            moment_optimizer.minimize(loss)
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            exe.run(fluid.default_startup_program())
-            for i in range(max_iter):
-                outs = exe.run(feed=gen_train_data(), fetch_list=[loss.name])
-                print(outs)
-    """
-
-    check_variable_and_dtype(
-        x, 'x', ['float32', 'float64'], 'sigmoid_focal_loss'
-    )
-    check_variable_and_dtype(label, 'label', ['int32'], 'sigmoid_focal_loss')
-    check_variable_and_dtype(fg_num, 'fg_num', ['int32'], 'sigmoid_focal_loss')
-
-    helper = LayerHelper("sigmoid_focal_loss", **locals())
-
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-
-    helper.append_op(
-        type="sigmoid_focal_loss",
-        inputs={"X": x, "Label": label, "FgNum": fg_num},
-        attrs={"gamma": gamma, 'alpha': alpha},
-        outputs={"Out": out},
-    )
-    return out
-
-
 def detection_output(
     loc,
     scores,
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 98d63c9fd0bdf7..717c965727fdb2 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -109,7 +109,6 @@
     'bilinear_tensor_product',
     'merge_selected_rows',
     'get_tensor_from_selected_rows',
-    'temporal_shift',
     'continuous_value_model',
     'unfold',
     'deformable_roi_pooling',
@@ -6375,45 +6374,6 @@ def get_tensor_from_selected_rows(x, name=None):
     return out
 
 
-@templatedoc()
-def temporal_shift(x, seg_num, shift_ratio=0.25, name=None, data_format="NCHW"):
-    """
-
-    **Temporal Shift Operator**
-
-    ${comment}
-
-    Args:
-        x(Tensor): ${x_comment}
-        seg_num(int): ${seg_num_comment}
-        shift_ratio(float): ${shift_ratio_comment}
-        name(str, optional): For detailed information, please refer
-                             to :ref:`api_guide_Name`. Usually name is no need to set and
-                             None by default.
-        data_format(str, optional): Data format that specifies the layout of input.
-            It can be "NCHW" or "NHWC". Default: "NCHW".
-
-    Returns:
-        out(Tensor): The temporal shifting result is a tensor with the
-        same shape and same data type as the input.
-
-    Raises:
-        TypeError: seg_num must be int type.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-            import paddle.nn.functional as F
-
-            input = paddle.randn([6, 4, 2, 2])
-            out = F.temporal_shift(x=input, seg_num=2, shift_ratio=0.2)
-    """
-    return paddle.nn.functional.temporal_shift(
-        x, seg_num, shift_ratio, name, data_format
-    )
-
-
 def continuous_value_model(input, cvm, use_cvm=True):
     r"""
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
index 7cd74f30256f41..0be42a27feb70e 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py
@@ -126,7 +126,9 @@ def __init__(
         self._num_channels_out = int(num_filters * 4)
 
     def forward(self, inputs):
-        shifts = fluid.layers.temporal_shift(inputs, self.seg_num, 1.0 / 8)
+        shifts = paddle.nn.functional.temporal_shift(
+            inputs, self.seg_num, 1.0 / 8
+        )
         y = self.conv0(shifts)
         conv1 = self.conv1(y)
         conv2 = self.conv2(conv1)
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 40508fcd52e285..67cfdfeceb2664 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -3519,14 +3519,6 @@ def make_kldiv_loss(self):
             )
             return loss
 
-    def make_temporal_shift(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            x = self._get_data(name="X", shape=[16, 4, 4], dtype="float32")
-            out = layers.temporal_shift(x, seg_num=2, shift_ratio=0.2)
-            return out
-
     def make_pixel_shuffle(self):
         with program_guard(
             fluid.default_main_program(), fluid.default_startup_program()
@@ -3949,30 +3941,6 @@ def test_retinanet_target_assign(self):
                 10,
             )
 
-    def test_sigmoid_focal_loss(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            input = layers.data(
-                name='data',
-                shape=[10, 80],
-                append_batch_size=False,
-                dtype='float32',
-            )
-            label = layers.data(
-                name='label',
-                shape=[10, 1],
-                append_batch_size=False,
-                dtype='int32',
-            )
-            fg_num = layers.data(
-                name='fg_num', shape=[1], append_batch_size=False, dtype='int32'
-            )
-            out = fluid.layers.sigmoid_focal_loss(
-                x=input, label=label, fg_num=fg_num, gamma=2.0, alpha=0.25
-            )
-            return out
-
     def test_addmm(self):
         with program_guard(
             fluid.default_main_program(), fluid.default_startup_program()
diff --git a/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss_op.py b/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss_op.py
index 689fc30b5803f1..1330272ffc77af 100644
--- a/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss_op.py
@@ -19,8 +19,7 @@
 import numpy as np
 from op_test import OpTest
 
-import paddle.fluid as fluid
-from paddle.fluid import Program, core, program_guard
+import paddle
 
 
 def sigmoid_focal_loss_forward(
@@ -105,15 +104,15 @@ def test_check_grad(self):
 
 
 @unittest.skipIf(
-    not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
+    not paddle.is_compiled_with_cuda(), "core is not compiled with CUDA"
 )
 class TestSigmoidFocalLossOp2(TestSigmoidFocalLossOp1):
     def test_check_output(self):
-        place = core.CUDAPlace(0)
+        place = paddle.CUDAPlace(0)
         self.check_output_with_place(place, atol=2e-3)
 
     def test_check_grad(self):
-        place = core.CUDAPlace(0)
+        place = paddle.CUDAPlace(0)
         self.check_grad_with_place(
             place, ['X'], 'Out', max_relative_error=0.002
         )
@@ -128,87 +127,19 @@ def set_argument(self):
 
 
 @unittest.skipIf(
-    not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
+    not paddle.is_compiled_with_cuda(), "core is not compiled with CUDA"
 )
 class TestSigmoidFocalLossOp4(TestSigmoidFocalLossOp3):
     def test_check_output(self):
-        place = core.CUDAPlace(0)
+        place = paddle.CUDAPlace(0)
         self.check_output_with_place(place, atol=2e-3)
 
     def test_check_grad(self):
-        place = core.CUDAPlace(0)
+        place = paddle.CUDAPlace(0)
         self.check_grad_with_place(
             place, ['X'], 'Out', max_relative_error=0.002
         )
 
 
-class TestSigmoidFocalLossOpError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            label1 = fluid.layers.fill_constant(
-                shape=[10, 1], dtype="int32", value=1
-            )
-            fg_num1 = fluid.layers.fill_constant(
-                shape=[1], dtype="int32", value=5
-            )
-
-            # The `x` must be Variable and the data type of `x` Tensor must be one of float32 and float64.
-            def test_x_type():
-                x1 = [2]
-                fluid.layers.sigmoid_focal_loss(
-                    x=x1, label=label1, fg_num=fg_num1, gamma=2.0, alpha=0.25
-                )
-
-            self.assertRaises(TypeError, test_x_type)
-
-            def test_x_tensor_dtype():
-                x2 = fluid.layers.data(name='x2', shape=[10, 10], dtype="int16")
-                fluid.layers.sigmoid_focal_loss(
-                    x=x2, label=label1, fg_num=fg_num1, gamma=2.0, alpha=0.25
-                )
-
-            self.assertRaises(TypeError, test_x_tensor_dtype)
-
-            x3 = fluid.layers.data(name='x3', shape=[10, 10], dtype="float64")
-
-            # The `label` must be Variable and the data type of `label` Tensor must be int32.
-            def test_label_type():
-                label2 = [2]
-                fluid.layers.sigmoid_focal_loss(
-                    x=x3, label=label2, fg_num=fg_num1, gamma=2.0, alpha=0.25
-                )
-
-            self.assertRaises(TypeError, test_label_type)
-
-            def test_label_tensor_dtype():
-                label3 = fluid.layers.fill_constant(
-                    shape=[10, 1], dtype="float32", value=1.0
-                )
-                fluid.layers.sigmoid_focal_loss(
-                    x=x3, label=label3, fg_num=fg_num1, gamma=2.0, alpha=0.25
-                )
-
-            self.assertRaises(TypeError, test_label_tensor_dtype)
-
-            # The `fg_num` must be Variable and the data type of `fg_num` Tensor must be int32.
-            def test_fgnum_type():
-                fg_num2 = [2]
-                fluid.layers.sigmoid_focal_loss(
-                    x=x3, label=label1, fg_num=fg_num2, gamma=2.0, alpha=0.25
-                )
-
-            self.assertRaises(TypeError, test_fgnum_type)
-
-            def test_fgnum_tensor_dtype():
-                fg_num3 = fluid.layers.fill_constant(
-                    shape=[1], dtype="float32", value=5.0
-                )
-                fluid.layers.sigmoid_focal_loss(
-                    x=x3, label=label1, fg_num=fg_num3, gamma=2.0, alpha=0.25
-                )
-
-            self.assertRaises(TypeError, test_fgnum_tensor_dtype)
-
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py b/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py
index 265cf42934c7b7..ead0b50c1ad0e5 100644
--- a/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py
+++ b/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py
@@ -127,9 +127,6 @@ def test_check_grad_ignore_uv(self):
 class TestTemporalShiftAPI(unittest.TestCase):
     def test_api(self):
         input = paddle.randn([6, 4, 2, 2])
-        out = paddle.fluid.layers.temporal_shift(
-            x=input, seg_num=2, shift_ratio=0.2
-        )
 
         out_from_function = paddle.nn.functional.temporal_shift(
             x=input, seg_num=2, shift_ratio=0.2

From 2af82190a2f326f68ed58d6a5697f5cd495fbcac Mon Sep 17 00:00:00 2001
From: Jiabin Yang <360788950@qq.com>
Date: Fri, 2 Dec 2022 17:34:23 +0800
Subject: [PATCH 121/154] [XPU ]Fix xpu compile error (#48621)

* [Eager] Fix paddle.grad interface

* [Eager] Support minimum SubGraph for GeneralGrad

* Add needed_nodes to prune grad graph more thoroughly

* [Eager] Add grad_node_trans_mapping_ to record which grad_node has been transformed to AccumulationNode

* [Eager] Fix paddle.grad interface

* Polish code

* remove potential_stop_node

* Add endding_nodes to enhance genSugraph logic

* clear endding_nodes_

* polish code

* rename endding_nodes to endding_nades_

* Refactor grad interface

* Add register_hook case to fix coverage-ci

* Fix code format

* Refactor general_grad

* Add more code comments

* call clear directly to release GradSlotMeta

* fix a mistake

* fix matmul/ multiply kernel logic and optional input in yaml, fill zeros logic and so on.

* fix batch_norm_double_grad yaml optional config

* fix tanh_triple_grad yaml and kernels

* fix MultiplyTripleGradKernel optional logic

* fix merge mistake

* fix compile error

* remove legacy attr for bn

* polish code

* fix some kernel

* merge develop

* fix error

* remote log

* fix kernel with full like

* hide value log behind

* hide value log behind

* fix matmul_triple grad

* fix xpu compile error

* fix xpu compile error

* fix xpu ut

* fix xpu ut

* fix_xpu_compile_error

Co-authored-by: Weilong Wu <veyron_wu@163.com>
---
 paddle/phi/kernels/xpu/full_kernel.cc                         | 4 +---
 .../paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py  | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/paddle/phi/kernels/xpu/full_kernel.cc b/paddle/phi/kernels/xpu/full_kernel.cc
index ab3e0344478a46..c5fca8881e221e 100644
--- a/paddle/phi/kernels/xpu/full_kernel.cc
+++ b/paddle/phi/kernels/xpu/full_kernel.cc
@@ -140,8 +140,6 @@ PD_REGISTER_KERNEL(full_like,
                    float,
                    int,
                    int64_t,
-                   phi::dtype::float16,
-                   phi::dtype::complex<float>,
-                   phi::dtype::complex<double>) {
+                   phi::dtype::float16) {
   kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND);
 }
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py
index 1c74bd715a34b7..e2aa8312635113 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py
@@ -115,7 +115,7 @@ def test_errors(self):
         def test_type():
             paddle.nonzero([10])
 
-        self.assertRaises(TypeError, test_type)
+        self.assertRaises(AttributeError, test_type)
 
 
 if __name__ == "__main__":

From e8edbb099edb4c064ba9144867ffd9ad3697f399 Mon Sep 17 00:00:00 2001
From: Hulek <jakub.hulek@intel.com>
Date: Fri, 2 Dec 2022 10:47:51 +0100
Subject: [PATCH 122/154] Migrate mul_mkldnn_op to phi matmul_kernel (#48299)

* Migrate mul_mkldnn_op to matmul_kernel

* Review fixes - changed mutable_data, changed ctx to dev_ctx, fixed namespaces

* switched some funcs to phi

* Deleted not needed phi:: and changed place checking according to standards
---
 .../fluid/operators/mkldnn/mul_mkldnn_op.cc   | 502 ------------------
 paddle/phi/kernels/onednn/matmul_kernel.cc    | 430 +++++++++++++++
 2 files changed, 430 insertions(+), 502 deletions(-)
 delete mode 100644 paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc

diff --git a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
deleted file mode 100644
index b8638ab17c7dbc..00000000000000
--- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
+++ /dev/null
@@ -1,502 +0,0 @@
-/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <string>
-
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
-
-namespace phi {
-class DenseTensor;
-}  // namespace phi
-
-namespace paddle {
-namespace operators {
-
-using framework::DDim;
-using framework::ExecutionContext;
-
-using phi::OneDNNContext;
-using platform::MatMulV2MKLDNNHandler;
-
-using dnnl::inner_product_forward;
-using dnnl::memory;
-using dnnl::prop_kind;
-using dnnl::stream;
-
-template <typename XT, typename YT, typename OT>
-class MulPrimitiveFactory {
- public:
-  explicit MulPrimitiveFactory(const dnnl::engine &engine) : engine_(engine) {}
-
-  inner_product_forward CreateMulPrimitive(const Tensor *x_input,
-                                           const Tensor *y_input,
-                                           Tensor *output,
-                                           const ExecutionContext &ctx) {
-    /* check data format and reorder if need */
-    int x_num_col_dims = ctx.Attr<int>("x_num_col_dims");
-    int y_num_col_dims = ctx.Attr<int>("y_num_col_dims");
-
-    // TODO(intel-minghui) : Remove the restriction that only supports Input(Y)
-    // as weights
-    PADDLE_ENFORCE_EQ(
-        (std::is_same<YT, float>::value),
-        true,
-        platform::errors::InvalidArgument(
-            "Input(Y) must be fp32 data type since only fp32 data type is "
-            "supported in the current design of MKLDNN INT8."));
-
-    auto x_matrix = UpdateDataFormat<XT>(x_input, x_num_col_dims, ctx);
-    auto y_matrix = UpdateDataFormat<YT>(y_input, y_num_col_dims, ctx);
-
-    auto output_dim = output->dims();
-    if (output_dim.size() != 2) {
-      output->Resize({x_matrix.dims()[0], y_matrix.dims()[1]});
-    }
-
-    if (mul_) {
-      UpdateDataPointers(ctx, output, &x_matrix);
-      Execute();
-      return *(mul_);
-    }
-
-    auto src_desc = CreateMemDescriptor<XT>(&x_matrix, OneDNNMemoryFormat::nc);
-    x_input_ = CreateMemory<XT>(src_desc, &x_matrix);
-
-    if (is_int8_) {
-      const auto trans_y = TransposeInputY(&y_matrix);
-      auto scale_y = ctx.Attr<std::vector<float>>("scale_y");
-      y_input_ = QuantInputY(trans_y, scale_y);
-    } else {
-      y_input_ = TransposeInputY(&y_matrix);
-    }
-
-    auto dst_desc = CreateMemDescriptor<OT>(output, OneDNNMemoryFormat::any);
-
-    mul_ = CreateMulPrimitive(*x_input_, *y_input_, dst_desc, output, ctx);
-    Execute();
-    return *(mul_);
-  }
-
- private:
-  memory ReorderWithScale(const memory::desc &src_desc,
-                          const memory::desc &dst_desc,
-                          void *src_data,
-                          const std::vector<float> &scale) {
-    auto mask = scale.size() > 1 ? 1 : 0;
-    dnnl::primitive_attr attr;
-    attr.set_output_scales(mask, scale);
-
-    auto src_mem = memory(src_desc, engine_, src_data);
-    auto dst_mem = memory(dst_desc, engine_);
-
-    auto reorder_pd = dnnl::reorder::primitive_desc(src_mem, dst_mem, attr);
-
-    auto reorder = dnnl::reorder(reorder_pd);
-
-    auto &astream = OneDNNContext::tls().get_stream();
-    {
-      platform::RecordEvent record_reorder(
-          "int_reorder",
-          platform::TracerEventType::UserDefined,
-          2,
-          platform::EventRole::kUniqueOp);
-      reorder.execute(astream, src_mem, dst_mem);
-      astream.wait();
-    }
-
-    return dst_mem;
-  }
-
-  memory QuantInputY(memory input_y, const std::vector<float> &scale_y) {
-    const auto &dims = input_y.get_desc().data.dims;
-    auto ndims = input_y.get_desc().data.ndims;
-    auto y_dims = std::vector<int64_t>(dims, dims + ndims);
-
-    auto user_y_desc = CreateMemDescriptor<YT>(y_dims, OneDNNMemoryFormat::oi);
-    auto y_desc = CreateMemDescriptor<int8_t>(y_dims, OneDNNMemoryFormat::oi);
-
-    return ReorderWithScale(
-        user_y_desc, y_desc, input_y.get_data_handle(), scale_y);
-  }
-
-  dnnl::primitive_attr CreateMulAttr(const ExecutionContext &ctx,
-                                     bool force_fp32_output) {
-    dnnl::primitive_attr mul_attr;
-
-    auto scale_y_data = ctx.Attr<std::vector<float>>("scale_y");
-    auto scale_x_data = ctx.Attr<float>("scale_x");
-    auto scale_out_data =
-        force_fp32_output ? 1.0f : ctx.Attr<float>("scale_out");
-
-    bool is_multi_channel = scale_y_data.size() > 1;
-    int count = is_multi_channel ? scale_y_data.size() : 1;
-    std::vector<float> output_shift_scale(count);
-    for (int i = 0; i < count; i++) {
-      if (scale_y_data[i] == 0.0)
-        output_shift_scale[i] = scale_out_data;
-      else
-        output_shift_scale[i] =
-            scale_out_data / (scale_x_data * scale_y_data[i]);
-    }
-    int mul_mask = is_multi_channel ? 1 : 0;
-    mul_attr.set_output_scales(mul_mask, output_shift_scale);
-
-    return mul_attr;
-  }
-
-  inner_product_forward CreateMulPrimitive(const memory &x_memory,
-                                           const memory &y_memory,
-                                           const memory::desc &dst_desc,
-                                           Tensor *output,
-                                           const ExecutionContext &ctx) {
-    const auto x_desc = x_memory.get_desc();
-    const auto y_desc = y_memory.get_desc();
-    inner_product_forward::primitive_desc mul_prim_desc;
-
-    const auto &mul_desc = inner_product_forward::desc(
-        prop_kind::forward, x_desc, y_desc, dst_desc);
-
-    if (is_int8_) {
-      bool force_fp32_output = ctx.Attr<bool>("force_fp32_output");
-      auto mul_attr = CreateMulAttr(ctx, force_fp32_output);
-      mul_prim_desc =
-          inner_product_forward::primitive_desc(mul_desc, mul_attr, engine_);
-    } else {
-      mul_prim_desc = inner_product_forward::primitive_desc(mul_desc, engine_);
-    }
-
-    output_ = CreateDstMemory(mul_prim_desc, ctx, output);
-
-    return inner_product_forward(mul_prim_desc);
-  }
-
-  void Execute() {
-    auto &astream = OneDNNContext::tls().get_stream();
-    (*mul_).execute(astream,
-                    {{DNNL_ARG_SRC, *x_input_},
-                     {DNNL_ARG_WEIGHTS, *y_input_},
-                     {DNNL_ARG_DST, *output_}});
-    astream.wait();
-  }
-
-  template <typename T>
-  Tensor UpdateDataFormat(const Tensor *data,
-                          int num_col_dims,
-                          const ExecutionContext &ctx) {
-    Tensor x_tmp;
-    Tensor data_matrix;
-    // This code is enforcing plain (non-blocked) memory arrangement
-    // in order to flatten (reduce dimensionality) of Tensor later
-    auto src_mdesc = data->mem_desc();
-    auto dst_mdesc =
-        data->dims().size() >= 4
-            ? (data->dims().size() == 5
-                   ? CreateMemDescriptor<T>(data, OneDNNMemoryFormat::ncdhw)
-                   : CreateMemDescriptor<T>(data, OneDNNMemoryFormat::nchw))
-            : src_mdesc;
-
-    if (src_mdesc != dst_mdesc) {
-      x_tmp.mutable_data<T>(ctx.GetPlace(), data->memory_size());
-
-      Reorder(src_mdesc,
-              dst_mdesc,
-              phi::funcs::to_void_cast<T>(data->data<T>()),
-              phi::funcs::to_void_cast<T>(x_tmp.data<T>()));
-
-      x_tmp.Resize(data->dims());
-      x_tmp.set_mem_desc(dst_mdesc);
-      data_matrix = framework::ReshapeToMatrix(x_tmp, num_col_dims);
-    } else {
-      data_matrix = framework::ReshapeToMatrix(*data, num_col_dims);
-    }
-
-    return data_matrix;
-  }
-
-  void UpdateDataPointers(const ExecutionContext &ctx,
-                          Tensor *out,
-                          const Tensor *in) {
-    x_input_->set_data_handle(phi::funcs::to_void_cast<XT>(in->data<XT>()));
-    output_->set_data_handle(out->mutable_data<OT>(ctx.GetPlace()));
-    out->set_mem_desc(output_->get_desc());
-  }
-
-  template <typename T>
-  memory::desc CreateMemDescriptor(
-      const Tensor *tensor,
-      OneDNNMemoryFormat format,
-      memory::data_type type = phi::funcs::OneDNNGetDataType<T>()) {
-    auto dims = phi::vectorize<int64_t>(tensor->dims());
-    return phi::funcs::OneDNNMemDesc(dims, type, format);
-  }
-
-  template <typename T>
-  memory::desc CreateMemDescriptor(
-      const std::vector<int64_t> &dims,
-      OneDNNMemoryFormat format,
-      memory::data_type type = phi::funcs::OneDNNGetDataType<T>()) {
-    return phi::funcs::OneDNNMemDesc(dims, type, format);
-  }
-
-  template <typename T>
-  memory CreateMemory(const memory::desc &desc, const Tensor *tensor) {
-    return memory(
-        desc, engine_, phi::funcs::to_void_cast<T>(tensor->data<T>()));
-  }
-
-  memory CreateDstMemory(
-      const inner_product_forward::primitive_desc &mul_prim_desc,
-      const ExecutionContext &ctx,
-      Tensor *output) {
-    auto dst_desc = mul_prim_desc.dst_desc();
-    auto buffer_size = dst_desc.get_size();
-
-    OT *output_data = output->mutable_data<OT>(ctx.GetPlace(), buffer_size);
-    output->set_mem_desc(dst_desc);
-    return memory(dst_desc, engine_, phi::funcs::to_void_cast<OT>(output_data));
-  }
-
-  memory Reorder(const memory::desc &src_desc,
-                 const memory::desc &dst_desc,
-                 void *src_data,
-                 void *dst_data = NULL) {
-    auto src_mem = memory(src_desc, engine_, src_data);
-    auto dst_mem = dst_data ? memory(dst_desc, engine_, dst_data)
-                            : memory(dst_desc, engine_);
-
-    auto reorder = dnnl::reorder(src_mem, dst_mem);
-
-    auto &astream = OneDNNContext::tls().get_stream();
-    {
-      platform::RecordEvent record_reorder(
-          "int_reorder",
-          platform::TracerEventType::UserDefined,
-          2,
-          platform::EventRole::kUniqueOp);
-      reorder.execute(astream, src_mem, dst_mem);
-      astream.wait();
-    }
-
-    return dst_mem;
-  }
-
-  memory TransposeInputY(const Tensor *input_y) {
-    auto dims = phi::vectorize<int64_t>(input_y->dims());
-    std::swap(dims[0], dims[1]);  // Correct output dimensions
-    auto src_desc = CreateMemDescriptor<YT>(dims, OneDNNMemoryFormat::io);
-    auto dst_desc = CreateMemDescriptor<YT>(dims, OneDNNMemoryFormat::oi);
-    return Reorder(
-        src_desc, dst_desc, phi::funcs::to_void_cast<YT>(input_y->data<YT>()));
-  }
-
-  const dnnl::engine &engine_;
-  paddle::optional<memory> x_input_;
-  paddle::optional<memory> y_input_;
-  paddle::optional<memory> output_;
-  paddle::optional<inner_product_forward> mul_;
-  static constexpr bool is_int8_ =
-      std::is_same<XT, int8_t>::value || std::is_same<XT, uint8_t>::value;
-};
-
-/* OT: output data type */
-template <typename XT, typename YT, typename OT>
-std::shared_ptr<MulPrimitiveFactory<XT, YT, OT>> GetPrimitiveFactory(
-    const OneDNNContext &dev_ctx,
-    const ExecutionContext &ctx,
-    const Tensor *input_x,
-    const Tensor *input_y,
-    const dnnl::engine &mkldnn_engine) {
-  std::string key =
-      phi::funcs::CreateKey(dev_ctx,
-                            framework::TransToProtoVarType(input_x->dtype()),
-                            phi::vectorize(input_x->dims()),
-                            framework::TransToProtoVarType(input_y->dtype()),
-                            phi::vectorize(input_y->dims()),
-                            ctx.OutputName("Out"));
-  key = phi::funcs::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, key);
-
-  auto prim_creator = std::static_pointer_cast<MulPrimitiveFactory<XT, YT, OT>>(
-      dev_ctx.GetBlob(key));
-
-  if (prim_creator == nullptr) {
-    prim_creator =
-        std::make_shared<MulPrimitiveFactory<XT, YT, OT>>(mkldnn_engine);
-    dev_ctx.SetBlob(key, prim_creator);
-  }
-
-  return prim_creator;
-}
-
-/* XT: input x data type, YT: input y data type */
-template <typename XT, typename YT>
-inner_product_forward GetMulPrimitive(const OneDNNContext &dev_ctx,
-                                      const ExecutionContext &ctx,
-                                      const Tensor *input_x,
-                                      const Tensor *input_y,
-                                      Tensor *output,
-                                      const dnnl::engine &mkldnn_engine) {
-  constexpr bool is_int8 =
-      std::is_same<XT, int8_t>::value || std::is_same<XT, uint8_t>::value;
-  bool force_fp32_output = ctx.Attr<bool>("force_fp32_output");
-
-  if (is_int8 && !force_fp32_output) {
-    return GetPrimitiveFactory<XT, YT, int8_t>(
-               dev_ctx, ctx, input_x, input_y, mkldnn_engine)
-        ->CreateMulPrimitive(input_x, input_y, output, ctx);
-
-  } else {
-    return GetPrimitiveFactory<XT, YT, float>(
-               dev_ctx, ctx, input_x, input_y, mkldnn_engine)
-        ->CreateMulPrimitive(input_x, input_y, output, ctx);
-  }
-}
-
-/* XT: input x data type */
-template <typename XT>
-class MulMKLDNNINT8Kernel : public framework::OpKernel<XT> {
- public:
-  void Compute(const ExecutionContext &ctx) const override {
-    PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()),
-                      true,
-                      paddle::platform::errors::PreconditionNotMet(
-                          "Operator DNNL Mul must use CPUPlace"));
-    OneDNNContext::tls().log_lib_version();
-    auto &dev_ctx = ctx.template device_context<OneDNNContext>();
-    auto &mkldnn_engine = dev_ctx.GetEngine();
-
-    const Tensor *x = ctx.Input<phi::DenseTensor>("X");
-    const Tensor *y = ctx.Input<phi::DenseTensor>("Y");
-    Tensor *out = ctx.Output<phi::DenseTensor>("Out");
-    auto out_dims = out->dims();
-
-    auto mul =
-        GetMulPrimitive<XT, float>(dev_ctx, ctx, x, y, out, mkldnn_engine);
-
-    if (out_dims.size() != 2) {
-      out->Resize(out_dims);
-    }
-
-    auto in_md = dnnl::memory::desc(*dnnl_primitive_desc_query_md(
-        mul.get_primitive_desc(), dnnl_query_dst_md, 0));
-    out->set_mem_desc(in_md.reshape(phi::vectorize<int64_t>(out->dims())));
-  }
-};
-
-template <typename XT>
-class MulMKLDNNKernel : public framework::OpKernel<XT> {
- public:
-  void Compute(const ExecutionContext &ctx) const override { RunKernel(ctx); }
-
- protected:
-  void ExecuteMatMul(const ExecutionContext &ctx,
-                     const OneDNNContext &dev_ctx,
-                     const dnnl::engine &onednn_engine,
-                     const platform::Place &cpu_place,
-                     const Tensor *x,
-                     const std::vector<int64_t> &x_dims,
-                     bool trans_x,
-                     const Tensor *y,
-                     const std::vector<int64_t> &y_dims,
-                     bool trans_y,
-                     Tensor *out) const {
-    static const std::vector<int64_t> vec_placeholder;
-    MatMulV2MKLDNNHandler<XT, XT, XT> handler(ctx,
-                                              onednn_engine,
-                                              ctx.GetPlace(),
-                                              x_dims,
-                                              trans_x,
-                                              y_dims,
-                                              trans_y,
-                                              false,
-                                              vec_placeholder,
-                                              vec_placeholder);
-
-    const auto src_memory_p = handler.AcquireSrcMemory(x);
-    const auto weights_memory_p = handler.AcquireWeightsMemory(y);
-    const auto dst_memory_p = handler.AcquireDstMemory(out);
-
-    auto matmul_p = handler.AcquireForwardPrimitive();
-
-    std::unordered_map<int, dnnl::memory> matmul_args = {
-        {DNNL_ARG_SRC, *src_memory_p},
-        {DNNL_ARG_WEIGHTS, *weights_memory_p},
-        {DNNL_ARG_DST, *dst_memory_p}};
-
-    auto &astream = OneDNNContext::tls().get_stream();
-    matmul_p->execute(astream, matmul_args);
-    astream.wait();
-
-    // This kernel is flattening dims so then we need to unflattened version
-    // that should be set in out reshape require plain layout, but
-    // MatmulV2MKLDNNHanlder enforces one so it should work
-    out->set_mem_desc(
-        dst_memory_p->get_desc().reshape(phi::vectorize<int64_t>(out->dims())));
-  }
-
- private:
-  void RunKernel(const ExecutionContext &ctx) const {
-    const auto &dev_ctx = ctx.template device_context<OneDNNContext>();
-    const auto &onednn_engine = dev_ctx.GetEngine();
-
-    const auto *x = ctx.Input<phi::DenseTensor>("X");
-    const auto *y = ctx.Input<phi::DenseTensor>("Y");
-    auto *out = ctx.Output<phi::DenseTensor>("Out");
-
-    int x_num_col_dims = ctx.Attr<int>("x_num_col_dims");
-    int y_num_col_dims = ctx.Attr<int>("y_num_col_dims");
-
-    const Tensor x_matrix = x->dims().size() > 2
-                                ? framework::ReshapeToMatrix(*x, x_num_col_dims)
-                                : *x;
-    const Tensor y_matrix = y->dims().size() > 2
-                                ? framework::ReshapeToMatrix(*y, y_num_col_dims)
-                                : *y;
-
-    // adding mb dim because MatMulV2 handler needs it
-    std::vector<int64_t> y_dims(3, 1);
-    std::vector<int64_t> x_dims(3, 1);
-
-    y_dims[1] = y_matrix.dims()[0];
-    y_dims[2] = y_matrix.dims()[1];
-
-    x_dims[1] = x_matrix.dims()[0];
-    x_dims[2] = x_matrix.dims()[1];
-
-    ExecuteMatMul(ctx,
-                  dev_ctx,
-                  onednn_engine,
-                  ctx.GetPlace(),
-                  &x_matrix,
-                  x_dims,
-                  false,
-                  &y_matrix,
-                  y_dims,
-                  false,
-                  out);
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-
-REGISTER_OP_KERNEL(mul,
-                   MKLDNN,
-                   ::paddle::platform::CPUPlace,
-                   ops::MulMKLDNNINT8Kernel<uint8_t>,
-                   ops::MulMKLDNNINT8Kernel<int8_t>,
-                   ops::MulMKLDNNKernel<paddle::platform::bfloat16>,
-                   ops::MulMKLDNNKernel<float>);
diff --git a/paddle/phi/kernels/onednn/matmul_kernel.cc b/paddle/phi/kernels/onednn/matmul_kernel.cc
index 30a1735c5184aa..c820e738f09348 100644
--- a/paddle/phi/kernels/onednn/matmul_kernel.cc
+++ b/paddle/phi/kernels/onednn/matmul_kernel.cc
@@ -12,11 +12,20 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <string>
+
 #include "paddle/phi/kernels/matmul_kernel.h"
 
 #include "paddle/phi/backends/onednn/onednn_reuse.h"
 #include "paddle/phi/core/kernel_registry.h"
 
+using dnnl::engine;
+using dnnl::inner_product_forward;
+using dnnl::memory;
+using dnnl::prop_kind;
+using dnnl::stream;
+using paddle::framework::ReshapeToMatrix;
+
 namespace phi {
 
 DDim GetDimsForInput(const OneDNNContext &dev_ctx,
@@ -152,6 +161,418 @@ void MatmulKernel(const Context &dev_ctx,
   }
 }
 
+template <typename XT, typename YT, typename OT>
+class MulPrimitiveFactory {
+ public:
+  explicit MulPrimitiveFactory(const engine &engine) : engine_(engine) {}
+
+  inner_product_forward CreateMulPrimitive(const DenseTensor *x_input,
+                                           const DenseTensor *y_input,
+                                           DenseTensor *output,
+                                           int x_num_col_dims,
+                                           int y_num_col_dims,
+                                           const OneDNNContext &dev_ctx) {
+    // TODO(intel-minghui) : Remove the restriction that only supports Input(Y)
+    // as weights
+    PADDLE_ENFORCE_EQ(
+        (std::is_same<YT, float>::value),
+        true,
+        errors::InvalidArgument(
+            "Input(Y) must be fp32 data type since only fp32 data type is "
+            "supported in the current design of OneDNN INT8."));
+
+    /* check data format and reorder if need */
+    auto x_matrix = UpdateDataFormat<XT>(x_input, x_num_col_dims, dev_ctx);
+    auto y_matrix = UpdateDataFormat<YT>(y_input, y_num_col_dims, dev_ctx);
+
+    auto output_dim = output->dims();
+    if (output_dim.size() != 2) {
+      output->Resize({x_matrix.dims()[0], y_matrix.dims()[1]});
+    }
+
+    if (mul_) {
+      UpdateDataPointers(dev_ctx, output, &x_matrix);
+      Execute();
+      return *(mul_);
+    }
+
+    auto src_desc =
+        CreateMemDescriptor<XT>(&x_matrix, funcs::OneDNNMemoryFormat::nc);
+    x_input_ = CreateMemory<XT>(src_desc, &x_matrix);
+
+    if (is_int8_) {
+      const auto trans_y = TransposeInputY(&y_matrix);
+      auto scale_y = dev_ctx.HasDnnAttr("scale_y")
+                         ? PADDLE_GET_CONST(std::vector<float>,
+                                            dev_ctx.GetDnnAttr("scale_y"))
+                         : std::vector<float>();
+      y_input_ = QuantInputY(trans_y, scale_y);
+    } else {
+      y_input_ = TransposeInputY(&y_matrix);
+    }
+
+    auto dst_desc =
+        CreateMemDescriptor<OT>(output, funcs::OneDNNMemoryFormat::any);
+
+    mul_ = CreateMulPrimitive(*x_input_, *y_input_, dst_desc, output, dev_ctx);
+    Execute();
+    return *(mul_);
+  }
+
+ private:
+  memory ReorderWithScale(const memory::desc &src_desc,
+                          const memory::desc &dst_desc,
+                          void *src_data,
+                          const std::vector<float> &scale) {
+    auto mask = scale.size() > 1 ? 1 : 0;
+    dnnl::primitive_attr attr;
+    attr.set_output_scales(mask, scale);
+
+    auto src_mem = memory(src_desc, engine_, src_data);
+    auto dst_mem = memory(dst_desc, engine_);
+
+    auto reorder_pd = dnnl::reorder::primitive_desc(src_mem, dst_mem, attr);
+
+    auto reorder = dnnl::reorder(reorder_pd);
+
+    auto &astream = OneDNNContext::tls().get_stream();
+    {
+      paddle::platform::RecordEvent record_reorder(
+          "int_reorder",
+          paddle::platform::TracerEventType::UserDefined,
+          2,
+          paddle::platform::EventRole::kUniqueOp);
+      reorder.execute(astream, src_mem, dst_mem);
+      astream.wait();
+    }
+
+    return dst_mem;
+  }
+
+  memory QuantInputY(memory input_y, const std::vector<float> &scale_y) {
+    const auto &dims = input_y.get_desc().data.dims;
+    auto ndims = input_y.get_desc().data.ndims;
+    auto y_dims = std::vector<int64_t>(dims, dims + ndims);
+
+    auto user_y_desc =
+        CreateMemDescriptor<YT>(y_dims, funcs::OneDNNMemoryFormat::oi);
+    auto y_desc =
+        CreateMemDescriptor<int8_t>(y_dims, funcs::OneDNNMemoryFormat::oi);
+
+    return ReorderWithScale(
+        user_y_desc, y_desc, input_y.get_data_handle(), scale_y);
+  }
+
+  dnnl::primitive_attr CreateMulAttr(const OneDNNContext &dev_ctx,
+                                     bool force_fp32_output) {
+    dnnl::primitive_attr mul_attr;
+
+    auto scale_y_data = dev_ctx.HasDnnAttr("scale_y")
+                            ? PADDLE_GET_CONST(std::vector<float>,
+                                               dev_ctx.GetDnnAttr("scale_y"))
+                            : std::vector<float>{1.0};
+    auto scale_x_data =
+        dev_ctx.HasDnnAttr("scale_x")
+            ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("scale_x"))
+            : 1.0f;
+    auto scale_out =
+        dev_ctx.HasDnnAttr("scale_out")
+            ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("scale_out"))
+            : 1.0f;
+    auto scale_out_data = force_fp32_output ? 1.0f : scale_out;
+
+    bool is_multi_channel = scale_y_data.size() > 1;
+    int count = is_multi_channel ? scale_y_data.size() : 1;
+    std::vector<float> output_shift_scale(count);
+    for (int i = 0; i < count; i++) {
+      if (scale_y_data[i] == 0.0)
+        output_shift_scale[i] = scale_out_data;
+      else
+        output_shift_scale[i] =
+            scale_out_data / (scale_x_data * scale_y_data[i]);
+    }
+    int mul_mask = is_multi_channel ? 1 : 0;
+    mul_attr.set_output_scales(mul_mask, output_shift_scale);
+
+    return mul_attr;
+  }
+
+  inner_product_forward CreateMulPrimitive(const memory &x_memory,
+                                           const memory &y_memory,
+                                           const memory::desc &dst_desc,
+                                           DenseTensor *output,
+                                           const OneDNNContext &dev_ctx) {
+    const auto x_desc = x_memory.get_desc();
+    const auto y_desc = y_memory.get_desc();
+    inner_product_forward::primitive_desc mul_prim_desc;
+
+    const auto &mul_desc = inner_product_forward::desc(
+        prop_kind::forward, x_desc, y_desc, dst_desc);
+
+    if (is_int8_) {
+      bool force_fp32_output =
+          dev_ctx.HasDnnAttr("force_fp32_output")
+              ? PADDLE_GET_CONST(bool, dev_ctx.GetDnnAttr("force_fp32_output"))
+              : false;
+      auto mul_attr = CreateMulAttr(dev_ctx, force_fp32_output);
+      mul_prim_desc =
+          inner_product_forward::primitive_desc(mul_desc, mul_attr, engine_);
+    } else {
+      mul_prim_desc = inner_product_forward::primitive_desc(mul_desc, engine_);
+    }
+
+    output_ = CreateDstMemory(mul_prim_desc, dev_ctx, output);
+
+    return inner_product_forward(mul_prim_desc);
+  }
+
+  void Execute() {
+    auto &astream = OneDNNContext::tls().get_stream();
+    (*mul_).execute(astream,
+                    {{DNNL_ARG_SRC, *x_input_},
+                     {DNNL_ARG_WEIGHTS, *y_input_},
+                     {DNNL_ARG_DST, *output_}});
+    astream.wait();
+  }
+
+  template <typename T>
+  DenseTensor UpdateDataFormat(const DenseTensor *data,
+                               int num_col_dims,
+                               const OneDNNContext &dev_ctx) {
+    DenseTensor x_tmp;
+    DenseTensor data_matrix;
+    // This code is enforcing plain (non-blocked) memory arrangement
+    // in order to flatten (reduce dimensionality) of DenseTensor later
+    auto src_mdesc = data->mem_desc();
+    auto dst_mdesc = data->dims().size() >= 4
+                         ? (data->dims().size() == 5
+                                ? CreateMemDescriptor<T>(
+                                      data, funcs::OneDNNMemoryFormat::ncdhw)
+                                : CreateMemDescriptor<T>(
+                                      data, funcs::OneDNNMemoryFormat::nchw))
+                         : src_mdesc;
+
+    if (src_mdesc != dst_mdesc) {
+      dev_ctx.template Alloc<T>(&x_tmp, data->memory_size());
+
+      Reorder(src_mdesc,
+              dst_mdesc,
+              funcs::to_void_cast<T>(data->data<T>()),
+              funcs::to_void_cast<T>(x_tmp.data<T>()));
+
+      x_tmp.Resize(data->dims());
+      x_tmp.set_mem_desc(dst_mdesc);
+      data_matrix = ReshapeToMatrix(x_tmp, num_col_dims);
+    } else {
+      data_matrix = ReshapeToMatrix(*data, num_col_dims);
+    }
+
+    return data_matrix;
+  }
+
+  void UpdateDataPointers(const OneDNNContext &dev_ctx,
+                          DenseTensor *out,
+                          const DenseTensor *in) {
+    x_input_->set_data_handle(funcs::to_void_cast<XT>(in->data<XT>()));
+    output_->set_data_handle(dev_ctx.template Alloc<OT>(out));
+    out->set_mem_desc(output_->get_desc());
+  }
+
+  template <typename T>
+  memory::desc CreateMemDescriptor(
+      const DenseTensor *tensor,
+      funcs::OneDNNMemoryFormat format,
+      memory::data_type type = funcs::OneDNNGetDataType<T>()) {
+    auto dims = vectorize<int64_t>(tensor->dims());
+    return funcs::OneDNNMemDesc(dims, type, format);
+  }
+
+  template <typename T>
+  memory::desc CreateMemDescriptor(
+      const std::vector<int64_t> &dims,
+      funcs::OneDNNMemoryFormat format,
+      memory::data_type type = funcs::OneDNNGetDataType<T>()) {
+    return funcs::OneDNNMemDesc(dims, type, format);
+  }
+
+  template <typename T>
+  memory CreateMemory(const memory::desc &desc, const DenseTensor *tensor) {
+    return memory(desc, engine_, funcs::to_void_cast<T>(tensor->data<T>()));
+  }
+
+  memory CreateDstMemory(
+      const inner_product_forward::primitive_desc &mul_prim_desc,
+      const OneDNNContext &dev_ctx,
+      DenseTensor *output) {
+    auto dst_desc = mul_prim_desc.dst_desc();
+    auto buffer_size = dst_desc.get_size();
+
+    OT *output_data = dev_ctx.template Alloc<OT>(output, buffer_size);
+    output->set_mem_desc(dst_desc);
+    return memory(dst_desc, engine_, funcs::to_void_cast<OT>(output_data));
+  }
+
+  memory Reorder(const memory::desc &src_desc,
+                 const memory::desc &dst_desc,
+                 void *src_data,
+                 void *dst_data = NULL) {
+    auto src_mem = memory(src_desc, engine_, src_data);
+    auto dst_mem = dst_data ? memory(dst_desc, engine_, dst_data)
+                            : memory(dst_desc, engine_);
+
+    auto reorder = dnnl::reorder(src_mem, dst_mem);
+
+    auto &astream = OneDNNContext::tls().get_stream();
+    {
+      paddle::platform::RecordEvent record_reorder(
+          "int_reorder",
+          paddle::platform::TracerEventType::UserDefined,
+          2,
+          paddle::platform::EventRole::kUniqueOp);
+      reorder.execute(astream, src_mem, dst_mem);
+      astream.wait();
+    }
+
+    return dst_mem;
+  }
+
+  memory TransposeInputY(const DenseTensor *input_y) {
+    auto dims = vectorize<int64_t>(input_y->dims());
+    std::swap(dims[0], dims[1]);  // Correct output dimensions
+    auto src_desc =
+        CreateMemDescriptor<YT>(dims, funcs::OneDNNMemoryFormat::io);
+    auto dst_desc =
+        CreateMemDescriptor<YT>(dims, funcs::OneDNNMemoryFormat::oi);
+    return Reorder(
+        src_desc, dst_desc, funcs::to_void_cast<YT>(input_y->data<YT>()));
+  }
+
+  const engine &engine_;
+  paddle::optional<memory> x_input_;
+  paddle::optional<memory> y_input_;
+  paddle::optional<memory> output_;
+  paddle::optional<inner_product_forward> mul_;
+  static constexpr bool is_int8_ = funcs::is_int8<XT>();
+};
+
+/* OT: output data type */
+template <typename XT, typename YT, typename OT>
+std::shared_ptr<MulPrimitiveFactory<XT, YT, OT>> GetPrimitiveFactory(
+    const OneDNNContext &dev_ctx,
+    const DenseTensor *input_x,
+    const DenseTensor *input_y,
+    const engine &onednn_engine) {
+  std::string key = funcs::CreateKey(dev_ctx,
+                                     TransToProtoVarType(input_x->dtype()),
+                                     vectorize(input_x->dims()),
+                                     TransToProtoVarType(input_y->dtype()),
+                                     vectorize(input_y->dims()),
+                                     dev_ctx.GetOutputsName("Out")[0]);
+  key = funcs::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, key);
+
+  auto prim_creator = std::static_pointer_cast<MulPrimitiveFactory<XT, YT, OT>>(
+      dev_ctx.GetBlob(key));
+
+  if (prim_creator == nullptr) {
+    prim_creator =
+        std::make_shared<MulPrimitiveFactory<XT, YT, OT>>(onednn_engine);
+    dev_ctx.SetBlob(key, prim_creator);
+  }
+
+  return prim_creator;
+}
+
+/* XT: input x data type, YT: input y data type */
+template <typename XT, typename YT>
+inner_product_forward GetMulPrimitive(const OneDNNContext &dev_ctx,
+                                      const DenseTensor *input_x,
+                                      const DenseTensor *input_y,
+                                      DenseTensor *output,
+                                      int x_num_col_dims,
+                                      int y_num_col_dims,
+                                      const engine &onednn_engine) {
+  constexpr bool is_int8 = funcs::is_int8<XT>();
+  bool force_fp32_output =
+      dev_ctx.HasDnnAttr("force_fp32_output")
+          ? PADDLE_GET_CONST(bool, dev_ctx.GetDnnAttr("force_fp32_output"))
+          : false;
+
+  if (is_int8 && !force_fp32_output) {
+    return GetPrimitiveFactory<XT, YT, int8_t>(
+               dev_ctx, input_x, input_y, onednn_engine)
+        ->CreateMulPrimitive(
+            input_x, input_y, output, x_num_col_dims, y_num_col_dims, dev_ctx);
+
+  } else {
+    return GetPrimitiveFactory<XT, YT, float>(
+               dev_ctx, input_x, input_y, onednn_engine)
+        ->CreateMulPrimitive(
+            input_x, input_y, output, x_num_col_dims, y_num_col_dims, dev_ctx);
+  }
+}
+
+/* XT: input x data type */
+template <typename XT, typename Context>
+void MatmulWithFlattenKernelINT8(const Context &dev_ctx,
+                                 const DenseTensor &x,
+                                 const DenseTensor &y,
+                                 int x_num_col_dims,
+                                 int y_num_col_dims,
+                                 DenseTensor *out) {
+  PADDLE_ENFORCE_EQ(dev_ctx.GetPlace().GetType() == AllocationType::CPU,
+                    true,
+                    errors::PreconditionNotMet(
+                        "oneDNN MatmulWithFlatten kernel must use CPUPlace"));
+
+  OneDNNContext::tls().log_lib_version();
+  auto &onednn_engine = dev_ctx.GetEngine();
+
+  auto out_dims = out->dims();
+
+  auto mul = GetMulPrimitive<XT, float>(
+      dev_ctx, &x, &y, out, x_num_col_dims, y_num_col_dims, onednn_engine);
+
+  if (out_dims.size() != 2) {
+    out->Resize(out_dims);
+  }
+
+  auto in_md = memory::desc(*dnnl_primitive_desc_query_md(
+      mul.get_primitive_desc(), dnnl_query_dst_md, 0));
+  out->set_mem_desc(in_md.reshape(vectorize<int64_t>(out->dims())));
+}
+
+template <typename T, typename Context>
+void MatmulWithFlattenKernel(const Context &dev_ctx,
+                             const DenseTensor &x,
+                             const DenseTensor &y,
+                             int x_num_col_dims,
+                             int y_num_col_dims,
+                             DenseTensor *out) {
+  constexpr bool is_int8 = funcs::is_int8<T>();
+  if (is_int8) {
+    MatmulWithFlattenKernelINT8<T, Context>(
+        dev_ctx, x, y, x_num_col_dims, y_num_col_dims, out);
+    return;
+  }
+
+  const DenseTensor x_matrix =
+      x.dims().size() > 2 ? ReshapeToMatrix(x, x_num_col_dims) : x;
+  const DenseTensor y_matrix =
+      y.dims().size() > 2 ? ReshapeToMatrix(y, y_num_col_dims) : y;
+
+  // adding mb dim because MatMulV2 handler needs it
+  std::vector<int64_t> x_dims(3, 1);
+  std::vector<int64_t> y_dims(3, 1);
+
+  x_dims[1] = x_matrix.dims()[0];
+  x_dims[2] = x_matrix.dims()[1];
+  y_dims[1] = y_matrix.dims()[0];
+  y_dims[2] = y_matrix.dims()[1];
+
+  funcs::ExecuteMul<T>(
+      dev_ctx, x_matrix, y_matrix, x_dims, y_dims, false, false, out);
+}
+
 }  // namespace phi
 
 PD_REGISTER_KERNEL(matmul,
@@ -162,3 +583,12 @@ PD_REGISTER_KERNEL(matmul,
                    phi::dtype::bfloat16,
                    int8_t,
                    uint8_t) {}
+
+PD_REGISTER_KERNEL(matmul_with_flatten,
+                   OneDNN,
+                   ONEDNN,
+                   phi::MatmulWithFlattenKernel,
+                   float,
+                   phi::dtype::bfloat16,
+                   uint8_t,
+                   int8_t) {}

From 493825a5d3ce7b820744dfdd6df2f2acb958bebb Mon Sep 17 00:00:00 2001
From: Piotr Paturej <48731682+piotrekobi@users.noreply.github.com>
Date: Fri, 2 Dec 2022 10:48:52 +0100
Subject: [PATCH 123/154] [PHI] Migrate elementwise_sub kernel (#48611)

* Add migrations

* Fix build errors

* Remove elementwise_mul from migration
---
 .../mkldnn/elementwise_sub_mkldnn_op.cc       | 27 -------------------
 paddle/phi/kernels/elementwise_kernel.cc      |  9 +++++++
 .../phi/kernels/onednn/elementwise_kernel.cc  | 10 +++++++
 3 files changed, 19 insertions(+), 27 deletions(-)
 delete mode 100644 paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc

diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc
deleted file mode 100644
index 91660b79b09ac3..00000000000000
--- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h"
-
-namespace ops = paddle::operators;
-
-REGISTER_OP_KERNEL(
-    elementwise_sub,
-    MKLDNN,
-    paddle::platform::CPUPlace,
-    ops::EltwiseMKLDNNKernel<float, dnnl::algorithm::binary_sub>,
-    ops::EltwiseMKLDNNKernel<paddle::platform::bfloat16,
-                             dnnl::algorithm::binary_sub>,
-    ops::EltwiseMKLDNNKernel<int8_t, dnnl::algorithm::binary_sub>,
-    ops::EltwiseMKLDNNKernel<uint8_t, dnnl::algorithm::binary_sub>)
diff --git a/paddle/phi/kernels/elementwise_kernel.cc b/paddle/phi/kernels/elementwise_kernel.cc
index c0b99b8ddf0368..d4ffd49b5fc48b 100644
--- a/paddle/phi/kernels/elementwise_kernel.cc
+++ b/paddle/phi/kernels/elementwise_kernel.cc
@@ -416,6 +416,15 @@ PD_REGISTER_KERNEL(elementwise_pow,
 #endif
 
 #if defined PADDLE_WITH_MKLDNN
+PD_REGISTER_KERNEL(subtract,
+                   OneDNN,
+                   ONEDNN,
+                   phi::SubtractKernel,
+                   float,
+                   phi::dtype::bfloat16,
+                   int8_t,
+                   uint8_t) {}
+
 PD_REGISTER_KERNEL(
     divide, OneDNN, ONEDNN, phi::DivideKernel, float, phi::dtype::bfloat16) {}
 #endif
diff --git a/paddle/phi/kernels/onednn/elementwise_kernel.cc b/paddle/phi/kernels/onednn/elementwise_kernel.cc
index e103f23df0dc62..29d527a523fbfe 100644
--- a/paddle/phi/kernels/onednn/elementwise_kernel.cc
+++ b/paddle/phi/kernels/onednn/elementwise_kernel.cc
@@ -133,10 +133,20 @@ void ElementwiseKernel(const OneDNNContext& dev_ctx,
     ElementwiseKernel<T, algorithm>(dev_ctx, x, y, axis, out); \
   }
 
+DEFINE_ONEDNN_ELEMENTWISE_KERNEL(Subtract, dnnl::algorithm::binary_sub)
 DEFINE_ONEDNN_ELEMENTWISE_KERNEL(Divide, dnnl::algorithm::binary_div)
 
 }  // namespace phi
 
+PD_REGISTER_KERNEL(subtract_raw,
+                   OneDNN,
+                   ONEDNN,
+                   phi::SubtractRawKernel,
+                   float,
+                   phi::dtype::bfloat16,
+                   int8_t,
+                   uint8_t) {}
+
 PD_REGISTER_KERNEL(divide_raw,
                    OneDNN,
                    ONEDNN,

From d969c309abb72fcf61cee1d8e50dd92e5fbeaeab Mon Sep 17 00:00:00 2001
From: ronnywang <ronny1996@163.com>
Date: Fri, 2 Dec 2022 18:02:47 +0800
Subject: [PATCH 124/154] add custom profier doc (#48629)

* add custom profier doc

* update

* format; test=document_fix

Co-authored-by: Ligoml <39876205+Ligoml@users.noreply.github.com>
---
 python/paddle/profiler/profiler.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/python/paddle/profiler/profiler.py b/python/paddle/profiler/profiler.py
index eef6714f2e20c9..e274f007b83b0e 100644
--- a/python/paddle/profiler/profiler.py
+++ b/python/paddle/profiler/profiler.py
@@ -31,17 +31,14 @@
     enable_memory_recorder,
     enable_op_info_recorder,
 )
+from paddle.profiler import utils
 
-from .utils import RecordEvent, wrap_optimizers
 from .profiler_statistic import (
+    SortedKeys,
     StatisticData,
     _build_table,
-    SortedKeys,
     gen_layer_flops,
 )
-from paddle.profiler import utils
-
-from .profiler_statistic import SortedKeys, StatisticData, _build_table
 from .timer import benchmark
 from .utils import RecordEvent, wrap_optimizers
 
@@ -359,6 +356,7 @@ class Profiler:
             be timed and profiled. Default: False.
         record_shapes (bool, optional): If it is True, collect op's input shape information. Default: False.
         profile_memory (bool, optional): If it is True, collect tensor memory allocation and release information. Default: False.
+        custom_device_types (list, optional): If targets contain profiler.ProfilerTarget.CUSTOM_DEVICE, custom_device_types select the custom device type for profiling. The default value represents all custom devices will be selected.
         with_flops (bool, optional): If it is True, the flops of the op will be calculated. Default: False.
 
     Examples:

From ea5ca5559bdc6e2e428e3544e28b24c60986572a Mon Sep 17 00:00:00 2001
From: Yuanle Liu <yuanlehome@163.com>
Date: Fri, 2 Dec 2022 20:46:39 +0800
Subject: [PATCH 125/154] [Paddle-TRT] Support engine sharing memory of
 multiple predictors (#47631)

---
 paddle/fluid/inference/api/analysis_config.cc | 39 ++++++++++++-------
 .../fluid/inference/api/analysis_predictor.h  |  7 +++-
 .../inference/api/paddle_analysis_config.h    | 15 +++++++
 paddle/fluid/pybind/inference_api.cc          |  5 +++
 4 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc
index 22b8e4487d1704..7720fab31e29ee 100755
--- a/paddle/fluid/inference/api/analysis_config.cc
+++ b/paddle/fluid/inference/api/analysis_config.cc
@@ -679,24 +679,11 @@ void AnalysisConfig::EnableTensorRtEngine(
     bool use_calib_mode) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
   if (!use_gpu()) {
-    LOG(ERROR) << "To use TensorRT engine, please call EnableGpu() first";
+    LOG(ERROR) << "To use TensorRT engine, please call EnableUseGpu() first";
     return;
   }
 
   use_tensorrt_ = true;
-#ifdef PADDLE_WITH_TENSORRT
-  // https://forums.developer.nvidia.com/t/nvinfer1-createexecutioncontextwithoutdevicememory-returns-nullptr/111878/2
-  // when trt version less than 7.2,
-  // createExecutionContextWithoutDeviceMemory() has bug.
-  // so, we cannot enable engine context memory sharing.
-#if IS_TRT_VERSION_GE(7200)
-  trt_engine_memory_sharing_ = true;
-#else
-  LOG(WARNING)
-      << "TensorRT engine context memory sharing needs version 7.2 and after.";
-  trt_engine_memory_sharing_ = false;
-#endif
-#endif
   tensorrt_workspace_size_ = workspace_size;
   tensorrt_max_batchsize_ = max_batch_size;
   tensorrt_min_subgraph_size_ = min_subgraph_size;
@@ -711,6 +698,30 @@ void AnalysisConfig::EnableTensorRtEngine(
 #endif
 }
 
+void AnalysisConfig::EnableTensorRTMemoryOptim(bool engine_memory_sharing,
+                                               int sharing_identifier) {
+  PADDLE_ENFORCE_EQ(
+      use_tensorrt_,
+      true,
+      platform::errors::InvalidArgument(
+          "To enable TensorRT memory optim, please call "
+          "EnableTensorRtEngine or enable_tensorrt_engine first."));
+  PADDLE_ENFORCE_GE(sharing_identifier,
+                    0,
+                    platform::errors::InvalidArgument(
+                        "The value of sharing_identifier must be greater "
+                        "than or equal to 0."));
+  if (!engine_memory_sharing) {
+    PADDLE_ENFORCE_EQ(sharing_identifier,
+                      0,
+                      platform::errors::InvalidArgument(
+                          "The value of sharing_identifier must be equal to 0 "
+                          "when engine_memory_sharing is false."));
+  }
+  trt_engine_memory_sharing_ = engine_memory_sharing;
+  trt_engine_memory_sharing_identifier_ = sharing_identifier;
+}
+
 void AnalysisConfig::EnableDlnne(
     int min_subgraph_size,
     int max_batch_size,
diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h
index 25595d12cb44a5..09e1b43377cdff 100644
--- a/paddle/fluid/inference/api/analysis_predictor.h
+++ b/paddle/fluid/inference/api/analysis_predictor.h
@@ -103,7 +103,12 @@ class AnalysisPredictor : public PaddlePredictor {
     if (config_.shape_range_info_collected()) {
       config_.SwitchIrOptim(false);
     }
-    predictor_id_ = inference::GetUniqueId();
+    auto trt_identifier = config_.trt_engine_memory_sharing_identifier_;
+    if (trt_identifier > 0) {
+      predictor_id_ = -trt_identifier;
+    } else {
+      predictor_id_ = inference::GetUniqueId();
+    }
   }
   ///
   /// \brief Destroy the Analysis Predictor object
diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h
index 0fef4f6ced5fdf..a8f645680a962c 100644
--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -576,6 +576,20 @@ struct PD_INFER_DECL AnalysisConfig {
   ///
   bool tensorrt_engine_enabled() const { return use_tensorrt_; }
   ///
+  /// \brief Turn on the TensorRT memory optimization.
+  ///
+  /// \param engine_memory_sharing Whether to enable TensorRT memory
+  /// optimization.
+  /// \param sharing_identifier This parameter can be set if TensorRT memory
+  /// optimization is enabled, and the value must be greater than 0. If you have
+  /// multiple predictors that want to share memory, you can specify a
+  /// same value for these predictors. NOTE: The predictors specified with the
+  /// same value must be guaranteed to be executed serially, otherwise undefined
+  /// behavior will occur.
+  ///
+  void EnableTensorRTMemoryOptim(bool engine_memory_sharing = true,
+                                 int sharing_identifier = 0);
+  ///
   /// \brief A boolean state telling whether the tensorrt engine memory sharing
   /// is activated.
   ///
@@ -1093,6 +1107,7 @@ struct PD_INFER_DECL AnalysisConfig {
   // memory reuse related.
   bool enable_memory_optim_{false};
   bool trt_engine_memory_sharing_{false};
+  int trt_engine_memory_sharing_identifier_{0};
 
   bool use_mkldnn_{false};
   std::unordered_set<std::string> mkldnn_enabled_op_types_;
diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc
index 83db629dc89f23..1524c1f29d67b8 100644
--- a/paddle/fluid/pybind/inference_api.cc
+++ b/paddle/fluid/pybind/inference_api.cc
@@ -32,6 +32,7 @@
 
 #include "paddle/fluid/inference/api/analysis_predictor.h"
 #include "paddle/fluid/inference/api/helper.h"
+#include "paddle/fluid/inference/api/paddle_analysis_config.h"
 #include "paddle/fluid/inference/api/paddle_infer_contrib.h"
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
 #include "paddle/fluid/inference/api/paddle_pass_builder.h"
@@ -732,6 +733,10 @@ void BindAnalysisConfig(py::module *m) {
            py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,
            py::arg("use_static") = false,
            py::arg("use_calib_mode") = true)
+      .def("enable_tensorrt_memory_optim",
+           &AnalysisConfig::EnableTensorRTMemoryOptim,
+           py::arg("engine_memory_sharing") = true,
+           py::arg("sharing_identifier") = 0)
       .def("tensorrt_precision_mode", &AnalysisConfig::tensorrt_precision_mode)
       .def("set_trt_dynamic_shape_info",
            &AnalysisConfig::SetTRTDynamicShapeInfo,

From a3ae080aa1eda28277795f049ba37f2df6b8d68a Mon Sep 17 00:00:00 2001
From: Charles-hit <56987902+Charles-hit@users.noreply.github.com>
Date: Fri, 2 Dec 2022 21:50:11 +0800
Subject: [PATCH 126/154] remove softmax api from fluid (#48388)

* move softmax to paddle2.0

* fix some bugs

* resolve conflict

* remove some code

* modify code style

* fix bugs

* fix code

* fix move code

* fix some bugs

* fix code

* fix some code

* modify the header file

* fix bugs

* fix some examples

* fix mish example

* fix code
---
 python/paddle/fluid/layers/detection.py       |   2 +-
 python/paddle/fluid/layers/nn.py              | 164 ++----------------
 python/paddle/fluid/layers/rnn.py             |   4 +-
 .../fleet/parallel_dygraph_se_resnext.py      |   2 +-
 .../fleet/parallel_dygraph_transformer.py     |   4 +-
 .../fluid/tests/unittests/dist_transformer.py |   6 +-
 .../seq2seq_dygraph_model.py                  |   6 +-
 .../unittests/dygraph_to_static/test_dict.py  |   4 +-
 .../dygraph_to_static/test_ifelse.py          |   2 +-
 .../dygraph_to_static/test_mobile_net.py      |   2 +-
 .../test_reinforcement_learning.py            |   2 +-
 .../dygraph_to_static/test_se_resnet.py       |   2 +-
 .../transformer_dygraph_model.py              |   4 +-
 .../unittests/ipu/test_dy2static_fp16_ipu.py  |   2 +-
 .../tests/unittests/ipu/test_dy2static_ipu.py |   2 +-
 .../unittests/ipu/test_modelruntime_ipu.py    |   2 +-
 .../tests/unittests/ipu/test_print_op_ipu.py  |   2 +-
 .../unittests/ipu/test_softmax_op_ipu.py      |   2 +-
 .../test_mkldnn_inplace_fuse_pass.py          |   2 +-
 .../ir/inference/test_trt_activation_pass.py  |   2 +-
 .../ir/inference/test_trt_fc_fuse_pass.py     |  19 +-
 .../ir/inference/test_trt_gather_op.py        |   4 +-
 .../unittests/ir/test_ir_fc_fuse_pass.py      |   3 +-
 .../unittests/npu/test_softmax_op_npu.py      |   2 +-
 .../test_imperative_ocr_attention_model.py    |   2 +-
 .../test_imperative_reinforcement.py          |   2 +-
 .../unittests/test_imperative_se_resnext.py   |   4 +-
 ..._imperative_transformer_sorted_gradient.py |   4 +-
 .../fluid/tests/unittests/test_layers.py      |   6 +-
 .../fluid/tests/unittests/test_mean_op.py     |   2 +-
 .../tests/unittests/test_recurrent_op.py      |   2 +-
 .../tests/unittests/test_rnn_decode_api.py    |   4 +-
 .../tests/unittests/xpu/test_mean_op_xpu.py   |   2 +-
 33 files changed, 69 insertions(+), 205 deletions(-)

diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py
index f021ab8f3d3605..d490b0457d98cc 100644
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -626,7 +626,7 @@ class number, M is number of bounding boxes.
         target_box=loc,
         code_type='decode_center_size',
     )
-    scores = nn.softmax(input=scores)
+    scores = paddle.nn.functional.softmax(scores)
     scores = paddle.transpose(scores, perm=[0, 2, 1])
     scores.stop_gradient = True
     nmsed_outs = helper.create_variable_for_type_inference(
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 717c965727fdb2..4dab44ebe54feb 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -68,7 +68,6 @@
     'linear_chain_crf',
     'crf_decoding',
     'conv2d',
-    'softmax',
     'pool2d',
     'batch_norm',
     'dropout',
@@ -145,7 +144,7 @@ def _get_reduce_dim(dim, input):
         else:
             raise TypeError(
                 "The type of dim must be int, list, tuple or range, but received {}".format(
-                    type(axis)
+                    type(dim)
                 )
             )
     if dim is None:
@@ -679,7 +678,7 @@ def _pull_gpups_sparse(
         size(int|list of int): The embedding size parameter of each input, which indicates the size of
             each embedding vector respectively.
         dtype(str): The dtype refers to the data type of output tensor. Only supports
-	    float32 now.
+        float32 now.
 
     Returns:
         Variable|list of Variable: The tensor variable storing the embeddings of the \
@@ -742,7 +741,7 @@ def _pull_box_sparse(
         size(int): The embedding size parameter, which indicates the size of
             each embedding vector respectively.
         dtype(str): The dtype refers to the data type of output tensor. Only supports
-	    float32 now.
+        float32 now.
 
     Returns:
         Variable|list of Variable: The tensor variable storing the embeddings of the \
@@ -1123,147 +1122,6 @@ def get_attrs(prog, dropout_prob, is_test, seed):
     return out
 
 
-@deprecated(since="2.0.0", update_to="paddle.nn.functional.softmax")
-def softmax(input, use_cudnn=True, name=None, axis=-1):
-    r"""
-    This operator implements the softmax layer. The calculation process is as follows:
-
-    1. The dimension :attr:`axis` of the ``input`` will be permuted to the last.
-
-    2. Then the input tensor will be logically flattened to a 2-D matrix. The matrix's
-    second dimension(row length) is the same as the dimension :attr:`axis` of the input
-    tensor, and the first dimension(column length) is the product of all other
-    dimensions of the input tensor. For each row of the matrix, the softmax operator
-    squashes the K-dimensional(K is the width of the matrix, which is also the size
-    of the input tensor's dimension :attr:`axis`) vector of arbitrary real values to a
-    K-dimensional vector of real values in the range [0, 1] that add up to 1.
-
-    3. After the softmax operation is completed, the inverse operations of steps 1 and 2
-    are performed to restore the two-dimensional matrix to the same dimension as the ``input``.
-
-    It computes the exponential of the given dimension and the sum of exponential
-    values of all the other dimensions in the K-dimensional vector input.
-    Then the ratio of the exponential of the given dimension and the sum of
-    exponential values of all the other dimensions is the output of the softmax
-    operator.
-
-    For each row :math:`i` and each column :math:`j` in the matrix, we have:
-
-    .. math::
-
-        Out[i, j] = \\frac{\\exp(X[i, j])}{\\sum_j(exp(X[i, j])}
-
-    Example:
-
-    .. code-block:: text
-
-        Case 1:
-          Input:
-            X.shape = [2, 3, 4]
-            X.data = [[[2.0, 3.0, 4.0, 5.0],
-                       [3.0, 4.0, 5.0, 6.0],
-                       [7.0, 8.0, 8.0, 9.0]],
-                      [[1.0, 2.0, 3.0, 4.0],
-                       [5.0, 6.0, 7.0, 8.0],
-                       [6.0, 7.0, 8.0, 9.0]]]
-
-          Attrs:
-            axis = -1
-
-          Output:
-            Out.shape = [2, 3, 4]
-            Out.data = [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
-                         [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
-                         [0.07232949, 0.19661193, 0.19661193, 0.53444665]],
-                        [[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
-                         [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
-                         [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]]
-
-        Case 2:
-          Input:
-            X.shape = [2, 3, 4]
-            X.data = [[[2.0, 3.0, 4.0, 5.0],
-                       [3.0, 4.0, 5.0, 6.0],
-                       [7.0, 8.0, 8.0, 9.0]],
-                      [[1.0, 2.0, 3.0, 4.0],
-                       [5.0, 6.0, 7.0, 8.0],
-                       [6.0, 7.0, 8.0, 9.0]]]
-          Attrs:
-            axis = 1
-
-          Output:
-            Out.shape = [2, 3, 4]
-            Out.data = [[[0.00657326, 0.00657326, 0.01714783, 0.01714783],
-                         [0.01786798, 0.01786798, 0.04661262, 0.04661262],
-                         [0.97555875, 0.97555875, 0.93623955, 0.93623955]],
-                        [[0.00490169, 0.00490169, 0.00490169, 0.00490169],
-                         [0.26762315, 0.26762315, 0.26762315, 0.26762315],
-                         [0.72747516, 0.72747516, 0.72747516, 0.72747516]]]
-
-    Args:
-        input (Tensor): The input tensor. A multi-dimension ``Tensor`` with type float32 or float64.
-        use_cudnn (bool, optional): Use cudnn kernel or not, it is valid only when the cudnn \
-            library is installed. To improve performance, set use_cudnn to True by default.
-        name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` . Default: None.
-            will be named automatically. Default: None.
-        axis (int, optional): The index of dimension to perform softmax calculations, it should
-            be in range :math:`[-1, rank - 1]`, while :math:`rank` is the rank of
-            input tensor. Default: -1. -1 means the last dimension.
-
-    Returns:
-        Tensor: ``Tensor`` indicates the output of softmax. The data type and shape are the same as ``input`` .
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle
-            import paddle.nn.functional as F
-
-            x = paddle.to_tensor([[[2.0, 3.0, 4.0, 5.0],
-                                [3.0, 4.0, 5.0, 6.0],
-                                [7.0, 8.0, 8.0, 9.0]],
-                                [[1.0, 2.0, 3.0, 4.0],
-                                [5.0, 6.0, 7.0, 8.0],
-                                [6.0, 7.0, 8.0, 9.0]]], dtype='float32')
-            y = F.softmax(x, axis=1)
-            print(y)
-            # [[[0.00657326, 0.00657326, 0.01714783, 0.01714783],
-            #   [0.01786798, 0.01786798, 0.04661262, 0.04661262],
-            #   [0.97555870, 0.97555870, 0.93623954, 0.93623954]],
-            #  [[0.00490169, 0.00490169, 0.00490169, 0.00490169],
-            #   [0.26762316, 0.26762316, 0.26762316, 0.26762316],
-            #   [0.72747517, 0.72747517, 0.72747517, 0.72747517]]]
-
-    """
-
-    if in_dygraph_mode():
-        return _C_ops.softmax(input, axis)
-
-    if _non_static_mode():
-        return _legacy_C_ops.softmax(
-            input, 'axis', axis, 'use_cudnn', use_cudnn
-        )
-
-    inputs = {"X": [input]}
-    attrs = {"axis": axis, "use_cudnn": use_cudnn}
-
-    helper = LayerHelper('softmax', **locals())
-    check_variable_and_dtype(
-        input, 'input/x', ['float16', 'float32', 'float64'], 'softmax'
-    )
-
-    dtype = helper.input_dtype()
-    softmax_out = helper.create_variable_for_type_inference(dtype)
-    helper.append_op(
-        type="softmax",
-        inputs={"X": input},
-        outputs={"Out": softmax_out},
-        attrs=attrs,
-    )
-    return softmax_out
-
-
 def conv2d(
     input,
     num_filters,
@@ -1788,7 +1646,7 @@ def is_list_or_tuple(ele):
         if pool_padding == "VALID":
             padding_algorithm = "VALID"
             pool_padding = [0, 0]
-            if ceil_mode != False:
+            if ceil_mode is not False:
                 raise ValueError(
                     "When Attr(pool_padding) is \"VALID\", Attr(ceil_mode) must be False. "
                     "Received ceil_mode: True."
@@ -6643,7 +6501,7 @@ def deformable_roi_pooling(
         )
 
     input_channels = input.shape[1]
-    if position_sensitive == False:
+    if position_sensitive is False:
         output_channels = input_channels
     else:
         output_channels = input_channels / pooled_height / pooled_width
@@ -6841,11 +6699,11 @@ def mish(x, threshold=20, name=None):
 
     .. math::
 
-	out = \\begin{cases}
-		x \\ast \\tanh(x), \\text{if } x > \\text{threshold} \\\\
-		x \\ast \\tanh(e^{x}), \\text{if } x < -\\text{threshold} \\\\
-		x \\ast \\tanh(\\ln(1 + e^{x})),  \\text{otherwise}
-	      \\end{cases}
+    out = \\begin{cases}
+        x \\ast \\tanh(x), \\text{if } x > \\text{threshold} \\\\
+        x \\ast \\tanh(e^{x}), \\text{if } x < -\\text{threshold} \\\\
+        x \\ast \\tanh(\\ln(1 + e^{x})),  \\text{otherwise}
+          \\end{cases}
 
     Args:
         x (Variable): Input feature, multi-dimensional Tensor. The data type
@@ -6867,9 +6725,11 @@ def mish(x, threshold=20, name=None):
 
     .. code-block:: python
 
+        import paddle
         import paddle.fluid as fluid
         import numpy as np
 
+        paddle.enable_static()
         DATATYPE='float32'
 
         x_data = np.array([i for i in range(1,5)]).reshape([1,1,4]).astype(DATATYPE)
diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py
index 83c4d6c2cbcb1c..60ac537ffc6d40 100644
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -1304,7 +1304,7 @@ def _beam_search_step(self, time, logits, next_cell_states, beam_state):
                 self.noend_mask_tensor, "float64"
             )
 
-        step_log_probs = paddle.log(nn.softmax(logits))
+        step_log_probs = paddle.log(paddle.nn.functional.softmax(logits))
         step_log_probs = self._mask_probs(step_log_probs, beam_state.finished)
         log_probs = nn.elementwise_add(
             x=step_log_probs, y=beam_state.log_probs, axis=0
@@ -2330,7 +2330,7 @@ def sample(self, time, outputs, states):
             if self.softmax_temperature is not None
             else outputs
         )
-        probs = nn.softmax(logits)
+        probs = paddle.nn.functional.softmax(logits)
         # TODO: remove this stop_gradient. The stop_gradient of sample_ids can
         # not pass to probs, since sampling_id op does not have corresponding
         # grad op and thus can not pass.
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_se_resnext.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_se_resnext.py
index 13e83741ea6956..164f1410ed756b 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_se_resnext.py
@@ -354,7 +354,7 @@ def run_one_loop(self, model, opt, data):
         label.stop_gradient = True
 
         out = model(img)
-        softmax_out = fluid.layers.softmax(out, use_cudnn=False)
+        softmax_out = paddle.nn.functional.softmax(out, use_cudnn=False)
         loss = fluid.layers.cross_entropy(input=softmax_out, label=label)
         avg_loss = paddle.mean(x=loss)
         return avg_loss
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py
index 5cfd8a60786a9e..41c8afd6290286 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py
@@ -342,7 +342,7 @@ def forward(self, queries, keys, values, attn_bias):
         )
         if attn_bias is not None:
             product += attn_bias
-        weights = fluid.layers.softmax(product)
+        weights = paddle.nn.functional.softmax(product)
         if self._dropout_rate:
             weights_droped = fluid.layers.dropout(
                 weights,
@@ -849,7 +849,7 @@ def forward(self, dec_inputs=None, enc_output=None):
 
         if dec_inputs is None:
             # Return probs for independent decoder program.
-            predict_out = fluid.layers.softmax(predict)
+            predict_out = paddle.nn.functional.softmax(predict)
             return predict_out
         return predict
 
diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py
index 7106c426bcfc8f..cb60e1c599114b 100644
--- a/python/paddle/fluid/tests/unittests/dist_transformer.py
+++ b/python/paddle/fluid/tests/unittests/dist_transformer.py
@@ -1177,7 +1177,7 @@ def scaled_dot_product_attention(q, k, v, attn_bias, d_model, dropout_rate):
         product = layers.matmul(x=scaled_q, y=k, transpose_y=True)
         if attn_bias:
             product += attn_bias
-        weights = layers.softmax(product)
+        weights = paddle.nn.functional.softmax(product)
         if dropout_rate:
             weights = layers.dropout(
                 weights,
@@ -1715,7 +1715,7 @@ def wrap_decoder(
             bias_attr=const_bias_attr,
         )
     if dec_inputs is None:
-        predict = layers.softmax(predict)
+        predict = paddle.nn.functional.softmax(predict)
     return predict
 
 
@@ -1834,7 +1834,7 @@ def beam_search():
             logits = paddle.reshape(logits, (-1, trg_vocab_size))
 
             topk_scores, topk_indices = layers.topk(
-                input=layers.softmax(logits), k=beam_size
+                input=paddle.nn.functional.softmax(logits), k=beam_size
             )
             accu_scores = layers.elementwise_add(
                 x=paddle.log(topk_scores),
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
index bf1dfdcad23887..d364b8a1a5d602 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
@@ -435,7 +435,9 @@ def beam_search(self, inputs):
             cell_outputs = self._split_batch_beams(step_input)
             cell_outputs = self.fc(cell_outputs)
 
-            step_log_probs = paddle.log(fluid.layers.softmax(cell_outputs))
+            step_log_probs = paddle.log(
+                paddle.nn.functional.softmax(cell_outputs)
+            )
             noend_array = [-self.kinf] * self.tar_vocab_size
             noend_array[self.beam_end_token] = 0
             noend_mask_tensor = to_variable(
@@ -703,7 +705,7 @@ def attention(self, query, enc_output, mask=None):
             attn = paddle.transpose(attn, [1, 0, 2])
             attn = paddle.add(attn, mask * 1000000000)
             attn = paddle.transpose(attn, [1, 0, 2])
-        weight = fluid.layers.softmax(attn)
+        weight = paddle.nn.functional.softmax(attn)
         weight_memory = fluid.layers.matmul(weight, memory)
 
         return weight_memory
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py
index 57bd7c2936e8e0..742e828aa9acb4 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py
@@ -67,7 +67,7 @@ def forward(self, input, cache=None):
             cache["k"], cache["v"] = k, v
 
         weight = fluid.layers.matmul(x=q, y=k, transpose_y=True)
-        weight = fluid.layers.softmax(weight)
+        weight = paddle.nn.functional.softmax(weight)
         out = fluid.layers.matmul(weight, v)
 
         return out
@@ -113,7 +113,7 @@ def forward(self, input, max_len=4):
 # Test to call function defined outside of class.
 def update_cache(cache):
     for k, val in cache.items():
-        cache[k] = fluid.layers.softmax(val)
+        cache[k] = paddle.nn.functional.softmax(val)
 
     return cache
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py
index c17bfd2508b3de..8cc543a19f94d2 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py
@@ -308,7 +308,7 @@ def forward(self, x, label=None):
 
 # Test to call function behind caller.
 def softmax(x):
-    return fluid.layers.softmax(x)
+    return paddle.nn.functional.softmax(x)
 
 
 class TestNetWithExternalFunc(TestDygraphIfElseNet):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
index 4c5e306718d223..8358c12edc4b6c 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
@@ -535,7 +535,7 @@ def train_mobilenet(args, to_static):
                 out = net(img)
 
                 t_end = time.time()
-                softmax_out = fluid.layers.softmax(out, use_cudnn=False)
+                softmax_out = paddle.nn.functional.softmax(out)
                 loss = fluid.layers.cross_entropy(
                     input=softmax_out, label=label
                 )
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
index b98d9c304dc9a3..13aace20031b38 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py
@@ -48,7 +48,7 @@ def forward(self, x):
         x = fluid.layers.relu(x)
         action_scores = self.affine2(x)
 
-        log_prob = fluid.layers.softmax(action_scores, axis=1)
+        log_prob = paddle.nn.functional.softmax(action_scores, axis=1)
 
         return log_prob
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
index 109fc9975488d9..70ee21713c7edc 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
@@ -343,7 +343,7 @@ def forward(self, inputs, label):
         y = paddle.reshape(y, shape=[-1, self.pool2d_avg_output])
         out = self.out(y)
 
-        softmax_out = fluid.layers.softmax(out)
+        softmax_out = paddle.nn.functional.softmax(out)
         loss = fluid.layers.cross_entropy(input=softmax_out, label=label)
         avg_loss = paddle.mean(x=loss)
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
index f4c0815884a52c..e6f03170b4734c 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
@@ -153,7 +153,7 @@ def forward(self, queries, keys, values, attn_bias, cache=None):
         )
         if attn_bias is not None:
             product += attn_bias
-        weights = layers.softmax(product)
+        weights = paddle.nn.functional.softmax(product)
         if self.dropout_rate:
             weights = layers.dropout(weights, dropout_prob=self.dropout_rate)
             out = layers.matmul(weights, v)
@@ -840,7 +840,7 @@ def gather(input, indices, batch_pos):
             )
             caches = map_structure(split_batch_beams, caches)
             step_log_probs = split_batch_beams(
-                paddle.log(fluid.layers.softmax(logits))
+                paddle.log(paddle.nn.functional.softmax(logits))
             )
 
             step_log_probs = mask_probs(
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_fp16_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_fp16_ipu.py
index 8a13e5abb55237..f685eac6d3c787 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_fp16_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_fp16_ipu.py
@@ -33,7 +33,7 @@ def forward(self, x, target=None):
         x = self.conv(x)
         x = paddle.flatten(x, 1, -1)
         if target is not None:
-            x = paddle.fluid.layers.softmax(x)
+            x = paddle.nn.functional.softmax(x)
             loss = paddle.fluid.layers.cross_entropy(x, target)
             if self.use_ipu:
                 loss = paddle.incubate.identity_loss(loss, 1)
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py
index dbdfab28825036..4ca25992175599 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py
@@ -48,7 +48,7 @@ def forward(self, x, target=None):
         x = paddle.flatten(x, 1, -1)
         if target is not None:
             if self.use_softmax:
-                x = paddle.fluid.layers.softmax(x)
+                x = paddle.nn.functional.softmax(x)
             if self.loss_op:
                 loss = self.loss_op(x, target)
             else:
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py
index 9fda7f780e86d4..2e13687df14f0a 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py
@@ -32,7 +32,7 @@ def forward(self, x, target=None):
         x = self.conv(x)
         x = paddle.flatten(x, 1, -1)
         if target is not None:
-            x = paddle.fluid.layers.softmax(x)
+            x = paddle.nn.functional.softmax(x)
             loss = paddle.fluid.layers.cross_entropy(x, target)
             return x, loss
         return x
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py
index ccf0a38bbf88c7..782c195c5d658d 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py
@@ -119,7 +119,7 @@ def forward(self, x, target=None):
         print(x)
         x = paddle.flatten(x, 1, -1)
         if target is not None:
-            x = paddle.fluid.layers.softmax(x)
+            x = paddle.nn.functional.softmax(x)
             loss = paddle.fluid.layers.cross_entropy(x, target)
             loss = paddle.incubate.identity_loss(loss, 1)
             return x, loss
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_softmax_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_softmax_op_ipu.py
index 485515d7d7f12b..53c7e1ad927183 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_softmax_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_softmax_op_ipu.py
@@ -47,7 +47,7 @@ def build_model(self):
         x = paddle.static.data(
             name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32'
         )
-        out = paddle.fluid.layers.softmax(x, **self.attrs)
+        out = paddle.nn.functional.softmax(x, **self.attrs)
         self.fetch_list = [out.name]
 
     def run_model(self, exec_mode):
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py
index 47668a42ec6de9..386dcf7b4075b7 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py
@@ -32,7 +32,7 @@ def setUp(self):
             conv_out_1 = fluid.layers.conv2d(
                 data, num_filters=3, filter_size=3, bias_attr=False
             )
-            softmax_out = fluid.layers.softmax(conv_out_1)
+            softmax_out = paddle.nn.functional.softmax(conv_out_1)
             relu_out = fluid.layers.relu(conv_out_1)
             eltwise_out = fluid.layers.elementwise_add(
                 softmax_out, relu_out, axis=-1
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py
index 3597f11c550827..29393ff96ca2bc 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py
@@ -77,7 +77,7 @@ def append_act(self, x):
 
 class TensorRTSubgraphPassSoftMaxTest(TensorRTSubgraphPassActivationTest):
     def append_act(self, x):
-        return fluid.layers.softmax(x)
+        return paddle.nn.functional.softmax(x)
 
 
 class TensorRTSubgraphPassSigmoidTest(TensorRTSubgraphPassActivationTest):
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py
index 3f5daf0d924cb9..a3b297a268fadd 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py
@@ -17,6 +17,7 @@
 import numpy as np
 from inference_pass_test import InferencePassTest
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.core import AnalysisConfig
@@ -31,7 +32,7 @@ def setUp(self):
             fc_out1 = fluid.layers.fc(
                 input=data, size=128, num_flatten_dims=1, act="relu"
             )
-            out = fluid.layers.softmax(input=fc_out1)
+            out = paddle.nn.functional.softmax(fc_out1)
 
         self.feeds = {
             "data": np.random.random((32, 128, 2, 2)).astype("float32")
@@ -61,7 +62,7 @@ def setUp(self):
             fc_out1 = fluid.layers.fc(
                 input=data, size=64, num_flatten_dims=1, act="relu"
             )
-            out = fluid.layers.softmax(input=fc_out1)
+            out = paddle.nn.functional.softmax(fc_out1)
 
         self.feeds = {
             "data": np.random.random((32, 128, 32, 8)).astype("float32")
@@ -89,7 +90,7 @@ def setUp(self):
             fc_out1 = fluid.layers.fc(
                 input=data, size=32, num_flatten_dims=2, act="relu"
             )
-            out = fluid.layers.softmax(input=fc_out1)
+            out = paddle.nn.functional.softmax(fc_out1)
 
         self.feeds = {
             "data": np.random.random((3, 24, 16, 16)).astype("float32")
@@ -115,7 +116,7 @@ def setUp(self):
             fc_out1 = fluid.layers.fc(
                 input=data, size=64, num_flatten_dims=1, act="relu"
             )
-            out = fluid.layers.softmax(input=fc_out1)
+            out = paddle.nn.functional.softmax(fc_out1)
 
         self.feeds = {"data": np.random.random((32, 128)).astype("float32")}
         self.enable_trt = True
@@ -147,7 +148,7 @@ def setUp(self):
             fc_out1 = fluid.layers.fc(
                 input=data, size=64, num_flatten_dims=1, act="relu"
             )
-            out = fluid.layers.softmax(input=fc_out1)
+            out = paddle.nn.functional.softmax(fc_out1)
 
         self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")}
         self.enable_trt = True
@@ -179,7 +180,7 @@ def setUp(self):
             fc_out1 = fluid.layers.fc(
                 input=data, size=64, num_flatten_dims=2, act="relu"
             )
-            out = fluid.layers.softmax(input=fc_out1)
+            out = paddle.nn.functional.softmax(fc_out1)
 
         self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")}
         self.enable_trt = True
@@ -213,7 +214,7 @@ def setUp(self):
             fc_out1 = fluid.layers.fc(
                 input=data, size=64, num_flatten_dims=1, act="relu"
             )
-            out = fluid.layers.softmax(input=fc_out1)
+            out = paddle.nn.functional.softmax(fc_out1)
 
         self.feeds = {
             "data": np.random.random((32, 12, 4, 6)).astype("float32")
@@ -249,7 +250,7 @@ def setUp(self):
             fc_out1 = fluid.layers.fc(
                 input=data, size=64, num_flatten_dims=2, act="relu"
             )
-            out = fluid.layers.softmax(input=fc_out1)
+            out = paddle.nn.functional.softmax(fc_out1)
 
         self.feeds = {
             "data": np.random.random((32, 128, 32, 32)).astype("float32")
@@ -285,7 +286,7 @@ def setUp(self):
             fc_out1 = fluid.layers.fc(
                 input=data, size=64, num_flatten_dims=3, act="relu"
             )
-            out = fluid.layers.softmax(input=fc_out1)
+            out = paddle.nn.functional.softmax(fc_out1)
 
         self.feeds = {
             "data": np.random.random((32, 128, 32, 32)).astype("float32")
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py
index c8b01107ebfac7..3b73ae07441c7e 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py
@@ -30,7 +30,7 @@ def setUp(self):
             data = fluid.data(name='data', shape=[-1, 128], dtype='float32')
             index = fluid.data(name='index', shape=[-1, 1], dtype='int32')
             scale_out = paddle.gather(data, index=index)
-            out = fluid.layers.softmax(input=scale_out)
+            out = paddle.nn.functional.softmax(scale_out)
 
         self.feeds = {
             "data": np.random.random([self.bs, 128]).astype("float32"),
@@ -69,7 +69,7 @@ def setUp(self):
             data = fluid.data(name='data', shape=[16, 64], dtype='float32')
             index = fluid.data(name='index', shape=[2], dtype='int32')
             scale_out = paddle.gather(data, index=index)
-            out = fluid.layers.softmax(input=scale_out)
+            out = paddle.nn.functional.softmax(scale_out)
 
         self.feeds = {
             "data": np.random.random([self.bs, 64]).astype("float32"),
diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_fc_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_fc_fuse_pass.py
index 5659ecf3b4a2d1..3e958d9d191c9c 100644
--- a/python/paddle/fluid/tests/unittests/ir/test_ir_fc_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/test_ir_fc_fuse_pass.py
@@ -17,6 +17,7 @@
 import numpy as np
 from pass_test import PassTest
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 
@@ -31,7 +32,7 @@ def setUp(self):
                 input=data, size=128, num_flatten_dims=1, act="relu"
             )
             tmp_1 = fluid.layers.fc(input=tmp_0, size=32, num_flatten_dims=1)
-            tmp_2 = fluid.layers.softmax(input=tmp_1)
+            tmp_2 = paddle.nn.functional.softmax(tmp_1)
 
         self.feeds = {"data": np.random.random((32, 128)).astype("float32")}
         self.fetch_list = [tmp_0, tmp_1, tmp_2]
diff --git a/python/paddle/fluid/tests/unittests/npu/test_softmax_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_softmax_op_npu.py
index 41ccda3dba7622..2ad4b930f0dcf6 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_softmax_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_softmax_op_npu.py
@@ -79,7 +79,7 @@ def _test(self, run_npu=True):
             prediction = fluid.layers.fc(input=fc_1, size=2)
 
             # 4 x 2
-            prob = fluid.layers.softmax(prediction, axis=1)
+            prob = paddle.nn.functional.softmax(prediction, axis=1)
 
             cost = fluid.layers.cross_entropy(input=prob, label=label)
             loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
index 657774b7298a36..8c46a64162299c 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
@@ -310,7 +310,7 @@ def forward(self, encoder_vec, encoder_proj, decoder_state):
             shape=[attention_weight.shape[0], attention_weight.shape[1]],
         )
 
-        weights_reshape = fluid.layers.softmax(weights_reshape)
+        weights_reshape = paddle.nn.functional.softmax(weights_reshape)
         scaled = fluid.layers.elementwise_mul(
             x=encoder_vec, y=weights_reshape, axis=0
         )
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
index dfbaae4926d0ef..06982a0fc3da9e 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
@@ -41,7 +41,7 @@ def forward(self, inputs):
         x = fluid.layers.dropout(x, self.dropout_ratio)
         x = fluid.layers.relu(x)
         action_scores = self.affine2(x)
-        return fluid.layers.softmax(action_scores, axis=1)
+        return paddle.nn.functional.softmax(action_scores, axis=1)
 
 
 class TestImperativeMnist(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
index 95f912d8227711..6eb5ab1874d524 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
@@ -376,7 +376,7 @@ def run_dygraph():
                     label.stop_gradient = True
 
                     out = se_resnext(img)
-                    softmax_out = fluid.layers.softmax(out, use_cudnn=False)
+                    softmax_out = paddle.nn.functional.softmax(out)
                     loss = fluid.layers.cross_entropy(
                         input=softmax_out, label=label
                     )
@@ -456,7 +456,7 @@ def run_dygraph():
             )
             label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             out = se_resnext(img)
-            softmax_out = fluid.layers.softmax(out, use_cudnn=False)
+            softmax_out = paddle.nn.function.softmax(out)
             loss = fluid.layers.cross_entropy(input=softmax_out, label=label)
             avg_loss = paddle.mean(x=loss)
             optimizer.minimize(avg_loss)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
index e850905141b188..a88c31dd3f5ee3 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
@@ -503,7 +503,7 @@ def forward(self, queries, keys, values, attn_bias):
         )
         if attn_bias is not None:
             product += attn_bias
-        weights = fluid.layers.softmax(product)
+        weights = paddle.nn.functional.softmax(product)
         if self._dropout_rate:
             weights_droped = fluid.layers.dropout(
                 weights,
@@ -1013,7 +1013,7 @@ def forward(self, dec_inputs=None, enc_output=None):
 
         if dec_inputs is None:
             # Return probs for independent decoder program.
-            predict_out = fluid.layers.softmax(predict)
+            predict_out = paddle.nn.functional.softmax(predict)
             return predict_out
         return predict
 
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 67cfdfeceb2664..25b6d0513d1c87 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -2748,7 +2748,7 @@ def test_accuracy(self):
             data = fluid.data(name="input", shape=[-1, 32, 32], dtype="float32")
             label = fluid.data(name="label", shape=[-1, 1], dtype="int")
             fc_out = fluid.layers.fc(input=data, size=10)
-            predict = fluid.layers.softmax(input=fc_out)
+            predict = paddle.nn.functional.softmax(fc_out)
             result = paddle.static.accuracy(input=predict, label=label, k=5)
             place = fluid.CPUPlace()
             exe = fluid.Executor(place)
@@ -2764,7 +2764,7 @@ def test_accuracy(self):
             data = base.to_variable(x)
             label = base.to_variable(y)
             fc_out = fluid.layers.fc(data, size=10)
-            predict = fluid.layers.softmax(fc_out)
+            predict = paddle.nn.functional.softmax(fc_out)
             dynamic_out = paddle.static.accuracy(
                 input=predict, label=label, k=5
             )
@@ -3056,7 +3056,7 @@ def make_softmax(self):
         ):
             data = self._get_data(name='data', shape=[10], dtype='float32')
             hid = layers.fc(input=data, size=20)
-            return layers.softmax(hid, axis=1)
+            return paddle.nn.functional.softmax(hid, axis=1)
 
     @prog_scope()
     def make_nce(self):
diff --git a/python/paddle/fluid/tests/unittests/test_mean_op.py b/python/paddle/fluid/tests/unittests/test_mean_op.py
index 83f07bf747c7ee..33f95b439c1789 100644
--- a/python/paddle/fluid/tests/unittests/test_mean_op.py
+++ b/python/paddle/fluid/tests/unittests/test_mean_op.py
@@ -89,7 +89,7 @@ def test_errors(self):
             input3 = fluid.layers.data(
                 name='input3', shape=[4], dtype="float16"
             )
-            fluid.layers.softmax(input3)
+            paddle.nn.functional.softmax(input3)
 
 
 @unittest.skipIf(
diff --git a/python/paddle/fluid/tests/unittests/test_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_recurrent_op.py
index 6e01ee1d4f0f79..2b06de33f2cc07 100644
--- a/python/paddle/fluid/tests/unittests/test_recurrent_op.py
+++ b/python/paddle/fluid/tests/unittests/test_recurrent_op.py
@@ -617,7 +617,7 @@ def create_rnn_op(self):
 
         def dot_attention(query, memory):
             attn = layers.matmul(query, memory, transpose_y=True)
-            weight = layers.softmax(attn)
+            weight = paddle.nn.functional.softmax(attn)
             weight_memory = layers.matmul(weight, memory)
 
             return weight_memory, weight
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
index a557fb9df00131..3b3539c4861f12 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
@@ -76,7 +76,7 @@ def attention(self, hidden, encoder_output, encoder_padding_mask):
         )
         if encoder_padding_mask is not None:
             attn_scores = paddle.add(attn_scores, encoder_padding_mask)
-        attn_scores = layers.softmax(attn_scores)
+        attn_scores = paddle.nn.functional.softmax(attn_scores)
         attn_out = paddle.squeeze(
             layers.matmul(attn_scores, encoder_output), [1]
         )
@@ -295,7 +295,7 @@ def __call__(self, src, src_length, trg=None, trg_length=None):
             decoder_output.sample_ids,
             dec_seq_lengths,
         )
-        probs = layers.softmax(logits)
+        probs = paddle.nn.functional.softmax(logits)
         return probs, samples, sample_length
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py
index 6021256f6962cf..22f759b46f0c75 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py
@@ -99,7 +99,7 @@ def test_errors(self):
             input3 = fluid.layers.data(
                 name='input3', shape=[4], dtype="float16"
             )
-            fluid.layers.softmax(input3)
+            paddle.nn.functional.softmax(input3)
 
 
 support_types = get_xpu_op_support_types('mean')

From f9815bfee7f74d08ebcd0e3c9e588a3261326121 Mon Sep 17 00:00:00 2001
From: Yuang Liu <liuyuang@baidu.com>
Date: Sat, 3 Dec 2022 09:32:18 +0800
Subject: [PATCH 127/154] Scatter 0D index for gather,  0D index and 0D updates
 for scatter. (#48452)

---
 paddle/phi/infermeta/binary.cc                |  78 ++++++++----
 paddle/phi/infermeta/ternary.cc               |  49 ++++----
 paddle/phi/kernels/funcs/gather.cu.h          |   9 +-
 paddle/phi/kernels/funcs/gather.h             |  28 +++--
 paddle/phi/kernels/funcs/scatter.cu.h         |  26 ++--
 paddle/phi/kernels/funcs/scatter.h            |  91 ++++++++------
 paddle/phi/kernels/xpu/gather_grad_kernel.cc  |  10 +-
 paddle/phi/kernels/xpu/gather_kernel.cc       |  23 ++--
 paddle/phi/kernels/xpu/scatter_kernel.cc      |  36 +++---
 .../tests/unittests/test_zero_dim_tensor.py   | 117 ++++++++++++++++++
 .../unittests/xpu/test_zero_dim_tensor_xpu.py |  49 ++++++++
 python/paddle/tensor/manipulation.py          |   8 +-
 12 files changed, 377 insertions(+), 147 deletions(-)

diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc
index c48388a03173d6..532aed7f66d9ea 100644
--- a/paddle/phi/infermeta/binary.cc
+++ b/paddle/phi/infermeta/binary.cc
@@ -1268,37 +1268,69 @@ void GatherInferMeta(const MetaTensor& x,
             index_dims[1]));
   } else {
     PADDLE_ENFORCE_EQ(
-        index_dims.size(),
-        1,
+        index_dims.size() == 1 || index_dims.size() == 0,
+        true,
         phi::errors::InvalidArgument(
-            "The index should be 1D, when it is not 2D, but we get %d",
+            "The index should be 0D or 1D, when it is not 2D, but we get %d",
             index_dims.size()));
   }
 
   auto input_dim = x.dims();
   auto axis_v = axis.to<int>();
-  if (axis.FromTensor() || axis_v == 0) {
-    // if axis.FromTensor(), we can not obtain correct shape of output
-    int batch_size = index_dims[0];
-    phi::DDim output_dims(input_dim);
-    output_dims[0] = batch_size;
-    out->set_dims(output_dims);
-    out->set_dtype(x.dtype());
-    out->share_lod(x);
-  } else {
-    int index_size = index_dims[0];
-    std::vector<int> out_dim_vec;
-    for (int i = 0; i < axis_v; i++) {
-      out_dim_vec.push_back(input_dim[i]);
+  if (index_dims.size() == 0) {
+    // 0D index will decrease the dimension
+    if (input_dim.size() == 1) {
+      // the index is a 0D tensor and the x is a 1D tensor
+      out->set_dims(phi::DDim(phi::Dim<0>()));
+    } else {
+      if (axis.FromTensor() || axis_v == 0) {
+        // decrease the output dimension
+        std::vector<int> out_dim_vec;
+        for (int i = 1; i < input_dim.size(); ++i) {
+          out_dim_vec.emplace_back(input_dim[i]);
+        }
+        auto output_dims = phi::make_ddim(out_dim_vec);
+        out->set_dims(output_dims);
+        out->set_dtype(x.dtype());
+        out->share_lod(x);
+      } else {
+        std::vector<int> out_dim_vec;
+        for (int i = 0; i < axis_v; i++) {
+          out_dim_vec.push_back(input_dim[i]);
+        }
+        for (int i = axis_v + 1; i < input_dim.size(); i++) {
+          out_dim_vec.push_back(input_dim[i]);
+        }
+        auto output_dims = phi::make_ddim(out_dim_vec);
+        out->set_dims(output_dims);
+        out->set_dtype(x.dtype());
+        out->share_lod(x);
+      }
     }
-    out_dim_vec.push_back(index_size);
-    for (int i = axis_v + 1; i < input_dim.size(); i++) {
-      out_dim_vec.push_back(input_dim[i]);
+  } else {
+    if (axis.FromTensor() || axis_v == 0) {
+      // if axis.FromTensor(), we can not obtain correct shape of output
+      int batch_size = index_dims[0];
+      phi::DDim output_dims(input_dim);
+      output_dims[0] = batch_size;
+      out->set_dims(output_dims);
+      out->set_dtype(x.dtype());
+      out->share_lod(x);
+    } else {
+      int index_size = index_dims[0];
+      std::vector<int> out_dim_vec;
+      for (int i = 0; i < axis_v; i++) {
+        out_dim_vec.push_back(input_dim[i]);
+      }
+      out_dim_vec.push_back(index_size);
+      for (int i = axis_v + 1; i < input_dim.size(); i++) {
+        out_dim_vec.push_back(input_dim[i]);
+      }
+      auto output_dims = phi::make_ddim(out_dim_vec);
+      out->set_dims(output_dims);
+      out->set_dtype(x.dtype());
+      out->share_lod(x);
     }
-    auto output_dims = phi::make_ddim(out_dim_vec);
-    out->set_dims(output_dims);
-    out->set_dtype(x.dtype());
-    out->share_lod(x);
   }
 }
 
diff --git a/paddle/phi/infermeta/ternary.cc b/paddle/phi/infermeta/ternary.cc
index 1b945c0254fb38..f7bae3690991f5 100644
--- a/paddle/phi/infermeta/ternary.cc
+++ b/paddle/phi/infermeta/ternary.cc
@@ -995,31 +995,34 @@ void ScatterInferMeta(const MetaTensor& x,
                           "index is a 2D tensor, but we get %d.",
                           index_dims[1]));
   } else {
+    PADDLE_ENFORCE_EQ(index_dims.size() == 1 || index_dims.size() == 0,
+                      true,
+                      phi::errors::InvalidArgument(
+                          "The index should be a 0D or 1D tensor when the "
+                          "index is not a 2D tensor, but we get %d.",
+                          index_dims.size()));
+  }
+  if (index_dims.size() != 0) {
     PADDLE_ENFORCE_EQ(
-        index_dims.size(),
-        1,
-        phi::errors::InvalidArgument("The index should be a 1D tensor when the "
-                                     "index is not a 2D tensor, but we get %d.",
-                                     index_dims.size()));
+        (ref_dims.size() == updates_dims.size()),
+        true,
+        phi::errors::InvalidArgument(
+            "When the Input(Updates) is not a 0D tensor, the "
+            "Input(X) and Input(Updates) should have the same shape size, "
+            "but received the size of Input(x)'s shape is %d, the size of "
+            "Input(Updates)'s shape is %d.",
+            ref_dims.size(),
+            updates_dims.size()));
+    PADDLE_ENFORCE_EQ(
+        updates_dims[0],
+        index_dims[0],
+        phi::errors::InvalidArgument(
+            "Input(Updates) and Input(Ids) should have same batch-size, but"
+            " received Input(Updates)'s batch-size is %d, Input(Ids)'s "
+            "batch-size is %d.",
+            updates_dims[0],
+            index_dims[0]));
   }
-  PADDLE_ENFORCE_EQ(
-      ref_dims.size(),
-      updates_dims.size(),
-      phi::errors::InvalidArgument(
-          "Input(X) and Input(Updates) should have the same shape size, "
-          "but received the size of Input(x)'s shape is %d, the size of "
-          "Input(Updates)'s shape is %d.",
-          ref_dims.size(),
-          updates_dims.size()));
-  PADDLE_ENFORCE_EQ(
-      updates_dims[0],
-      index_dims[0],
-      phi::errors::InvalidArgument(
-          "Input(Updates) and Input(Ids) should have same batch-size, but"
-          " received Input(Updates)'s batch-size is %d, Input(Ids)'s "
-          "batch-size is %d.",
-          updates_dims[0],
-          index_dims[0]));
   out->set_dims(ref_dims);
   out->share_lod(x);
   out->set_dtype(x.dtype());
diff --git a/paddle/phi/kernels/funcs/gather.cu.h b/paddle/phi/kernels/funcs/gather.cu.h
index ac8487db8f62e6..2b1822ece2627d 100644
--- a/paddle/phi/kernels/funcs/gather.cu.h
+++ b/paddle/phi/kernels/funcs/gather.cu.h
@@ -94,12 +94,9 @@ void GPUGather(const phi::GPUContext& ctx,
   }
 
   // index size
-  int64_t index_size = index.dims()[0];
-  if (index_size == 0) return;
+  int64_t index_size = index.dims().size() == 0 ? 1 : index.dims()[0];
 
   auto src_dims = src.dims();
-  phi::DDim output_dims(src_dims);
-  output_dims[0] = index_size;
 
   // slice size
   int64_t slice_size = 1;
@@ -246,7 +243,9 @@ void GatherV2CUDAFunction(const DenseTensor* input,
     inner_dim_size *= input_dim[i];
     out_dim_vec.push_back(input_dim[i]);
   }
-  out_dim_vec.push_back(index_size);
+  if (index->dims().size() != 0) {
+    out_dim_vec.push_back(index_size);
+  }
   for (int i = axis_index + 1; i < input_dim.size(); i++) {
     outer_dim_size *= input_dim[i];
     out_dim_vec.push_back(input_dim[i]);
diff --git a/paddle/phi/kernels/funcs/gather.h b/paddle/phi/kernels/funcs/gather.h
index 094bc46cb6f45c..f1ab1a16f1224b 100644
--- a/paddle/phi/kernels/funcs/gather.h
+++ b/paddle/phi/kernels/funcs/gather.h
@@ -38,7 +38,6 @@ void CPUGather(const phi::CPUContext& ctx,
                const DenseTensor& src,
                const DenseTensor& index,
                DenseTensor* output) {
-  // check index of shape 1-D
   if (index.dims().size() == 2) {
     PADDLE_ENFORCE_EQ(
         index.dims()[1],
@@ -48,14 +47,15 @@ void CPUGather(const phi::CPUContext& ctx,
             "in gather_op, but received value is [%d].",
             index.dims()[1]));
   } else {
-    PADDLE_ENFORCE_EQ(index.dims().size(),
-                      1,
-                      phi::errors::InvalidArgument(
-                          "index.dims().size() should be 1 or 2 in gather_op,"
-                          "but received shape's size is [%d].",
-                          index.dims().size()));
+    PADDLE_ENFORCE_EQ(
+        index.dims().size() == 1 || index.dims().size() == 0,
+        true,
+        phi::errors::InvalidArgument(
+            "The index should be 0D or 1D, when it is not 2D, but we get %d",
+            index.dims().size()));
   }
-  int64_t index_size = index.dims()[0];
+
+  int64_t index_size = index.dims().size() == 0 ? 1 : index.dims()[0];
 
   auto src_dims = src.dims();
 
@@ -188,7 +188,9 @@ void GatherV2Function(const phi::CPUContext& ctx,
     inner_dim_size *= input_dim[i];
     out_dim_vec.push_back(input_dim[i]);
   }
-  out_dim_vec.push_back(index_size);
+  if (index->dims().size() != 0) {
+    out_dim_vec.push_back(index_size);
+  }
   for (int i = axis_index + 1; i < input_dim.size(); i++) {
     outer_dim_size *= input_dim[i];
     out_dim_vec.push_back(input_dim[i]);
@@ -224,7 +226,13 @@ void GatherV2GradFunction(const phi::CPUContext& ctx,
 
   if (input->numel() == 0) return;
   int axis_index = axis;
-  int64_t input_index_dim_size = input_dim[axis_index];
+  int64_t input_index_dim_size;
+  if (input_dim.size() == out->dims().size()) {
+    input_index_dim_size = input_dim[axis_index];
+  } else {
+    // 0d index
+    input_index_dim_size = 1;
+  }
 
   int64_t inner_dim_size = 1;
   int64_t outer_dim_size = 1;
diff --git a/paddle/phi/kernels/funcs/scatter.cu.h b/paddle/phi/kernels/funcs/scatter.cu.h
index 6aeb09b232bd51..c03dcba1e2e7f0 100644
--- a/paddle/phi/kernels/funcs/scatter.cu.h
+++ b/paddle/phi/kernels/funcs/scatter.cu.h
@@ -122,7 +122,6 @@ void GPUScatterAssign(const phi::GPUContext& ctx,
                       const DenseTensor& index,
                       DenseTensor* output,
                       bool overwrite = true) {
-  // check index of shape 1-D
   if (index.dims().size() == 2) {
     PADDLE_ENFORCE_EQ(
         index.dims()[1],
@@ -132,26 +131,33 @@ void GPUScatterAssign(const phi::GPUContext& ctx,
                                      "But received value is [%d]",
                                      index.dims()[1]));
   } else {
-    PADDLE_ENFORCE_EQ(index.dims().size(),
-                      1,
-                      phi::errors::InvalidArgument(
-                          "index.dims().size() should be 1 or 2 in scatter_op."
-                          "But received value is [%d]",
-                          index.dims().size()));
+    PADDLE_ENFORCE_EQ(
+        index.dims().size() == 1 || index.dims().size() == 0,
+        true,
+        phi::errors::InvalidArgument(
+            "index.dims().size() should be 0, 1 or 2 in scatter_op."
+            "But received value is [%d]",
+            index.dims().size()));
   }
-  int64_t index_size = index.dims()[0];
+
+  int64_t index_size = index.dims().size() == 0 ? 1 : index.dims()[0];
 
   auto src_dims = src.dims();
   phi::DDim output_dims(src_dims);
   output_dims[0] = index_size;
 
   // slice size
-  int64_t slice_size = 1;
-  for (int i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i];
+  size_t slice_size = 1;
+  if (index.dims().size() != 0) {
+    for (int i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i];
+  } else {
+    for (int i = 0; i < src_dims.size(); ++i) slice_size *= src_dims[i];
+  }
 
   const T* p_src = src.data<T>();
   const IndexT* p_index = index.data<IndexT>();
   T* p_output = output->data<T>();
+
   const size_t& slice_bytes = slice_size * sizeof(T);
 
   // set block and grid num
diff --git a/paddle/phi/kernels/funcs/scatter.h b/paddle/phi/kernels/funcs/scatter.h
index 0b381e5710651c..9ee73a08b06c52 100644
--- a/paddle/phi/kernels/funcs/scatter.h
+++ b/paddle/phi/kernels/funcs/scatter.h
@@ -76,7 +76,6 @@ void ScatterAssign(const phi::CPUContext& ctx,
                    const DenseTensor& src,
                    const DenseTensor& index,
                    DenseTensor* output) {
-  // check index of shape 1-D
   if (index.dims().size() == 2) {
     PADDLE_ENFORCE_EQ(
         index.dims()[1],
@@ -86,14 +85,15 @@ void ScatterAssign(const phi::CPUContext& ctx,
                                      "But received value is [%d]",
                                      index.dims()[1]));
   } else {
-    PADDLE_ENFORCE_EQ(index.dims().size(),
-                      1,
+    PADDLE_ENFORCE_EQ(index.dims().size() == 1 || index.dims().size() == 0,
+                      true,
                       phi::errors::InvalidArgument(
-                          "index.dims().size() should be 1 or 2 in scatter_op."
-                          "But received value is [%d]",
+                          "index.dims().size() should be 0, 1 or 2 in "
+                          "scatter_op. But received value is [%d]",
                           index.dims().size()));
   }
-  int64_t index_size = index.dims()[0];
+
+  int64_t index_size = index.dims().size() == 0 ? 1 : index.dims()[0];
 
   auto src_dims = src.dims();
   auto dst_dims = output->dims();
@@ -102,23 +102,29 @@ void ScatterAssign(const phi::CPUContext& ctx,
   const IndexT* p_index = index.data<IndexT>();
   T* p_output = output->data<T>();
 
-  // check src shape and dst shape should match
-  for (int i = 1; i < src_dims.size(); i++)
-    PADDLE_ENFORCE_EQ(
-        src_dims[i],
-        dst_dims[i],
-        phi::errors::InvalidArgument(
-            "The dimensions of the source tensor and target tensor should"
-            " match, but received source tensor's %d-th dimension is %d,"
-            "target tensor's %d-th dimension is %d.",
-            i,
-            src_dims[i],
-            i,
-            dst_dims[i]));
+  if (index.dims().size() != 0) {
+    // check src shape and dst shape should match
+    for (int i = 1; i < src_dims.size(); i++)
+      PADDLE_ENFORCE_EQ(
+          src_dims[i],
+          dst_dims[i],
+          phi::errors::InvalidArgument(
+              "The dimensions of the source tensor and target tensor should"
+              " match, but received source tensor's %d-th dimension is %d,"
+              "target tensor's %d-th dimension is %d.",
+              i,
+              src_dims[i],
+              i,
+              dst_dims[i]));
+  }
 
   // slice size
   size_t slice_size = 1;
-  for (int i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i];
+  if (index.dims().size() != 0) {
+    for (int i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i];
+  } else {
+    for (int i = 0; i < src_dims.size(); ++i) slice_size *= src_dims[i];
+  }
 
   const size_t slice_bytes = slice_size * sizeof(T);
 
@@ -143,43 +149,48 @@ void ScatterAssignAdd(const phi::CPUContext& ctx,
                       const DenseTensor& src,
                       const DenseTensor& index,
                       DenseTensor* output) {
-  // check index of shape 1-D
   PADDLE_ENFORCE_EQ(
-      index.dims().size() == 1 ||
+      index.dims().size() == 1 || index.dims().size() == 0 ||
           (index.dims().size() == 2 && index.dims()[1] == 1),
       true,
       phi::errors::InvalidArgument(
           "index's shape is error, "
-          "expect index'dims shape is 1 or 2 and index.dims[1] is 1"
-          "but got index'dims shape is %d",
+          "expect index'dims shape is 0, 1, 2 (index.dims[1] should "
+          "be 1), but got index'dims shape is %d",
           index.dims().size()));
-  int64_t index_size = index.dims()[0];
+
+  int64_t index_size = index.dims().size() == 0 ? 1 : index.dims()[0];
 
   auto src_dims = src.dims();
   auto dst_dims = output->dims();
 
   const T* p_src = src.data<T>();
   const IndexT* p_index = index.data<IndexT>();
-
   T* p_output = output->data<T>();
 
-  // check src shape and dst shape should match
-  for (int i = 1; i < src_dims.size(); i++)
-    PADDLE_ENFORCE_EQ(
-        src_dims[i],
-        dst_dims[i],
-        phi::errors::InvalidArgument(
-            "The dimensions of the source tensor and target tensor should"
-            " match, but received source tensor's %d-th dimension is %d,"
-            "target tensor's %d-th dimension is %d.",
-            i,
-            src_dims[i],
-            i,
-            dst_dims[i]));
+  if (index.dims().size() != 0) {
+    // check src shape and dst shape should match
+    for (int i = 1; i < src_dims.size(); i++)
+      PADDLE_ENFORCE_EQ(
+          src_dims[i],
+          dst_dims[i],
+          phi::errors::InvalidArgument(
+              "The dimensions of the source tensor and target tensor should"
+              " match, but received source tensor's %d-th dimension is %d,"
+              "target tensor's %d-th dimension is %d.",
+              i,
+              src_dims[i],
+              i,
+              dst_dims[i]));
+  }
 
   // slice size
   size_t slice_size = 1;
-  for (int i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i];
+  if (index.dims().size() != 0) {
+    for (int i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i];
+  } else {
+    for (int i = 0; i < src_dims.size(); ++i) slice_size *= src_dims[i];
+  }
 
   const size_t& slice_bytes = slice_size * sizeof(T);
 
diff --git a/paddle/phi/kernels/xpu/gather_grad_kernel.cc b/paddle/phi/kernels/xpu/gather_grad_kernel.cc
index 7be22a86d00194..86a6a39f87cf5d 100644
--- a/paddle/phi/kernels/xpu/gather_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/gather_grad_kernel.cc
@@ -44,10 +44,10 @@ void GatherGradKernel(const Context& dev_ctx,
             index_dims[1]));
   } else {
     PADDLE_ENFORCE_EQ(
-        index_dims.size(),
-        1,
+        index_dims.size() == 1 || index_dims.size() == 0,
+        true,
         phi::errors::InvalidArgument(
-            "The index should be 1D, when it is not 2D, but we get %d",
+            "The index should be 0D or 1D, when it is not 2D, but we get %d",
             index_dims.size()));
   }
   std::vector<int> xshape(x_grad->dims().size());
@@ -66,7 +66,7 @@ void GatherGradKernel(const Context& dev_ctx,
         index.data<int>(),
         reinterpret_cast<XPUType*>(x_grad->data<T>()),
         xshape,
-        index.dims()[0],
+        index.dims().size() == 0 ? 1 : index.dims()[0],
         axis_v,
         overwrite);
   } else {
@@ -84,7 +84,7 @@ void GatherGradKernel(const Context& dev_ctx,
         index_int_ptr_l3,
         reinterpret_cast<XPUType*>(x_grad->data<T>()),
         xshape,
-        index.dims()[0],
+        index.dims().size() == 0 ? 1 : index.dims()[0],
         axis_v,
         overwrite);
   }
diff --git a/paddle/phi/kernels/xpu/gather_kernel.cc b/paddle/phi/kernels/xpu/gather_kernel.cc
index c3520178d1804b..76b2f04ee52bab 100644
--- a/paddle/phi/kernels/xpu/gather_kernel.cc
+++ b/paddle/phi/kernels/xpu/gather_kernel.cc
@@ -41,10 +41,10 @@ void GatherKernel(const Context& dev_ctx,
             index_dims[1]));
   } else {
     PADDLE_ENFORCE_EQ(
-        index_dims.size(),
-        1,
+        index_dims.size() == 1 || index_dims.size() == 0,
+        true,
         phi::errors::InvalidArgument(
-            "The index should be 1D, when it is not 2D, but we get %d",
+            "The index should be 0D, 1D, when it is not 2D, but we get %d",
             index_dims.size()));
   }
   std::vector<int> xshape(x.dims().size());
@@ -56,13 +56,14 @@ void GatherKernel(const Context& dev_ctx,
 
   int r = XPU_SUCCESS;
   if (index_type == DataType::INT32) {
-    r = xpu::gather<XPUType, int>(dev_ctx.x_context(),
-                                  reinterpret_cast<const XPUType*>(x.data<T>()),
-                                  index.data<int>(),
-                                  reinterpret_cast<XPUType*>(out->data<T>()),
-                                  xshape,
-                                  index.dims()[0],
-                                  axis_v);
+    r = xpu::gather<XPUType, int>(
+        dev_ctx.x_context(),
+        reinterpret_cast<const XPUType*>(x.data<T>()),
+        index.data<int>(),
+        reinterpret_cast<XPUType*>(out->data<T>()),
+        xshape,
+        index.dims().size() == 0 ? 1 : index.dims()[0],
+        axis_v);
   } else {
     r = xpu::gather<XPUType, int64_t>(
         dev_ctx.x_context(),
@@ -70,7 +71,7 @@ void GatherKernel(const Context& dev_ctx,
         index.data<int64_t>(),
         reinterpret_cast<XPUType*>(out->data<T>()),
         xshape,
-        index.dims()[0],
+        index.dims().size() == 0 ? 1 : index.dims()[0],
         axis_v);
   }
   PADDLE_ENFORCE_EQ(
diff --git a/paddle/phi/kernels/xpu/scatter_kernel.cc b/paddle/phi/kernels/xpu/scatter_kernel.cc
index a1db2669e619b1..988b8a71568e96 100644
--- a/paddle/phi/kernels/xpu/scatter_kernel.cc
+++ b/paddle/phi/kernels/xpu/scatter_kernel.cc
@@ -43,30 +43,34 @@ void ScatterKernel(const Context &ctx,
 
   // check index of shape 1-D
   PADDLE_ENFORCE_EQ(
-      index.dims().size() == 1 ||
+      index.dims().size() == 1 || index.dims().size() == 0 ||
           (index.dims().size() == 2 && index.dims()[1] == 1),
       true,
       phi::errors::InvalidArgument(
           "index's shape is error, "
-          "expect index'dims shape is 1 or 2 and index.dims[1] is 1"
-          "but got index'dims shape is %d",
+          "expect index'dims shape is 0, 1, 2 (index.dims[1] should "
+          "be 1), 0 but got index'dims shape is %d",
           index.dims().size()));
 
-  int index_size = static_cast<int>(index.dims()[0]);
+  int index_size =
+      static_cast<int>(index.dims().size() == 0 ? 1 : index.dims()[0]);
   auto x_dims = x.dims();
   auto update_dims = updates.dims();
-  for (int i = 1; i < x_dims.size(); i++)
-    PADDLE_ENFORCE_EQ(
-        x_dims[i],
-        update_dims[i],
-        phi::errors::InvalidArgument(
-            "The dimensions of the source tensor and target tensor should"
-            " match, but received source tensor's %d-th dimension is %d,"
-            "target tensor's %d-th dimension is %d.",
-            i,
-            x_dims[i],
-            i,
-            update_dims[i]));
+  if (index.dims().size() != 0) {
+    // only check when the updates tensor is not a 0D tensor
+    for (int i = 1; i < x_dims.size(); i++)
+      PADDLE_ENFORCE_EQ(
+          x_dims[i],
+          update_dims[i],
+          phi::errors::InvalidArgument(
+              "The dimensions of the source tensor and target tensor should"
+              " match, but received source tensor's %d-th dimension is %d,"
+              "target tensor's %d-th dimension is %d.",
+              i,
+              x_dims[i],
+              i,
+              update_dims[i]));
+  }
 
   int dim0 = static_cast<int>(x.dims()[0]);
   int dim1 =
diff --git a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
index e854b8489af140..e7381350624b9b 100644
--- a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
@@ -598,6 +598,61 @@ def test_searchsorted(self):
         self.assertEqual(out.shape, [])
         self.assertEqual(out.numpy(), 0)
 
+    def test_gather_1D(self):
+        x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False)
+        index = paddle.full([], 2, 'int64')
+        out = paddle.gather(x, index)
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(out.numpy(), 5)
+        self.assertEqual(out.grad.shape, [])
+
+    def test_gather_xD_axis_0(self):
+        x = paddle.to_tensor(
+            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], stop_gradient=False
+        )
+        index = paddle.full([], 1, 'int64')
+        out = paddle.gather(x, index)
+        out.backward()
+
+        self.assertEqual(out.shape, [3])
+        for i in range(3):
+            self.assertEqual(out.numpy()[i], x.numpy()[1][i])
+        self.assertEqual(out.grad.shape, [3])
+
+    def test_gather_xD_axis_1(self):
+        x = paddle.to_tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+        index = paddle.full([], 1, 'int64')
+        out = paddle.gather(x, index, axis=1)
+
+        self.assertEqual(out.shape, [2])
+        for i in range(2):
+            self.assertEqual(out.numpy()[i], x.numpy()[i][1])
+
+    def test_scatter_1D(self):
+        x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False)
+        index = paddle.full([], 2, 'int64')
+        updates = paddle.full([], 4.0)
+        out = paddle.scatter(x, index, updates)
+        out.backward()
+
+        self.assertEqual(out.grad.shape, [5])
+        self.assertEqual(out.numpy()[2], 4)
+
+    def test_scatter_XD(self):
+        x = paddle.to_tensor(
+            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], stop_gradient=False
+        )
+        index = paddle.full([], 1, 'int64')
+        updates = paddle.to_tensor([1.0, 2.0, 3.0])
+        out = paddle.scatter(x, index, updates)
+        out.backward()
+
+        for i in range(3):
+            self.assertEqual(out.numpy()[1][i], updates.numpy()[i])
+        self.assertEqual(out.grad.shape, [2, 3])
+
 
 class TestSundryAPIStatic(unittest.TestCase):
     def setUp(self):
@@ -679,6 +734,68 @@ def test_searchsorted(self):
         self.assertEqual(res[0].shape, ())
         self.assertEqual(res[0], 0)
 
+    @prog_scope()
+    def test_gather_1D(self):
+        x = paddle.full([10], 1.0, 'float32')
+        index = paddle.full([], 2, 'int64')
+        out = paddle.gather(x, index)
+        paddle.static.append_backward(out)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out])
+        self.assertEqual(res[0].shape, ())
+        self.assertEqual(res[0], 1)
+
+    @prog_scope()
+    def test_gather_XD_axis_0(self):
+        x = paddle.full([2, 3], 1.0, 'float32')
+        index = paddle.full([], 1, 'int64')
+        out = paddle.gather(x, index)
+        paddle.static.append_backward(out)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out])
+        self.assertEqual(res[0].shape, (3,))
+        for i in range(3):
+            self.assertEqual(res[0][i], 1)
+
+    @prog_scope()
+    def test_gather_XD_axis_1(self):
+        x = paddle.full([2, 3], 1.0, 'float32')
+        index = paddle.full([], 1, 'int64')
+        out = paddle.gather(x, index, axis=1)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out])
+        self.assertEqual(res[0].shape, (2,))
+        for i in range(2):
+            self.assertEqual(res[0][i], 1)
+
+    @prog_scope()
+    def test_scatter_1D(self):
+        x = paddle.full([10], 1.0, 'float32')
+        index = paddle.full([], 2, 'int64')
+        updates = paddle.full([], 4, 'float32')
+        out = paddle.scatter(x, index, updates)
+        paddle.static.append_backward(out)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out])
+        self.assertEqual(res[0][2], 4)
+
+    @prog_scope()
+    def test_scatter_XD(self):
+        x = paddle.full([2, 3], 1.0, 'float32')
+        index = paddle.full([], 1, 'int64')
+        updates = paddle.full([3], 4, 'float32')
+        out = paddle.scatter(x, index, updates)
+        paddle.static.append_backward(out)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out])
+        for i in range(3):
+            self.assertEqual(res[0][1][i], 4)
+
 
 # Use to test API whose zero-dim input tensors don't have grad and not need to test backward in OpTest.
 class TestNoBackwardAPI(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py
index 6bde8ef947d7c8..b07043689f7fef 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py
@@ -426,6 +426,55 @@ def test_searchsorted(self):
         self.assertEqual(out.shape, [])
         self.assertEqual(out.numpy(), 0)
 
+    def test_gather_1D(self):
+        x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False)
+        index = paddle.full([], 2, 'int64')
+        out = paddle.gather(x, index)
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(out.numpy(), 5)
+        self.assertEqual(out.grad.shape, [])
+
+    def test_gather_xD_axis_0(self):
+        x = paddle.to_tensor(
+            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], stop_gradient=False
+        )
+        index = paddle.full([], 1, 'int64')
+        out = paddle.gather(x, index)
+        out.backward()
+
+        self.assertEqual(out.shape, [3])
+        for i in range(3):
+            self.assertEqual(out.numpy()[i], x.numpy()[1][i])
+        self.assertEqual(out.grad.shape, [3])
+
+    def test_gather_xD_axis_1(self):
+        x = paddle.to_tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+        index = paddle.full([], 1, 'int64')
+        out = paddle.gather(x, index, axis=1)
+
+        self.assertEqual(out.shape, [2])
+        for i in range(2):
+            self.assertEqual(out.numpy()[i], x.numpy()[i][1])
+
+    def test_scatter_1D(self):
+        x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0])
+        index = paddle.full([], 2, 'int64')
+        updates = paddle.full([], 4.0)
+        out = paddle.scatter(x, index, updates)
+
+        self.assertEqual(out.numpy()[2], 4)
+
+    def test_scatter_XD(self):
+        x = paddle.to_tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+        index = paddle.full([], 1, 'int64')
+        updates = paddle.to_tensor([1.0, 2.0, 3.0])
+        out = paddle.scatter(x, index, updates)
+
+        for i in range(3):
+            self.assertEqual(out.numpy()[1][i], updates.numpy()[i])
+
 
 # Use to test API whose zero-dim input tensors don't have grad and not need to test backward in OpTest.
 class TestNoBackwardAPI(unittest.TestCase):
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index fceae51e14564b..8c47809d222a90 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -2728,13 +2728,13 @@ def gather(x, index, axis=None, name=None):
         x (Tensor): The source input tensor with rank>=1. Supported data type is
             int32, int64, float32, float64 and uint8 (only for CPU),
             float16 (only for GPU).
-        index (Tensor): The index input tensor with rank=1. Data type is int32 or int64.
+        index (Tensor): The index input tensor with rank=0 or rank=1. Data type is int32 or int64.
         axis (Tensor|int, optional): The axis of input to be gathered, it's can be int or a Tensor with data type is int32 or int64. The default value is None, if None, the ``axis`` is 0.
         name (str, optional): The default value is None.  Normally there is no need for user to set this property.
             For more information, please refer to :ref:`api_guide_Name` .
 
     Returns:
-        output (Tensor), The output is a tensor with the same rank as ``x``.
+        output (Tensor), If the index is a 1-D tensor, the output is a tensor with the same shape as ``x``. If the index is a 0-D tensor, the output will reduce the dimension where the axis pointing.
 
     Examples:
 
@@ -2888,8 +2888,8 @@ def scatter(x, index, updates, overwrite=True, name=None):
 
     Args:
         x (Tensor): The input N-D Tensor with ndim>=1. Data type can be float32, float64.
-        index (Tensor): The index 1-D Tensor. Data type can be int32, int64. The length of index cannot exceed updates's length, and the value in index cannot exceed input's length.
-        updates (Tensor): update input with updates parameter based on index. shape should be the same as input, and dim value with dim > 1 should be the same as input.
+        index (Tensor): The index is a 1-D or 0-D Tensor. Data type can be int32, int64. The length of index cannot exceed updates's length, and the value in index cannot exceed input's length.
+        updates (Tensor): Update input with updates parameter based on index. When the index is a 1-D tensor, the updates shape should be the same as input, and dim value with dim > 1 should be the same as input. When the index is a 0-D tensor, the updates should be a (N-1)-D tensor, the ith dim of the updates should be queal with the (i+1)th dim of the input.
         overwrite (bool): The mode that updating the output when there are same indices.
 
             If True, use the overwrite mode to update the output of the same index,

From 4552be4842b6febb2acbc3b2f4e59e09884c80c8 Mon Sep 17 00:00:00 2001
From: Wen Sun <35923278+HermitSun@users.noreply.github.com>
Date: Sat, 3 Dec 2022 10:41:11 +0800
Subject: [PATCH 128/154] Refactor collective communication static check
 (#48646)

* refactor: classify static check

* refactor: rename to static_check & use forward decl

* refactor: switch to unary & binary funcs
---
 .../distributed/collective/CMakeLists.txt     |   2 +-
 .../fluid/distributed/collective/NCCLTools.cc | 104 ------------
 .../fluid/distributed/collective/NCCLTools.h  |  27 ---
 .../collective/ProcessGroupNCCL.cc            |  55 +++++--
 .../distributed/collective/static_check.cc    | 155 ++++++++++++++++++
 .../distributed/collective/static_check.h     |  77 +++++++++
 6 files changed, 273 insertions(+), 147 deletions(-)
 create mode 100644 paddle/fluid/distributed/collective/static_check.cc
 create mode 100644 paddle/fluid/distributed/collective/static_check.h

diff --git a/paddle/fluid/distributed/collective/CMakeLists.txt b/paddle/fluid/distributed/collective/CMakeLists.txt
index e5dc51c63f07a7..83b42fd4320706 100644
--- a/paddle/fluid/distributed/collective/CMakeLists.txt
+++ b/paddle/fluid/distributed/collective/CMakeLists.txt
@@ -21,7 +21,7 @@ endif()
 if(WITH_NCCL OR WITH_RCCL)
   cc_library(
     processgroup_nccl
-    SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc
+    SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc static_check.cc
     DEPS processgroup
          processgroup_stream
          place
diff --git a/paddle/fluid/distributed/collective/NCCLTools.cc b/paddle/fluid/distributed/collective/NCCLTools.cc
index 988232b6171947..a8c437bb12225d 100644
--- a/paddle/fluid/distributed/collective/NCCLTools.cc
+++ b/paddle/fluid/distributed/collective/NCCLTools.cc
@@ -44,109 +44,5 @@ std::string SerializeNCCLUniqueId(const ncclUniqueId& ncclID) {
   return oss.str();
 }
 
-void StaticCheckTensor(const phi::DenseTensor& tensor,
-                       int rank,
-                       int world_size) {
-  // place check
-  PADDLE_ENFORCE_EQ(
-      platform::is_gpu_place(tensor.place()),
-      true,
-      platform::errors::InvalidArgument("Tensor should be in GPU place."));
-  // rank check
-  PADDLE_ENFORCE_GE(rank,
-                    0,
-                    platform::errors::InvalidArgument(
-                        "Rank should be greater than or equal to 0."));
-  PADDLE_ENFORCE_LT(
-      rank,
-      world_size,
-      platform::errors::InvalidArgument("Rank is out of the process group."));
-}
-
-// static check for collective
-void StaticCheckTensors(const phi::DenseTensor& out_tensor,
-                        const phi::DenseTensor& in_tensor,
-                        int rank,
-                        int world_size,
-                        int out_size_factor,
-                        int in_size_factor) {
-  // place check
-  PADDLE_ENFORCE_EQ(platform::is_gpu_place(out_tensor.place()),
-                    true,
-                    platform::errors::InvalidArgument(
-                        "Output tensor should be in GPU place."));
-  PADDLE_ENFORCE_EQ(platform::is_gpu_place(in_tensor.place()),
-                    true,
-                    platform::errors::InvalidArgument(
-                        "Input tensor should be in GPU place."));
-  // rank check
-  PADDLE_ENFORCE_GE(rank,
-                    0,
-                    platform::errors::InvalidArgument(
-                        "Rank should be greater than or equal to 0."));
-  PADDLE_ENFORCE_LT(
-      rank,
-      world_size,
-      platform::errors::InvalidArgument("Rank is out of the process group."));
-  // shape check
-  int64_t out_size = out_tensor.numel();
-  PADDLE_ENFORCE_GT(out_size,
-                    0,
-                    platform::errors::InvalidArgument(
-                        "Size of output tensor should be greater than 0."));
-  int64_t in_size = in_tensor.numel();
-  PADDLE_ENFORCE_GT(in_size,
-                    0,
-                    platform::errors::InvalidArgument(
-                        "Size of input tensor should be greater than 0."));
-  PADDLE_ENFORCE_EQ(
-      out_size * out_size_factor,
-      in_size * in_size_factor,
-      platform::errors::InvalidArgument(
-          "Input and output tensors should have matching sizes."));
-  // dtype check
-  PADDLE_ENFORCE_EQ(
-      out_tensor.dtype(),
-      in_tensor.dtype(),
-      platform::errors::InvalidArgument(
-          "Input and output tensors should have the same data type."));
-}
-
-void StaticCheckTensorsSameShape(const phi::DenseTensor& out_tensor,
-                                 const phi::DenseTensor& in_tensor,
-                                 int rank,
-                                 int world_size) {
-  StaticCheckTensors(out_tensor,
-                     in_tensor,
-                     rank,
-                     world_size,
-                     /*out_size_factor*/ 1,
-                     /*in_size_factor*/ 1);
-}
-
-void StaticCheckTensorsScatterLikeShape(const phi::DenseTensor& out_tensor,
-                                        const phi::DenseTensor& in_tensor,
-                                        int rank,
-                                        int world_size) {
-  StaticCheckTensors(out_tensor,
-                     in_tensor,
-                     rank,
-                     world_size,
-                     /*out_size_factor*/ world_size,
-                     /*in_size_factor*/ 1);
-}
-
-void StaticCheckTensorsGatherLikeShape(const phi::DenseTensor& out_tensor,
-                                       const phi::DenseTensor& in_tensor,
-                                       int rank,
-                                       int world_size) {
-  StaticCheckTensors(out_tensor,
-                     in_tensor,
-                     rank,
-                     world_size,
-                     /*out_size_factor*/ 1,
-                     /*in_size_factor*/ world_size);
-}
-
 }  //  namespace distributed
 }  //  namespace paddle
diff --git a/paddle/fluid/distributed/collective/NCCLTools.h b/paddle/fluid/distributed/collective/NCCLTools.h
index a882dae2e990da..37b1e0f114c3d4 100644
--- a/paddle/fluid/distributed/collective/NCCLTools.h
+++ b/paddle/fluid/distributed/collective/NCCLTools.h
@@ -63,32 +63,5 @@ ncclRedOp_t ToNCCLRedType(ReduceOp reduction);
 
 std::string SerializeNCCLUniqueId(const ncclUniqueId& ncclID);
 
-// static check for p2p
-void StaticCheckTensor(const phi::DenseTensor& tensor,
-                       int rank,
-                       int world_size);
-
-// static check for collective
-void StaticCheckTensors(const phi::DenseTensor& out_tensor,
-                        const phi::DenseTensor& in_tensor,
-                        int rank,
-                        int world_size,
-                        int out_size_factor,
-                        int in_size_factor);
-
-void StaticCheckTensorsSameShape(const phi::DenseTensor& out_tensor,
-                                 const phi::DenseTensor& in_tensor,
-                                 int rank,
-                                 int world_size);
-
-void StaticCheckTensorsScatterLikeShape(const phi::DenseTensor& out_tensor,
-                                        const phi::DenseTensor& in_tensor,
-                                        int rank,
-                                        int world_size);
-
-void StaticCheckTensorsGatherLikeShape(const phi::DenseTensor& out_tensor,
-                                       const phi::DenseTensor& in_tensor,
-                                       int rank,
-                                       int world_size);
 }  // namespace distributed
 }  // namespace paddle
diff --git a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc
index e995161cf304d2..b5c44962dd3a52 100644
--- a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc
+++ b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc
@@ -16,6 +16,7 @@
 
 #include "paddle/fluid/distributed/collective/Common.h"
 #include "paddle/fluid/distributed/collective/NCCLTools.h"
+#include "paddle/fluid/distributed/collective/static_check.h"
 #include "paddle/fluid/distributed/collective/utils.h"
 #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
 #include "paddle/fluid/platform/place.h"
@@ -138,8 +139,11 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::AllGather(
   // numel > 0 indicates the tensor need to be sliced
   const phi::DenseTensor& in_tensor_maybe_partial =
       numel > 0 ? GetPartialTensor(in_tensor, offset, numel) : in_tensor;
-  StaticCheckTensorsGatherLikeShape(
-      *out_tensor, in_tensor_maybe_partial, rank_, size_);
+  CommStaticCheck::GatherLikeShape(*out_tensor,
+                                   in_tensor_maybe_partial,
+                                   /*dst_rank*/ rank_,
+                                   /*cur_rank*/ rank_,
+                                   size_);
   return RunFnInNCCLEnv(
       [&](ncclComm_t comm, gpuStream_t stream) {
         NCCL_CHECK(platform::dynload::ncclAllGather(
@@ -162,7 +166,11 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::AllReduce(
     const AllreduceOptions& opts,
     bool sync_op,
     bool use_calc_stream) {
-  StaticCheckTensorsSameShape(*out_tensor, in_tensor, rank_, size_);
+  CommStaticCheck::SameShape(*out_tensor,
+                             in_tensor,
+                             /*dst_rank*/ rank_,
+                             /*cur_rank*/ rank_,
+                             size_);
   return RunFnInNCCLEnv(
       [&](ncclComm_t comm, gpuStream_t stream) {
         NCCL_CHECK(platform::dynload::ncclAllReduce(
@@ -214,12 +222,13 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::AllToAll(
   // NOTE: Since `all_to_all` needs other processes's participation, it cannot
   // simply be covered by static checks. Factors are set to 0 here to skip the
   // shape check. Its shape check will be done by dynamic checks in debug mode.
-  StaticCheckTensors(*out_tensor,
-                     in_tensor,
-                     rank_,
-                     size_,
-                     /*out_size_factor*/ 0,
-                     /*in_size_factor*/ 0);
+  CommStaticCheck::CheckShape(*out_tensor,
+                              in_tensor,
+                              /*dst_rank*/ rank_,
+                              /*cur_rank*/ rank_,
+                              size_,
+                              /*out_size_factor*/ 0,
+                              /*in_size_factor*/ 0);
   return RunFnInNCCLEnv(
       [&](ncclComm_t comm, gpuStream_t stream) {
         int64_t in_row_size = in_tensor.numel() / in_dim[0],
@@ -287,7 +296,11 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::Broadcast(
     const BroadcastOptions& opts,
     bool sync_op,
     bool use_calc_stream) {
-  StaticCheckTensorsSameShape(*out_tensor, in_tensor, rank_, size_);
+  CommStaticCheck::SameShape(*out_tensor,
+                             in_tensor,
+                             /*dst_rank*/ rank_,
+                             /*cur_rank*/ rank_,
+                             size_);
   return RunFnInNCCLEnv(
       [&](ncclComm_t comm, gpuStream_t stream) {
         int root = opts.source_rank + opts.source_root;
@@ -312,7 +325,11 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::Reduce(
     const ReduceOptions& opts,
     bool sync_op,
     bool use_calc_stream) {
-  StaticCheckTensorsSameShape(*out_tensor, in_tensor, rank_, size_);
+  CommStaticCheck::SameShape(*out_tensor,
+                             in_tensor,
+                             /*dst_rank*/ opts.root_rank,
+                             /*cur_rank*/ rank_,
+                             size_);
   return RunFnInNCCLEnv(
       [&](ncclComm_t comm, gpuStream_t stream) {
         NCCL_CHECK(platform::dynload::ncclReduce(
@@ -337,7 +354,11 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::ReduceScatter(
     const ReduceScatterOptions& opts,
     bool sync_op,
     bool use_calc_stream) {
-  StaticCheckTensorsScatterLikeShape(*out_tensor, in_tensor, rank_, size_);
+  CommStaticCheck::ScatterLikeShape(*out_tensor,
+                                    in_tensor,
+                                    /*dst_rank*/ rank_,
+                                    /*cur_rank*/ rank_,
+                                    size_);
   return RunFnInNCCLEnv(
       [&](ncclComm_t comm, gpuStream_t stream) {
         NCCL_CHECK(platform::dynload::ncclReduceScatter(
@@ -361,7 +382,11 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::Scatter(
     const ScatterOptions& opts,
     bool sync_op,
     bool use_calc_stream) {
-  StaticCheckTensorsScatterLikeShape(*out_tensor, in_tensor, rank_, size_);
+  CommStaticCheck::ScatterLikeShape(*out_tensor,
+                                    in_tensor,
+                                    /*dst_rank*/ opts.root_rank,
+                                    /*cur_rank*/ rank_,
+                                    size_);
   return RunFnInNCCLEnv(
       [&](ncclComm_t comm, gpuStream_t stream) {
         int64_t numel = in_tensor.numel() / size_;
@@ -418,7 +443,7 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::Recv(
     tensor = &partial_tensor;
   }
 
-  StaticCheckTensor(*tensor, rank_, size_);
+  CommStaticCheck::SingleTensor(*tensor, rank_, size_);
   return RunFnInNCCLEnv(
       [&](ncclComm_t comm, gpuStream_t stream) {
         NCCL_CHECK(platform::dynload::ncclRecv(
@@ -446,7 +471,7 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::Send(
   const phi::DenseTensor& tensor_maybe_partial =
       numel > 0 ? GetPartialTensor(tensor, offset, numel) : tensor;
 
-  StaticCheckTensor(tensor_maybe_partial, rank_, size_);
+  CommStaticCheck::SingleTensor(tensor_maybe_partial, rank_, size_);
   return RunFnInNCCLEnv(
       [&](ncclComm_t comm, gpuStream_t stream) {
         NCCL_CHECK(platform::dynload::ncclSend(
diff --git a/paddle/fluid/distributed/collective/static_check.cc b/paddle/fluid/distributed/collective/static_check.cc
new file mode 100644
index 00000000000000..98336db90d1e29
--- /dev/null
+++ b/paddle/fluid/distributed/collective/static_check.cc
@@ -0,0 +1,155 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/distributed/collective/static_check.h"
+
+#include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/platform/place.h"
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/errors.h"
+
+namespace paddle {
+namespace distributed {
+
+void CommStaticCheck::CheckRank(int rank, int world_size) {
+  PADDLE_ENFORCE_GE(rank,
+                    0,
+                    phi::errors::InvalidArgument(
+                        "Rank should be greater than or equal to 0."));
+  PADDLE_ENFORCE_LT(
+      rank,
+      world_size,
+      phi::errors::InvalidArgument("Rank is out of the process group."));
+}
+
+void CommStaticCheck::CheckPlace(const phi::DenseTensor& tensor) {
+  PADDLE_ENFORCE_EQ(
+      platform::is_gpu_place(tensor.place()),
+      true,
+      platform::errors::InvalidArgument("Tensor should be in GPU place."));
+}
+
+void CommStaticCheck::CheckPlace(const phi::DenseTensor& out_tensor,
+                                 const phi::DenseTensor& in_tensor) {
+  CheckPlace(out_tensor);
+  CheckPlace(in_tensor);
+  PADDLE_ENFORCE_EQ(
+      out_tensor.place(),
+      in_tensor.place(),
+      phi::errors::InvalidArgument(
+          "Input and output tensors should be on the same place."));
+}
+
+void CommStaticCheck::CheckDataType(const phi::DenseTensor& out_tensor,
+                                    const phi::DenseTensor& in_tensor) {
+  PADDLE_ENFORCE_EQ(
+      out_tensor.dtype(),
+      in_tensor.dtype(),
+      phi::errors::InvalidArgument(
+          "Input and output tensors should have the same data type."));
+}
+
+void CommStaticCheck::CheckShape(const phi::DenseTensor& tensor) {
+  PADDLE_ENFORCE_GT(
+      tensor.numel(),
+      0,
+      phi::errors::InvalidArgument("Size of tensor should be greater than 0."));
+}
+
+void CommStaticCheck::CheckShape(const phi::DenseTensor& out_tensor,
+                                 const phi::DenseTensor& in_tensor,
+                                 int out_size_factor,
+                                 int in_size_factor) {
+  CheckShape(out_tensor);
+  CheckShape(in_tensor);
+  int64_t out_size = out_tensor.numel(), in_size = in_tensor.numel();
+  PADDLE_ENFORCE_EQ(
+      out_size * out_size_factor,
+      in_size * in_size_factor,
+      phi::errors::InvalidArgument(
+          "Input and output tensors should have matching sizes."));
+}
+
+void CommStaticCheck::CheckShape(const phi::DenseTensor& out_tensor,
+                                 const phi::DenseTensor& in_tensor,
+                                 int dst_rank,
+                                 int cur_rank,
+                                 int world_size,
+                                 int out_size_factor,
+                                 int in_size_factor) {
+  CheckRank(dst_rank, world_size);
+  CheckRank(cur_rank, world_size);
+
+  CheckPlace(out_tensor, in_tensor);
+  CheckDataType(out_tensor, in_tensor);
+
+  if (dst_rank == cur_rank) {
+    CheckShape(out_tensor, in_tensor, out_size_factor, in_size_factor);
+  } else {
+    CheckShape(out_tensor);
+    CheckShape(in_tensor);
+  }
+}
+
+void CommStaticCheck::SingleTensor(const phi::DenseTensor& tensor,
+                                   int rank,
+                                   int world_size) {
+  CheckPlace(tensor);
+  CheckRank(rank, world_size);
+}
+
+void CommStaticCheck::SameShape(const phi::DenseTensor& out_tensor,
+                                const phi::DenseTensor& in_tensor,
+                                int dst_rank,
+                                int cur_rank,
+                                int world_size) {
+  CheckShape(out_tensor,
+             in_tensor,
+             dst_rank,
+             cur_rank,
+             world_size,
+             /*out_size_factor*/ 1,
+             /*in_size_factor*/ 1);
+}
+
+void CommStaticCheck::ScatterLikeShape(const phi::DenseTensor& out_tensor,
+                                       const phi::DenseTensor& in_tensor,
+                                       int dst_rank,
+                                       int cur_rank,
+                                       int world_size) {
+  CheckShape(out_tensor,
+             in_tensor,
+             dst_rank,
+             cur_rank,
+             world_size,
+             /*out_size_factor*/ world_size,
+             /*in_size_factor*/ 1);
+}
+
+void CommStaticCheck::GatherLikeShape(const phi::DenseTensor& out_tensor,
+                                      const phi::DenseTensor& in_tensor,
+                                      int dst_rank,
+                                      int cur_rank,
+                                      int world_size) {
+  CheckShape(out_tensor,
+             in_tensor,
+             dst_rank,
+             cur_rank,
+             world_size,
+             /*out_size_factor*/ 1,
+             /*in_size_factor*/ world_size);
+}
+
+}  //  namespace distributed
+}  //  namespace paddle
diff --git a/paddle/fluid/distributed/collective/static_check.h b/paddle/fluid/distributed/collective/static_check.h
new file mode 100644
index 00000000000000..5dcb17e505438c
--- /dev/null
+++ b/paddle/fluid/distributed/collective/static_check.h
@@ -0,0 +1,77 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+// forward declaration to reduce deps
+namespace phi {
+class DenseTensor;
+}
+
+namespace paddle {
+namespace distributed {
+
+struct CommStaticCheck {
+  static void CheckRank(int rank, int world_size);
+
+  static void CheckPlace(const phi::DenseTensor& tensor);
+
+  static void CheckPlace(const phi::DenseTensor& out_tensor,
+                         const phi::DenseTensor& in_tensor);
+
+  static void CheckDataType(const phi::DenseTensor& out_tensor,
+                            const phi::DenseTensor& in_tensor);
+
+  static void CheckShape(const phi::DenseTensor& tensor);
+
+  static void CheckShape(const phi::DenseTensor& out_tensor,
+                         const phi::DenseTensor& in_tensor,
+                         int out_size_factor,
+                         int in_size_factor);
+
+  static void CheckShape(const phi::DenseTensor& out_tensor,
+                         const phi::DenseTensor& in_tensor,
+                         int dst_rank,
+                         int cur_rank,
+                         int world_size,
+                         int out_size_factor,
+                         int in_size_factor);
+
+  // for p2p
+  static void SingleTensor(const phi::DenseTensor& tensor,
+                           int rank,
+                           int world_size);
+
+  // for collective
+  static void SameShape(const phi::DenseTensor& out_tensor,
+                        const phi::DenseTensor& in_tensor,
+                        int dst_rank,
+                        int cur_rank,
+                        int world_size);
+
+  static void ScatterLikeShape(const phi::DenseTensor& out_tensor,
+                               const phi::DenseTensor& in_tensor,
+                               int dst_rank,
+                               int cur_rank,
+                               int world_size);
+
+  static void GatherLikeShape(const phi::DenseTensor& out_tensor,
+                              const phi::DenseTensor& in_tensor,
+                              int dst_rank,
+                              int cur_rank,
+                              int world_size);
+};
+
+}  // namespace distributed
+}  // namespace paddle

From 46371c534ce89e6e94357e9fb5eb182ed4598a3b Mon Sep 17 00:00:00 2001
From: Weilong Wu <veyron_wu@163.com>
Date: Sun, 4 Dec 2022 19:51:02 +0800
Subject: [PATCH 129/154] [Eager] fix set_value logic when input's dtype is
 different (#48519)

* [Eager] fix set_value logic when input's dtype is different

* value_tensor

* fix set_value logic when input's dtype is different
---
 paddle/fluid/pybind/eager_method.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index 6f0bd5fb16d14e..8c7b6296eb46e9 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -1227,7 +1227,6 @@ static PyObject* tensor_method__setitem_eager_tensor(TensorObject* self,
             Py_TYPE(value_obj)));
       }
     }
-
     {
       // Release gil and do tracing
       py::gil_scoped_release release;
@@ -1242,6 +1241,9 @@ static PyObject* tensor_method__setitem_eager_tensor(TensorObject* self,
             self->tensor.name(), self->tensor, amp_dtype, "set_value");
         value_tensor = egr::EagerAmpAutoCast(
             value_tensor.name(), value_tensor, amp_dtype, "set_value");
+        if (self->tensor.dtype() != value_tensor.dtype()) {
+          value_tensor = cast_ad_func(value_tensor, self->tensor.dtype());
+        }
       }
       self->tensor = set_value__dygraph_function(
           self->tensor, value_tensor, {}, {}, {}, attrs);

From 7c577754875a0d2d865decc062ba924ecf212bf9 Mon Sep 17 00:00:00 2001
From: Weilong Wu <veyron_wu@163.com>
Date: Sun, 4 Dec 2022 22:38:48 +0800
Subject: [PATCH 130/154] clear test_dropout_op (#48657)

---
 .../fluid/tests/unittests/test_dropout_op.py  | 77 ++-----------------
 1 file changed, 7 insertions(+), 70 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_dropout_op.py b/python/paddle/fluid/tests/unittests/test_dropout_op.py
index 8a47deb34c0d38..75b92687034719 100644
--- a/python/paddle/fluid/tests/unittests/test_dropout_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dropout_op.py
@@ -23,7 +23,6 @@
 import paddle.static as static
 from paddle import _C_ops
 from paddle.fluid import Program, program_guard
-from paddle.fluid.framework import _enable_legacy_dygraph, _test_eager_guard
 
 
 class TestDropoutOp(OpTest):
@@ -1046,13 +1045,14 @@ def cal_grad_downscale_in_infer(self, mask):
         return mask.astype("float32")
 
     def test_backward_downscale_in_infer(self):
-        _enable_legacy_dygraph()
         for place in self.places:
             with fluid.dygraph.guard(place):
 
                 input = paddle.uniform([40, 40], dtype="float32")
                 input.stop_gradient = False
-                out, mask = core.ops.dropout(input, 'dropout_prob', 0.5)
+                out, mask = _C_ops.dropout(
+                    input, None, 0.5, False, "downgrade_in_infer", 0, False
+                )
                 out.backward()
 
                 np.testing.assert_array_equal(
@@ -1060,35 +1060,15 @@ def test_backward_downscale_in_infer(self):
                     self.cal_grad_downscale_in_infer(mask.numpy()),
                 )
 
-    def test_backward_downscale_in_infer_eager(self):
-        for place in self.places:
-            with fluid.dygraph.guard(place):
-                with _test_eager_guard():
-                    input = paddle.uniform([40, 40], dtype="float32")
-                    input.stop_gradient = False
-                    out, mask = _C_ops.dropout(
-                        input, None, 0.5, False, "downgrade_in_infer", 0, False
-                    )
-                    out.backward()
-                    np.testing.assert_array_equal(
-                        input.gradient(),
-                        self.cal_grad_downscale_in_infer(mask.numpy()),
-                    )
-
     def test_backward_upscale_train(self):
-        _enable_legacy_dygraph()
         for place in self.places:
             with fluid.dygraph.guard(place):
 
                 prob = 0.5
                 input = paddle.uniform([40, 40], dtype="float32")
                 input.stop_gradient = False
-                out, mask = core.ops.dropout(
-                    input,
-                    'dropout_prob',
-                    prob,
-                    "dropout_implementation",
-                    "upscale_in_train",
+                out, mask = _C_ops.dropout(
+                    input, None, 0.5, False, "upscale_in_train", 0, False
                 )
                 out.backward()
 
@@ -1098,38 +1078,15 @@ def test_backward_upscale_train(self):
                     rtol=1e-05,
                 )
 
-    def test_backward_upscale_train_eager(self):
-        for place in self.places:
-            with fluid.dygraph.guard(place):
-                with _test_eager_guard():
-                    prob = 0.5
-                    input = paddle.uniform([40, 40], dtype="float32")
-                    input.stop_gradient = False
-                    out, mask = _C_ops.dropout(
-                        input, None, 0.5, False, "upscale_in_train", 0, False
-                    )
-                    out.backward()
-
-                    np.testing.assert_allclose(
-                        input.gradient(),
-                        self.cal_grad_upscale_train(mask.numpy(), prob),
-                        rtol=1e-05,
-                    )
-
     def test_backward_upscale_train_2(self):
-        _enable_legacy_dygraph()
         for place in self.places:
             with fluid.dygraph.guard(place):
 
                 prob = 0.3
                 input = paddle.uniform([40, 40], dtype="float32")
                 input.stop_gradient = False
-                out, mask = core.ops.dropout(
-                    input,
-                    'dropout_prob',
-                    prob,
-                    "dropout_implementation",
-                    "upscale_in_train",
+                out, mask = _C_ops.dropout(
+                    input, None, 0.3, False, "upscale_in_train", 0, False
                 )
                 out.backward()
 
@@ -1139,26 +1096,6 @@ def test_backward_upscale_train_2(self):
                     rtol=1e-05,
                 )
 
-    def test_backward_upscale_train_2_eager(self):
-        for place in self.places:
-            with fluid.dygraph.guard(place):
-                with _test_eager_guard():
-
-                    prob = 0.3
-                    input = paddle.uniform([40, 40], dtype="float32")
-                    input.stop_gradient = False
-                    out, mask = _C_ops.dropout(
-                        input, None, 0.3, False, "upscale_in_train", 0, False
-                    )
-
-                    out.backward()
-
-                    np.testing.assert_allclose(
-                        input.gradient(),
-                        self.cal_grad_upscale_train(mask.numpy(), prob),
-                        rtol=1e-05,
-                    )
-
 
 class TestDropOutWithProbTensor(unittest.TestCase):
     def setUp(self):

From a842c1d0e0194146098314419c315d1b3e956e4a Mon Sep 17 00:00:00 2001
From: xiaoxiaohehe001 <49090790+xiaoxiaohehe001@users.noreply.github.com>
Date: Mon, 5 Dec 2022 10:21:18 +0800
Subject: [PATCH 131/154] [Paddle Inference] Support fill_any_like bool input.
 (#48671)

* fill_any_like_bool

* fill_any_like_bool
---
 paddle/fluid/inference/tensorrt/op_teller.cc | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index d88de415e82cdd..d8801bd8f527a3 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -1226,17 +1226,26 @@ struct SimpleOpTypeSetTeller : public Teller {
         return false;
       }
       int dtype = PADDLE_GET_CONST(int, desc.GetAttr("dtype"));
+      auto* block = desc.Block();
+      auto* x_var_desc = block->FindVar(desc.Input("X")[0]);
+      auto input_type = x_var_desc->GetDataType();
+#if IS_TRT_VERSION_GE(8400)
+      if (dtype == 0 ||
+          (dtype == -1 && input_type == framework::proto::VarType::BOOL)) {
+        VLOG(3) << "the fill_any_like supports input of BOOL by trt8.4 above";
+        return true;
+      }
+#endif
       if (dtype != -1 && dtype != 2 && dtype != 5) {
-        VLOG(3) << "the fill_any_like only supports int32 and float32";
+        VLOG(3) << "the fill_any_like only supports int32 and float32 by "
+                   "trt8.4 below";
         return false;
       }
       if (dtype == -1) {
-        auto* block = desc.Block();
-        auto* x_var_desc = block->FindVar(desc.Input("X")[0]);
-        auto input_type = x_var_desc->GetDataType();
         if (input_type != framework::proto::VarType::INT32 &&
             input_type != framework::proto::VarType::FP32) {
-          VLOG(3) << "the fill_any_like only supports int32 and float32";
+          VLOG(3) << "the fill_any_like only supports int32 and float32 by "
+                     "trt8.4 below";
           return false;
         }
       }

From 91b65e58225032584599ec96f6f5de49314b7a43 Mon Sep 17 00:00:00 2001
From: HongyuJia <jiahongyu@baidu.com>
Date: Mon, 5 Dec 2022 10:35:38 +0800
Subject: [PATCH 132/154] [Clean fluid] Clean fluid elementwise_arithmetic
 (part2) (#48461)

* clean elem_arithmetic part2 unittest

* fix test_model_cast_to_bf16

* restore test_model_cast_to_bf16
---
 .../slim/tests/test_quantization_pass.py      | 14 ++--
 .../tests/test_user_defined_quantization.py   |  8 +--
 .../tests/test_image_classification_fp16.py   |  2 +-
 .../tests/test_multi_precision_fp16_train.py  |  2 +-
 .../contrib/tests/test_quantize_transpiler.py |  2 +-
 .../contrib/tests/test_weight_decay_extend.py |  4 +-
 .../tests/book/test_image_classification.py   |  2 +-
 .../fleet/hybrid_parallel_pp_embedding.py     |  4 +-
 .../fleet/hybrid_parallel_shared_weight.py    |  4 +-
 .../fleet/parallel_dygraph_se_resnext.py      |  2 +-
 .../test_mkldnn_elt_act_fuse_pass.py          | 72 +++++++++----------
 .../test_mkldnn_inplace_fuse_pass.py          |  4 +-
 .../ir/inference/test_trt_subgraph_pass.py    |  4 +-
 ...r_embedding_eltwise_layernorm_fuse_pass.py | 11 +--
 .../unittests/ir/test_ir_fusion_group_pass.py |  8 +--
 .../ir/test_ir_skip_layernorm_pass.py         |  2 +-
 .../mkldnn/check_flags_mkldnn_ops_on_off.py   |  3 +-
 17 files changed, 69 insertions(+), 79 deletions(-)

diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
index f64a047ea49264..5aabeee1197993 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
@@ -72,7 +72,7 @@ def conv_bn_layer(
     for _ in range(num):
         conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
         short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
-        hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
+        hidden = paddle.nn.functional.relu(paddle.add(x=conv, y=short))
     matmul_weight = paddle.create_parameter(
         shape=[1, 16, 32, 32], dtype='float32'
     )
@@ -723,7 +723,7 @@ def conv_bn_layer(
     for _ in range(num):
         conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
         short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
-        hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
+        hidden = paddle.nn.functional.relu(paddle.add(x=conv, y=short))
     hidden = fluid.layers.matmul(hidden, data2, True, True)
     if isinstance(quant_skip_pattern, str):
         with fluid.name_scope(quant_skip_pattern):
@@ -733,9 +733,7 @@ def conv_bn_layer(
             pool2 = fluid.layers.pool2d(
                 input=hidden, pool_size=2, pool_type='max', pool_stride=2
             )
-            pool_add = fluid.layers.elementwise_add(
-                x=pool1, y=pool2, act='relu'
-            )
+            pool_add = paddle.nn.functional.relu(paddle.add(x=pool1, y=pool2))
     elif isinstance(quant_skip_pattern, list):
         assert (
             len(quant_skip_pattern) > 1
@@ -748,9 +746,7 @@ def conv_bn_layer(
                 input=hidden, pool_size=2, pool_type='max', pool_stride=2
             )
         with fluid.name_scope(quant_skip_pattern[1]):
-            pool_add = fluid.layers.elementwise_add(
-                x=pool1, y=pool2, act='relu'
-            )
+            pool_add = paddle.nn.functional.relu(paddle.add(x=pool1, y=pool2))
     else:
         pool1 = fluid.layers.pool2d(
             input=hidden, pool_size=2, pool_type='avg', pool_stride=2
@@ -758,7 +754,7 @@ def conv_bn_layer(
         pool2 = fluid.layers.pool2d(
             input=hidden, pool_size=2, pool_type='max', pool_stride=2
         )
-        pool_add = fluid.layers.elementwise_add(x=pool1, y=pool2, act='relu')
+        pool_add = paddle.nn.functional.relu(paddle.add(x=pool1, y=pool2))
     fc = fluid.layers.fc(input=pool_add, size=10)
     loss = fluid.layers.cross_entropy(input=fc, label=label)
     loss = paddle.mean(loss)
diff --git a/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py b/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py
index cc8136e3b7b4cf..25656278137a7c 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py
@@ -73,12 +73,8 @@ def pact(x, name=None):
         learning_rate=1,
     )
     u_param = helper.create_parameter(attr=u_param_attr, shape=[1], dtype=dtype)
-    x = fluid.layers.elementwise_sub(
-        x, fluid.layers.relu(fluid.layers.elementwise_sub(x, u_param))
-    )
-    x = fluid.layers.elementwise_add(
-        x, fluid.layers.relu(fluid.layers.elementwise_sub(-u_param, x))
-    )
+    x = paddle.subtract(x, fluid.layers.relu(paddle.subtract(x, u_param)))
+    x = paddle.add(x, fluid.layers.relu(paddle.subtract(-u_param, x)))
 
     return x
 
diff --git a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
index 362dde4d4816f1..908622d76a1540 100644
--- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
+++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
@@ -53,7 +53,7 @@ def basicblock(input, ch_in, ch_out, stride):
         tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
         tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None, bias_attr=True)
         short = shortcut(input, ch_in, ch_out, stride)
-        return fluid.layers.elementwise_add(x=tmp, y=short, act='relu')
+        return paddle.nn.functional.relu(paddle.add(x=tmp, y=short))
 
     def layer_warp(block_func, input, ch_in, ch_out, count, stride):
         tmp = block_func(input, ch_in, ch_out, stride)
diff --git a/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py b/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py
index 4265594f71f56f..8f4bf36e5b2b50 100644
--- a/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py
+++ b/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py
@@ -71,7 +71,7 @@ def basicblock(input, ch_in, ch_out, stride):
         tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
         tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None, bias_attr=True)
         short = shortcut(input, ch_in, ch_out, stride)
-        return fluid.layers.elementwise_add(x=tmp, y=short, act='relu')
+        return paddle.nn.functional.relu(paddle.add(x=tmp, y=short))
 
     def layer_warp(block_func, input, ch_in, ch_out, count, stride):
         tmp = block_func(input, ch_in, ch_out, stride)
diff --git a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
index 082dbe5bdba1d1..cdbd65fad68a62 100644
--- a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
+++ b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
@@ -56,7 +56,7 @@ def conv_bn_layer(
     for _ in range(num):
         conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
         short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
-        hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
+        hidden = paddle.nn.functional.relu(paddle.add(x=conv, y=short))
     fc = fluid.layers.fc(input=hidden, size=10)
     loss = fluid.layers.cross_entropy(input=fc, label=label)
     loss = paddle.mean(loss)
diff --git a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py
index 70c63c1d54af8c..7b40d513f2e5c3 100644
--- a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py
+++ b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py
@@ -171,9 +171,7 @@ def check_weight_decay2(self, place, model):
             ]
 
             for params in param_list:
-                updated_p = fluid.layers.elementwise_sub(
-                    x=params[0], y=params[1]
-                )
+                updated_p = paddle.subtract(x=params[0], y=params[1])
                 fluid.layers.assign(input=updated_p, output=params[0])
 
             optimizer.apply_optimize(avg_cost, startup_prog, params_grads)
diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py
index beb562bee57a26..3a401df20370de 100644
--- a/python/paddle/fluid/tests/book/test_image_classification.py
+++ b/python/paddle/fluid/tests/book/test_image_classification.py
@@ -52,7 +52,7 @@ def basicblock(input, ch_in, ch_out, stride):
         tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
         tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None, bias_attr=True)
         short = shortcut(input, ch_in, ch_out, stride)
-        return fluid.layers.elementwise_add(x=tmp, y=short, act='relu')
+        return paddle.nn.functional.relu(paddle.add(x=tmp, y=short))
 
     def layer_warp(block_func, input, ch_in, ch_out, count, stride):
         tmp = block_func(input, ch_in, ch_out, stride)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_embedding.py b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_embedding.py
index 104aa658ec3319..0d1e7084ab94d2 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_embedding.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_embedding.py
@@ -55,7 +55,7 @@ def __init__(self):
     def forward(self, x1, x2, y1):
         x_emb = self.word_embeddings(x1)
         fc = fluid.layers.matmul(x_emb, self.softmax_weight)
-        fc = fluid.layers.elementwise_add(fc, self.softmax_bias)
+        fc = paddle.add(fc, self.softmax_bias)
         projection = paddle.reshape(fc, shape=[-1, vocab_size])
         loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=projection, label=y1, soft_label=False
@@ -95,7 +95,7 @@ def __init__(self):
 
     def forward(self, args):
         fc, x2 = args
-        fc = fluid.layers.elementwise_add(fc, self.softmax_bias)
+        fc = paddle.add(fc, self.softmax_bias)
         projection = paddle.reshape(fc, shape=[-1, vocab_size])
         return projection, x2
 
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_shared_weight.py b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_shared_weight.py
index 58c0fe7465c918..456078921295bf 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_shared_weight.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_shared_weight.py
@@ -62,7 +62,7 @@ def __init__(self):
     def forward(self, x1, x2, y1):
         x_emb = self.word_embeddings(x1)
         fc = fluid.layers.matmul(x_emb, self.softmax_weight)
-        fc = fluid.layers.elementwise_add(fc, self.softmax_bias)
+        fc = paddle.add(fc, self.softmax_bias)
         projection = paddle.reshape(fc, shape=[-1, vocab_size])
 
         projection = paddle.matmul(projection, self.word_embeddings.weight)
@@ -109,7 +109,7 @@ def __init__(self):
 
     def forward(self, args):
         fc, x2 = args
-        fc = fluid.layers.elementwise_add(fc, self.softmax_bias)
+        fc = paddle.add(fc, self.softmax_bias)
         projection = paddle.reshape(fc, shape=[-1, vocab_size])
         return projection, x2
 
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_se_resnext.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_se_resnext.py
index 164f1410ed756b..eef3f6bdd743d3 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_se_resnext.py
@@ -206,7 +206,7 @@ def forward(self, inputs):
         else:
             short = self.short(inputs)
 
-        y = fluid.layers.elementwise_add(x=short, y=scale, act='relu')
+        y = paddle.nn.functional.relu(paddle.add(x=short, y=scale))
         return y
 
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py
index ac635436f6200a..2026a54116c23d 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py
@@ -53,7 +53,7 @@ def setUp(self):
         self.enable_mkldnn = True
 
     def set_params(self):
-        self.operand = fluid.layers.elementwise_add
+        self.operand = paddle.add
         self.act = None
 
     def test_check_output(self):
@@ -68,7 +68,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Relu(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_add
+        self.operand = paddle.add
         self.act = fluid.layers.relu
 
 
@@ -76,7 +76,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Tanh(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_add
+        self.operand = paddle.add
         self.act = paddle.tanh
 
 
@@ -84,7 +84,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_LeakyRelu(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_add
+        self.operand = paddle.add
         self.act_alpha = 0.2
         self.act = paddle.nn.functional.leaky_relu
 
@@ -93,7 +93,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Swish(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_add
+        self.operand = paddle.add
         self.act = paddle.nn.functional.swish
 
 
@@ -101,7 +101,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_HardSwish(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_add
+        self.operand = paddle.add
         self.act = fluid.layers.hard_swish
 
 
@@ -109,7 +109,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_SQRT(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_add
+        self.operand = paddle.add
         self.act = paddle.sqrt
 
 
@@ -117,7 +117,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_ABS(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_add
+        self.operand = paddle.add
         self.act = paddle.abs
 
 
@@ -125,7 +125,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Clip(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_add
+        self.operand = paddle.add
         self.act = fluid.layers.clip
         self.act_alpha = 0.0
         self.act_beta = 10.0
@@ -135,7 +135,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Gelu(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_add
+        self.operand = paddle.add
         self.act = paddle.nn.functional.gelu
 
 
@@ -143,7 +143,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Gelu_Tanh(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_add
+        self.operand = paddle.add
         self.act = paddle.nn.functional.gelu
         self.act_alpha = True
 
@@ -152,7 +152,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Relu6(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_add
+        self.operand = paddle.add
         self.act = paddle.nn.functional.relu6
 
 
@@ -160,7 +160,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Sigmoid(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_add
+        self.operand = paddle.add
         self.act = paddle.nn.functional.sigmoid
 
 
@@ -168,7 +168,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Relu(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_sub
+        self.operand = paddle.subtract
         self.act = fluid.layers.relu
 
 
@@ -176,7 +176,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Tanh(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_sub
+        self.operand = paddle.subtract
         self.act = paddle.tanh
 
 
@@ -184,7 +184,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_LeakyRelu(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_sub
+        self.operand = paddle.subtract
         self.act_alpha = 0.2
         self.act = paddle.nn.functional.leaky_relu
 
@@ -193,7 +193,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Swish(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_sub
+        self.operand = paddle.subtract
         self.act = paddle.nn.functional.swish
 
 
@@ -201,7 +201,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_HardSwish(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_sub
+        self.operand = paddle.subtract
         self.act = fluid.layers.hard_swish
 
 
@@ -209,7 +209,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_ABS(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_sub
+        self.operand = paddle.subtract
         self.act = paddle.abs
 
 
@@ -217,7 +217,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Clip(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_sub
+        self.operand = paddle.subtract
         self.act = fluid.layers.clip
         self.act_alpha = 0.0
         self.act_beta = 10.0
@@ -227,7 +227,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Gelu(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_sub
+        self.operand = paddle.subtract
         self.act = paddle.nn.functional.gelu
 
 
@@ -235,7 +235,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Gelu_Tanh(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_sub
+        self.operand = paddle.subtract
         self.act = paddle.nn.functional.gelu
         self.act_alpha = True
 
@@ -244,7 +244,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Relu6(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_sub
+        self.operand = paddle.subtract
         self.act = paddle.nn.functional.relu6
 
 
@@ -252,7 +252,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Sigmoid(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_sub
+        self.operand = paddle.subtract
         self.act = paddle.nn.functional.sigmoid
 
 
@@ -260,7 +260,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Relu(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_mul
+        self.operand = paddle.multiply
         self.act = fluid.layers.relu
 
 
@@ -268,7 +268,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Tanh(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_mul
+        self.operand = paddle.multiply
         self.act = paddle.tanh
 
 
@@ -276,7 +276,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_LeakyRelu(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_mul
+        self.operand = paddle.multiply
         self.act_alpha = 0.2
         self.act = paddle.nn.functional.leaky_relu
 
@@ -285,7 +285,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Swish(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_mul
+        self.operand = paddle.multiply
         self.act = paddle.nn.functional.swish
 
 
@@ -293,7 +293,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_HardSwish(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_mul
+        self.operand = paddle.multiply
         self.act = fluid.layers.hard_swish
 
 
@@ -301,7 +301,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_SQRT(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_mul
+        self.operand = paddle.multiply
         self.act = paddle.sqrt
 
 
@@ -309,7 +309,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_ABS(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_mul
+        self.operand = paddle.multiply
         self.act = paddle.abs
 
 
@@ -317,7 +317,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Clip(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_mul
+        self.operand = paddle.multiply
         self.act = fluid.layers.clip
         self.act_alpha = 0.0
         self.act_beta = 10.0
@@ -327,7 +327,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Gelu(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_mul
+        self.operand = paddle.multiply
         self.act = paddle.nn.functional.gelu
 
 
@@ -335,7 +335,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Gelu_Tanh(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_mul
+        self.operand = paddle.multiply
         self.act = paddle.nn.functional.gelu
         self.act_alpha = True
 
@@ -344,7 +344,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Relu6(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_mul
+        self.operand = paddle.multiply
         self.act = paddle.nn.functional.relu6
 
 
@@ -352,7 +352,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Sigmoid(
     ElementwiseActivationMkldnnFusePassTest
 ):
     def set_params(self):
-        self.operand = fluid.layers.elementwise_mul
+        self.operand = paddle.multiply
         self.act = paddle.nn.functional.sigmoid
 
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py
index 386dcf7b4075b7..a2ac6d42e54e78 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py
@@ -34,9 +34,7 @@ def setUp(self):
             )
             softmax_out = paddle.nn.functional.softmax(conv_out_1)
             relu_out = fluid.layers.relu(conv_out_1)
-            eltwise_out = fluid.layers.elementwise_add(
-                softmax_out, relu_out, axis=-1
-            )
+            eltwise_out = paddle.add(softmax_out, relu_out)
 
         self.pass_name = 'mkldnn_inplace_pass'
         self.feeds = {
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py
index 235f2446cb1490..a0f034462f3ba5 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py
@@ -378,7 +378,7 @@ def setUp(self):
         self.fetch_list = [out]
 
     def append_eltwise(self, data1, data2):
-        return fluid.layers.elementwise_add(x=data1, y=data2)
+        return paddle.add(x=data1, y=data2)
 
     def test_check_output(self):
         if core.is_compiled_with_cuda():
@@ -439,7 +439,7 @@ def setUp(self):
         self.fetch_list = [out]
 
     def append_eltwise(self, data1, data2):
-        return fluid.layers.elementwise_add(x=data1, y=data2)
+        return paddle.add(x=data1, y=data2)
 
     def test_check_output(self):
         if os.path.exists(self.path + "_opt_cache"):
diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py
index 2eb3cf9938aefd..2f3df455248912 100644
--- a/python/paddle/fluid/tests/unittests/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py
@@ -17,6 +17,7 @@
 import numpy as np
 from pass_test import PassTest
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 
@@ -51,8 +52,8 @@ def setUp(self):
             sent_emb = fluid.layers.embedding(
                 input=sent_id, size=(128, 768), dtype='float32'
             )
-            add1 = fluid.layers.elementwise_add(word_emb, pos_emb)
-            add2 = fluid.layers.elementwise_add(add1, sent_emb)
+            add1 = paddle.add(word_emb, pos_emb)
+            add2 = paddle.add(add1, sent_emb)
             hidden1 = fluid.layers.layer_norm(input=add2, begin_norm_axis=2)
 
             id1 = fluid.layers.data(
@@ -91,9 +92,9 @@ def setUp(self):
             emb4 = fluid.layers.embedding(
                 input=id4, size=(128, 768), dtype='float32'
             )
-            add_1 = fluid.layers.elementwise_add(emb1, emb2)
-            add_2 = fluid.layers.elementwise_add(add_1, emb3)
-            add_3 = fluid.layers.elementwise_add(add_2, emb4)
+            add_1 = paddle.add(emb1, emb2)
+            add_2 = paddle.add(add_1, emb3)
+            add_3 = paddle.add(add_2, emb4)
             hidden_1 = fluid.layers.layer_norm(input=add_3, begin_norm_axis=2)
 
         self.feeds = {
diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py
index 1538bac16ff916..47b65f5626ff6a 100644
--- a/python/paddle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py
@@ -142,7 +142,7 @@ def build_program(self, dtype):
             zero = layers.fill_constant(shape=[128], dtype="float16", value=0)
             # TODO(xreki): fix precision problem when using softmax of float16.
             # tmp_2 = layers.softmax(tmp_1)
-            tmp_2 = layers.elementwise_add(tmp_1, zero)
+            tmp_2 = paddle.add(tmp_1, zero)
             tmp_3 = layers.mul(tmp_0, self.feed_vars[2])
             # subgraph with 4 op nodes
             tmp_3 = layers.cast(tmp_2, dtype="float16")
@@ -184,7 +184,7 @@ def build_program(self, dtype):
         with fluid.program_guard(self.main_program, self.startup_program):
             self.feed_vars = self._prepare_feed_vars([2, 2], dtype, 2)
 
-            tmp_0 = layers.elementwise_add(self.feed_vars[0], self.feed_vars[1])
+            tmp_0 = paddle.add(self.feed_vars[0], self.feed_vars[1])
             tmp_1 = layers.cast(tmp_0, dtype="float64")
             tmp_2 = layers.cast(tmp_1, dtype="float32")
 
@@ -205,12 +205,12 @@ def build_program(self, dtype):
         with fluid.program_guard(self.main_program, self.startup_program):
             self.feed_vars = self._prepare_feed_vars([2, 2], dtype, 2)
 
-            tmp_0 = layers.elementwise_add(self.feed_vars[0], self.feed_vars[1])
+            tmp_0 = paddle.add(self.feed_vars[0], self.feed_vars[1])
             tmp_1 = layers.fill_constant(shape=[2, 2], dtype=dtype, value=2.0)
             tmp_2 = paddle.scale(
                 tmp_1, scale=3.0, bias=1.0, bias_after_scale=True
             )
-            tmp_3 = layers.elementwise_mul(tmp_2, tmp_0)
+            tmp_3 = paddle.multiply(tmp_2, tmp_0)
 
         self.append_gradients(tmp_3)
 
diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_skip_layernorm_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_skip_layernorm_pass.py
index ea2eadd36a3e0b..829dbcadcd86bb 100644
--- a/python/paddle/fluid/tests/unittests/ir/test_ir_skip_layernorm_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/test_ir_skip_layernorm_pass.py
@@ -31,7 +31,7 @@ def setUp(self):
             y = fluid.data(
                 name="y", shape=[128, 768], dtype="float32", lod_level=0
             )
-            elementwise_out = fluid.layers.elementwise_add(x=x, y=y)
+            elementwise_out = paddle.add(x=x, y=y)
             out = fluid.layers.layer_norm(input=elementwise_out)
 
         self.fetch_list = [out]
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/check_flags_mkldnn_ops_on_off.py b/python/paddle/fluid/tests/unittests/mkldnn/check_flags_mkldnn_ops_on_off.py
index d11ca11740ab84..aa9811a94bc3ec 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/check_flags_mkldnn_ops_on_off.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/check_flags_mkldnn_ops_on_off.py
@@ -16,6 +16,7 @@
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid.framework import _enable_legacy_dygraph, _global_flags
 from paddle.fluid.layer_helper import LayerHelper
@@ -48,7 +49,7 @@ def check():
     with fluid.dygraph.guard(fluid.core.CPUPlace()):
         a = fluid.dygraph.to_variable(a_np)
         b = fluid.dygraph.to_variable(b_np)
-        y = fluid.layers.elementwise_add(x=a, y=b)
+        y = paddle.add(x=a, y=b)
         y = fluid.layers.matmul(x=y, y=b, transpose_y=True)
         res1 = func(y)
 

From 7507956bcb7776766f7edd5b0c575d06c255e965 Mon Sep 17 00:00:00 2001
From: xiaoxiaohehe001 <49090790+xiaoxiaohehe001@users.noreply.github.com>
Date: Mon, 5 Dec 2022 10:46:54 +0800
Subject: [PATCH 133/154] release_ (#48383)

---
 paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc     | 2 ++
 paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc
index cefcca0f83a9e6..a48cfde23853db 100644
--- a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc
@@ -931,6 +931,7 @@ int MultiHeadMatmulV2FusePass::BuildFusionV2(Graph* graph,
 
     scope->EraseVars({mul1_w->Name(), mul2_w->Name()});
     scope->EraseVars({eltadd1_b->Name(), eltadd2_b->Name()});
+    paddle::memory::Release(platform::CPUPlace());
 
     auto reshape_desc = reshape2->Op();
     int head_number =
@@ -1398,6 +1399,7 @@ int MultiHeadMatmulV3FusePass::BuildFusionV3(Graph* graph,
            sizeof(float) * wq_tensor->numel());
 
     scope->EraseVars({mul1_w->Name(), mul2_w->Name()});
+    paddle::memory::Release(platform::CPUPlace());
 
     phi::DenseTensor tmp_combined_bias_tensor;
     tmp_combined_bias_tensor.Resize(combined_bias_dims);
diff --git a/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc
index 1d17cba4459059..5c96c351f5111f 100644
--- a/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc
@@ -888,6 +888,7 @@ int TrtMultiHeadMatmulV2FusePass::BuildFusionV2(Graph* graph,
            sizeof(float) * wq_tensor->numel());
 
     scope->EraseVars({mul1_w->Name(), mul2_w->Name()});
+    paddle::memory::Release(platform::CPUPlace());
 
     phi::DenseTensor tmp_combined_bias_tensor;
     tmp_combined_bias_tensor.Resize(combined_bias_dims);
@@ -910,6 +911,7 @@ int TrtMultiHeadMatmulV2FusePass::BuildFusionV2(Graph* graph,
            sizeof(float) * bq_tensor->numel());
 
     scope->EraseVars({eltadd1_b->Name(), eltadd2_b->Name()});
+    paddle::memory::Release(platform::CPUPlace());
 
     auto reshape_desc = reshape2->Op();
     int head_number =
@@ -1408,6 +1410,7 @@ int TrtMultiHeadMatmulV3FusePass::BuildFusionV3(Graph* graph,
            sizeof(float) * wq_tensor->numel());
 
     scope->EraseVars({mul1_w->Name(), mul2_w->Name()});
+    paddle::memory::Release(platform::CPUPlace());
 
     phi::DenseTensor tmp_combined_bias_tensor;
     tmp_combined_bias_tensor.Resize(combined_bias_dims);
@@ -1430,6 +1433,7 @@ int TrtMultiHeadMatmulV3FusePass::BuildFusionV3(Graph* graph,
            sizeof(float) * bq_tensor->numel());
 
     scope->EraseVars({eltadd1_b->Name(), eltadd2_b->Name()});
+    paddle::memory::Release(platform::CPUPlace());
 
     auto reshape_desc = reshape2->Op();
     int head_number =

From aee2db012751f27f7447f0fdc6a13f19d092fa8b Mon Sep 17 00:00:00 2001
From: xiaoxiaohehe001 <49090790+xiaoxiaohehe001@users.noreply.github.com>
Date: Mon, 5 Dec 2022 10:47:38 +0800
Subject: [PATCH 134/154] [Paddle Inference] Support range trt converter and
 add scalar interface.  (#48697)

* add_range

* add_range
---
 .../fluid/inference/api/analysis_predictor.cc |   1 +
 .../inference/tensorrt/convert/CMakeLists.txt |   1 +
 .../inference/tensorrt/convert/range_op.cc    |  65 +++++
 paddle/fluid/inference/tensorrt/engine.cc     |  16 +-
 paddle/fluid/inference/tensorrt/engine.h      |   5 +-
 paddle/fluid/inference/tensorrt/op_teller.cc  |   8 +
 .../ir/inference/test_trt_convert_range.py    | 230 ++++++++++++++++++
 7 files changed, 321 insertions(+), 5 deletions(-)
 create mode 100644 paddle/fluid/inference/tensorrt/convert/range_op.cc
 create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_range.py

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 293de6bcd31a62..67e6478bffa706 100755
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2329,6 +2329,7 @@ USE_TRT_CONVERTER(remove_padding)
 USE_TRT_CONVERTER(equal);
 USE_TRT_CONVERTER(top_k)
 USE_TRT_CONVERTER(top_k_v2)
+USE_TRT_CONVERTER(range)
 USE_TRT_CONVERTER(squeeze2)
 USE_TRT_CONVERTER(unsqueeze2)
 USE_TRT_CONVERTER(sum)
diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
index b796cf1c2a2308..cec617c2f56a55 100755
--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -71,6 +71,7 @@ list(
   preln_residual_bias.cc
   c_allreduce_op.cc
   top_k_op.cc
+  range_op.cc
   squeeze2_op.cc
   unsqueeze2_op.cc
   rnn_op.cc
diff --git a/paddle/fluid/inference/tensorrt/convert/range_op.cc b/paddle/fluid/inference/tensorrt/convert/range_op.cc
new file mode 100644
index 00000000000000..7288f4877b8ad1
--- /dev/null
+++ b/paddle/fluid/inference/tensorrt/convert/range_op.cc
@@ -0,0 +1,65 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+class RangeOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope,
+                  bool test_mode) override {
+    VLOG(3) << "convert a range op to tensorrt layer";
+    framework::OpDesc op_desc(op, nullptr);
+    nvinfer1::ILayer* layer = nullptr;
+    nvinfer1::ITensor* quotient_tensor;
+
+    // Declare inputs
+    auto* start = engine_->GetITensor(op_desc.Input("Start")[0]);
+    auto* end = engine_->GetITensor(op_desc.Input("End")[0]);
+    auto* step = engine_->GetITensor(op_desc.Input("Step")[0]);
+    auto output_name = op_desc.Output("Out")[0];
+
+    auto zero_tensor = Add1DConstantLayer(0, output_name + "_zero_tensor_");
+    auto fquotient_tensor = FloorDiv(Sub(start, end), step);
+    if (start->getType() == nvinfer1::DataType::kFLOAT) {
+      auto* cast_int32_layer =
+          TRT_ENGINE_ADD_LAYER(engine_, Identity, *fquotient_tensor);
+      cast_int32_layer->setOutputType(0, nvinfer1::DataType::kINT32);
+      cast_int32_layer->getOutput(0)->setType(nvinfer1::DataType::kINT32);
+      quotient_tensor = cast_int32_layer->getOutput(0);
+    } else {
+      quotient_tensor = fquotient_tensor;
+    }
+    auto number_tensor = Max(Sub(zero_tensor, quotient_tensor), zero_tensor);
+    auto* start1 = engine_->GetITensor(op_desc.Input("Start")[0], true);
+
+    layer = TRT_ENGINE_ADD_LAYER(
+        engine_, Fill, nvinfer1::Dims{}, nvinfer1::FillOperation::kLINSPACE);
+    layer->setInput(0, *number_tensor);
+    layer->setInput(1, *start1);
+    layer->setInput(2, *step);
+
+    RreplenishLayerAndOutput(layer, "range", {output_name}, test_mode);
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(range, RangeOpConverter);
diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc
index 0b3c099934c571..255ef5d6d61945 100644
--- a/paddle/fluid/inference/tensorrt/engine.cc
+++ b/paddle/fluid/inference/tensorrt/engine.cc
@@ -451,7 +451,11 @@ void TensorRTEngine::SetITensor(const std::string &name,
   itensor_map_[name] = tensor;
 }
 
-nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name) {
+nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name,
+                                              bool scalar) {
+  if (scalar) {
+    return ConvertWeight2ITensor(name, true);
+  }
   if (itensor_map_.count(name)) {
     return itensor_map_[name];
   } else {
@@ -463,7 +467,7 @@ nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name) {
 // For cases when input is not middle-tensor , but persistable tensor
 // you should call this.
 nvinfer1::ITensor *TensorRTEngine::ConvertWeight2ITensor(
-    const std::string &name) {
+    const std::string &name, bool scalar) {
   auto *var_v = scope_->FindVar(name);
   PADDLE_ENFORCE_NOT_NULL(
       var_v,
@@ -489,9 +493,15 @@ nvinfer1::ITensor *TensorRTEngine::ConvertWeight2ITensor(
       trt_in_shape.d[i] = trt_in_shape.d[i + 1];
     }
   }
+  if (scalar) {
+    trt_in_shape.nbDims = 0;
+    trt_in_shape.d[0] = var_dims[0];
+  }
   nvinfer1::ILayer *layer =
       TRT_ENGINE_ADD_LAYER(this, Constant, trt_in_shape, weight.get());
-  this->SetITensor(name, layer->getOutput(0));
+  if (!scalar) {
+    this->SetITensor(name, layer->getOutput(0));
+  }
   return layer->getOutput(0);
 }
 
diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h
index b0e300dca6047c..91876ab1544e1e 100644
--- a/paddle/fluid/inference/tensorrt/engine.h
+++ b/paddle/fluid/inference/tensorrt/engine.h
@@ -295,8 +295,9 @@ class TensorRTEngine {
   void DeleteITensor(const std::string& name, nvinfer1::ITensor* tensor);
   void SetITensor(const std::string& name, nvinfer1::ITensor* tensor);
   // Get an ITensor called name.
-  nvinfer1::ITensor* GetITensor(const std::string& name);
-  nvinfer1::ITensor* ConvertWeight2ITensor(const std::string& name);
+  nvinfer1::ITensor* GetITensor(const std::string& name, bool scalar = false);
+  nvinfer1::ITensor* ConvertWeight2ITensor(const std::string& name,
+                                           bool scalar = false);
   std::unordered_map<std::string, nvinfer1::ITensor*>* GetITensorMap();
 
   nvinfer1::ICudaEngine* engine() { return infer_engine_.get(); }
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index d8801bd8f527a3..7344755790fb1b 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -337,6 +337,12 @@ struct SimpleOpTypeSetTeller : public Teller {
       }
     }
 
+    if (op_type == "range") {
+      if (!with_dynamic_shape) {
+        return false;
+      }
+    }
+
     if (op_type == "sign") {
 #if IS_TRT_VERSION_GE(8200)
       if (!with_dynamic_shape) {
@@ -2369,6 +2375,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "matmul",
       "matmul_v2",
       "bmm",
+      "range",
       "conv2d",
       "conv2d_fusion",
       "pool2d",
@@ -2507,6 +2514,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "matmul",
       "matmul_v2",
       "bmm",
+      "range",
       "conv2d",
       "conv2d_fusion",
       "pool2d",
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_range.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_range.py
new file mode 100644
index 00000000000000..42c00181f24e4a
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_range.py
@@ -0,0 +1,230 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from functools import partial
+from typing import List
+
+import numpy as np
+from program_config import ProgramConfig, TensorConfig
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest
+
+import paddle.inference as paddle_infer
+
+
+class TrtConvertRangeDynamicTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        def generate_input():
+            return np.array([1]).astype(np.int32)
+
+        for in_dtype in [2]:
+            self.in_dtype = in_dtype
+            dics = [{}]
+            ops_config = [
+                {
+                    "op_type": "fill_constant",
+                    "op_inputs": {},
+                    "op_outputs": {"Out": ["start_data"]},
+                    "op_attrs": {
+                        "dtype": self.in_dtype,
+                        "str_value": "7",
+                        "shape": [1],
+                    },
+                },
+                {
+                    "op_type": "fill_constant",
+                    "op_inputs": {},
+                    "op_outputs": {"Out": ["end_data"]},
+                    "op_attrs": {
+                        "dtype": self.in_dtype,
+                        "str_value": "256",
+                        "shape": [1],
+                    },
+                },
+                {
+                    "op_type": "fill_constant",
+                    "op_inputs": {},
+                    "op_outputs": {"Out": ["step_data"]},
+                    "op_attrs": {
+                        "dtype": self.in_dtype,
+                        "str_value": "1",
+                        "shape": [1],
+                    },
+                },
+                {
+                    "op_type": "range",
+                    "op_inputs": {
+                        "Start": ["start_data"],
+                        "End": ["end_data"],
+                        "Step": ["step_data"],
+                    },
+                    "op_outputs": {"Out": ["range_output_data1"]},
+                    "op_attrs": dics[0],
+                },
+                {
+                    "op_type": "cast",
+                    "op_inputs": {"X": ["range_output_data1"]},
+                    "op_outputs": {"Out": ["range_output_data"]},
+                    "op_attrs": {"in_dtype": self.in_dtype, "out_dtype": 5},
+                },
+            ]
+            ops = self.generate_op_config(ops_config)
+
+            program_config = ProgramConfig(
+                ops=ops,
+                weights={},
+                inputs={
+                    "step_data": TensorConfig(data_gen=partial(generate_input)),
+                },
+                outputs=["range_output_data"],
+            )
+
+            yield program_config
+
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            self.dynamic_shape.min_input_shape = {
+                "start_data": [1],
+                "end_data": [1],
+                "step_data": [1],
+            }
+            self.dynamic_shape.max_input_shape = {
+                "start_data": [1],
+                "end_data": [1],
+                "step_data": [1],
+            }
+            self.dynamic_shape.opt_input_shape = {
+                "start_data": [1],
+                "end_data": [1],
+                "step_data": [1],
+            }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            return 1, 2
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-2
+
+    def test(self):
+        self.run_test()
+
+
+class TrtConvertRangeStaticTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        def generate_input():
+            return np.array([0]).astype(np.int32)
+
+        def generate_input1():
+            return np.array([128]).astype(np.int32)
+
+        def generate_input2():
+            return np.array([1]).astype(np.int32)
+
+        for in_dtype in [2, 5]:
+            self.in_dtype = in_dtype
+            dics = [{}]
+            ops_config = [
+                {
+                    "op_type": "range",
+                    "op_inputs": {
+                        "Start": ["start_data"],
+                        "End": ["end_data"],
+                        "Step": ["step_data"],
+                    },
+                    "op_outputs": {"Out": ["range_output_data1"]},
+                    "op_attrs": dics[0],
+                },
+                {
+                    "op_type": "cast",
+                    "op_inputs": {"X": ["range_output_data1"]},
+                    "op_outputs": {"Out": ["range_output_data"]},
+                    "op_attrs": {"in_dtype": self.in_dtype, "out_dtype": 5},
+                },
+            ]
+            ops = self.generate_op_config(ops_config)
+
+            program_config = ProgramConfig(
+                ops=ops,
+                weights={},
+                inputs={
+                    "start_data": TensorConfig(
+                        data_gen=partial(generate_input)
+                    ),
+                    "end_data": TensorConfig(data_gen=partial(generate_input1)),
+                    "step_data": TensorConfig(
+                        data_gen=partial(generate_input2)
+                    ),
+                },
+                outputs=["range_output_data"],
+            )
+
+            yield program_config
+
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def clear_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            return 0, 6
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), 1e-2
+
+    def test(self):
+        self.run_test()
+
+
+if __name__ == "__main__":
+    unittest.main()

From 2a3ddce0ed0881d422b2132e88262b62c83b2df1 Mon Sep 17 00:00:00 2001
From: Weilong Wu <veyron_wu@163.com>
Date: Mon, 5 Dec 2022 11:14:18 +0800
Subject: [PATCH 135/154] rm _enable_legacy_dygraph (#48677)

* rm _enable_legacy

* recover original code
---
 .../tests/unittests/test_linalg_lstsq_op.py   | 21 -----
 .../tests/unittests/test_pairwise_distance.py | 86 +------------------
 .../fluid/tests/unittests/test_slice_op.py    | 49 +++++------
 .../test_tensor_fill_diagonal_tensor.py       |  5 --
 .../test_uniform_random_inplace_op.py         |  9 --
 5 files changed, 24 insertions(+), 146 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_linalg_lstsq_op.py b/python/paddle/fluid/tests/unittests/test_linalg_lstsq_op.py
index b82fb8ed09a0a5..bae9094a7f74e3 100644
--- a/python/paddle/fluid/tests/unittests/test_linalg_lstsq_op.py
+++ b/python/paddle/fluid/tests/unittests/test_linalg_lstsq_op.py
@@ -92,27 +92,6 @@ def test_eager_dygraph(self):
             self._result_sg_values = results[3].numpy()
             self.assert_np_close()
 
-    def test_legacy_dygraph(self):
-        paddle.disable_static()
-        paddle.fluid.framework._enable_legacy_dygraph()
-        for dev in self.devices:
-            paddle.set_device(dev)
-            place = paddle.CPUPlace() if dev == "cpu" else paddle.CUDAPlace(0)
-            x = paddle.to_tensor(
-                self._input_data_1, place=place, dtype=self.dtype
-            )
-            y = paddle.to_tensor(
-                self._input_data_2, place=place, dtype=self.dtype
-            )
-            results = paddle.linalg.lstsq(
-                x, y, rcond=self.rcond, driver=self.driver
-            )
-            self._result_solution = results[0].numpy()
-            self._result_residuals = results[1].numpy()
-            self._result_rank = results[2].numpy()
-            self._result_sg_values = results[3].numpy()
-            self.assert_np_close()
-
     def test_static(self):
         paddle.enable_static()
         for dev in self.devices:
diff --git a/python/paddle/fluid/tests/unittests/test_pairwise_distance.py b/python/paddle/fluid/tests/unittests/test_pairwise_distance.py
index 8e7463abd9464e..a764612cd9d138 100644
--- a/python/paddle/fluid/tests/unittests/test_pairwise_distance.py
+++ b/python/paddle/fluid/tests/unittests/test_pairwise_distance.py
@@ -95,25 +95,6 @@ def test_dygraph(
     return dygraph_ret
 
 
-def test_legacy_dygraph(
-    place, x_np, y_np, p=2.0, epsilon=1e-6, keepdim=False, functional=False
-):
-    paddle.fluid.framework._enable_legacy_dygraph()
-    x = paddle.to_tensor(x_np)
-    y = paddle.to_tensor(y_np)
-    if functional:
-        legacy_distance = call_pairwise_distance_functional(
-            x=x, y=y, p=p, epsilon=epsilon, keepdim=keepdim
-        )
-    else:
-        legacy_distance = call_pairwise_distance_layer(
-            x=x, y=y, p=p, epsilon=epsilon, keepdim=keepdim
-        )
-    legacy_ret = legacy_distance.numpy()
-    paddle.fluid.framework._disable_legacy_dygraph()
-    return legacy_ret
-
-
 class TestPairwiseDistance(unittest.TestCase):
     def test_pairwise_distance(self):
         epsilon = 1e-6
@@ -148,14 +129,6 @@ def test_pairwise_distance(self):
                                 epsilon=epsilon,
                                 keepdim=keepdim,
                             )
-                            legacy_ret = test_legacy_dygraph(
-                                place,
-                                x_np,
-                                y_np,
-                                p,
-                                epsilon=epsilon,
-                                keepdim=keepdim,
-                            )
                             excepted_value = np_pairwise_distance(
                                 x_np, y_np, p, epsilon=epsilon, keepdim=keepdim
                             )
@@ -166,9 +139,6 @@ def test_pairwise_distance(self):
                             self.assertEqual(
                                 dygraph_ret.shape, excepted_value.shape
                             )
-                            self.assertEqual(
-                                legacy_ret.shape, excepted_value.shape
-                            )
 
                             np.testing.assert_allclose(
                                 static_ret, excepted_value, rtol=1e-05
@@ -176,10 +146,6 @@ def test_pairwise_distance(self):
                             np.testing.assert_allclose(
                                 dygraph_ret, excepted_value, rtol=1e-05
                             )
-                            np.testing.assert_allclose(
-                                legacy_ret, excepted_value, rtol=1e-05
-                            )
-
                             static_functional_ret = test_static(
                                 place,
                                 x_np,
@@ -196,14 +162,6 @@ def test_pairwise_distance(self):
                                 epsilon=epsilon,
                                 keepdim=keepdim,
                             )
-                            legacy_functional_ret = test_legacy_dygraph(
-                                place,
-                                x_np,
-                                y_np,
-                                p,
-                                epsilon=epsilon,
-                                keepdim=keepdim,
-                            )
 
                             self.assertEqual(
                                 static_functional_ret.shape,
@@ -213,10 +171,6 @@ def test_pairwise_distance(self):
                                 dygraph_functional_ret.shape,
                                 excepted_value.shape,
                             )
-                            self.assertEqual(
-                                legacy_functional_ret.shape,
-                                excepted_value.shape,
-                            )
 
                             np.testing.assert_allclose(
                                 static_functional_ret,
@@ -228,11 +182,6 @@ def test_pairwise_distance(self):
                                 excepted_value,
                                 rtol=1e-05,
                             )
-                            np.testing.assert_allclose(
-                                legacy_functional_ret,
-                                excepted_value,
-                                rtol=1e-05,
-                            )
 
     def test_pairwise_distance_broadcast_1(self):
         shape_x = [100, 100]
@@ -248,20 +197,15 @@ def test_pairwise_distance_broadcast_1(self):
         dygraph_ret = test_dygraph(
             place=place, x_np=x_np, y_np=y_np, epsilon=epsilon, keepdim=keepdim
         )
-        legacy_ret = test_legacy_dygraph(
-            place=place, x_np=x_np, y_np=y_np, epsilon=epsilon, keepdim=keepdim
-        )
         excepted_value = np_pairwise_distance(
             x_np, y_np, epsilon=epsilon, keepdim=keepdim
         )
 
         self.assertEqual(static_ret.shape, excepted_value.shape)
         self.assertEqual(dygraph_ret.shape, excepted_value.shape)
-        self.assertEqual(legacy_ret.shape, excepted_value.shape)
 
         np.testing.assert_allclose(static_ret, excepted_value, rtol=1e-05)
         np.testing.assert_allclose(dygraph_ret, excepted_value, rtol=1e-05)
-        np.testing.assert_allclose(legacy_ret, excepted_value, rtol=1e-05)
 
         static_functional_ret = test_static(
             place=place,
@@ -279,18 +223,9 @@ def test_pairwise_distance_broadcast_1(self):
             keepdim=keepdim,
             functional=True,
         )
-        legacy_functional_ret = test_legacy_dygraph(
-            place=place,
-            x_np=x_np,
-            y_np=y_np,
-            epsilon=epsilon,
-            keepdim=keepdim,
-            functional=True,
-        )
 
         self.assertEqual(static_functional_ret.shape, excepted_value.shape)
         self.assertEqual(dygraph_functional_ret.shape, excepted_value.shape)
-        self.assertEqual(legacy_functional_ret.shape, excepted_value.shape)
 
         np.testing.assert_allclose(
             static_functional_ret, excepted_value, rtol=1e-05
@@ -298,9 +233,6 @@ def test_pairwise_distance_broadcast_1(self):
         np.testing.assert_allclose(
             dygraph_functional_ret, excepted_value, rtol=1e-05
         )
-        np.testing.assert_allclose(
-            legacy_functional_ret, excepted_value, rtol=1e-05
-        )
 
     def test_pairwise_distance_broadcast_2(self):
         shape_x = [100, 100]
@@ -316,20 +248,16 @@ def test_pairwise_distance_broadcast_2(self):
         dygraph_ret = test_dygraph(
             place=place, x_np=x_np, y_np=y_np, epsilon=epsilon, keepdim=keepdim
         )
-        legacy_ret = test_legacy_dygraph(
-            place=place, x_np=x_np, y_np=y_np, epsilon=epsilon, keepdim=keepdim
-        )
+
         excepted_value = np_pairwise_distance(
             x_np, y_np, epsilon=epsilon, keepdim=keepdim
         )
 
         self.assertEqual(static_ret.shape, excepted_value.shape)
         self.assertEqual(dygraph_ret.shape, excepted_value.shape)
-        self.assertEqual(legacy_ret.shape, excepted_value.shape)
 
         np.testing.assert_allclose(static_ret, excepted_value, rtol=1e-05)
         np.testing.assert_allclose(dygraph_ret, excepted_value, rtol=1e-05)
-        np.testing.assert_allclose(legacy_ret, excepted_value, rtol=1e-05)
 
         static_functional_ret = test_static(
             place=place,
@@ -347,18 +275,9 @@ def test_pairwise_distance_broadcast_2(self):
             keepdim=keepdim,
             functional=True,
         )
-        legacy_functional_ret = test_legacy_dygraph(
-            place=place,
-            x_np=x_np,
-            y_np=y_np,
-            epsilon=epsilon,
-            keepdim=keepdim,
-            functional=True,
-        )
 
         self.assertEqual(static_functional_ret.shape, excepted_value.shape)
         self.assertEqual(dygraph_functional_ret.shape, excepted_value.shape)
-        self.assertEqual(legacy_functional_ret.shape, excepted_value.shape)
 
         np.testing.assert_allclose(
             static_functional_ret, excepted_value, rtol=1e-05
@@ -366,9 +285,6 @@ def test_pairwise_distance_broadcast_2(self):
         np.testing.assert_allclose(
             dygraph_functional_ret, excepted_value, rtol=1e-05
         )
-        np.testing.assert_allclose(
-            legacy_functional_ret, excepted_value, rtol=1e-05
-        )
 
 
 if __name__ == "__main__":
diff --git a/python/paddle/fluid/tests/unittests/test_slice_op.py b/python/paddle/fluid/tests/unittests/test_slice_op.py
index 371db6edd75cb8..4538ef65c11da1 100644
--- a/python/paddle/fluid/tests/unittests/test_slice_op.py
+++ b/python/paddle/fluid/tests/unittests/test_slice_op.py
@@ -23,7 +23,6 @@
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.fluid.layers as layers
-from paddle.fluid.framework import _enable_legacy_dygraph, _test_eager_guard
 
 paddle.enable_static()
 
@@ -640,29 +639,28 @@ def test_bool_tensor(self):
 class TestSliceApiEager(unittest.TestCase):
     def test_slice_api(self):
         with paddle.fluid.dygraph.guard():
-            with _test_eager_guard():
-                a = paddle.rand(shape=[4, 5, 6], dtype='float32')
-                a.stop_gradient = False
-                axes = [0, 1, 2]
-                starts = [-3, 0, 2]
-                ends = [3, 2, 4]
-                a_1 = paddle.slice(a, axes=axes, starts=starts, ends=ends)
-
-                a_2 = paddle.slice(
-                    a,
-                    axes=axes,
-                    starts=paddle.to_tensor(starts),
-                    ends=paddle.to_tensor(ends),
-                )
-                np.testing.assert_array_equal(a_1.numpy(), a_2.numpy())
-                a_1.backward()
-                grad_truth = paddle.zeros_like(a)
-                grad_truth[-3:3, 0:2, 2:4] = 1
-                np.testing.assert_array_equal(grad_truth, a.gradient())
-
-                np.testing.assert_allclose(
-                    a_1.numpy(), a[-3:3, 0:2, 2:4], rtol=1e-05
-                )
+            a = paddle.rand(shape=[4, 5, 6], dtype='float32')
+            a.stop_gradient = False
+            axes = [0, 1, 2]
+            starts = [-3, 0, 2]
+            ends = [3, 2, 4]
+            a_1 = paddle.slice(a, axes=axes, starts=starts, ends=ends)
+
+            a_2 = paddle.slice(
+                a,
+                axes=axes,
+                starts=paddle.to_tensor(starts),
+                ends=paddle.to_tensor(ends),
+            )
+            np.testing.assert_array_equal(a_1.numpy(), a_2.numpy())
+            a_1.backward()
+            grad_truth = paddle.zeros_like(a)
+            grad_truth[-3:3, 0:2, 2:4] = 1
+            np.testing.assert_array_equal(grad_truth, a.gradient())
+
+            np.testing.assert_allclose(
+                a_1.numpy(), a[-3:3, 0:2, 2:4], rtol=1e-05
+            )
 
 
 class TestSliceApiWithLoDTensorArray(unittest.TestCase):
@@ -861,10 +859,9 @@ def test_axis_less_than_zero(self):
 )
 class TestImperativeCUDAPinnedInput(unittest.TestCase):
     def test_input_cuda_pinned_var(self):
-        _enable_legacy_dygraph()
         with fluid.dygraph.guard():
             data = np.random.random((2, 80, 16128)).astype('float32')
-            var = core.VarBase(
+            var = core.eager.Tensor(
                 value=data,
                 name='',
                 persistable=False,
diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py
index f5902fadf40529..0f375cc0aec62d 100644
--- a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_tensor.py
@@ -19,7 +19,6 @@
 import paddle
 import paddle.fluid as fluid
 import paddle.nn.functional as F
-from paddle.fluid.framework import _enable_legacy_dygraph
 
 
 class TensorFillDiagTensor_Test(unittest.TestCase):
@@ -216,9 +215,5 @@ def test_largedim(self):
         fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
 
 
-class TensorFillDiagTensor_Test_legacy(TensorFillDiagTensor_Test):
-    _enable_legacy_dygraph()
-
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py
index 446df7cd9f54e0..f420209dda5a85 100644
--- a/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py
+++ b/python/paddle/fluid/tests/unittests/test_uniform_random_inplace_op.py
@@ -18,10 +18,6 @@
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.framework import (
-    _disable_legacy_dygraph,
-    _enable_legacy_dygraph,
-)
 
 
 class TestUniformRandomInplaceOpDtype(unittest.TestCase):
@@ -188,11 +184,6 @@ def test_grad():
     def test_uniform_random_inplace_grad(self):
         self.run_()
 
-    def test_uniform_random_inplace_grad_old_dygraph(self):
-        _enable_legacy_dygraph()
-        self.run_()
-        _disable_legacy_dygraph()
-
 
 if __name__ == '__main__':
     unittest.main()

From 6cdaa371be301b9b7a7c34b8f8c45319b0ce70a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9F=A0=E6=AA=AC=E5=91=B3=7E?=
 <93066842+Lemon-er@users.noreply.github.com>
Date: Mon, 5 Dec 2022 11:20:08 +0800
Subject: [PATCH 136/154] DenseTensor (#48419)

---
 .../sequence_ops/sequence_conv_op.cc          | 29 ++++++------
 .../operators/sequence_ops/sequence_conv_op.h | 34 +++++++-------
 .../sequence_ops/sequence_conv_op_xpu.cc      | 32 ++++++-------
 .../sequence_ops/sequence_enumerate_op.cc     |  8 ++--
 .../sequence_ops/sequence_enumerate_op.cu     |  5 +-
 .../sequence_ops/sequence_enumerate_op.h      |  7 ++-
 .../sequence_ops/sequence_erase_op.cc         | 37 ++++++++-------
 .../sequence_ops/sequence_erase_op.cu         |  5 +-
 .../sequence_ops/sequence_expand_as_op.cc     | 23 ++++-----
 .../sequence_ops/sequence_expand_as_op.cu     | 10 ++--
 .../sequence_ops/sequence_expand_op.h         | 34 +++++++-------
 .../sequence_ops/sequence_mask_op.cc          |  2 +-
 .../operators/sequence_ops/sequence_mask_op.h | 14 +++---
 .../sequence_ops/sequence_mask_op_npu.cc      | 12 ++---
 .../operators/sequence_ops/sequence_pad_op.cc | 34 +++++++-------
 .../operators/sequence_ops/sequence_pad_op.h  | 26 +++++-----
 .../sequence_ops/sequence_pool_op.cc          | 17 ++++---
 .../operators/sequence_ops/sequence_pool_op.h | 22 ++++-----
 .../sequence_ops/sequence_softmax_op.h        | 47 +++++++++----------
 19 files changed, 195 insertions(+), 203 deletions(-)

diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc b/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc
index 7056c52cd8ba8c..57669dbcd6a40c 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc
@@ -145,30 +145,31 @@ class SequenceConvOpMaker : public framework::OpProtoAndCheckerMaker {
   void Make() override {
     AddInput(
         "X",
-        "(LoDTensor) the input(X) is a LodTensor, which supports "
+        "(phi::DenseTensor) the input(X) is a LodTensor, which supports "
         "variable-time length input sequence. The underlying tensor in "
-        "this LoDTensor is a matrix with shape (T, N), where T is the "
+        "this phi::DenseTensor is a matrix with shape (T, N), where T is the "
         "total time steps in this mini-batch and N is the input_hidden_size.");
-    AddInput("PaddingData",
-             "(Tensor, optional) the input(PaddingData) is an optional "
-             "parameter, and it is learnable. "
-             "This is a tensor with shape (P, N), where P is the "
-             "top_pad + bottom_pad, N is the input_hidden_size. In order to "
-             "ensure the equal length of sequence before and after "
-             "convolution, it is necessary to fill the top and bottom of each "
-             "sequence according to context_length, context_stride and "
-             "context_start")
+    AddInput(
+        "PaddingData",
+        "(phi::DenseTensor, optional) the input(PaddingData) is an optional "
+        "parameter, and it is learnable. "
+        "This is a tensor with shape (P, N), where P is the "
+        "top_pad + bottom_pad, N is the input_hidden_size. In order to "
+        "ensure the equal length of sequence before and after "
+        "convolution, it is necessary to fill the top and bottom of each "
+        "sequence according to context_length, context_stride and "
+        "context_start")
         .AsDispensable();
     AddInput(
         "Filter",
-        "(Tensor) the input(Filter) is an learnable parameter."
+        "(phi::DenseTensor) the input(Filter) is an learnable parameter."
         "This is a tensor with shape (K, M), where K is the "
         "context_length * input_hidden_size, M is the output feature size.");
     AddOutput(
         "Out",
-        "(LoDTensor) the output(Out) is a LodTensor, which support "
+        "(phi::DenseTensor) the output(Out) is a LodTensor, which support "
         "variable-time length output sequence. The underlying tensor in "
-        "this LoDTensor is a matrix with shape (T, M), where, T is the "
+        "this phi::DenseTensor is a matrix with shape (T, M), where, T is the "
         "total time steps in this mini-batch, M is the output feature size.");
 
     AddAttr<bool>("paddingTrainable",
diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h
index 5dec776c32072c..cf34cde478c35c 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h
@@ -22,15 +22,12 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
-
 template <typename DeviceContext, typename T>
 class SequenceConvKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in = context.Input<LoDTensor>("X");
-    auto* out = context.Output<LoDTensor>("Out");
+    auto* in = context.Input<phi::DenseTensor>("X");
+    auto* out = context.Output<phi::DenseTensor>("Out");
     auto filter = *context.Input<phi::DenseTensor>("Filter");
 
     out->mutable_data<T>(context.GetPlace());
@@ -40,11 +37,11 @@ class SequenceConvKernel : public framework::OpKernel<T> {
     int context_stride = context.Attr<int>("contextStride");
     bool padding_trainable = context.Attr<bool>("paddingTrainable");
 
-    PADDLE_ENFORCE_EQ(
-        in->lod().empty(),
-        false,
-        platform::errors::InvalidArgument("Input(X) Tensor of SequenceConvOp "
-                                          "does not contain LoD information."));
+    PADDLE_ENFORCE_EQ(in->lod().empty(),
+                      false,
+                      platform::errors::InvalidArgument(
+                          "Input(X) phi::DenseTensor of SequenceConvOp "
+                          "does not contain LoD information."));
     PADDLE_ENFORCE_EQ(
         in->lod().size(),
         1UL,
@@ -64,7 +61,7 @@ class SequenceConvKernel : public framework::OpKernel<T> {
 
     framework::DDim col_shape = {in->dims()[0],
                                  context_length * sequence_width};
-    Tensor col;
+    phi::DenseTensor col;
     col.mutable_data<T>(col_shape, context.GetPlace());
     // Because if padding_trainable is false, padding data should be zeros.
     phi::funcs::SetConstant<DeviceContext, T> set_zero;
@@ -92,13 +89,14 @@ template <typename DeviceContext, typename T>
 class SequenceConvGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X"));
-    auto* out_g = context.Input<LoDTensor>(framework::GradVarName("Out"));
+    auto* in_g = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
+    auto* out_g =
+        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
     auto* filter_g =
         context.Output<phi::DenseTensor>(framework::GradVarName("Filter"));
     auto* padding_data_g =
         context.Output<phi::DenseTensor>(framework::GradVarName("PaddingData"));
-    auto* in = context.Input<LoDTensor>("X");
+    auto* in = context.Input<phi::DenseTensor>("X");
     auto* filter = context.Input<phi::DenseTensor>("Filter");
 
     int context_start = context.Attr<int>("contextStart");
@@ -125,7 +123,7 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
     // use col_shape in the im2col calculation
     framework::DDim col_shape = {in->dims()[0],
                                  sequence_width * context_length};
-    Tensor col;
+    phi::DenseTensor col;
 
     if (in_g || filter_g || (padding_trainable && padding_data_g)) {
       col.mutable_data<T>(col_shape, context.GetPlace());
@@ -159,7 +157,7 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
       padding_data_g->mutable_data<T>(context.GetPlace());
       set_zero(dev_ctx, padding_data_g, static_cast<T>(0));
 
-      LoDTensor* input = const_cast<LoDTensor*>(in);
+      phi::DenseTensor* input = const_cast<phi::DenseTensor*>(in);
       seq_project_grad_functor(dev_ctx,
                                *input,
                                padding_trainable,
@@ -178,8 +176,8 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
       filter_g->mutable_data<T>(context.GetPlace());
       set_zero(dev_ctx, filter_g, static_cast<T>(0));
 
-      Tensor filter_grad = *filter_g;
-      LoDTensor out_grad = *out_g;
+      phi::DenseTensor filter_grad = *filter_g;
+      phi::DenseTensor out_grad = *out_g;
 
       const phi::DenseTensor* padding_data = nullptr;
       if (padding_trainable) {
diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc b/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc
index f0083ec4042e65..f7b0b5c3b581a2 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc
@@ -19,14 +19,13 @@ limitations under the License. */
 
 namespace paddle {
 namespace operators {
-using Tensor = phi::DenseTensor;
 
 template <typename DeviceContext, typename T>
 class SequenceConvXPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in = context.Input<LoDTensor>("X");
-    auto* out = context.Output<LoDTensor>("Out");
+    auto* in = context.Input<phi::DenseTensor>("X");
+    auto* out = context.Output<phi::DenseTensor>("Out");
     auto filter = *context.Input<phi::DenseTensor>("Filter");
 
     out->mutable_data<T>(context.GetPlace());
@@ -36,11 +35,11 @@ class SequenceConvXPUKernel : public framework::OpKernel<T> {
     int context_stride = context.Attr<int>("contextStride");
     bool padding_trainable = context.Attr<bool>("paddingTrainable");
 
-    PADDLE_ENFORCE_EQ(
-        in->lod().empty(),
-        false,
-        platform::errors::InvalidArgument("Input(X) Tensor of SequenceConvOp "
-                                          "does not contain LoD information."));
+    PADDLE_ENFORCE_EQ(in->lod().empty(),
+                      false,
+                      platform::errors::InvalidArgument(
+                          "Input(X) phi::DenseTensor of SequenceConvOp "
+                          "does not contain LoD information."));
     PADDLE_ENFORCE_EQ(
         in->lod().size(),
         1UL,
@@ -159,11 +158,12 @@ template <typename DeviceContext, typename T>
 class SequenceConvGradXPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X"));
-    auto* out_g = context.Input<LoDTensor>(framework::GradVarName("Out"));
+    auto* in_g = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
+    auto* out_g =
+        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
     auto* filter_g =
         context.Output<phi::DenseTensor>(framework::GradVarName("Filter"));
-    auto* in = context.Input<LoDTensor>("X");
+    auto* in = context.Input<phi::DenseTensor>("X");
     auto* filter = context.Input<phi::DenseTensor>("Filter");
 
     int context_start = context.Attr<int>("contextStart");
@@ -171,11 +171,11 @@ class SequenceConvGradXPUKernel : public framework::OpKernel<T> {
     int context_stride = context.Attr<int>("contextStride");
     bool padding_trainable = context.Attr<bool>("paddingTrainable");
 
-    PADDLE_ENFORCE_EQ(
-        in->lod().empty(),
-        false,
-        platform::errors::InvalidArgument("Input(X) Tensor of SequenceConvOp "
-                                          "does not contain LoD information."));
+    PADDLE_ENFORCE_EQ(in->lod().empty(),
+                      false,
+                      platform::errors::InvalidArgument(
+                          "Input(X) phi::DenseTensor of SequenceConvOp "
+                          "does not contain LoD information."));
     PADDLE_ENFORCE_EQ(
         in->lod().size(),
         1UL,
diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc
index 337ea46b260e9f..979296eb044cc7 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc
@@ -36,11 +36,11 @@ class SequenceEnumerateOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(2-D LoDTensor with the 2nd dimension equal to 1) "
-             "Input LoDTensor of SequenceEnumerate operator.");
+             "(2-D phi::DenseTensor with the 2nd dimension equal to 1) "
+             "Input phi::DenseTensor of SequenceEnumerate operator.");
     AddOutput("Out",
-              "(2-D LoDTensor with the 2nd dimension equal to win_size) "
-              "Output LoDTensor of SequenceEnumerate operator.");
+              "(2-D phi::DenseTensor with the 2nd dimension equal to win_size) "
+              "Output phi::DenseTensor of SequenceEnumerate operator.");
     AddAttr<int>("win_size", "(int) The enumerate sequence window size.")
         .AddCustomChecker([](const int& win_size) {
           PADDLE_ENFORCE_GE(win_size,
diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu
index 0f53f292ef8ae4..ee69333f924fee 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu
+++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu
@@ -21,7 +21,6 @@
 namespace paddle {
 namespace operators {
 using phi::PADDLE_CUDA_NUM_THREADS;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 __global__ void CalcOutPut(const T* in_data,
@@ -52,8 +51,8 @@ template <typename T>
 class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in = context.Input<LoDTensor>("X");
-    auto* out = context.Output<LoDTensor>("Out");
+    auto* in = context.Input<phi::DenseTensor>("X");
+    auto* out = context.Output<phi::DenseTensor>("Out");
     int win_size = context.Attr<int>("win_size");
     int pad_value = context.Attr<int>("pad_value");
 
diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h
index 90cb9300626215..048f28d85917b9 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h
@@ -18,14 +18,13 @@
 
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 
 template <typename DeviceContext, typename T>
 class SequenceEnumerateKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in = context.Input<LoDTensor>("X");
-    auto* out = context.Output<LoDTensor>("Out");
+    auto* in = context.Input<phi::DenseTensor>("X");
+    auto* out = context.Output<phi::DenseTensor>("Out");
     int win_size = context.Attr<int>("win_size");
     auto pad_value = static_cast<T>(context.Attr<int>("pad_value"));
 
@@ -33,7 +32,7 @@ class SequenceEnumerateKernel : public framework::OpKernel<T> {
         in->lod().empty(),
         false,
         platform::errors::InvalidArgument(
-            "Input(X) Tensor of SequenceEnumerateOp does not contain "
+            "Input(X) phi::DenseTensor of SequenceEnumerateOp does not contain "
             "LoD information."));
 
     auto in_dims = in->dims();
diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc
index 2943b8895978f6..fe50d8502c0eb2 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc
@@ -27,20 +27,21 @@ class SequenceEraseOp : public framework::OperatorWithKernel {
     OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SequenceErase");
     OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "SequenceErase");
     auto x_dims = ctx->GetInputDim("X");
-    PADDLE_ENFORCE(x_dims.size() == 2 && x_dims[1] == 1,
-                   platform::errors::InvalidArgument(
-                       "Input(X) of SequenceEraseOp should be a 2-D LoDTensor "
-                       "with the 2nd dimension equal to 1,"
-                       "but received size %d with the 2nd dimension %d.",
-                       x_dims.size(),
-                       x_dims[1]));
+    PADDLE_ENFORCE(
+        x_dims.size() == 2 && x_dims[1] == 1,
+        platform::errors::InvalidArgument(
+            "Input(X) of SequenceEraseOp should be a 2-D phi::DenseTensor "
+            "with the 2nd dimension equal to 1,"
+            "but received size %d with the 2nd dimension %d.",
+            x_dims.size(),
+            x_dims[1]));
     ctx->SetOutputDim("Out", x_dims);
-    // The output LoDTensor's lod_level should be input X's lod_level.
+    // The output phi::DenseTensor's lod_level should be input X's lod_level.
     // For compile-time, we call SetLoDLevel to set output's lod_level.
-    // For runtime, output LoDTensor's lod is determined by input X's lod and
-    // the level specified by input RandTable.
-    // We cannot get X's detail lod and RankTable's level in this function, so
-    // leave this work to the detail kernel implementation.
+    // For runtime, output phi::DenseTensor's lod is determined by input X's lod
+    // and the level specified by input RandTable. We cannot get X's detail lod
+    // and RankTable's level in this function, so leave this work to the detail
+    // kernel implementation.
     if (!ctx->IsRuntime()) {
       ctx->SetLoDLevel("Out", ctx->GetLoDLevel("X"));
     }
@@ -51,11 +52,11 @@ class SequenceEraseOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(2-D LoDTensor with the 2nd dim. equal to 1) "
-             "Input LoDTensor of SequenceEraseOp.");
+             "(2-D phi::DenseTensor with the 2nd dim. equal to 1) "
+             "Input phi::DenseTensor of SequenceEraseOp.");
     AddOutput("Out",
-              "(2-D LoDTensor with the 2nd dim. equal to 1) "
-              "Output LoDTensor of SequenceEraseOp.");
+              "(2-D phi::DenseTensor with the 2nd dim. equal to 1) "
+              "Output phi::DenseTensor of SequenceEraseOp.");
     AddAttr<std::vector<int>>("tokens",
                               "(vector<int>) Tokens need to be erased from "
                               "input sequences.");
@@ -64,7 +65,7 @@ Sequence Erase Operator.
 
 Sequence erase operator erases tokens specified by Attr(tokens) from the input
 sequences Input(X), and outputs the remaining data and modifies the LoD
-information at the same time. For example, given a 2-D LoDTensor
+information at the same time. For example, given a 2-D phi::DenseTensor
 
     X = [[2, 2, 6, 1, 3, 9, 6, 1, 0, 1]]^T
 
@@ -77,7 +78,7 @@ operation, the three sequences become
 
     X1' = [[6]]^T, X2' = [[1, 9]]^T and X3' = [[6, 1, 0, 1]]^T.
 
-Hence the LoDTensor Output(Out) should be
+Hence the phi::DenseTensor Output(Out) should be
 
     Out = [[6, 1, 9, 6, 1, 0, 1]]^T,
 
diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu
index d8b0afbc85dc59..b573df956df1c1 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu
+++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu
@@ -21,7 +21,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 using phi::PADDLE_CUDA_NUM_THREADS;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 __global__ void LabelErasedIdx(const T* in_dat,
@@ -67,8 +66,8 @@ template <typename T>
 class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* in = ctx.Input<LoDTensor>("X");
-    auto* out = ctx.Output<LoDTensor>("Out");
+    auto* in = ctx.Input<phi::DenseTensor>("X");
+    auto* out = ctx.Output<phi::DenseTensor>("Out");
 
     auto lod = in->lod();
     PADDLE_ENFORCE_EQ(
diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc
index aa27516a3356e2..b1223618eea0d5 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc
@@ -20,8 +20,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-
 class SequenceExpandAsOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
@@ -49,8 +47,8 @@ class SequenceExpandAsOp : public framework::OperatorWithKernel {
       framework::Variable* y_var =
           PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("Y")[0]);
 
-      auto& x_dim = x_var->Get<LoDTensor>().dims();
-      auto& y_lod = y_var->Get<LoDTensor>().lod();
+      auto& x_dim = x_var->Get<phi::DenseTensor>().dims();
+      auto& y_lod = y_var->Get<phi::DenseTensor>().lod();
 
       PADDLE_ENFORCE_EQ(y_lod.size(),
                         1,
@@ -96,13 +94,16 @@ class SequenceExpandAsOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(LoDTensor, default LoDTensor<float>) A 2-D LoDTensor whose lod "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) A 2-D "
+             "phi::DenseTensor whose lod "
              "level is at most 1.");
     AddInput("Y",
-             "(LoDTensor, default LoDTensor<float>) Referred LoDTensor whose "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) Referred "
+             "phi::DenseTensor whose "
              "lod (specified level) is referred by Input(X).");
     AddOutput("Out",
-              "(LodTensor, default LoDTensor<float>) Output LoDTensor which is "
+              "(phi::DenseTensor, default phi::DenseTensor<float>) Output "
+              "phi::DenseTensor which is "
               "generated from Input(X) by referring lod of Input(Y).");
     AddComment(R"DOC(
 Sequence Expand As Operator.
@@ -116,26 +117,26 @@ Following are cases to better explain how this works:
 
 Case 1:
 
-Given a 1-level LoDTensor input(X)
+Given a 1-level phi::DenseTensor input(X)
     X.data = [[a], [b], [c], [d]]
     X.dims = [4, 1]
 and input(Y)
     Y.lod = [[0, 3, 6, 7, 8]]
 ref_level: 0
-then we get 1-level LoDTensor
+then we get 1-level phi::DenseTensor
     Out.lod =  [[0,            3,              6,  7,  8]]
     Out.data = [[a], [a], [a], [b], [b], [b], [c], [d]]
     Out.dims = [8, 1]
 
 Case 2:
 
-Given a common Tensor input(X)
+Given a common phi::DenseTensor input(X)
     X.data = [[a, b], [c, d], [e, f]]
     X.dims = [3, 2]
 and input(Y)
     Y.lod = [[0, 2, 3, 6]]
 ref_level: 0
-then we get a common LoDTensor
+then we get a common phi::DenseTensor
     Out.lod =  [[0,             2,     3,                    6]]
     Out.data = [[a, b], [a, b] [c, d], [e, f], [e, f], [e, f]]
     Out.dims = [6, 2]
diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu
index f565e0d438a0e6..d5fecace6d7678 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu
+++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu
@@ -20,8 +20,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-
 template <typename T>
 static __global__ void sequence_expand_as_kernel(const T *in_data,
                                                  const size_t *expand_offset,
@@ -69,9 +67,9 @@ template <typename T>
 struct SequenceExpandAsFunctor<phi::GPUContext, T> {
   void operator()(
       const phi::GPUContext &context,
-      const LoDTensor &x,
+      const phi::DenseTensor &x,
       const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
-      LoDTensor *out) {
+      phi::DenseTensor *out) {
     int height = x.dims()[0];
     int width = phi::product(x.dims()) / height;
 
@@ -99,9 +97,9 @@ struct SequenceExpandAsFunctor<phi::GPUContext, T> {
 template <typename T>
 struct SequenceExpandAsGradFunctor<phi::GPUContext, T> {
   void operator()(const phi::GPUContext &context,
-                  const LoDTensor &dout,
+                  const phi::DenseTensor &dout,
                   const framework::Vector<size_t> &ref_lod, /*expand based lod*/
-                  LoDTensor *dx) {
+                  phi::DenseTensor *dx) {
     int height = dx->dims()[0];
     int width = phi::product(dx->dims()) / height;
 
diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h
index af43aec7931e7e..1366fe87ab3081 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h
@@ -22,7 +22,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 template <typename T,
           int MajorType = Eigen::RowMajor,
           typename IndexType = Eigen::DenseIndex>
@@ -32,30 +31,30 @@ template <typename DeviceContext, typename T>
 struct SequenceExpandFunctor {
   void operator()(
       const DeviceContext& ctx,
-      const LoDTensor& x,
+      const phi::DenseTensor& x,
       const framework::Vector<size_t>& x_lod,   /*expand source lod*/
       const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
-      LoDTensor* out);
+      phi::DenseTensor* out);
 };
 
 template <typename DeviceContext, typename T>
 struct SequenceExpandGradFunctor {
   void operator()(
       const DeviceContext& ctx,
-      const LoDTensor& dout,
+      const phi::DenseTensor& dout,
       const framework::Vector<size_t>& x_lod,   /*expand source lod*/
       const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
-      LoDTensor* dx);
+      phi::DenseTensor* dx);
 };
 
 template <typename T>
 struct SequenceExpandFunctor<phi::CPUContext, T> {
   void operator()(
       const phi::CPUContext& context,
-      const LoDTensor& x,
+      const phi::DenseTensor& x,
       const framework::Vector<size_t>& x_lod,   /*expand source lod*/
       const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
-      LoDTensor* out) {
+      phi::DenseTensor* out) {
     int out_offset = 0;
     int x_item_length = x.numel() / x.dims()[0];
     auto out_data = out->data<T>();
@@ -88,9 +87,9 @@ template <typename DeviceContext, typename T>
 class SequenceExpandKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* x = context.Input<LoDTensor>("X");
-    auto* y = context.Input<LoDTensor>("Y");
-    auto* out = context.Output<LoDTensor>("Out");
+    auto* x = context.Input<phi::DenseTensor>("X");
+    auto* y = context.Input<phi::DenseTensor>("Y");
+    auto* out = context.Output<phi::DenseTensor>("Out");
 
     int ref_level = context.Attr<int>("ref_level");
     auto& x_lod = x->lod();
@@ -100,7 +99,7 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
         y_lod.empty(),
         false,
         platform::errors::InvalidArgument(
-            "Input(Y) Tensor of SequenceExpandOp does not contain "
+            "Input(Y) phi::DenseTensor of SequenceExpandOp does not contain "
             "LoD information."));
 
     if (ref_level == -1) ref_level = y_lod.size() - 1;
@@ -164,10 +163,10 @@ template <typename T>
 struct SequenceExpandGradFunctor<phi::CPUContext, T> {
   void operator()(
       const phi::CPUContext& context,
-      const LoDTensor& dout,
+      const phi::DenseTensor& dout,
       const framework::Vector<size_t>& x_lod,   /*expand source lod*/
       const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
-      LoDTensor* dx) {
+      phi::DenseTensor* dx) {
     int dout_offset = 0;
     for (size_t i = 1; i < ref_lod.size(); ++i) {
       int repeat_num = ref_lod[i] - ref_lod[i - 1];
@@ -193,10 +192,11 @@ template <typename DeviceContext, typename T>
 class SequenceExpandGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* g_out = context.Input<LoDTensor>(framework::GradVarName("Out"));
-    auto* x = context.Input<LoDTensor>("X");
-    auto* y = context.Input<LoDTensor>("Y");
-    auto* g_x = context.Output<LoDTensor>(framework::GradVarName("X"));
+    auto* g_out =
+        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+    auto* x = context.Input<phi::DenseTensor>("X");
+    auto* y = context.Input<phi::DenseTensor>("Y");
+    auto* g_x = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
     int ref_level = context.Attr<int>("ref_level");
 
     g_x->mutable_data<T>(context.GetPlace());
diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc b/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc
index 6c14fa997fe5e6..c3807798610994 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc
@@ -82,7 +82,7 @@ class SequenceMaskOpMaker : public framework::OpProtoAndCheckerMaker {
 SequenceMask Operator
 
 This operator outputs a Mask according to Input(X) and Attr(maxlen).
-Supposing Input(X) is a Tensor with shape [d_1, d_2, ..., d_n], the
+Supposing Input(X) is a phi::DenseTensor with shape [d_1, d_2, ..., d_n], the
 Output(Y) is a mask with shape [d_1, d_2, ..., d_n, maxlen], where:
 
 Y(i_1, i_2, ..., i_n, j) = (j < X(i_1, i_2, ..., i_n))
diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op.h b/paddle/fluid/operators/sequence_ops/sequence_mask_op.h
index 87b52174aa8e1c..d541f712a5d670 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_mask_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op.h
@@ -28,9 +28,6 @@
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-using Tensor = phi::DenseTensor;
-
 template <typename Tx, typename Ty>
 struct SequenceMaskForRangeFunctor {
   HOSTDEVICE SequenceMaskForRangeFunctor(const Tx *x, Ty *y, int maxlen)
@@ -50,8 +47,11 @@ struct SequenceMaskForRangeFunctor {
 
 template <typename DeviceContext, typename Tx>
 struct SequenceMaskFunctor {
-  SequenceMaskFunctor(
-      const DeviceContext &ctx, const Tx *x, Tensor *y, int limits, int maxlen)
+  SequenceMaskFunctor(const DeviceContext &ctx,
+                      const Tx *x,
+                      phi::DenseTensor *y,
+                      int limits,
+                      int maxlen)
       : ctx_(ctx), x_(x), y_(y), limits_(limits), maxlen_(maxlen) {}
 
   template <typename Ty>
@@ -64,15 +64,13 @@ struct SequenceMaskFunctor {
  private:
   const DeviceContext &ctx_;
   const Tx *x_;
-  Tensor *y_;
+  phi::DenseTensor *y_;
   int limits_;
   int maxlen_;
 };
 
 template <typename DeviceContext, typename Tx>
 class SequenceMaskKernel : public framework::OpKernel<Tx> {
-  using Tensor = phi::DenseTensor;
-
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
     auto *x = ctx.Input<phi::DenseTensor>("X");
diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc b/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc
index 1290e79bc076d2..f3b18676abe564 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc
@@ -18,8 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using Tensor = phi::DenseTensor;
-
 template <typename DeviceContext, typename T>
 class SequenceMaskNPUKernel : public framework::OpKernel<T> {
  public:
@@ -58,7 +56,7 @@ class SequenceMaskNPUKernel : public framework::OpKernel<T> {
     auto y_dim = phi::vectorize<int>(x->dims());
     y_dim.push_back(maxlen);
 
-    Tensor cast_x;
+    phi::DenseTensor cast_x;
     cast_x.mutable_data<int32_t>(x->dims(), ctx.GetPlace());
     const auto& cast1_runner = NpuOpRunner(
         "Cast",
@@ -68,7 +66,7 @@ class SequenceMaskNPUKernel : public framework::OpKernel<T> {
           ConvertToNpuDtype(framework::TransToProtoVarType(cast_x.dtype()))}});
     cast1_runner.Run(dev_ctx.stream());
 
-    Tensor tmp;
+    phi::DenseTensor tmp;
     tmp.mutable_data<int32_t>(phi::make_ddim({maxlen}), ctx.GetPlace());
     NpuOpRunner range_runner;
     range_runner.SetType("Range");
@@ -78,7 +76,7 @@ class SequenceMaskNPUKernel : public framework::OpKernel<T> {
     range_runner.AddOutput(tmp);
     range_runner.Run(dev_ctx.stream());
 
-    Tensor expand_tmp;
+    phi::DenseTensor expand_tmp;
     expand_tmp.mutable_data<int32_t>(phi::make_ddim(y_dim), ctx.GetPlace());
     const auto& expand_runner =
         NpuOpRunner("ExpandD", {tmp}, {expand_tmp}, {{"shape", y_dim}});
@@ -87,7 +85,7 @@ class SequenceMaskNPUKernel : public framework::OpKernel<T> {
     auto x_dims = phi::vectorize<int>(x->dims());
     x_dims.push_back(1);
     cast_x.Resize(phi::make_ddim({x_dims}));
-    Tensor x_tmp;
+    phi::DenseTensor x_tmp;
     x_tmp.mutable_data<int32_t>(phi::make_ddim(y_dim), ctx.GetPlace());
     const auto& tile_runner =
         NpuOpRunner("TileWithAxis",
@@ -96,7 +94,7 @@ class SequenceMaskNPUKernel : public framework::OpKernel<T> {
                     {{"axis", x->dims().size()}, {"tiles", maxlen}});
     tile_runner.Run(dev_ctx.stream());
 
-    Tensor y_tmp;
+    phi::DenseTensor y_tmp;
     y_tmp.mutable_data<uint8_t>(phi::make_ddim(y_dim), ctx.GetPlace());
     const auto& less_runner =
         NpuOpRunner("Less", {expand_tmp, x_tmp}, {y_tmp}, {});
diff --git a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc b/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc
index d427e339fb9c37..6957920131ceac 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc
@@ -69,7 +69,7 @@ class SequencePadOp : public framework::OperatorWithKernel {
       // run time
       framework::Variable* x_var =
           PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("X")[0]);
-      const auto& x_lod = x_var->Get<LoDTensor>().lod();
+      const auto& x_lod = x_var->Get<phi::DenseTensor>().lod();
       PADDLE_ENFORCE_EQ(x_lod.empty(),
                         false,
                         platform::errors::NotFound(
@@ -145,20 +145,22 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(LoDTensor, default LoDTensor<float>) Input variable which "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) Input "
+             "variable which "
              "should contain lod information.");
     AddInput("PadValue",
-             "(LoDTensor), this Tensor holds values that will be fill into "
+             "(phi::DenseTensor), this phi::DenseTensor holds values that will "
+             "be fill into "
              "padded steps. It can be a scalar or a tensor whose shape equals "
              "to time steps in sequences. If it's a scalar, it will be "
              "automatically broadcasted to the shape of time step.");
-    AddOutput(
-        "Out",
-        "(LoDTensor) The output vairable, which contains padded sequences.");
-    AddOutput(
-        "Length",
-        "(LoDTensor) The output vairable, which contains the actual length of "
-        "sequences before padding.");
+    AddOutput("Out",
+              "(phi::DenseTensor) The output vairable, which contains padded "
+              "sequences.");
+    AddOutput("Length",
+              "(phi::DenseTensor) The output vairable, which contains the "
+              "actual length of "
+              "sequences before padding.");
     AddAttr<int>(
         "padded_length",
         "The length of padded sequences. It can be set to -1 or "
@@ -179,41 +181,41 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker {
 
       Case 1:
 
-      Given a 1-level LoDTensor input(X):
+      Given a 1-level phi::DenseTensor input(X):
           X.lod = [[0, 2,       5]]
           X.data = [a, b, c, d, e]
       and Input(PadValue):
           PadValue.data = [0]
       and attribite 'padded_length' = 4,
-      then we get LoDTensor:
+      then we get phi::DenseTensor:
           Out.data = [[a, b, 0, 0],
                       [c, d, e, 0]]
           Length.data = [2, 3]
 
       Case 2:
 
-      Given a 1-level LoDTensor input(X):
+      Given a 1-level phi::DenseTensor input(X):
           X.lod = [[0,               2,                           5]]
           X.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]]
       and Input(PadValue):
           PadValue.data = [0]
       and attribite 'padded_length' = -1, which mean using the length
       of longest input sequence(3 in this case),
-      then we get LoDTensor:
+      then we get phi::DenseTensor:
           Out.data = [[[a1, a2], [b1, b2], [0, 0]],
                       [[c1, c2], [d1, d2], [e1, e2]]]
           Length.data = [2, 3]
 
       Case 3:
 
-      Given a 1-level LoDTensor input(X):
+      Given a 1-level phi::DenseTensor input(X):
           X.lod = [[0,               2,                           5]]
           X.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]]
       and Input(PadValue):
           PadValue.data = [p1, p2]
       and attribite 'padded_length' = -1, which mean using the length
       of longest input sequence(3 in this case),
-      then we get LoDTensor:
+      then we get phi::DenseTensor:
           Out.data = [[[a1, a2], [b1, b2], [p1, p2]],
                       [[c1, c2], [d1, d2], [e1, e2]]]
           Length.data = [2, 3]
diff --git a/paddle/fluid/operators/sequence_ops/sequence_pad_op.h b/paddle/fluid/operators/sequence_ops/sequence_pad_op.h
index 6f9026095756aa..0615e0c943e251 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_pad_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_pad_op.h
@@ -24,25 +24,24 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using LoD = framework::LoD;
 
 template <typename DeviceContext, typename T>
 class SequencePadOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    const auto* x = ctx.Input<LoDTensor>("X");
-    auto* out = ctx.Output<LoDTensor>("Out");
-    auto* len_t = ctx.Output<LoDTensor>("Length");
+    const auto* x = ctx.Input<phi::DenseTensor>("X");
+    auto* out = ctx.Output<phi::DenseTensor>("Out");
+    auto* len_t = ctx.Output<phi::DenseTensor>("Length");
     out->mutable_data<T>(ctx.GetPlace());
 
-    PADDLE_ENFORCE_EQ(
-        x->lod().empty(),
-        false,
-        platform::errors::NotFound("Input(X) Tensor of SequencePadOp does not "
-                                   "contain LoD information."));
+    PADDLE_ENFORCE_EQ(x->lod().empty(),
+                      false,
+                      platform::errors::NotFound(
+                          "Input(X) phi::DenseTensor of SequencePadOp does not "
+                          "contain LoD information."));
 
-    const auto* pad_value = ctx.Input<LoDTensor>("PadValue");
+    const auto* pad_value = ctx.Input<phi::DenseTensor>("PadValue");
 
     int padded_length = ctx.Attr<int>("padded_length");
 
@@ -56,7 +55,7 @@ class SequencePadOpKernel : public framework::OpKernel<T> {
         false,
         math::kBatchLengthWidth);
 
-    LoDTensor seq_len;
+    phi::DenseTensor seq_len;
     seq_len.Resize(len_t->dims());
     int64_t* len_data = seq_len.mutable_data<int64_t>(platform::CPUPlace());
     for (size_t i = 1; i < x->lod()[0].size(); ++i) {
@@ -73,9 +72,10 @@ template <typename DeviceContext, typename T>
 class SequencePadGradOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* d_x = ctx.Output<LoDTensor>(framework::GradVarName("X"));
+    auto* d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
     if (d_x) {
-      const auto* d_out = ctx.Input<LoDTensor>(framework::GradVarName("Out"));
+      const auto* d_out =
+          ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
       d_x->mutable_data<T>(ctx.GetPlace());
 
       int padded_length = ctx.Attr<int>("padded_length");
diff --git a/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc b/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc
index 9b8697b976633a..778b2f88549453 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc
@@ -53,12 +53,15 @@ class SequencePoolOp : public framework::OperatorWithKernel {
 class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X", "(LoDTensor) The variable-length input of SequencePoolOp");
-    AddOutput("Out",
-              "(Tensor) The output of SequencePoolOp does not contain LoD "
-              "information.");
+    AddInput("X",
+             "(phi::DenseTensor) The variable-length input of SequencePoolOp");
+    AddOutput(
+        "Out",
+        "(phi::DenseTensor) The output of SequencePoolOp does not contain LoD "
+        "information.");
     AddOutput("MaxIndex",
-              "(Tensor<int>) This tensor is used for the sequence max-pooling "
+              "(phi::DenseTensor<int>) This tensor is used for the sequence "
+              "max-pooling "
               "to record the max indexes.")
         .AsIntermediate();
     AddAttr<bool>("is_test",
@@ -92,11 +95,11 @@ The following example explains how this works:
 For a mini-batch of 3 variable-length sentences,
 containing 2, 3, and 2 time-steps:
 
-Assume X is a [7,M,N] LoDTensor, and X->lod()[0] = [0, 2, 5, 7], 7=2+3+2.
+Assume X is a [7,M,N] phi::DenseTensor, and X->lod()[0] = [0, 2, 5, 7], 7=2+3+2.
 Besides, for the sake of simplicity, we assume M=1 and N=1,
 and the value of X = [[1, 3], [2, 4, 6], [5, 1]].
 
-Thus, Out is a [3,1,1] Tensor without LoD information.
+Thus, Out is a [3,1,1] phi::DenseTensor without LoD information.
 And for different pooltype, the value of Out is as follows:
 
 - AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2
diff --git a/paddle/fluid/operators/sequence_ops/sequence_pool_op.h b/paddle/fluid/operators/sequence_ops/sequence_pool_op.h
index 199187a1544c72..78acb4eef28a7a 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.h
@@ -23,15 +23,12 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
-
 template <typename DeviceContext, typename T>
 class SequencePoolKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in = context.Input<LoDTensor>("X");
-    auto* out = context.Output<LoDTensor>("Out");
+    auto* in = context.Input<phi::DenseTensor>("X");
+    auto* out = context.Output<phi::DenseTensor>("Out");
     std::string pooltype = context.Attr<std::string>("pooltype");
     T pad_value = static_cast<T>(context.Attr<float>("pad_value"));
 
@@ -39,11 +36,11 @@ class SequencePoolKernel : public framework::OpKernel<T> {
     auto lod = in->lod();
     auto lod_level = lod.size();
     // InferShape by lod
-    PADDLE_ENFORCE_GT(
-        lod_level,
-        0,
-        platform::errors::InvalidArgument("Input(X) Tensor of SequencePoolOp "
-                                          "does not contain LoD information."));
+    PADDLE_ENFORCE_GT(lod_level,
+                      0,
+                      platform::errors::InvalidArgument(
+                          "Input(X) phi::DenseTensor of SequencePoolOp "
+                          "does not contain LoD information."));
     PADDLE_ENFORCE_LE(lod_level,
                       2UL,
                       platform::errors::InvalidArgument(
@@ -100,8 +97,9 @@ template <typename DeviceContext, typename T>
 class SequencePoolGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* out_g = context.Input<LoDTensor>(framework::GradVarName("Out"));
-    auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X"));
+    auto* out_g =
+        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+    auto* in_g = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
     std::string pooltype = context.Attr<std::string>("pooltype");
     const phi::DenseTensor* index = nullptr;
     if (pooltype == "MAX") {
diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h
index aeae0a0e1fdd5e..03036a0babf3b6 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h
@@ -19,33 +19,30 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
-
 template <typename DeviceContext, typename T>
 struct SequenceSoftmaxFunctor {
   void operator()(
       const DeviceContext &ctx,
-      const LoDTensor &x,
+      const phi::DenseTensor &x,
       const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
-      LoDTensor *out);
+      phi::DenseTensor *out);
 };
 
 template <typename DeviceContext, typename T>
 struct SequenceSoftmaxGradFunctor {
   void operator()(const DeviceContext &ctx,
-                  const LoDTensor &dout,
-                  const LoDTensor &out,
+                  const phi::DenseTensor &dout,
+                  const phi::DenseTensor &out,
                   const framework::Vector<size_t> &ref_lod, /*referenced lod*/
-                  LoDTensor *dx);
+                  phi::DenseTensor *dx);
 };
 
 template <typename T>
 struct SequenceSoftmaxFunctor<phi::CPUContext, T> {
   void operator()(const phi::CPUContext &ctx,
-                  const LoDTensor &x,
+                  const phi::DenseTensor &x,
                   const framework::Vector<size_t> &ref_lod, /*referenced lod*/
-                  LoDTensor *out) {
+                  phi::DenseTensor *out) {
     size_t height = ref_lod.size() - 1;
     const T *in_data = x.data<T>();
     T *out_data = out->mutable_data<T>(ctx.GetPlace());
@@ -65,10 +62,10 @@ struct SequenceSoftmaxFunctor<phi::CPUContext, T> {
 template <typename T>
 struct SequenceSoftmaxGradFunctor<phi::CPUContext, T> {
   void operator()(const phi::CPUContext &ctx,
-                  const LoDTensor &dout,
-                  const LoDTensor &out,
+                  const phi::DenseTensor &dout,
+                  const phi::DenseTensor &out,
                   const framework::Vector<size_t> &ref_lod, /*referenced lod*/
-                  LoDTensor *dx) {
+                  phi::DenseTensor *dx) {
     size_t height = ref_lod.size() - 1;
 
     const T *softmax_grad_data = dout.data<T>();
@@ -94,17 +91,17 @@ template <typename DeviceContext, typename T>
 class SequenceSoftmaxKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
-    auto *x = ctx.Input<LoDTensor>("X");
-    auto *out = ctx.Output<LoDTensor>("Out");
+    auto *x = ctx.Input<phi::DenseTensor>("X");
+    auto *out = ctx.Output<phi::DenseTensor>("Out");
 
     auto lod = x->lod();
     auto dims = x->dims();
-    PADDLE_ENFORCE_EQ(
-        lod.empty(),
-        false,
-        platform::errors::InvalidArgument(
-            "Input(X) Tensor of SequenceSoftmax operator does not contain "
-            "LoD information."));
+    PADDLE_ENFORCE_EQ(lod.empty(),
+                      false,
+                      platform::errors::InvalidArgument(
+                          "Input(X) phi::DenseTensor of SequenceSoftmax "
+                          "operator does not contain "
+                          "LoD information."));
 
     const size_t level = lod.size() - 1;
     PADDLE_ENFORCE_EQ(
@@ -138,10 +135,10 @@ template <typename DeviceContext, typename T>
 class SequenceSoftmaxGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
-    auto *out = ctx.Input<LoDTensor>("Out");
-    auto *out_grad = ctx.Input<LoDTensor>(framework::GradVarName("Out"));
-    auto *x = ctx.Input<LoDTensor>("X");
-    auto *x_grad = ctx.Output<LoDTensor>(framework::GradVarName("X"));
+    auto *out = ctx.Input<phi::DenseTensor>("Out");
+    auto *out_grad = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+    auto *x = ctx.Input<phi::DenseTensor>("X");
+    auto *x_grad = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
     if (!x_grad) {
       return;
     }

From d8c9f19ac44ddf2299e975bdf5732e63bc6b39ec Mon Sep 17 00:00:00 2001
From: cyber-pioneer <116002591+cyber-pioneer@users.noreply.github.com>
Date: Mon, 5 Dec 2022 11:40:41 +0800
Subject: [PATCH 137/154] move paddle.fluid.layers.tensor.create_tensor to
 paddle.tensor.creation.create_tensor (#48662)

---
 python/paddle/distribution/distribution.py    |  2 +-
 python/paddle/fluid/layers/tensor.py          | 43 ------------------
 python/paddle/fluid/tests/test_if_else_op.py  |  4 +-
 .../fleet/dist_mnist_gradient_merge.py        |  2 +-
 .../collective/fleet/pipeline_mnist.py        |  2 +-
 .../fleet/pipeline_mnist_multi_device.py      |  2 +-
 .../fleet/pipeline_mnist_one_device.py        |  2 +-
 .../tests/unittests/dist_allreduce_op.py      |  2 +-
 .../dist_fleet_raw_program_optimizer.py       |  2 +-
 ...et_raw_program_optimizer_fuse_allreduce.py |  2 +-
 .../fluid/tests/unittests/dist_mnist.py       |  2 +-
 .../tests/unittests/dist_mnist_batch_merge.py |  2 +-
 .../unittests/dist_mnist_fp16_allreduce.py    |  2 +-
 .../fluid/tests/unittests/dist_mnist_lars.py  |  2 +-
 .../test_distribution_categorical.py          |  3 +-
 .../unittests/npu/test_assign_value_op_npu.py |  2 +-
 .../tests/unittests/test_assign_value_op.py   |  2 +-
 .../tests/unittests/test_conditional_block.py |  2 +-
 .../fluid/tests/unittests/test_desc_clone.py  |  2 +-
 .../fluid/tests/unittests/test_fetch_var.py   |  5 ++-
 .../fluid/tests/unittests/test_profiler.py    |  2 +-
 .../tests/unittests/test_square_error_cost.py |  5 +--
 .../unittests/xpu/test_assign_value_op_xpu.py |  2 +-
 python/paddle/tensor/__init__.py              |  4 ++
 python/paddle/tensor/creation.py              | 44 ++++++++++++++++++-
 25 files changed, 74 insertions(+), 70 deletions(-)

diff --git a/python/paddle/distribution/distribution.py b/python/paddle/distribution/distribution.py
index 15ee5d8e011e4e..ae4cb2f9b16ef6 100644
--- a/python/paddle/distribution/distribution.py
+++ b/python/paddle/distribution/distribution.py
@@ -203,7 +203,7 @@ def _to_tensor(self, *args):
         dtype = tmp.dtype
         for arg in numpy_args:
             arg_broadcasted, _ = np.broadcast_arrays(arg, tmp)
-            arg_variable = tensor.create_tensor(dtype=dtype)
+            arg_variable = paddle.tensor.create_tensor(dtype=dtype)
             tensor.assign(arg_broadcasted, arg_variable)
             variable_args.append(arg_variable)
 
diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py
index e131744cd8685e..6a88b6828fb851 100644
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -47,7 +47,6 @@
 from paddle import _C_ops, _legacy_C_ops
 
 __all__ = [
-    'create_tensor',
     'create_global_var',
     'cast',
     'tensor_array_to_tensor',
@@ -62,48 +61,6 @@
 ]
 
 
-def create_tensor(dtype, name=None, persistable=False):
-    """
-    Create a variable, which will hold a Tensor with data type dtype.
-
-    Args:
-        dtype(string|numpy.dtype): the data type of Tensor to be created, the
-            data type is bool, float16, float32, float64, int8, int16, int32 and int64.
-        name(string, optional): The default value is None.  Normally there is no need for
-            user to set this property.  For more information, please refer to :ref:`api_guide_Name`
-        persistable(bool): Set the persistable flag of the create tensor.
-            default value is False.
-
-    Returns:
-        Variable: The tensor to be created according to dtype.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          tensor = fluid.layers.create_tensor(dtype='float32')
-    """
-    check_dtype(
-        dtype,
-        'dtype',
-        [
-            'bool',
-            'float16',
-            'float32',
-            'float64',
-            'int8',
-            'int32',
-            'int32',
-            'int64',
-        ],
-        'create_tensor',
-    )
-    helper = LayerHelper("create_tensor", **locals())
-    return helper.create_variable(
-        name=helper.name, dtype=dtype, persistable=persistable
-    )
-
-
 def create_global_var(
     shape, value, dtype, persistable=False, force_cpu=False, name=None
 ):
diff --git a/python/paddle/fluid/tests/test_if_else_op.py b/python/paddle/fluid/tests/test_if_else_op.py
index 1eba6cbb60ee19..24857164dc30b8 100644
--- a/python/paddle/fluid/tests/test_if_else_op.py
+++ b/python/paddle/fluid/tests/test_if_else_op.py
@@ -46,7 +46,7 @@ def not_test_raw_api(self):
             cond = paddle.less_than(x=label, y=limit)
             true_image, false_image = split_lod_tensor(input=image, mask=cond)
 
-            true_out = layers.create_tensor(dtype='float32')
+            true_out = paddle.tensor.create_tensor(dtype='float32')
             true_cond = ConditionalBlock([cond])
 
             with true_cond.block():
@@ -54,7 +54,7 @@ def not_test_raw_api(self):
                 prob = layers.fc(input=hidden, size=10, act='softmax')
                 layers.assign(input=prob, output=true_out)
 
-            false_out = layers.create_tensor(dtype='float32')
+            false_out = paddle.tensor.create_tensor(dtype='float32')
             false_cond = ConditionalBlock([cond])
 
             with false_cond.block():
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dist_mnist_gradient_merge.py b/python/paddle/fluid/tests/unittests/collective/fleet/dist_mnist_gradient_merge.py
index 1e67d722040c40..85394ea89da4e7 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dist_mnist_gradient_merge.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dist_mnist_gradient_merge.py
@@ -38,7 +38,7 @@ def get_model(self, batch_size=2):
         avg_cost = paddle.mean(x=cost)
 
         # Evaluator
-        batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
+        batch_size_tensor = paddle.tensor.create_tensor(dtype='int64')
         batch_acc = paddle.static.accuracy(
             input=predict, label=label, total=batch_size_tensor
         )
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
index 59572a5e7deacb..4530d8e24216ae 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
@@ -105,7 +105,7 @@ def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None):
 
         # Evaluator
         with fluid.device_guard("gpu:1"):
-            batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
+            batch_size_tensor = paddle.tensor.create_tensor(dtype='int64')
             batch_acc = paddle.static.accuracy(
                 input=predict, label=label, total=batch_size_tensor
             )
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
index e60b6bbbcd4280..0b75b034ce46e1 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
@@ -105,7 +105,7 @@ def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None):
 
         # Evaluator
         with fluid.device_guard("gpu:1"):
-            batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
+            batch_size_tensor = paddle.tensor.create_tensor(dtype='int64')
             batch_acc = paddle.static.accuracy(
                 input=predict, label=label, total=batch_size_tensor
             )
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py
index 8ecea66aaa6bcb..5b1e590fc00585 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py
@@ -97,7 +97,7 @@ def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None):
 
         # Evaluator
         with fluid.device_guard("gpu:0"):
-            batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
+            batch_size_tensor = paddle.tensor.create_tensor(dtype='int64')
             batch_acc = paddle.static.accuracy(
                 input=predict, label=label, total=batch_size_tensor
             )
diff --git a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py
index bd3a6d659a39eb..c956f287d7b149 100644
--- a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py
+++ b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py
@@ -81,7 +81,7 @@ def get_model(self, batch_size=2, single_device=False):
         avg_cost = paddle.mean(x=cost)
 
         # Evaluator
-        batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
+        batch_size_tensor = paddle.tensor.create_tensor(dtype='int64')
         batch_acc = paddle.static.accuracy(
             input=predict, label=label, total=batch_size_tensor
         )
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py
index 90c1ea16a82c53..7c98169433b0b8 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py
@@ -83,7 +83,7 @@ def get_model(self, batch_size=2, single_device=False):
         avg_cost = paddle.mean(x=cost)
 
         # Evaluator
-        batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
+        batch_size_tensor = paddle.tensor.create_tensor(dtype='int64')
         batch_acc = paddle.static.accuracy(
             input=predict, label=label, total=batch_size_tensor
         )
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py
index 98d7ef1d1569d4..e46173735a9679 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py
@@ -83,7 +83,7 @@ def get_model(self, batch_size=2, single_device=False):
         avg_cost = paddle.mean(x=cost)
 
         # Evaluator
-        batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
+        batch_size_tensor = paddle.tensor.create_tensor(dtype='int64')
         batch_acc = paddle.static.accuracy(
             input=predict, label=label, total=batch_size_tensor
         )
diff --git a/python/paddle/fluid/tests/unittests/dist_mnist.py b/python/paddle/fluid/tests/unittests/dist_mnist.py
index 3cecc8b32c0b02..819b959a1fa8d3 100644
--- a/python/paddle/fluid/tests/unittests/dist_mnist.py
+++ b/python/paddle/fluid/tests/unittests/dist_mnist.py
@@ -82,7 +82,7 @@ def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None):
         avg_cost = paddle.mean(x=cost)
 
         # Evaluator
-        batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
+        batch_size_tensor = paddle.tensor.create_tensor(dtype='int64')
         batch_acc = paddle.static.accuracy(
             input=predict, label=label, total=batch_size_tensor
         )
diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py b/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py
index 4cda9dd53a7233..aa963ab012bc05 100644
--- a/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py
+++ b/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py
@@ -47,7 +47,7 @@ def get_model(self, batch_size=2):
         avg_cost = paddle.mean(x=cost)
 
         # Evaluator
-        batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
+        batch_size_tensor = paddle.tensor.create_tensor(dtype='int64')
         batch_acc = paddle.static.accuracy(
             input=predict, label=label, total=batch_size_tensor
         )
diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py b/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py
index 53819ca5491d4c..ad0b25e8ea15ac 100644
--- a/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py
+++ b/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py
@@ -41,7 +41,7 @@ def get_model(self, batch_size=2):
         avg_cost = paddle.mean(x=cost)
 
         # Evaluator
-        batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
+        batch_size_tensor = paddle.tensor.create_tensor(dtype='int64')
         batch_acc = paddle.static.accuracy(
             input=predict, label=label, total=batch_size_tensor
         )
diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_lars.py b/python/paddle/fluid/tests/unittests/dist_mnist_lars.py
index 347692afdd0b12..b886ad8953461b 100644
--- a/python/paddle/fluid/tests/unittests/dist_mnist_lars.py
+++ b/python/paddle/fluid/tests/unittests/dist_mnist_lars.py
@@ -38,7 +38,7 @@ def get_model(self, batch_size=2):
         avg_cost = paddle.mean(x=cost)
 
         # Evaluator
-        batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
+        batch_size_tensor = paddle.tensor.create_tensor(dtype='int64')
         batch_acc = paddle.static.accuracy(
             input=predict, label=label, total=batch_size_tensor
         )
diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_categorical.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_categorical.py
index 5dfcedcc0c5a63..91e5b225767433 100644
--- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_categorical.py
+++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_categorical.py
@@ -20,7 +20,6 @@
 import paddle
 from paddle import fluid
 from paddle.distribution import Categorical, Distribution, Normal, Uniform
-from paddle.fluid import layers
 
 np.random.seed(2022)
 
@@ -380,7 +379,7 @@ def test_distribution_error(self):
         )
 
         value_npdata = np.array([0.8], dtype="float32")
-        value_tensor = layers.create_tensor(dtype="float32")
+        value_tensor = paddle.tensor.create_tensor(dtype="float32")
         self.assertRaises(
             NotImplementedError, distribution.log_prob, value_tensor
         )
diff --git a/python/paddle/fluid/tests/unittests/npu/test_assign_value_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_assign_value_op_npu.py
index 402b90bc49bbdf..1df24e54a16b32 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_assign_value_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_assign_value_op_npu.py
@@ -93,7 +93,7 @@ def init_dtype(self):
     def test_assign(self):
         main_program = fluid.Program()
         with fluid.program_guard(main_program):
-            x = layers.create_tensor(dtype=self.dtype)
+            x = paddle.tensor.create_tensor(dtype=self.dtype)
             layers.assign(input=self.value, output=x)
 
         exe = fluid.Executor(self.place)
diff --git a/python/paddle/fluid/tests/unittests/test_assign_value_op.py b/python/paddle/fluid/tests/unittests/test_assign_value_op.py
index 7a5128ed2ff159..c0a5554d39b978 100644
--- a/python/paddle/fluid/tests/unittests/test_assign_value_op.py
+++ b/python/paddle/fluid/tests/unittests/test_assign_value_op.py
@@ -83,7 +83,7 @@ def init_dtype(self):
     def test_assign(self):
         main_program = fluid.Program()
         with fluid.program_guard(main_program):
-            x = layers.create_tensor(dtype=self.dtype)
+            x = paddle.tensor.create_tensor(dtype=self.dtype)
             layers.assign(input=self.value, output=x)
 
         exe = fluid.Executor(self.place)
diff --git a/python/paddle/fluid/tests/unittests/test_conditional_block.py b/python/paddle/fluid/tests/unittests/test_conditional_block.py
index 418ae3875998e7..1eaf25dc348774 100644
--- a/python/paddle/fluid/tests/unittests/test_conditional_block.py
+++ b/python/paddle/fluid/tests/unittests/test_conditional_block.py
@@ -33,7 +33,7 @@ def test_forward(self):
             data = layers.data(name='X', shape=[1], dtype='float32')
             data.stop_gradient = False
             cond = ConditionalBlock(inputs=[data])
-            out = layers.create_tensor(dtype='float32')
+            out = paddle.tensor.create_tensor(dtype='float32')
             with cond.block():
                 hidden = layers.fc(input=data, size=10)
                 layers.assign(hidden, out)
diff --git a/python/paddle/fluid/tests/unittests/test_desc_clone.py b/python/paddle/fluid/tests/unittests/test_desc_clone.py
index 7d4f1f0975fc46..477910f53d59d4 100644
--- a/python/paddle/fluid/tests/unittests/test_desc_clone.py
+++ b/python/paddle/fluid/tests/unittests/test_desc_clone.py
@@ -77,7 +77,7 @@ def get_model(batch_size):
     avg_cost = paddle.mean(x=cost)
 
     # Evaluator
-    batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
+    batch_size_tensor = paddle.tensor.create_tensor(dtype='int64')
     batch_acc = paddle.static.accuracy(
         input=predict, label=label, total=batch_size_tensor
     )
diff --git a/python/paddle/fluid/tests/unittests/test_fetch_var.py b/python/paddle/fluid/tests/unittests/test_fetch_var.py
index 4339813584a909..3303e30a4f3ad6 100644
--- a/python/paddle/fluid/tests/unittests/test_fetch_var.py
+++ b/python/paddle/fluid/tests/unittests/test_fetch_var.py
@@ -16,6 +16,7 @@
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
 
@@ -26,7 +27,9 @@ def set_input(self):
 
     def test_fetch_var(self):
         self.set_input()
-        x = layers.create_tensor(dtype="int32", persistable=True, name="x")
+        x = paddle.tensor.create_tensor(
+            dtype="int32", persistable=True, name="x"
+        )
         layers.assign(input=self.val, output=x)
         exe = fluid.Executor(fluid.CPUPlace())
         exe.run(fluid.default_main_program(), feed={}, fetch_list=[])
diff --git a/python/paddle/fluid/tests/unittests/test_profiler.py b/python/paddle/fluid/tests/unittests/test_profiler.py
index 6b414afbe4a7fb..62d46d4cadc48d 100644
--- a/python/paddle/fluid/tests/unittests/test_profiler.py
+++ b/python/paddle/fluid/tests/unittests/test_profiler.py
@@ -59,7 +59,7 @@ def build_program(self, compile_program=True):
             label = fluid.layers.data(name='y', shape=[1], dtype='int64')
             cost = fluid.layers.cross_entropy(input=predict, label=label)
             avg_cost = paddle.mean(cost)
-            batch_size = fluid.layers.create_tensor(dtype='int64')
+            batch_size = paddle.tensor.create_tensor(dtype='int64')
             batch_acc = paddle.static.accuracy(
                 input=predict, label=label, total=batch_size
             )
diff --git a/python/paddle/fluid/tests/unittests/test_square_error_cost.py b/python/paddle/fluid/tests/unittests/test_square_error_cost.py
index 7828f01b02fe61..afd16a3095738e 100644
--- a/python/paddle/fluid/tests/unittests/test_square_error_cost.py
+++ b/python/paddle/fluid/tests/unittests/test_square_error_cost.py
@@ -19,7 +19,6 @@
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 from paddle.fluid.executor import Executor
 
 
@@ -31,8 +30,8 @@ def test_square_error_cost(self):
         sub = input_val - label_val
         np_result = sub * sub
 
-        input_var = layers.create_tensor(dtype="float32", name="input")
-        label_var = layers.create_tensor(dtype="float32", name="label")
+        input_var = paddle.tensor.create_tensor(dtype="float32", name="input")
+        label_var = paddle.tensor.create_tensor(dtype="float32", name="label")
         output = paddle.nn.functional.square_error_cost(
             input=input_var, label=label_var
         )
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_assign_value_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_assign_value_op_xpu.py
index 7de6af1b45c3c0..560815cb56bee2 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_assign_value_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_assign_value_op_xpu.py
@@ -94,7 +94,7 @@ def init_dtype(self):
     def test_assign(self):
         main_program = fluid.Program()
         with fluid.program_guard(main_program):
-            x = layers.create_tensor(dtype=self.dtype)
+            x = paddle.tensor.create_tensor(dtype=self.dtype)
             layers.assign(input=self.value, output=x)
 
         exe = fluid.Executor(self.place)
diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py
index 4c1ec078380506..89df1b6ac3b477 100755
--- a/python/paddle/tensor/__init__.py
+++ b/python/paddle/tensor/__init__.py
@@ -19,6 +19,8 @@
 from .attribute import real  # noqa: F401
 from .attribute import imag  # noqa: F401
 from .attribute import is_floating_point  # noqa: F401
+from .creation import create_parameter  # noqa: F401
+from .creation import create_tensor  # noqa: F401
 from .creation import to_tensor  # noqa: F401
 from .creation import diag  # noqa: F401
 from .creation import diagflat  # noqa: F401
@@ -289,6 +291,8 @@
 
 # this list used in math_op_patch.py for _binary_creator_
 tensor_method_func = [  # noqa
+    'create_parameter',
+    'create_tensor',
     'matmul',
     'dot',
     'cov',
diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py
index c969ee3639bf93..134e27eef9df6e 100644
--- a/python/paddle/tensor/creation.py
+++ b/python/paddle/tensor/creation.py
@@ -100,7 +100,7 @@ def create_parameter(
 
             import paddle
             paddle.enable_static()
-            W = paddle.static.create_parameter(shape=[784, 200], dtype='float32')
+            W = paddle.create_parameter(shape=[784, 200], dtype='float32')
     """
     check_type(shape, 'shape', (list, tuple, np.ndarray), 'create_parameter')
     for item in shape:
@@ -150,6 +150,48 @@ def create_parameter(
     )
 
 
+def create_tensor(dtype, name=None, persistable=False):
+    """
+    Create a variable, which will hold a Tensor with data type dtype.
+
+    Args:
+        dtype(string|numpy.dtype): the data type of Tensor to be created, the
+            data type is bool, float16, float32, float64, int8, int16, int32 and int64.
+        name(string, optional): The default value is None.  Normally there is no need for
+            user to set this property.  For more information, please refer to :ref:`api_guide_Name`
+        persistable(bool): Set the persistable flag of the create tensor.
+            default value is False.
+
+    Returns:
+        Variable: The tensor to be created according to dtype.
+
+    Examples:
+        .. code-block:: python
+
+          import paddle
+          tensor = paddle.tensor.create_tensor(dtype='float32')
+    """
+    check_dtype(
+        dtype,
+        'dtype',
+        [
+            'bool',
+            'float16',
+            'float32',
+            'float64',
+            'int8',
+            'int32',
+            'int32',
+            'int64',
+        ],
+        'create_tensor',
+    )
+    helper = LayerHelper("create_tensor", **locals())
+    return helper.create_variable(
+        name=helper.name, dtype=dtype, persistable=persistable
+    )
+
+
 def linspace(start, stop, num, dtype=None, name=None):
     r"""
     Return fixed number of evenly spaced values within a given interval.

From 0ebace143854c94dece090fd83baeb9d45757c03 Mon Sep 17 00:00:00 2001
From: Matsumoto Ruko <38883252+gsq7474741@users.noreply.github.com>
Date: Mon, 5 Dec 2022 11:54:11 +0800
Subject: [PATCH 138/154] remove deprecated warnings for py36 (#48639)

---
 python/paddle/utils/deprecated.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/python/paddle/utils/deprecated.py b/python/paddle/utils/deprecated.py
index 7854f12aa9c10c..cd9e6947b77b35 100755
--- a/python/paddle/utils/deprecated.py
+++ b/python/paddle/utils/deprecated.py
@@ -23,14 +23,6 @@
 
 __all__ = []
 
-# NOTE(zhiqiu): Since python 3.2, DeprecationWarning is ignored by default,
-# and since python 3.7, it is once again shown by default when triggered directly by code in __main__.
-# See details: https://docs.python.org/3/library/warnings.html#default-warning-filter
-# The following line set DeprecationWarning to show once, which is expected to work in python 3.2 -> 3.6
-# However, doing this could introduce one samll side effect, i.e., the DeprecationWarning which is not issued by @deprecated.
-# The side effect is acceptable, and we will find better way to do this if we could.
-warnings.simplefilter('default', DeprecationWarning)
-
 
 def deprecated(update_to="", since="", reason="", level=0):
     """Decorate a function to signify its deprecation.

From 89f024e35167a39f72dc956b170aa9a291595c3f Mon Sep 17 00:00:00 2001
From: 201716010711 <87008376+201716010711@users.noreply.github.com>
Date: Mon, 5 Dec 2022 12:56:12 +0800
Subject: [PATCH 139/154] delete shape api (#48546)

---
 python/paddle/distribution/normal.py          |  2 +-
 .../paddle/fluid/contrib/layers/rnn_impl.py   |  4 +-
 python/paddle/fluid/layers/detection.py       |  2 +-
 python/paddle/fluid/layers/nn.py              | 90 -------------------
 python/paddle/fluid/layers/rnn.py             |  4 +-
 .../dygraph_to_static/ifelse_simple_func.py   | 18 ++--
 .../seq2seq_dygraph_model.py                  |  8 +-
 .../dygraph_to_static/simnet_dygraph_model.py |  2 +-
 .../unittests/dygraph_to_static/test_bmn.py   |  4 +-
 .../unittests/dygraph_to_static/test_dict.py  |  4 +-
 .../unittests/dygraph_to_static/test_lac.py   |  2 +-
 .../dygraph_to_static/test_tensor_shape.py    | 13 ++-
 .../unittests/dygraph_to_static/yolov3.py     |  2 +-
 .../test_dynamic_rnn_stop_gradient.py         |  2 +-
 .../fluid/tests/unittests/test_layers.py      |  2 +-
 .../tests/unittests/test_rnn_cell_api.py      |  2 +-
 .../tests/unittests/test_rnn_decode_api.py    |  4 +-
 ...tatic_shape_inferrence_for_shape_tensor.py |  2 +-
 .../tests/unittests/test_while_loop_op.py     |  2 +-
 .../paddle/jit/dy2static/ast_transformer.py   |  2 +-
 .../paddle/jit/dy2static/convert_operators.py |  5 +-
 python/paddle/tensor/attribute.py             |  2 +-
 22 files changed, 45 insertions(+), 133 deletions(-)

diff --git a/python/paddle/distribution/normal.py b/python/paddle/distribution/normal.py
index 3eb3fd2d59009c..7eb9fb597d3a27 100644
--- a/python/paddle/distribution/normal.py
+++ b/python/paddle/distribution/normal.py
@@ -180,7 +180,7 @@ def sample(self, shape=(), seed=0):
                 self.loc + self.scale, batch_shape + shape, self.dtype, 0.0
             )
             zero_tmp_reshape = paddle.reshape(zero_tmp, output_shape)
-            zero_tmp_shape = nn.shape(zero_tmp_reshape)
+            zero_tmp_shape = paddle.shape(zero_tmp_reshape)
             normal_random_tmp = nn.gaussian_random(
                 zero_tmp_shape, mean=0.0, std=1.0, seed=seed, dtype=self.dtype
             )
diff --git a/python/paddle/fluid/contrib/layers/rnn_impl.py b/python/paddle/fluid/contrib/layers/rnn_impl.py
index b28cac87950c9f..4e23057fc4680d 100644
--- a/python/paddle/fluid/contrib/layers/rnn_impl.py
+++ b/python/paddle/fluid/contrib/layers/rnn_impl.py
@@ -330,7 +330,7 @@ def basic_gru(
 
     mask = None
     if sequence_length:
-        max_seq_len = layers.shape(input)[0]
+        max_seq_len = paddle.shape(input)[0]
         mask = layers.sequence_mask(
             sequence_length, maxlen=max_seq_len, dtype='float32'
         )
@@ -614,7 +614,7 @@ def basic_lstm(
 
     mask = None
     if sequence_length:
-        max_seq_len = layers.shape(input)[0]
+        max_seq_len = paddle.shape(input)[0]
         mask = layers.sequence_mask(
             sequence_length, maxlen=max_seq_len, dtype='float32'
         )
diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py
index d490b0457d98cc..dddd2fa386d2b8 100644
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -1588,7 +1588,7 @@ def ssd_loss(
         raise ValueError("Only support mining_type == max_negative now.")
 
     num, num_prior, num_class = confidence.shape
-    conf_shape = nn.shape(confidence)
+    conf_shape = paddle.shape(confidence)
 
     def __reshape_to_2d(var):
         out = paddle.flatten(var, 2, -1)
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 4dab44ebe54feb..fda4c24e510685 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -97,7 +97,6 @@
     'elementwise_mul',
     'gaussian_random',
     'sampling_id',
-    'shape',
     'clip',
     'clip_by_norm',
     'mean',
@@ -5010,95 +5009,6 @@ def sampling_id(x, min=0.0, max=1.0, seed=0, dtype='float32'):
     return out
 
 
-def shape(input):
-    """
-    :alias_main: paddle.shape
-        :alias: paddle.shape,paddle.tensor.shape,paddle.tensor.attribute.shape
-        :old_api: paddle.fluid.layers.shape
-
-    **Shape Layer**
-
-    Get the shape of the input.
-
-    .. code-block:: text
-
-        Case1:
-            Given N-D Tensor:
-                input = [ [1, 2, 3, 4], [5, 6, 7, 8] ]
-
-            Then:
-                input.shape = [2, 4]
-
-        Case2:
-            Given SelectedRows:
-                input.rows = [0, 4, 19]
-                input.height = 20
-                input.value = [ [1, 2], [3, 4], [5, 6] ]  # inner tensor
-            Then:
-                input.shape = [3, 2]
-
-    Args:
-        input (Variable): The input can be N-D Tensor or SelectedRows with data type bool, float16, float32, float64, int32, int64.
-                          If input variable is type of SelectedRows, returns the shape of it's inner tensor.
-
-    Returns:
-        Variable (Tensor): The shape of the input variable.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import numpy as np
-            import paddle
-            paddle.enable_static()
-
-            inputs = fluid.data(name="x", shape=[3, 100, 100], dtype="float32")
-            output = fluid.layers.shape(inputs)
-
-            exe = fluid.Executor(fluid.CPUPlace())
-            exe.run(fluid.default_startup_program())
-
-            img = np.ones((3, 100, 100)).astype(np.float32)
-
-            res = exe.run(fluid.default_main_program(), feed={'x':img}, fetch_list=[output])
-            print(res) # [array([  3, 100, 100], dtype=int32)]
-    """
-    if in_dygraph_mode():
-        out = _C_ops.shape(input)
-        out.stop_gradient = True
-        return out
-    if _in_legacy_dygraph():
-        out = _legacy_C_ops.shape(input)
-        out.stop_gradient = True
-        return out
-
-    check_variable_and_dtype(
-        input,
-        'input',
-        [
-            'bool',
-            'float16',
-            'float32',
-            'float64',
-            'int32',
-            'int64',
-            'complex64',
-            'complex128',
-        ],
-        'shape',
-    )
-    helper = LayerHelper('shape', **locals())
-    out = helper.create_variable_for_type_inference(dtype='int32')
-    helper.append_op(
-        type='shape',
-        inputs={'Input': input},
-        outputs={'Out': out},
-        stop_gradient=True,
-    )
-
-    return out
-
-
 def _elementwise_op(helper):
     op_type = helper.layer_type
     x = helper.kwargs.get('x', None)
diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py
index 60ac537ffc6d40..8b5721438d2e53 100644
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -673,7 +673,7 @@ def _switch_grad(x, stop=False):
         inputs = map_structure(_transpose_batch_time, inputs)
 
     if sequence_length:
-        max_seq_len = nn.shape(flatten(inputs)[0])[0]
+        max_seq_len = paddle.shape(flatten(inputs)[0])[0]
         mask = sequence_lod.sequence_mask(
             sequence_length,
             maxlen=max_seq_len,
@@ -1215,7 +1215,7 @@ def initialize(self, initial_cell_states):
         """
         self.kinf = 1e9
         state = flatten(initial_cell_states)[0]
-        self.batch_size = nn.shape(state)[0]
+        self.batch_size = paddle.shape(state)[0]
 
         self.start_token_tensor = tensor.fill_constant(
             shape=[1], dtype="int64", value=self.start_token
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py
index 2fa012559cc77f..985d091d6b9c46 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/ifelse_simple_func.py
@@ -151,7 +151,7 @@ def nested_if_else(x_v):
         #  `x_v.shape[0]` is not Tensor, and `batch_size` is the return value of `true_fn` after transformed.
         # col = -1
         # batch_size = x_v.shape[0]
-        batch_size = fluid.layers.shape(x_v)[0]
+        batch_size = paddle.shape(x_v)[0]
 
     # if tensor.shape is [1], now support to compare with numpy.
     if paddle.mean(x_v).numpy() < 0:
@@ -180,7 +180,7 @@ def nested_if_else_2(x):
         z = y
     x_shape_0 = x.shape[0]
     if x_shape_0 < 1:
-        if fluid.layers.shape(y).numpy()[0] < 1:
+        if paddle.shape(y).numpy()[0] < 1:
             res = fluid.layers.fill_constant(
                 value=2, shape=x.shape, dtype="int32"
             )
@@ -212,7 +212,7 @@ def nested_if_else_3(x):
         else:
             out = x - 1
     else:
-        y_shape = fluid.layers.shape(y)
+        y_shape = paddle.shape(y)
         if y_shape.numpy()[0] < 1:
             res = fluid.layers.fill_constant(
                 value=2, shape=x.shape, dtype="int32"
@@ -290,7 +290,7 @@ def forward(self, input):
 
 
 def if_with_and_or(x_v, label=None):
-    batch_size = fluid.layers.shape(x_v)
+    batch_size = paddle.shape(x_v)
     if (
         x_v is not None
         and (paddle.mean(x_v).numpy()[0] > 0 or label is not None)
@@ -308,7 +308,7 @@ def if_with_and_or(x_v, label=None):
 
 
 def if_with_and_or_1(x, y=None):
-    batch_size = fluid.layers.shape(x)
+    batch_size = paddle.shape(x)
     if batch_size[0] > 1 and y is not None:
         x = x + 1
     if y is not None or batch_size[0] > 1:
@@ -317,7 +317,7 @@ def if_with_and_or_1(x, y=None):
 
 
 def if_with_and_or_2(x, y=None):
-    batch_size = fluid.layers.shape(x)
+    batch_size = paddle.shape(x)
     if x is not None and batch_size[0] > 1 and y is not None:
         x = x + 1
     if batch_size[0] > 1 or y is not None or x is not None:
@@ -326,7 +326,7 @@ def if_with_and_or_2(x, y=None):
 
 
 def if_with_and_or_3(x, y=None):
-    batch_size = fluid.layers.shape(x)
+    batch_size = paddle.shape(x)
     mean_res = paddle.mean(x)
     if (
         x is not None
@@ -341,7 +341,7 @@ def if_with_and_or_3(x, y=None):
 
 
 def if_with_and_or_4(x, y=None):
-    batch_size = fluid.layers.shape(x)
+    batch_size = paddle.shape(x)
     mean_res = paddle.mean(x)
     if (x is not None and batch_size[0] > 1) or (
         y is not None and mean_res.numpy()[0] > 0
@@ -361,7 +361,7 @@ def __init__(self):
             self.b = 2
 
     foo = Foo()
-    batch_size = fluid.layers.shape(x)
+    batch_size = paddle.shape(x)
     mean_res = paddle.mean(x)
 
     if batch_size[0] > foo.a:
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
index d364b8a1a5d602..5c04aecd1a87fc 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
@@ -297,8 +297,8 @@ def forward(self, inputs):
         loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=dec_output, label=label, soft_label=False
         )
-        loss = paddle.squeeze(loss, axis=[2])
-        max_tar_seq_len = fluid.layers.shape(tar)[1]
+        loss = paddle.squeeze(loss, axes=[2])
+        max_tar_seq_len = paddle.shape(tar)[1]
         tar_mask = fluid.layers.sequence_mask(
             tar_sequence_length, maxlen=max_tar_seq_len, dtype='float32'
         )
@@ -833,8 +833,8 @@ def forward(self, inputs):
         loss = paddle.nn.functional.softmax_with_cross_entropy(
             logits=dec_output, label=label, soft_label=False
         )
-        loss = paddle.squeeze(loss, axis=[2])
-        max_tar_seq_len = fluid.layers.shape(tar)[1]
+        loss = paddle.squeeze(loss, axes=[2])
+        max_tar_seq_len = paddle.shape(tar)[1]
         tar_mask = fluid.layers.sequence_mask(
             tar_sequence_length, maxlen=max_tar_seq_len, dtype='float32'
         )
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
index 8c3d62feacc62c..d16f07d9a2e343 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py
@@ -210,7 +210,7 @@ def ops(self, input, shape, dtype, value):
         operation
         """
         shape = list(shape)
-        input_shape = fluid.layers.shape(input)
+        input_shape = paddle.shape(input)
         shape[0] = input_shape[0]
         constant = fluid.layers.fill_constant(shape, dtype, value)
         return constant
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
index 0cb3e333045f58..b7461b21aa612e 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
@@ -321,9 +321,7 @@ def bi_loss(pred_score, gt_label):
             gt_label = paddle.reshape(x=gt_label, shape=[-1])
             gt_label.stop_gradient = True
             pmask = fluid.layers.cast(x=(gt_label > 0.5), dtype=DATATYPE)
-            num_entries = fluid.layers.cast(
-                fluid.layers.shape(pmask), dtype=DATATYPE
-            )
+            num_entries = fluid.layers.cast(paddle.shape(pmask), dtype=DATATYPE)
             num_positive = fluid.layers.cast(paddle.sum(pmask), dtype=DATATYPE)
             ratio = num_entries / num_positive
             coef_0 = 0.5 * ratio / (ratio - 1)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py
index 742e828aa9acb4..597580eedc7655 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py
@@ -97,8 +97,8 @@ def forward(self, input, max_len=4):
             ),
         }
         # TODO(Aurelius84): The following code will be converted into:
-        # max_len = layers.cond(layers.shape(input)[0] != max_len,
-        #                       lambda: layers.shape(input)[0], lambda: max_len)
+        # max_len = layers.cond(paddle.shape(input)[0] != max_len,
+        #                       lambda: paddle.shape(input)[0], lambda: max_len)
         # But max_len should be wrapped into tensor, which is not supported.
 
         # Comment out this line of code for now.
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py
index 54b97c9280a404..5aff8c710ae7b5 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py
@@ -79,7 +79,7 @@ def forward(self, inputs):
         res = []
         for i in range(inputs.shape[1]):
             if self.is_reverse:
-                j = fluid.layers.shape(inputs)[1] - 1 - i
+                j = paddle.shape(inputs)[1] - 1 - i
             else:
                 j = i
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_shape.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_shape.py
index 4e29f2bf6b44a6..1ad55d3fbaa412 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_shape.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tensor_shape.py
@@ -38,7 +38,7 @@ def dyfunc_tensor_shape_2(x):
 def dyfunc_tensor_shape_3(x):
     # Transform y.shape but run y.shape actually because y is not Tensor
     x = fluid.dygraph.to_variable(x)
-    y = np.ones(5)
+    y = paddle.ones([1, 5])
     res = paddle.reshape(x, shape=y.shape)
     return res
 
@@ -97,7 +97,7 @@ def dyfunc_paddle_shape_api(x):
     a = paddle.shape(x)[0]
     # alias api will also not be converted.
     alias_old_api = paddle.fluid.layers
-    b = alias_old_api.shape(x)[1]
+    b = paddle.shape(x)[1]
     res = paddle.reshape(x, shape=(b, a))
     return res
 
@@ -199,7 +199,7 @@ def dyfunc_with_while_3(x):
 
 def dyfunc_with_while_4(x):
     x = paddle.to_tensor(x)
-    y = np.ones(5)
+    y = paddle.ones([1, 5])
     y_shape_0 = y.shape[0]
     i = 1
 
@@ -309,6 +309,11 @@ class TestTensorShapeBasic3(TestTensorShapeBasic):
     def init_test_func(self):
         self.dygraph_func = dyfunc_tensor_shape_3
 
+    def _set_expected_op_num(self):
+        self.expected_op_num = 3
+        self.expected_shape_op_num = 0
+        self.expected_slice_op_num = 0
+
 
 class TestTensorShapeBasic4(TestTensorShapeBasic):
     def init_test_func(self):
@@ -475,7 +480,7 @@ def init_test_func(self):
         self.dygraph_func = dyfunc_with_while_4
 
     def _set_expected_op_num(self):
-        self.expected_op_num = 4
+        self.expected_op_num = 1
         self.expected_shape_op_num = 0
         self.expected_slice_op_num = 0
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py
index 5f894744700f0b..5cf1f0f0f533a0 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py
@@ -203,7 +203,7 @@ def __init__(self, scale=2):
 
     def forward(self, inputs):
         # get dynamic upsample output shape
-        shape_nchw = fluid.layers.shape(inputs)
+        shape_nchw = paddle.shape(inputs)
         shape_hw = paddle.slice(shape_nchw, axes=[0], starts=[2], ends=[4])
         shape_hw.stop_gradient = True
         in_shape = fluid.layers.cast(shape_hw, dtype='int32')
diff --git a/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py b/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
index 9774ea32e44f68..3e3eefd5d278d3 100644
--- a/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_dynamic_rnn_stop_gradient.py
@@ -40,7 +40,7 @@ def build_and_run_program(place, batch_size, beam_size, stop_gradient=False):
     while_op = layers.While(cond)
     scores = layers.array_write(x, step_idx)
     with while_op.block():
-        bs = layers.cast(layers.shape(x)[0], "int64")
+        bs = layers.cast(paddle.shape(x)[0], "int64")
         for _ in range(20):
             bs = layers.cast(bs, 'int64')
         bs.stop_gradient = stop_gradient
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 25b6d0513d1c87..64e671c8ee9bd7 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -3307,7 +3307,7 @@ def make_shape(self):
             input = self._get_data(
                 name="input", shape=[3, 100, 100], dtype="float32"
             )
-            out = layers.shape(input)
+            out = paddle.shape(input)
             return out
 
     def make_pad2d(self):
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
index 6b2383ed56933e..2302e6f0d241fd 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
@@ -635,7 +635,7 @@ def def_seq2seq_model(
         logits=logits, label=label, soft_label=False
     )
     loss = layers.unsqueeze(loss, axes=[2])
-    max_tar_seq_len = layers.shape(target)[1]
+    max_tar_seq_len = paddle.shape(target)[1]
     tar_mask = layers.sequence_mask(
         target_length, maxlen=max_tar_seq_len, dtype="float32"
     )
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
index 3b3539c4861f12..67657071db83f3 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
@@ -248,7 +248,7 @@ def __call__(self, src, src_length, trg=None, trg_length=None):
             ),
         ]
         src_mask = layers.sequence_mask(
-            src_length, maxlen=layers.shape(src)[1], dtype="float32"
+            src_length, maxlen=paddle.shape(src)[1], dtype="float32"
         )
         encoder_padding_mask = (src_mask - 1.0) * 1e9
         encoder_padding_mask = layers.unsqueeze(encoder_padding_mask, [1])
@@ -400,7 +400,7 @@ def __init__(self, lr=None):
 
     def learn(self, probs, label, weight=None, length=None):
         loss = layers.cross_entropy(input=probs, label=label, soft_label=False)
-        max_seq_len = layers.shape(probs)[1]
+        max_seq_len = paddle.shape(probs)[1]
         mask = layers.sequence_mask(length, maxlen=max_seq_len, dtype="float32")
         loss = loss * mask
         loss = paddle.mean(loss, axis=[0])
diff --git a/python/paddle/fluid/tests/unittests/test_static_shape_inferrence_for_shape_tensor.py b/python/paddle/fluid/tests/unittests/test_static_shape_inferrence_for_shape_tensor.py
index 17e7f69a3b49e4..6f70e553cc2bc5 100644
--- a/python/paddle/fluid/tests/unittests/test_static_shape_inferrence_for_shape_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_static_shape_inferrence_for_shape_tensor.py
@@ -23,7 +23,7 @@ def test_static_graph(self):
         data = paddle.fluid.layers.data(
             name="x", shape=[-1, 2], dtype='float32'
         )
-        shape = paddle.fluid.layers.shape(data)  # shape should be [-1, 2]
+        shape = paddle.shape(data)  # shape should be [-1, 2]
         x = paddle.fluid.layers.uniform_random(shape)
         self.assertEqual(x.shape, data.shape)
         paddle.disable_static()
diff --git a/python/paddle/fluid/tests/unittests/test_while_loop_op.py b/python/paddle/fluid/tests/unittests/test_while_loop_op.py
index 8e733ef9208b31..8c94834c9a28b1 100644
--- a/python/paddle/fluid/tests/unittests/test_while_loop_op.py
+++ b/python/paddle/fluid/tests/unittests/test_while_loop_op.py
@@ -595,7 +595,7 @@ def body(z, i):
         with program_guard(main_program, startup_program):
             x = fluid.layers.data(name='x', shape=[5], dtype='int32')
             z = fluid.layers.fill_constant([1], 'int32', 0)
-            x_shape = fluid.layers.shape(x)
+            x_shape = paddle.shape(x)
             i = fluid.layers.fill_constant([1], 'int32', 0)
             z, _ = fluid.layers.while_loop(cond, body, [z, i])
 
diff --git a/python/paddle/jit/dy2static/ast_transformer.py b/python/paddle/jit/dy2static/ast_transformer.py
index 2e244d6f341833..826232e723f607 100644
--- a/python/paddle/jit/dy2static/ast_transformer.py
+++ b/python/paddle/jit/dy2static/ast_transformer.py
@@ -127,7 +127,7 @@ def transfer_from_node_type(self, node_wrapper):
         transformers = [
             EarlyReturnTransformer,
             BasicApiTransformer,  # Basic Api
-            TensorShapeTransformer,  # Tensor.shape -> layers.shape(Tensor)
+            TensorShapeTransformer,  # Tensor.shape -> paddle.shape(Tensor)
             BreakContinueTransformer,  # break/continue in loops
             ReturnTransformer,  # return in functions
             LogicalTransformer,  # logical and/or/not
diff --git a/python/paddle/jit/dy2static/convert_operators.py b/python/paddle/jit/dy2static/convert_operators.py
index fa622b14094de0..df20a5c4e0c98c 100644
--- a/python/paddle/jit/dy2static/convert_operators.py
+++ b/python/paddle/jit/dy2static/convert_operators.py
@@ -31,7 +31,6 @@
 from paddle.fluid.layers import (
     cast,
     control_flow,
-    nn,
 )
 from paddle.fluid.layers.control_flow import (
     cond,
@@ -524,7 +523,7 @@ def convert_len(var):
             # so we return a variable dynamically inferred from var.shape.
             if var.shape[0] > 0 and var.type == core.VarDesc.VarType.LOD_TENSOR:
                 return var.shape[0]
-            return nn.shape(var)[0]
+            return paddle.shape(var)[0]
         elif var.type == core.VarDesc.VarType.LOD_TENSOR_ARRAY:
             return paddle.tensor.array_length(var)
         else:
@@ -607,7 +606,7 @@ def has_negative(list_shape):
     if isinstance(x, Variable):
         values = list(x.shape)
         if has_negative(values):
-            shape_tensor = nn.shape(x)
+            shape_tensor = paddle.shape(x)
             for i, v in enumerate(values):
                 if v is None or v < 0:
                     values[i] = shape_tensor[i]
diff --git a/python/paddle/tensor/attribute.py b/python/paddle/tensor/attribute.py
index d6f08f676b8ae2..ac5f99a0b1c3c1 100644
--- a/python/paddle/tensor/attribute.py
+++ b/python/paddle/tensor/attribute.py
@@ -93,7 +93,7 @@ def shape(input):
             paddle.enable_static()
 
             inputs = fluid.data(name="x", shape=[3, 100, 100], dtype="float32")
-            output = fluid.layers.shape(inputs)
+            output = paddle.shape(inputs)
 
             exe = fluid.Executor(fluid.CPUPlace())
             exe.run(fluid.default_startup_program())

From cee7a3dbe8638ab1dc619bacd2ef679de45d3470 Mon Sep 17 00:00:00 2001
From: ShenLiang <1422485404@qq.com>
Date: Mon, 5 Dec 2022 13:17:24 +0800
Subject: [PATCH 140/154] fix bug of reducer in best_fit (#48668)

---
 .../fluid/distributed/collective/reducer.cc   | 33 ++++++++++++-------
 paddle/fluid/distributed/collective/reducer.h |  2 +-
 .../test_parallel_dygraph_dataparallel.py     | 17 +++++++++-
 3 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/paddle/fluid/distributed/collective/reducer.cc b/paddle/fluid/distributed/collective/reducer.cc
index cd8c8ed2e0cc9c..379bc57d5594e7 100644
--- a/paddle/fluid/distributed/collective/reducer.cc
+++ b/paddle/fluid/distributed/collective/reducer.cc
@@ -17,10 +17,16 @@
 #include "paddle/phi/backends/device_manager.h"
 
 DECLARE_bool(use_stream_safe_cuda_allocator);
+DECLARE_string(allocator_strategy);
 
 namespace paddle {
 namespace distributed {
 
+static bool IsStreamSafeAllocator() {
+  return FLAGS_allocator_strategy == "auto_growth" &&
+         FLAGS_use_stream_safe_cuda_allocator;
+}
+
 static Backend TransToBackend(platform::Place place) {
   static const std::map<phi::AllocationType, Backend> type_backend = {
       {phi::AllocationType::GPU, Backend::GPU},
@@ -399,14 +405,14 @@ void EagerGroup::ConcatTensors(const platform::Place &place) {
   }
 }
 
-void EagerGroup::SplitTensorsDev(const platform::DeviceContext &context) {
+void EagerGroup::SplitTensors(const platform::DeviceContext &context) {
   auto place = context.GetPlace();
   if (platform::is_gpu_place(place)) {
 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
     auto &gpu_context = static_cast<const phi::GPUContext &>(context);
     SplitTensorsWithType(
         gpu_context, &dense_contents_, &dense_tensors_, dtype_);
-    if (FLAGS_use_stream_safe_cuda_allocator) {
+    if (IsStreamSafeAllocator()) {
       auto dense_tensor =
           std::dynamic_pointer_cast<phi::DenseTensor>(dense_contents_.impl());
       VLOG(3) << "Free dense_contents_ " << dense_contents_.numel();
@@ -1011,12 +1017,11 @@ void EagerReducer::FinalizeBackward() {
   for (auto &group : groups_) {
     if (!group.is_sparse_) {
       group.task->Synchronize();
-    }
-  }
-
-  for (auto &group : groups_) {
-    if (!group.is_sparse_) {
-      group.dense_contents_.reset();
+      if (!IsStreamSafeAllocator()) {
+        auto *default_ctx =
+            platform::DeviceContextPool::Instance().Get(inner_place_);
+        group.SplitTensors(*default_ctx);
+      }
     }
   }
 
@@ -1054,9 +1059,15 @@ void EagerReducer::FusedAllReduceSchedule(EagerGroup *group,
   group->task = process_group_->AllReduce(in_out, in_out, opts);
 
   auto *context = process_group_->GetDeviceContext(inner_place_);
-  group->SplitTensorsDev(*context);
-  group->task->UpdateWaitChain(*context);
-  // split in FinalizeBackward()
+
+  if (IsStreamSafeAllocator()) {
+    // NOTE(shenliang03): The best_fit allocator strategy is multi-stream
+    // insecure. In the Split operator, additional memory will be applied for
+    // calculation, and if it is asynchronous, an illegal memory access may be
+    // encountered.
+    group->SplitTensors(*context);
+    group->task->UpdateWaitChain(*context);
+  }
 }
 
 void EagerReducer::AllReduceSparse(EagerGroup *group,
diff --git a/paddle/fluid/distributed/collective/reducer.h b/paddle/fluid/distributed/collective/reducer.h
index 5d27086fdbec50..5be2d60a6a654f 100644
--- a/paddle/fluid/distributed/collective/reducer.h
+++ b/paddle/fluid/distributed/collective/reducer.h
@@ -75,7 +75,7 @@ class EagerGroup {
 
   // context is used to select the stream for split
 
-  void SplitTensorsDev(const platform::DeviceContext &);
+  void SplitTensors(const platform::DeviceContext &);
 
   friend std::ostream &operator<<(std::ostream &, const EagerGroup &);
 };
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
index 9e4be19dacadea..5fd7f3beb117e9 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
@@ -103,6 +103,7 @@ def start_local_trainers(
     training_script,
     training_script_args,
     eager_mode=True,
+    allocator_strategy="auto_growth",
     log_dir=None,
 ):
     current_env = copy.copy(os.environ.copy())
@@ -126,6 +127,10 @@ def start_local_trainers(
         if not eager_mode:
             proc_env["FLAGS_enable_eager_mode"] = "%d" % 0
 
+        proc_env["FLAGS_allocator_strategy"] = allocator_strategy
+        if allocator_strategy == "auto_growth":
+            proc_env["FLAGS_fraction_of_gpu_memory_to_use"] = "0.1"
+
         current_env.update(proc_env)
 
         print("trainer proc env:{}".format(current_env))
@@ -153,7 +158,12 @@ def start_local_trainers(
 
 
 class TestMultipleGpus(unittest.TestCase):
-    def run_mnist_2gpu(self, target_file_name, eager_mode=True):
+    def run_mnist_2gpu(
+        self,
+        target_file_name,
+        eager_mode=True,
+        allocator_strategy="auto_growth",
+    ):
         if (
             not fluid.core.is_compiled_with_cuda()
             or fluid.core.get_cuda_device_count() == 0
@@ -170,6 +180,7 @@ def run_mnist_2gpu(self, target_file_name, eager_mode=True):
             cluster,
             pod,
             eager_mode=eager_mode,
+            allocator_strategy=allocator_strategy,
             training_script=target_file_name,
             training_script_args=[],
         )
@@ -218,6 +229,10 @@ def test_parallel_dygraph_dataparallel_with_pylayer(self):
         self.run_mnist_2gpu(
             'parallel_dygraph_dataparallel_with_pylayer.py', eager_mode=False
         )
+        self.run_mnist_2gpu(
+            'parallel_dygraph_dataparallel_with_pylayer.py',
+            allocator_strategy="naive_best_fit",
+        )
 
 
 class TestGradientCheckInEagerMode(TestMultipleGpus):

From d6aa0d43bc26f0e472a996b4def4ae81f78bfbd4 Mon Sep 17 00:00:00 2001
From: Netpunk <69072522+Patrick-Star125@users.noreply.github.com>
Date: Mon, 5 Dec 2022 13:43:54 +0800
Subject: [PATCH 141/154] [PHI decoupling] migrate poly_util.h to phi (#48499)

* rm poly_util.h

* format code

* fix some problems

* format code
---
 paddle/phi/kernels/funcs/detection/nms_util.h        |  8 ++++----
 .../kernels/funcs}/detection/poly_util.cc            | 12 +++++-------
 .../kernels/funcs}/detection/poly_util.h             | 11 +++++------
 3 files changed, 14 insertions(+), 17 deletions(-)
 rename paddle/{fluid/operators => phi/kernels/funcs}/detection/poly_util.cc (95%)
 rename paddle/{fluid/operators => phi/kernels/funcs}/detection/poly_util.h (91%)

diff --git a/paddle/phi/kernels/funcs/detection/nms_util.h b/paddle/phi/kernels/funcs/detection/nms_util.h
index e862b2a90f06c2..4e2398fbb9651c 100644
--- a/paddle/phi/kernels/funcs/detection/nms_util.h
+++ b/paddle/phi/kernels/funcs/detection/nms_util.h
@@ -17,9 +17,9 @@ limitations under the License. */
 #include <utility>
 #include <vector>
 
-#include "paddle/fluid/operators/detection/poly_util.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/device_context.h"
+#include "paddle/phi/kernels/funcs/detection/poly_util.h"
 
 namespace phi {
 namespace funcs {
@@ -96,10 +96,10 @@ T PolyIoU(const T* box1,
           const T* box2,
           const size_t box_size,
           const bool normalized) {
-  T bbox1_area = paddle::operators::PolyArea<T>(box1, box_size, normalized);
-  T bbox2_area = paddle::operators::PolyArea<T>(box2, box_size, normalized);
+  T bbox1_area = phi::funcs::PolyArea<T>(box1, box_size, normalized);
+  T bbox2_area = phi::funcs::PolyArea<T>(box2, box_size, normalized);
   T inter_area =
-      paddle::operators::PolyOverlapArea<T>(box1, box2, box_size, normalized);
+      phi::funcs::PolyOverlapArea<T>(box1, box2, box_size, normalized);
   if (bbox1_area == 0 || bbox2_area == 0 || inter_area == 0) {
     // If coordinate values are invalid
     // if area size <= 0,  return 0.
diff --git a/paddle/fluid/operators/detection/poly_util.cc b/paddle/phi/kernels/funcs/detection/poly_util.cc
similarity index 95%
rename from paddle/fluid/operators/detection/poly_util.cc
rename to paddle/phi/kernels/funcs/detection/poly_util.cc
index d8beabd0a04b3f..fd8037a82eabcd 100644
--- a/paddle/fluid/operators/detection/poly_util.cc
+++ b/paddle/phi/kernels/funcs/detection/poly_util.cc
@@ -14,12 +14,10 @@ limitations under the License. */
 #ifndef POLY_UTIL_CC_
 #define POLY_UTIL_CC_
 
-#include "paddle/fluid/operators/detection/poly_util.h"
+#include "paddle/phi/kernels/funcs/detection/poly_util.h"
 
-#include "paddle/fluid/framework/op_registry.h"
-
-namespace paddle {
-namespace operators {
+namespace phi {
+namespace funcs {
 
 using phi::funcs::gpc_free_polygon;
 using phi::funcs::gpc_polygon_clip;
@@ -134,7 +132,7 @@ T PolyOverlapArea(const T* box1,
   return inter_area;
 }
 
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace phi
 
 #endif
diff --git a/paddle/fluid/operators/detection/poly_util.h b/paddle/phi/kernels/funcs/detection/poly_util.h
similarity index 91%
rename from paddle/fluid/operators/detection/poly_util.h
rename to paddle/phi/kernels/funcs/detection/poly_util.h
index ad7611c165b79c..6d527d2d95f9eb 100644
--- a/paddle/fluid/operators/detection/poly_util.h
+++ b/paddle/phi/kernels/funcs/detection/poly_util.h
@@ -15,11 +15,10 @@ limitations under the License. */
 
 #include <vector>
 
-#include "paddle/fluid/framework/op_registry.h"
 #include "paddle/phi/kernels/funcs/gpc.h"
 
-namespace paddle {
-namespace operators {
+namespace phi {
+namespace funcs {
 
 template <class T>
 class Point_ {
@@ -70,7 +69,7 @@ T PolyOverlapArea(const T* box1,
                   const T* box2,
                   const size_t box_size,
                   const bool normalized);
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace phi
 
-#include "paddle/fluid/operators/detection/poly_util.cc"
+#include "paddle/phi/kernels/funcs/detection/poly_util.cc"

From 93027d9f57112bc05e6777c153896ed6eb913db1 Mon Sep 17 00:00:00 2001
From: heyanru <81976792+heyanru01@users.noreply.github.com>
Date: Mon, 5 Dec 2022 14:01:51 +0800
Subject: [PATCH 142/154] [Fluid Clean] remove nn.topk, nn.ctc_greedy_decoder,
 nn.im2sequence, nn.multiplex, nn.smooth_l1 (#48289)

---
 .../phi/kernels/funcs/transpose_functor.cu.h  |   2 +-
 python/paddle/fluid/layers/detection.py       |   3 +-
 python/paddle/fluid/layers/nn.py              | 579 ------------------
 .../fluid/tests/unittests/dist_transformer.py |   4 +-
 .../seq2seq_dygraph_model.py                  |   4 +-
 .../transformer_dygraph_model.py              |   4 +-
 .../tests/unittests/ipu/test_topk_op_ipu.py   |   2 +-
 .../npu/test_smooth_l1_loss_op_npu.py         |  17 -
 .../tests/unittests/test_beam_search_op.py    |   2 +-
 .../fluid/tests/unittests/test_ctc_align.py   |  45 --
 .../fluid/tests/unittests/test_layers.py      |  35 +-
 .../tests/unittests/test_smooth_l1_loss_op.py |  19 -
 12 files changed, 15 insertions(+), 701 deletions(-)

diff --git a/paddle/phi/kernels/funcs/transpose_functor.cu.h b/paddle/phi/kernels/funcs/transpose_functor.cu.h
index 0d24fdebef1489..8dae6ab60e99ab 100644
--- a/paddle/phi/kernels/funcs/transpose_functor.cu.h
+++ b/paddle/phi/kernels/funcs/transpose_functor.cu.h
@@ -475,7 +475,7 @@ void SwapDim1And2InNarrow(const phi::GPUContext& d,
         CeilOrFloor<int, false>(input_long_edge, proposed_tile_long_edge) *
             proposed_tile_long_edge;
 
-    int num_full_tiles = 
+    int num_full_tiles =
         CeilOrFloor<int, false>(input_long_edge, proposed_tile_long_edge);
 
     float cost = num_wasted_threads;
diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py
index dddd2fa386d2b8..486daac6092c62 100644
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -1688,7 +1688,8 @@ def __reshape_to_2d(var):
     location = __reshape_to_2d(location)
     target_bbox = __reshape_to_2d(target_bbox)
 
-    loc_loss = nn.smooth_l1(location, target_bbox)
+    smooth_l1_loss = paddle.nn.loss.SmoothL1Loss()
+    loc_loss = smooth_l1_loss(location, target_bbox)
     target_loc_weight = __reshape_to_2d(target_loc_weight)
     loc_loss = loc_loss * target_loc_weight
 
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index fda4c24e510685..a4125088c8a2f7 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -72,16 +72,11 @@
     'batch_norm',
     'dropout',
     'split',
-    'ctc_greedy_decoder',
     'l2_normalize',
     'matmul',
-    'topk',
-    'im2sequence',
     'row_conv',
-    'multiplex',
     'layer_norm',
     'spectral_norm',
-    'smooth_l1',
     'one_hot',
     'autoincreased_step_counter',
     'unsqueeze',
@@ -2751,421 +2746,6 @@ def __check_input(x, y):
     return out
 
 
-def topk(input, k, name=None):
-    """
-    :alias_main: paddle.topk
-        :alias: paddle.topk,paddle.tensor.topk,paddle.tensor.search.topk
-        :old_api: paddle.fluid.layers.topk
-
-    This OP is used to find values and indices of the k largest entries
-    for the last dimension.
-
-    If the input is a 1-D Tensor, finds the k largest entries and outputs
-    their values and indices.
-
-    If the input is a Tensor with higher rank, this operator computes the top k
-    entries along the last dimension.
-
-    .. code-block:: text
-
-        Case 1:
-
-          Input:
-            input.shape = [3, 4]
-            input.data = [[5, 4, 2, 3],
-                     [9, 7, 10, 25],
-                     [6, 2, 10, 1]]
-            k = 2
-
-          Output:
-            The first output:
-            values.shape = [3, 2]
-            values.data = [[5, 4],
-                      [10, 25],
-                      [6, 10]]
-
-            The second output:
-            indices.shape = [3, 2]
-            indices.data = [[0, 1],
-                       [2, 3],
-                       [0, 2]]
-
-    Args:
-        input(Variable): The input tensor. Support data types: float32, float64.
-        k(int | Variable): The number of top elements to look for along the last dimension
-                           of input tensor.
-        name (str, optional): Please refer to :ref:`api_guide_Name`, Default None.
-
-    Returns:
-        Values (Variable): Input tensor's k largest elements along each last dimensional slice. The dimension is: :math:`input.shape[:-1]+[k]`.
-        Indices (Variable): Indices of k largest elements alone the last dimension of input. The dimension is same as values.
-
-    Raises:
-        ValueError: If :math:`k < 1` or :math:`k > last dimension of input`.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import paddle.fluid.layers as layers
-            # set batch size=None
-            input = fluid.data(name="input", shape=[None, 13, 11], dtype='float32')
-            top5_values, top5_indices = layers.topk(input, k=5) # top5_values.shape[None, 13, 5], top5_indices.shape=[None, 13, 5]
-
-            # 1D Tensor
-            input1 = fluid.data(name="input1", shape=[None, 13], dtype='float32')
-            top5_values, top5_indices = layers.topk(input1, k=5) #top5_values.shape=[None, 5], top5_indices.shape=[None, 5]
-
-            # k=Variable
-            input2 = fluid.data(name="input2", shape=[None, 13, 11], dtype='float32')
-            vk = fluid.data(name="vk", shape=[None, 1], dtype='int32') # save k in vk.data[0]
-            vk_values, vk_indices = layers.topk(input2, k=vk) #vk_values.shape=[None, 13, k], vk_indices.shape=[None, 13, k]
-
-    """
-    if _non_static_mode():
-        _k = k.numpy().item(0) if isinstance(k, Variable) else k
-        out, indices = _legacy_C_ops.top_k(input, 'k', _k)
-        out.stop_gradient = True
-        indices.stop_gradient = True
-        return out, indices
-
-    inputs = {"X": [input]}
-    attrs = {}
-    if isinstance(k, Variable):
-        inputs['K'] = [k]
-    else:
-        attrs = {'k': k}
-
-    helper = LayerHelper("top_k", **locals())
-    values = helper.create_variable_for_type_inference(dtype=input.dtype)
-    indices = helper.create_variable_for_type_inference(dtype="int64")
-
-    helper.append_op(
-        type="top_k",
-        inputs=inputs,
-        outputs={"Out": [values], "Indices": [indices]},
-        attrs=attrs,
-    )
-    values.stop_gradient = True
-    indices.stop_gradient = True
-    return values, indices
-
-
-def ctc_greedy_decoder(
-    input, blank, input_length=None, padding_value=0, name=None
-):
-    r"""
-    This op is used to decode sequences by greedy policy by the following steps:
-
-    1. Get the indexes of maximum value for each row in input. a.k.a.
-       numpy.argmax(input, axis=0).
-    2. For each sequence in result of step1, merge repeated tokens between two
-       blanks and delete all blanks.
-
-    This op is implemented in two modes: lod and padding, either of them can be used.
-    The input can be either LoDTensor or Tensor, corresponding to lod and padding
-    mode respectively.
-
-    A simple example as below:
-
-    .. code-block:: text
-
-        Given:
-        (1) for lod mode:
-
-        input.data = [[0.6, 0.1, 0.3, 0.1],
-                      [0.3, 0.2, 0.4, 0.1],
-                      [0.1, 0.5, 0.1, 0.3],
-                      [0.5, 0.1, 0.3, 0.1],
-
-                      [0.5, 0.1, 0.3, 0.1],
-                      [0.2, 0.2, 0.2, 0.4],
-                      [0.2, 0.2, 0.1, 0.5],
-                      [0.5, 0.1, 0.3, 0.1]]
-
-        input.lod = [[4, 4]]
-
-        Computation:
-
-        step1: Apply argmax to first input sequence which is input.data[0:4]. Then we get:
-               [[0], [2], [1], [0]]
-        step2: merge repeated tokens and remove blank which is 0. Then we get first output sequence:
-               [[2], [1]]
-
-        Finally:
-
-        output.data = [[2],
-                       [1],
-                       [3]]
-
-        output.lod = [[2, 1]]
-
-        (2) for padding mode:
-
-         input.data = [[[0.6, 0.1, 0.3, 0.1],
-                        [0.3, 0.2, 0.4, 0.1],
-                        [0.1, 0.5, 0.1, 0.3],
-                        [0.5, 0.1, 0.3, 0.1]],
-
-                       [[0.5, 0.1, 0.3, 0.1],
-                        [0.2, 0.2, 0.2, 0.4],
-                        [0.2, 0.2, 0.1, 0.5],
-                        [0.5, 0.1, 0.3, 0.1]]]
-
-        input_length.data = [[4], [4]]
-        input.shape = [2, 4, 4]
-
-        step1: Apply argmax to first input sequence which is input.data[0:4]. Then we get:
-               [[0], [2], [1], [0]], for input.data[4:8] is [[0], [3], [3], [0]], shape is [2,4,1]
-        step2: Change the argmax result to use padding mode, then argmax result is
-                [[0, 2, 1, 0], [0, 3, 3, 0]], shape is [2, 4], lod is [], input_length is [[4], [4]]
-        step3: Apply ctc_align to padding argmax result, padding_value is 0
-
-        Finally:
-        output.data = [[2, 1, 0, 0],
-                       [3, 0, 0, 0]]
-        output_length.data = [[2], [1]]
-
-
-    Parameters:
-
-        input(Variable): the probabilities of variable-length sequences. When in lod mode,
-                         it is a 2-D LoDTensor with LoD information. It's shape is [Lp, num_classes + 1]
-                         where Lp is the sum of all input sequences' length and
-                         num_classes is the true number of classes. When in padding mode,
-                         it is a 3-D Tensor with padding, It's shape is [batch_size, N, num_classes + 1].
-                         (not including the blank label). The data type can be float32 or float64.
-        blank(int): the blank label index of Connectionist Temporal
-                    Classification (CTC) loss, which is in the half-opened
-                    interval [0, num_classes + 1).
-        input_length(Variable, optional): 2-D LoDTensor, shape is [batch_size, 1], data type is int64.
-                                 It is used for padding mode. In lod mode, input_length is None.
-        padding_value(int): padding value.
-        name(str, optional): The default value is None.
-                             Normally there is no need for user to set this property.
-                             For more information, please refer to :ref:`api_guide_Name`
-
-    Returns:
-        For lod mode, returns the result of CTC greedy decoder, 2-D LoDTensor, shape is [Lp, 1], \
-        data type is int64. 'Lp' is the sum of all output sequences' length. If all the sequences \
-        in result were empty, the result LoDTensor will be [-1] with  empty \
-        LoD [[]].
-
-        For padding mode, returns a tuple of (output, output_length), which was described as below:
-
-        output, 2-D Tensor, shape is [batch_size, N], data type is int64.
-
-        output_length, 2-D Tensor, shape is [batch_size, 1], data type is int64. It is the length of \
-                           each sequence of output for padding mode.
-
-    Return type:
-        For lod mode: Variable
-
-        For padding mode: tuple of two Variables (output, output_length).
-
-
-    Examples:
-        .. code-block:: python
-
-            # for lod mode
-            import paddle.fluid as fluid
-            x = fluid.data(name='x', shape=[None, 8], dtype='float32', lod_level=1)
-            cost = fluid.layers.ctc_greedy_decoder(input=x, blank=0)
-
-            # for padding mode
-            x_pad = fluid.data(name='x_pad', shape=[10, 4, 8], dtype='float32')
-            x_pad_len = fluid.data(name='x_pad_len', shape=[10, 1], dtype='int64')
-            out, out_len = fluid.layers.ctc_greedy_decoder(input=x_pad, blank=0,
-                            input_length=x_pad_len)
-
-    """
-    check_variable_and_dtype(
-        input, 'input', ['float32', 'float64'], 'ctc_greedy_decoder'
-    )
-
-    helper = LayerHelper("ctc_greedy_decoder", **locals())
-    _, topk_indices = topk(input, k=1)
-
-    # ctc align op
-    ctc_out = helper.create_variable_for_type_inference(dtype="int64")
-
-    if input_length is None:
-        helper.append_op(
-            type="ctc_align",
-            inputs={"Input": [topk_indices]},
-            outputs={"Output": [ctc_out]},
-            attrs={"merge_repeated": True, "blank": blank},
-        )
-        return ctc_out
-    else:
-        ctc_out_len = helper.create_variable_for_type_inference(dtype="int64")
-        ctc_input = paddle.squeeze(topk_indices, [2])
-
-        helper.append_op(
-            type="ctc_align",
-            inputs={"Input": [ctc_input], "InputLength": [input_length]},
-            outputs={"Output": [ctc_out], "OutputLength": [ctc_out_len]},
-            attrs={
-                "merge_repeated": True,
-                "blank": blank,
-                "padding_value": padding_value,
-            },
-        )
-        return ctc_out, ctc_out_len
-
-
-def im2sequence(
-    input,
-    filter_size=1,
-    stride=1,
-    padding=0,
-    input_image_size=None,
-    out_stride=1,
-    name=None,
-):
-    r"""
-    :api_attr: Static Graph
-
-    Extracts image patches from the input tensor to form a tensor of shape
-    {input.batch_size * output_height * output_width, filter_size_height *
-    filter_size_width * input.channels}. This op use filter to scan images
-    and convert these images to sequences. After expanding, the number of time step are
-    output_height * output_width for an image, in which output_height and
-    output_width are calculated by below equation:
-
-    .. math::
-
-        output\_height  = 1 + \
-            (padding\_up + padding\_down + input\_height  - filter\_size\_height  + stride\_height - 1) / stride\_height \\\\
-        output\_width  = 1 + \
-            (padding\_left + padding\_right + input\_width  - filter\_size\_width  + stride\_width - 1) / stride\_width
-
-    And the dimension of each time step is filter_size_height * filter_size_width * input.channels.
-
-    Parameters:
-        input (Variable): The input should be a 4-D Tensor in :math:`NCHW` format. The data type is float32.
-
-        filter_size(int32 | List[int32]): The filter size. If filter_size is a List,
-            it must contain two integers, :math:`[filter\_size\_height, filter\_size\_width]` .
-            Otherwise, the filter size will be a square :math:`[filter\_size, filter\_size]` . Default is 1.
-
-        stride(int32 | List[int32]): The stride size. If stride is a List, it must
-            contain two integers, :math:`[stride\_height, stride\_width]` . Otherwise, the stride size will be a square :math:`[stride\_size, stride\_size]` . Default is 1.
-
-        padding(int32 | List[int32]): The padding size. If padding is a List, it can
-            contain four integers like :math:`[padding\_up, padding\_left, padding\_down, padding\_right]` to indicate
-            paddings of four direction.  Or it can contain two integers :math:`[padding\_height, padding\_width]` which means
-            padding_up = padding_down = padding_height and
-            padding_left = padding_right = padding_width. Otherwise, a scalar padding means
-            padding_up = padding_down = padding_left = padding_right = padding.
-            Default is 0.
-
-        input_image_size(Variable, optional): the input contains image real size.It's dim
-            is :math:`[batchsize, 2]` . It is just for batch inference when not None. Default is None.
-
-        out_stride(int32 | List[int32]): The scaling of image through CNN. It is valid only when input_image_size is not None.
-            If out_stride is List,  it must contain two integers,
-            :math:`[out\_stride\_height, out\_stride\_W]` . Otherwise,
-            the out_stride_height = out_stride_width = out_stride. Default is 1.
-
-        name (str, optional): The default value is None.  Normally there is no need for
-                    user to set this property.  For more information, please refer to :ref:`api_guide_Name` .
-
-    Returns:
-            The output is a 2-D LoDTensor with shape {input.batch\_size * output\_height * output\_width, \
-            filter\_size\_height * filter\_size\_width * input.channels}. The data type is float32.
-
-    Return Type: Variable
-
-    Examples:
-
-        .. code-block:: text
-
-            Given:
-
-            x = [[[[ 6.  2.  1.]
-                   [ 8.  3.  5.]
-                   [ 0.  2.  6.]]
-
-                  [[ 2.  4.  4.]
-                   [ 6.  3.  0.]
-                   [ 6.  4.  7.]]]
-
-                 [[[ 6.  7.  1.]
-                   [ 5.  7.  9.]
-                   [ 2.  4.  8.]]
-
-                  [[ 1.  2.  1.]
-                   [ 1.  3.  5.]
-                   [ 9.  0.  8.]]]]
-
-            x.dims = {2, 2, 3, 3}
-
-            And:
-
-            filter = [2, 2]
-            stride = [1, 1]
-            padding = [0, 0]
-
-            Then:
-
-            output.data = [[ 6.  2.  8.  3.  2.  4.  6.  3.]
-                           [ 2.  1.  3.  5.  4.  4.  3.  0.]
-                           [ 8.  3.  0.  2.  6.  3.  6.  4.]
-                           [ 3.  5.  2.  6.  3.  0.  4.  7.]
-                           [ 6.  7.  5.  7.  1.  2.  1.  3.]
-                           [ 7.  1.  7.  9.  2.  1.  3.  5.]
-                           [ 5.  7.  2.  4.  1.  3.  9.  0.]
-                           [ 7.  9.  4.  8.  3.  5.  0.  8.]]
-
-            output.dims = {8, 8}
-
-            output.lod = [[4, 4]]
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import paddle
-            paddle.enable_static()
-            data = fluid.data(name='data', shape=[None, 3, 32, 32],
-                                     dtype='float32')
-            output = fluid.layers.im2sequence(
-                input=data, stride=[1, 1], filter_size=[2, 2])
-
-
-    """
-    assert (
-        not _non_static_mode()
-    ), "sequence layer is not supported in dygraph mode yet."
-
-    check_variable_and_dtype(input, 'input', ['float32'], 'im2sequence')
-
-    if isinstance(filter_size, int):
-        filter_size = [filter_size, filter_size]
-    if isinstance(stride, int):
-        stride = [stride, stride]
-    if isinstance(padding, int):
-        padding = [padding, padding]
-    if len(padding) == 2:
-        padding.append(padding[0])
-        padding.append(padding[1])
-    inputs = {"X": input}
-    attrs = {"kernels": filter_size, "strides": stride, "paddings": padding}
-    if input_image_size:
-        if isinstance(out_stride, int):
-            out_stride = [out_stride, out_stride]
-        inputs["Y"] = input_image_size
-        attrs["out_stride"] = out_stride
-    helper = LayerHelper('im2sequence', **locals())
-    out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
-    helper.append_op(
-        type='im2sequence', inputs=inputs, outputs={'Out': out}, attrs=attrs
-    )
-    return out
-
-
 @templatedoc()
 def row_conv(input, future_context_size, param_attr=None, act=None):
     """
@@ -3214,165 +2794,6 @@ def row_conv(input, future_context_size, param_attr=None, act=None):
     return helper.append_activation(out)
 
 
-@templatedoc()
-def multiplex(inputs, index, name=None):
-    """
-
-    Based on the given index parameter, the OP selects a specific row from each input Tensor to construct the output Tensor.
-
-    If the input of this OP contains :math:`m` Tensors, where :math:`I_{i}` means the i-th input Tensor, :math:`i` between :math:`[0,m)` .
-
-    And :math:`O` means the output, where :math:`O[i]` means the i-th row of the output, then the output satisfies that :math:`O[i] = I_{index[i]}[i]` .
-
-    For Example:
-
-            .. code-block:: text
-
-                Given:
-
-                inputs = [[[0,0,3,4], [0,1,3,4], [0,2,4,4], [0,3,3,4]],
-                          [[1,0,3,4], [1,1,7,8], [1,2,4,2], [1,3,3,4]],
-                          [[2,0,3,4], [2,1,7,8], [2,2,4,2], [2,3,3,4]],
-                          [[3,0,3,4], [3,1,7,8], [3,2,4,2], [3,3,3,4]]]
-
-                index = [[3],[0],[1],[2]]
-
-                out = [[3,0,3,4],    # out[0] = inputs[index[0]][0] = inputs[3][0] = [3,0,3,4]
-                       [0,1,3,4],    # out[1] = inputs[index[1]][1] = inputs[0][1] = [0,1,3,4]
-                       [1,2,4,2],    # out[2] = inputs[index[2]][2] = inputs[1][2] = [1,2,4,2]
-                       [2,3,3,4]]    # out[3] = inputs[index[3]][3] = inputs[2][3] = [2,3,3,4]
-
-
-    Args:
-        inputs (list): The input Tensor list. The list elements are N-D Tensors of data types float32, float64, int32, int64. All input Tensor shapes should be the same and rank must be at least 2.
-        index (Tensor): Used to select some rows in the input Tensor to construct an index of the output Tensor. It is a 2-D Tensor with data type int32 or int64 and shape [M, 1], where M is the number of input Tensors.
-        name(str, optional): The default value is None. Normally there is no
-            need for user to set this property. For more information, please
-            refer to :ref:`api_guide_Name`.
-    Returns:
-        Tensor: Output of multiplex OP, with data type being float32, float64, int32, int64.
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle
-            import numpy as np
-            img1 = np.array([[1, 2], [3, 4]]).astype(np.float32)
-            img2 = np.array([[5, 6], [7, 8]]).astype(np.float32)
-            inputs = [paddle.to_tensor(img1), paddle.to_tensor(img2)]
-            index = paddle.to_tensor(np.array([[1], [0]]).astype(np.int32))
-            res = paddle.multiplex(inputs, index)
-            print(res) # [array([[5., 6.], [3., 4.]], dtype=float32)]
-
-    """
-
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.multiplex(index, inputs)
-    if in_dygraph_mode():
-        return _C_ops.multiplex(inputs, index)
-    helper = LayerHelper('multiplex', **locals())
-
-    check_type(inputs, 'inputs', (list), 'multiplex')
-    if len(inputs) < 2:
-        raise ValueError(
-            "inputs should be a list object with at least 2 elements."
-        )
-    for id, x in enumerate(inputs):
-        check_variable_and_dtype(
-            x,
-            'input[' + str(id) + ']',
-            ['float32', 'float64', 'int32', 'int64'],
-            'multiplex',
-        )
-    check_variable_and_dtype(index, "index", ['int32', 'int64'], 'multiplex')
-
-    out = helper.create_variable_for_type_inference(inputs[0].dtype)
-    helper.append_op(
-        type='multiplex',
-        inputs={'X': inputs, 'Ids': index},
-        outputs={'Out': [out]},
-    )
-    return out
-
-
-def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
-    """
-
-    This layer computes the smooth L1 loss for Variable :attr:`x` and :attr:`y`.
-    It takes the first dimension of :attr:`x` and :attr:`y` as batch size.
-    For each instance, it computes the smooth L1 loss element by element first
-    and then sums all the losses. So the shape of output Variable is
-    [batch_size, 1].
-
-    Args:
-        x (Variable): A tensor with rank at least 2. The input value of smooth
-            L1 loss op with shape [batch_size, dim1, ..., dimN].
-            A LoDTensor or Tensor with type float32.
-        y (Variable): A tensor with rank at least 2. The target value of smooth
-            L1 loss op with same shape as :attr:`x`.
-            A LoDTensor or Tensor with type float32.
-        inside_weight (Variable|None):  A tensor with rank at least 2. This
-            input is optional and should have same shape with :attr:`x`. If
-            provided, the result of (:attr:`x` - :attr:`y`) will be multiplied
-            by this tensor element by element.
-            A Tensor with type float32.
-        outside_weight (Variable|None): A tensor with rank at least 2. This
-            input is optional and should have same shape with :attr:`x`. If
-            provided, the out smooth L1 loss will be multiplied by this tensor
-            element by element.
-            A Tensor with type float32.
-        sigma (float|None): Hyper parameter of smooth L1 loss layer. A float
-           scalar with default value 1.0.
-
-    Returns:
-        Variable: The output smooth L1 loss with shape [batch_size, 1].  A Tensor with type float32.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import numpy as np
-            import paddle
-            paddle.enable_static()
-            data = fluid.data(name="x", shape=[-1, 3], dtype="float32")
-            label = fluid.data(name="y", shape=[-1, 3], dtype="float32")
-            result = fluid.layers.smooth_l1(data,label)
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            exe.run(fluid.default_startup_program())
-            x = np.random.rand(3,3).astype("float32")
-            y = np.random.rand(3,3).astype("float32")
-            output= exe.run(feed={"x":x, "y":y},
-                             fetch_list=[result])
-            print(output)
-
-            #[array([[0.08220536],
-            #       [0.36652038],
-            #      [0.20541131]], dtype=float32)]
-
-    """
-    check_variable_and_dtype(x, 'X', ['float32', 'float64'], 'smooth_l1_loss')
-    check_variable_and_dtype(y, 'Y', ['float32', 'float64'], 'smooth_l1_loss')
-
-    helper = LayerHelper('smooth_l1_loss', **locals())
-
-    diff = helper.create_variable_for_type_inference(dtype=x.dtype)
-    loss = helper.create_variable_for_type_inference(dtype=x.dtype)
-    helper.append_op(
-        type='smooth_l1_loss',
-        inputs={
-            'X': x,
-            'Y': y,
-            'InsideWeight': inside_weight,
-            'OutsideWeight': outside_weight,
-        },
-        outputs={'Diff': diff, 'Out': loss},
-        attrs={'sigma': sigma if sigma is not None else 1.0},
-    )
-    return loss
-
-
 @deprecated(since='2.0.0', update_to='paddle.nn.functional.one_hot')
 def one_hot(input, depth, allow_out_of_range=False):
     """
diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py
index cb60e1c599114b..c6165dd7535378 100644
--- a/python/paddle/fluid/tests/unittests/dist_transformer.py
+++ b/python/paddle/fluid/tests/unittests/dist_transformer.py
@@ -1833,8 +1833,8 @@ def beam_search():
             )
             logits = paddle.reshape(logits, (-1, trg_vocab_size))
 
-            topk_scores, topk_indices = layers.topk(
-                input=paddle.nn.functional.softmax(logits), k=beam_size
+            topk_scores, topk_indices = paddle.topk(
+                x=paddle.nn.functional.softmax(logits), k=beam_size
             )
             accu_scores = layers.elementwise_add(
                 x=paddle.log(topk_scores),
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
index 5c04aecd1a87fc..5babde40b43552 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py
@@ -459,9 +459,7 @@ def beam_search(self, inputs):
             scores = paddle.reshape(
                 log_probs, [-1, self.beam_size * self.tar_vocab_size]
             )
-            topk_scores, topk_indices = fluid.layers.topk(
-                input=scores, k=self.beam_size
-            )
+            topk_scores, topk_indices = paddle.topk(x=scores, k=self.beam_size)
 
             beam_indices = paddle.floor_divide(topk_indices, vocab_size_tensor)
             token_indices = paddle.remainder(topk_indices, vocab_size_tensor)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
index e6f03170b4734c..16449d00ae7366 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py
@@ -853,9 +853,7 @@ def gather(input, indices, batch_pos):
                 log_probs, [-1, beam_size * self.trg_vocab_size]
             )
             scores = log_probs
-            topk_scores, topk_indices = fluid.layers.topk(
-                input=scores, k=beam_size
-            )
+            topk_scores, topk_indices = paddle.topk(x=scores, k=beam_size)
             beam_indices = paddle.floor_divide(topk_indices, vocab_size_tensor)
             token_indices = paddle.remainder(topk_indices, vocab_size_tensor)
 
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_topk_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_topk_op_ipu.py
index dbcbe6e3937251..6a302a7cb5a899 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_topk_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_topk_op_ipu.py
@@ -31,7 +31,7 @@ def setUp(self):
         self.set_op_attrs()
 
     def set_test_op(self):
-        self.op = paddle.fluid.layers.topk
+        self.op = paddle.topk
 
     def set_data_feed(self):
         data = np.random.uniform(size=[3, 5])
diff --git a/python/paddle/fluid/tests/unittests/npu/test_smooth_l1_loss_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_smooth_l1_loss_op_npu.py
index 54e625a9f4a919..6da0f7f36a1332 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_smooth_l1_loss_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_smooth_l1_loss_op_npu.py
@@ -138,22 +138,5 @@ def test_check_grad_ingore_y(self):
         )
 
 
-class TestSmoothL1LossOpError(unittest.TestCase):
-    def test_errors(self):
-        with fluid.program_guard(fluid.Program(), fluid.Program()):
-            # The input type of accuracy_op must be Variable.
-            x1 = fluid.create_lod_tensor(
-                np.array([[-1]]), [[1]], fluid.NPUPlace(0)
-            )
-            y1 = fluid.create_lod_tensor(
-                np.array([[-1]]), [[1]], fluid.NPUPlace(0)
-            )
-            self.assertRaises(TypeError, fluid.layers.smooth_l1, x1, y1)
-            # The input dtype of accuracy_op must be float32 or float64.
-            x2 = fluid.layers.data(name='x2', shape=[4], dtype="int32")
-            y2 = fluid.layers.data(name='x2', shape=[4], dtype="int32")
-            self.assertRaises(TypeError, fluid.layers.smooth_l1, x2, y2)
-
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_beam_search_op.py b/python/paddle/fluid/tests/unittests/test_beam_search_op.py
index bc737a5ed55f4f..d492560a509725 100644
--- a/python/paddle/fluid/tests/unittests/test_beam_search_op.py
+++ b/python/paddle/fluid/tests/unittests/test_beam_search_op.py
@@ -312,7 +312,7 @@ def test_errors(self):
                 name='pre_scores', shape=[1], lod_level=2, dtype='float32'
             )
             probs = fluid.data(name='probs', shape=[10000], dtype='float32')
-            topk_scores, topk_indices = fluid.layers.topk(probs, k=4)
+            topk_scores, topk_indices = paddle.topk(probs, k=4)
             accu_scores = fluid.layers.elementwise_add(
                 x=paddle.log(x=topk_scores),
                 y=paddle.reshape(pre_scores, shape=[-1]),
diff --git a/python/paddle/fluid/tests/unittests/test_ctc_align.py b/python/paddle/fluid/tests/unittests/test_ctc_align.py
index 18d8cb35e6ebed..9e76d29a775c82 100644
--- a/python/paddle/fluid/tests/unittests/test_ctc_align.py
+++ b/python/paddle/fluid/tests/unittests/test_ctc_align.py
@@ -18,7 +18,6 @@
 from op_test import OpTest
 
 import paddle
-import paddle.fluid as fluid
 
 
 def CTCAlign(input, lod, blank, merge_repeated, padding=0, input_length=None):
@@ -226,50 +225,6 @@ def config(self):
         )
 
 
-class TestCTCAlignOpApi(unittest.TestCase):
-    def test_api(self):
-        x = fluid.layers.data('x', shape=[4], dtype='float32')
-        y = fluid.layers.ctc_greedy_decoder(x, blank=0)
-
-        x_pad = fluid.layers.data('x_pad', shape=[4, 4], dtype='float32')
-        x_pad_len = fluid.layers.data('x_pad_len', shape=[1], dtype='int64')
-        y_pad, y_pad_len = fluid.layers.ctc_greedy_decoder(
-            x_pad, blank=0, input_length=x_pad_len
-        )
-
-        place = fluid.CPUPlace()
-        x_tensor = fluid.create_lod_tensor(
-            np.random.rand(8, 4).astype("float32"), [[4, 4]], place
-        )
-
-        x_pad_tensor = np.random.rand(2, 4, 4).astype("float32")
-        x_pad_len_tensor = np.array([[4], [4]]).reshape([2, 1]).astype("int64")
-
-        exe = fluid.Executor(place)
-
-        exe.run(fluid.default_startup_program())
-        ret = exe.run(
-            feed={
-                'x': x_tensor,
-                'x_pad': x_pad_tensor,
-                'x_pad_len': x_pad_len_tensor,
-            },
-            fetch_list=[y, y_pad, y_pad_len],
-            return_numpy=False,
-        )
-
-
-class BadInputTestCTCAlignr(unittest.TestCase):
-    def test_error(self):
-        with fluid.program_guard(fluid.Program()):
-
-            def test_bad_x():
-                x = fluid.layers.data(name='x', shape=[8], dtype='int64')
-                cost = fluid.layers.ctc_greedy_decoder(input=x, blank=0)
-
-            self.assertRaises(TypeError, test_bad_x)
-
-
 if __name__ == "__main__":
     paddle.enable_static()
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 64e671c8ee9bd7..62def4247037f5 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -1519,8 +1519,8 @@ def test_topk(self):
         with self.dynamic_graph():
             with _test_eager_guard():
                 input = fluid.dygraph.to_variable(np.random.random((13, 11)))
-                top5_values1, top5_indices1 = layers.topk(input, k=5)
-                top5_values2, top5_indices2 = layers.topk(
+                top5_values1, top5_indices1 = paddle.topk(input, k=5)
+                top5_values2, top5_indices2 = paddle.topk(
                     input, k=fluid.dygraph.to_variable(np.array([5]))
                 )
                 np.testing.assert_array_equal(
@@ -1531,8 +1531,8 @@ def test_topk(self):
                 )
 
             input = fluid.dygraph.to_variable(np.random.random((13, 11)))
-            top5_values1, top5_indices1 = layers.topk(input, k=5)
-            top5_values2, top5_indices2 = layers.topk(
+            top5_values1, top5_indices1 = paddle.topk(input, k=5)
+            top5_values2, top5_indices2 = paddle.topk(
                 input, k=fluid.dygraph.to_variable(np.array([5]))
             )
             np.testing.assert_array_equal(
@@ -3104,7 +3104,7 @@ def make_multiplex(self):
             x1 = self._get_data(name='x1', shape=[4], dtype='float32')
             x2 = self._get_data(name='x2', shape=[4], dtype='float32')
             index = self._get_data(name='index', shape=[1], dtype='int32')
-            out = layers.multiplex(inputs=[x1, x2], index=index)
+            out = paddle.multiplex(inputs=[x1, x2], index=index)
             return out
 
     def make_softmax_with_cross_entropy(self):
@@ -3144,15 +3144,6 @@ def make_softmax_with_cross_entropy(self):
             self.assertIsNotNone(loss4)
             return loss4
 
-    def make_smooth_l1(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            x = self._get_data(name='x', shape=[4], dtype='float32')
-            y = self._get_data(name='label', shape=[4], dtype='float32')
-            loss = layers.smooth_l1(x, y)
-            return loss
-
     def make_scatter(self):
         with program_guard(
             fluid.default_main_program(), fluid.default_startup_program()
@@ -3192,7 +3183,7 @@ def make_topk(self):
             fluid.default_main_program(), fluid.default_startup_program()
         ):
             data = self._get_data(name="label", shape=[200], dtype="float32")
-            values, indices = layers.topk(data, k=5)
+            values, indices = paddle.topk(data, k=5)
             return values
             return indices
 
@@ -3559,20 +3550,6 @@ def test_dynamic_lstmp(self):
                 )
             )
 
-    def test_im2sequence(self):
-        # TODO(minqiyang): dygraph do not support lod now
-        with self.static_graph():
-            x = layers.data(name='x', shape=[3, 128, 128], dtype='float32')
-            y = layers.data(name='y', shape=[], dtype='float32')
-            output = layers.im2sequence(
-                input=x,
-                input_image_size=y,
-                stride=[1, 1],
-                filter_size=[2, 2],
-                out_stride=[1, 1],
-            )
-            return output
-
     def test_lod_reset(self):
         # TODO(minqiyang): dygraph do not support lod now
         with self.static_graph():
diff --git a/python/paddle/fluid/tests/unittests/test_smooth_l1_loss_op.py b/python/paddle/fluid/tests/unittests/test_smooth_l1_loss_op.py
index ba251c05ac69e0..6d259617c02481 100644
--- a/python/paddle/fluid/tests/unittests/test_smooth_l1_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_smooth_l1_loss_op.py
@@ -17,8 +17,6 @@
 import numpy as np
 from op_test import OpTest
 
-import paddle.fluid as fluid
-
 
 def smooth_l1_loss_forward(val, sigma2):
     abs_val = abs(val)
@@ -124,22 +122,5 @@ def test_check_grad_ingore_y(self):
         )
 
 
-class TestSmoothL1LossOpError(unittest.TestCase):
-    def test_errors(self):
-        with fluid.program_guard(fluid.Program(), fluid.Program()):
-            # The input type of accuracy_op must be Variable.
-            x1 = fluid.create_lod_tensor(
-                np.array([[-1]]), [[1]], fluid.CPUPlace()
-            )
-            y1 = fluid.create_lod_tensor(
-                np.array([[-1]]), [[1]], fluid.CPUPlace()
-            )
-            self.assertRaises(TypeError, fluid.layers.smooth_l1, x1, y1)
-            # The input dtype of accuracy_op must be float32 or float64.
-            x2 = fluid.layers.data(name='x2', shape=[4], dtype="int32")
-            y2 = fluid.layers.data(name='x2', shape=[4], dtype="int32")
-            self.assertRaises(TypeError, fluid.layers.smooth_l1, x2, y2)
-
-
 if __name__ == '__main__':
     unittest.main()

From 2cb07a1ff21ff4a5d7f01e90520aeba7974a8def Mon Sep 17 00:00:00 2001
From: heyanru <81976792+heyanru01@users.noreply.github.com>
Date: Mon, 5 Dec 2022 14:02:30 +0800
Subject: [PATCH 143/154] [Fluid Clean] remove
 fluid.layers.continuous_value_model (#48509)

---
 python/paddle/fluid/layers/nn.py    | 58 -----------------------------
 python/paddle/static/nn/__init__.py |  1 -
 2 files changed, 59 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index a4125088c8a2f7..56765c19cba659 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -102,7 +102,6 @@
     'bilinear_tensor_product',
     'merge_selected_rows',
     'get_tensor_from_selected_rows',
-    'continuous_value_model',
     'unfold',
     'deformable_roi_pooling',
     'shard_index',
@@ -5563,63 +5562,6 @@ def get_tensor_from_selected_rows(x, name=None):
     return out
 
 
-def continuous_value_model(input, cvm, use_cvm=True):
-    r"""
-
-    **continuous_value_model layers**
-
-    Now, this OP is used in CTR project to remove or dispose show and click value in :attr:`input`.
-
-    :attr:`input` is an embedding vector including show and click value, whose shape is :math:`[N, D]` (N is batch size. D is `2 + embedding dim` ).
-    Show and click at first two dims of embedding vector D.
-    If :attr:`use_cvm` is True, it will calculate :math:`log(show)` and :math:`log(click)` , and output shape is :math:`[N, D]` .
-    If :attr:`use_cvm` is False, it will remove show and click from :attr:`input` , and output shape is :math:`[N, D - 2]` .
-    :attr:`cvm` is show_click info, whose shape is :math:`[N, 2]` .
-
-    Args:
-        input (Variable): The input variable. A 2-D LoDTensor with shape :math:`[N, D]` , where N is the batch size, D is `2 + the embedding dim` . `lod level = 1` .
-        A Tensor with type float32, float64.
-        cvm (Variable): Show and click variable. A 2-D Tensor with shape :math:`[N, 2]` , where N is the batch size, 2 is show and click.
-        A Tensor with type float32, float64.
-        use_cvm  (bool):  Use show_click or not. if use, the output dim is the same as input.
-                          if not use, the output dim is `input dim - 2` (remove show and click)
-
-    Returns:
-
-        Variable: A 2-D LodTensor with shape :math:`[N, M]` . if :attr:`use_cvm` = True, M is equal to input dim D. if False, M is equal to `D - 2`. \
-        A Tensor with same type as input.
-
-    Examples:
-
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          input = fluid.data(name="input", shape=[64, 1], dtype="int64")
-          label = fluid.data(name="label", shape=[64, 1], dtype="int64")
-          embed = fluid.layers.embedding(
-                            input=input,
-                            size=[100, 11],
-                            dtype='float32')
-          ones = fluid.layers.fill_constant_batch_size_like(input=label, shape=[-1, 1], dtype="int64", value=1)
-          show_clk = fluid.layers.cast(fluid.layers.concat([ones, label], axis=1), dtype='float32')
-          show_clk.stop_gradient = True
-          input_with_cvm = fluid.layers.continuous_value_model(embed, show_clk, True)
-
-    """
-    helper = LayerHelper('cvm', **locals())
-    out = helper.create_variable(dtype=input.dtype)
-    check_variable_and_dtype(
-        input, 'input', ['float16', 'float32', 'float64'], 'cvm'
-    )
-    helper.append_op(
-        type='cvm',
-        inputs={'X': [input], 'CVM': [cvm]},
-        outputs={'Y': [out]},
-        attrs={"use_cvm": use_cvm},
-    )
-    return out
-
-
 def unfold(x, kernel_sizes, strides=1, paddings=0, dilations=1, name=None):
     r"""
 
diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py
index 3d3cc5f8a2bb8e..1849cfd395a553 100755
--- a/python/paddle/static/nn/__init__.py
+++ b/python/paddle/static/nn/__init__.py
@@ -40,7 +40,6 @@
 
 from ...fluid.input import embedding  # noqa: F401
 from ...fluid.contrib.layers import sparse_embedding  # noqa: F401
-from ...fluid.layers import continuous_value_model  # noqa: F401
 from ...fluid.layers import StaticRNN  # noqa: F401
 
 from ...fluid.layers.sequence_lod import sequence_conv  # noqa: F401

From 0c1d68e1a55b2f206172ebe9483dfe75436d4ca9 Mon Sep 17 00:00:00 2001
From: HongyuJia <jiahongyu@baidu.com>
Date: Mon, 5 Dec 2022 14:09:55 +0800
Subject: [PATCH 144/154] fix custom operator backward=None (#48656)

---
 .../custom_operator/custom_operator_node.cc   | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/paddle/fluid/eager/custom_operator/custom_operator_node.cc b/paddle/fluid/eager/custom_operator/custom_operator_node.cc
index 1f0a055cbd3863..5c3c2fbe7e9c60 100644
--- a/paddle/fluid/eager/custom_operator/custom_operator_node.cc
+++ b/paddle/fluid/eager/custom_operator/custom_operator_node.cc
@@ -217,18 +217,20 @@ RunCustomOpNode::operator()(
   VLOG(6) << "Prepare Grad outputs for size: " << grad_outputs_names.size();
   for (size_t i = 0; i < OutputMeta().size(); i++) {
     if (map[0][0].find(i) != map[0][0].end()) {
+      int grad_output_idx = map[0][0][i];
       VLOG(7) << "Insert grad outputs: " << i
-              << " with size: " << OutputMeta()[i].size()
-              << " to tmp_outputs: " << map[0][0][i];
-      for (size_t j = 0; j < OutputMeta()[i].size(); j++) {
-        outs[i].emplace_back(/* init it incase of copy nullptr of shared_ptr */
-                             std::make_shared<phi::DenseTensor>(
-                                 phi::DataType::UNDEFINED),
-                             egr::Controller::Instance().GenerateUniqueName(
-                                 "custom_tmp_grad"));
-        egr::EagerUtils::autograd_meta(&(outs[i][j]));
+              << " with size: " << OutputMeta()[grad_output_idx].size()
+              << " to tmp_outputs: " << grad_output_idx;
+      for (size_t j = 0; j < OutputMeta()[grad_output_idx].size(); j++) {
+        outs[grad_output_idx]
+            .emplace_back(/* init it incase of copy nullptr of shared_ptr */
+                          std::make_shared<phi::DenseTensor>(
+                              phi::DataType::UNDEFINED),
+                          egr::Controller::Instance().GenerateUniqueName(
+                              "custom_tmp_grad"));
+        egr::EagerUtils::autograd_meta(&(outs[grad_output_idx][j]));
       }
-      tmp_outs[map[0][0][i]] = outs[i];
+      tmp_outs[grad_output_idx] = outs[grad_output_idx];
     }
   }
   for (size_t i = 0; i < tmp_outs.size(); i++) {

From 9913da022d091fb652bf2c472c8209a70cc3e947 Mon Sep 17 00:00:00 2001
From: risemeup1 <62429225+risemeup1@users.noreply.github.com>
Date: Mon, 5 Dec 2022 14:21:47 +0800
Subject: [PATCH 145/154] Setuptools (#48301)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* test

* test

* test

* test

* test

* suport setuptools for paddle

* modify paddle_build.sh

* modify paddle_build.sh

* modify paddle_build.sh

* modify paddle_build.sh

* modify paddle_build.sh

* test

* modify setup.py

* modify build_options

* modify build_options

* modify paddle_build.sh

* modify setup.py

* modify paddle_build.sh

* modify setup.py

* modify setup.py

* modify setup.py

* modify setup.py

* modfiy paddle_build.sh

* debug

* debug

* debug

* dddd

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* fix bug that no version.py

* debug

* debug

* debug

* debug

* debug

* debug

* Delete .pre-commit-config.yaml

* debug

* support ninja

* support ninja

* debug

* debug

* debug

* support setuptools for paddle

* modify code style

* debug

* debug

* debug

* debug

* 取消make clean

* 取消make clean

* debug

* debug

* debug

* debug for py3

* debug

* debug

* debug

* 将mkdir_and_copy_file单独封装一个函数

* modify paddle_build.sh

* modify setup.py after zhangbo reviewd
---
 .pre-commit-config.yaml        |    0
 paddle/scripts/paddle_build.sh |  227 +++++-
 python/CMakeLists.txt          |   75 +-
 python/env_dict.py.in          |   69 ++
 setup.py                       | 1380 ++++++++++++++++++++++++++++++++
 5 files changed, 1725 insertions(+), 26 deletions(-)
 mode change 100755 => 100644 .pre-commit-config.yaml
 create mode 100644 python/env_dict.py.in
 create mode 100644 setup.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
old mode 100755
new mode 100644
diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh
index 1153f636136d1e..4e563496d35290 100755
--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -768,7 +768,9 @@ function run_linux_cpu_test() {
     mkdir -p ${PADDLE_ROOT}/build
     cd ${PADDLE_ROOT}/build
     pip install hypothesis
-    pip install ${PADDLE_ROOT}/build/python/dist/*whl
+    if [ -d "${PADDLE_ROOT}/build/python/dist/" ]; then
+        pip install ${PADDLE_ROOT}/build/python/dist/*whl
+    fi
     cp ${PADDLE_ROOT}/build/python/paddle/fluid/tests/unittests/op_test.py ${PADDLE_ROOT}/build/python
     cp ${PADDLE_ROOT}/build/python/paddle/fluid/tests/unittests/testsuite.py ${PADDLE_ROOT}/build/python
     cp -r ${PADDLE_ROOT}/build/python/paddle/fluid/tests/unittests/white_list ${PADDLE_ROOT}/build/python
@@ -917,6 +919,7 @@ set +x
 set -ex
     fi
 }
+
 function get_precision_ut_mac() {
     on_precision=0
     UT_list=$(ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d')
@@ -1050,7 +1053,9 @@ function generate_api_spec() {
     else
         pip install -r ${PADDLE_ROOT}/python/requirements.txt
     fi
-    pip --no-cache-dir install ${PADDLE_ROOT}/build/python/dist/*whl
+    if [ -d "${PADDLE_ROOT}/build/python/dist/" ]; then
+        pip --no-cache-dir install ${PADDLE_ROOT}/build/python/dist/*whl
+    fi
     spec_path=${PADDLE_ROOT}/paddle/fluid/API_${spec_kind}.spec
     python ${PADDLE_ROOT}/tools/print_signatures.py paddle > $spec_path
 
@@ -2863,7 +2868,9 @@ function parallel_test() {
     mkdir -p ${PADDLE_ROOT}/build
     cd ${PADDLE_ROOT}/build
     pip install hypothesis
-    pip install ${PADDLE_ROOT}/build/python/dist/*whl
+    if [ -d "${PADDLE_ROOT}/build/python/dist/" ]; then
+        pip install ${PADDLE_ROOT}/build/python/dist/*whl
+    fi
     cp ${PADDLE_ROOT}/build/python/paddle/fluid/tests/unittests/testsuite.py ${PADDLE_ROOT}/build/python
     cp -r ${PADDLE_ROOT}/build/python/paddle/fluid/tests/unittests/white_list ${PADDLE_ROOT}/build/python
     ut_total_startTime_s=`date +%s`
@@ -3454,7 +3461,219 @@ function check_coverage_build() {
     fi
     set -x
 }
+function run_setup(){
+    rm -rf ${PADDLE_ROOT}/build
+    startTime_s=`date +%s`
 
+    SYSTEM=`uname -s`
+    if [ "$SYSTEM" == "Darwin" ]; then
+        echo "Using python abi: $1"
+        if [ "$1" == "cp36-cp36m" ] || [ "$1" == "" ]; then
+            if [ -d "/Library/Frameworks/Python.framework/Versions/3.6" ]; then
+                export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.6/lib/
+                export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.6/lib/
+                export PATH=/Library/Frameworks/Python.framework/Versions/3.6/bin/:${PATH}
+                #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export
+                export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.6/bin/python3
+                export PYTHON_INCLUDE_DIR=/Library/Frameworks/Python.framework/Versions/3.6/include/python3.6m/
+                export PYTHON_LIBRARY=/Library/Frameworks/Python.framework/Versions/3.6/lib/libpython3.6m.dylib
+                pip3.6 install --user -r ${PADDLE_ROOT}/python/requirements.txt
+            else
+                exit 1
+            fi
+        elif [ "$1" == "cp37-cp37m" ]; then
+            if [ -d "/Library/Frameworks/Python.framework/Versions/3.7" ]; then
+                export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.7/lib/
+                export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.7/lib/
+                export PATH=/Library/Frameworks/Python.framework/Versions/3.7/bin/:${PATH}
+                #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export
+                export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.7/bin/python3
+                export PYTHON_INCLUDE_DIR=/Library/Frameworks/Python.framework/Versions/3.7/include/python3.7m/
+                export PYTHON_LIBRARY=/Library/Frameworks/Python.framework/Versions/3.7/lib/libpython3.7m.dylib
+
+                pip3.7 install --user -r ${PADDLE_ROOT}/python/requirements.txt
+            else
+                exit 1
+            fi
+        elif [ "$1" == "cp38-cp38" ]; then
+            if [ -d "/Library/Frameworks/Python.framework/Versions/3.8" ]; then
+                export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.8/lib/
+                export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.8/lib/
+                export PATH=/Library/Frameworks/Python.framework/Versions/3.8/bin/:${PATH}
+                #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export
+                export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.8/bin/python3
+                export PYTHON_INCLUDE_DIR=/Library/Frameworks/Python.framework/Versions/3.8/include/python3.8/
+                export PYTHON_LIBRARY=/Library/Frameworks/Python.framework/Versions/3.8/lib/libpython3.8.dylib
+                pip3.8 install --user -r ${PADDLE_ROOT}/python/requirements.txt
+            else
+                exit 1
+            fi
+        elif [ "$1" == "cp39-cp39" ]; then
+            if [ -d "/Library/Frameworks/Python.framework/Versions/3.9" ]; then
+                export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.9/lib/
+                export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.9/lib/
+                export PATH=/Library/Frameworks/Python.framework/Versions/3.9/bin/:${PATH}
+                #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export
+                export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.9/bin/python3
+                export PYTHON_INCLUDE_DIR=/Library/Frameworks/Python.framework/Versions/3.9/include/python3.9/
+                export PYTHON_LIBRARY=/Library/Frameworks/Python.framework/Versions/3.9/lib/libpython3.9.dylib
+                pip3.9 install --user -r ${PADDLE_ROOT}/python/requirements.txt
+            else
+                exit 1
+            fi
+        elif [ "$1" == "cp310-cp310" ]; then
+            if [ -d "/Library/Frameworks/Python.framework/Versions/3.10" ]; then
+                export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.10/lib/
+                export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.10/lib/
+                export PATH=/Library/Frameworks/Python.framework/Versions/3.10/bin/:${PATH}
+                #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export
+                export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.9/lib/libpython3.9.dylib
+                export PYTHON_INCLUDE_DIR=/Library/Frameworks/Python.framework/Versions/3.10/include/python3.10/
+                export PYTHON_LIBRARY=/Library/Frameworks/Python.framework/Versions/3.10/lib/libpython3.10.dylib
+                pip3.10 install --user -r ${PADDLE_ROOT}/python/requirements.txt
+            else
+                exit 1
+            fi
+        fi
+    else
+        if [ "$1" != "" ]; then
+            echo "using python abi: $1"
+            if [ "$1" == "cp36-cp36m" ]; then
+                export LD_LIBRARY_PATH=/opt/_internal/cpython-3.6.0/lib/:${LD_LIBRARY_PATH}
+                export PATH=/opt/_internal/cpython-3.6.0/bin/:${PATH}
+                #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export
+                export PYTHON_EXECUTABLE=/opt/_internal/cpython-3.6.0/bin/python3
+                export PYTHON_INCLUDE_DIR=/opt/_internal/cpython-3.6.0/include/python3.6m
+                export PYTHON_LIBRARIES=/opt/_internal/cpython-3.6.0/lib/libpython3.so
+                pip3.6 install -r ${PADDLE_ROOT}/python/requirements.txt
+            elif [ "$1" == "cp37-cp37m" ]; then
+                export LD_LIBRARY_PATH=/opt/_internal/cpython-3.7.0/lib/:${LD_LIBRARY_PATH}
+                export PATH=/opt/_internal/cpython-3.7.0/bin/:${PATH}
+                #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export
+                export PYTHON_EXECUTABLE=/opt/_internal/cpython-3.7.0/bin/python3.7
+                export PYTHON_INCLUDE_DIR=/opt/_internal/cpython-3.7.0/include/python3.7m
+                export PYTHON_LIBRARIES=/opt/_internal/cpython-3.7.0/lib/libpython3.so
+                pip3.7 install -r ${PADDLE_ROOT}/python/requirements.txt
+            elif [ "$1" == "cp38-cp38" ]; then
+                export LD_LIBRARY_PATH=/opt/_internal/cpython-3.8.0/lib/:${LD_LIBRARY_PATH}
+                export PATH=/opt/_internal/cpython-3.8.0/bin/:${PATH}
+                #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export
+                export PYTHON_EXECUTABLE=/opt/_internal/cpython-3.8.0/bin/python3.8
+                export PYTHON_INCLUDE_DIR=/opt/_internal/cpython-3.8.0/include/python3.8
+                export PYTHON_LIBRARIES=/opt/_internal/cpython-3.8.0/lib/libpython3.so
+                pip3.8 install -r ${PADDLE_ROOT}/python/requirements.txt
+            elif [ "$1" == "cp39-cp39" ]; then
+                export LD_LIBRARY_PATH=/opt/_internal/cpython-3.9.0/lib/:${LD_LIBRARY_PATH}
+                export PATH=/opt/_internal/cpython-3.9.0/bin/:${PATH}
+                #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export
+                export PYTHON_EXECUTABLE=/opt/_internal/cpython-3.9.0/bin/python3.9
+                export PYTHON_INCLUDE_DIR=/opt/_internal/cpython-3.9.0/include/python3.9
+                export PYTHON_LIBRARIES=/opt/_internal/cpython-3.9.0/lib/libpython3.so
+                pip3.9 install -r ${PADDLE_ROOT}/python/requirements.txt
+            elif [ "$1" == "cp310-cp310" ]; then
+                export LD_LIBRARY_PATH=/opt/_internal/cpython-3.10.0/lib/:${LD_LIBRARY_PATH}
+                export PATH=/opt/_internal/cpython-3.10.0/bin/:${PATH}
+                #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export
+                export PYTHON_EXECUTABLE=/opt/_internal/cpython-3.10.0/bin/python3.10
+                export PYTHON_INCLUDE_DIR=/opt/_internal/cpython-3.10.0/include/python3.10
+                export PYTHON_LIBRARIES=/opt/_internal/cpython-3.10.0/lib/libpython3.so
+                pip3.10 install -r ${PADDLE_ROOT}/python/requirements.txt
+           elif [ "$1" == "conda-python3.7" ]; then
+                export LD_LIBRARY_PATH=/opt/conda/lib/:${LD_LIBRARY_PATH}
+                export PATH=/opt/conda/bin/:${PATH}
+                #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export
+                export DPYTHON_EXECUTABLE=/opt/conda/bin/python
+                export PYTHON_INCLUDE_DIR=/opt/conda/include/python3.7m
+                export PYTHON_LIBRARIES=/opt/conda/lib/libpython3.so
+                /opt/conda/bin/pip install -r ${PADDLE_ROOT}/python/requirements.txt
+           fi
+        else
+            pip install -r ${PADDLE_ROOT}/python/requirements.txt
+        fi
+    fi
+
+    if [ "$SYSTEM" == "Darwin" ]; then
+        WITH_DISTRIBUTE="OFF"
+        WITH_AVX=${WITH_AVX:-ON}
+        WITH_ARM=${WITH_ARM:-OFF}
+        INFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR:-~/.cache/inference_demo}
+    else
+        INFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR:-/root/.cache/inference_demo}
+    fi
+
+    distibuted_flag=${WITH_DISTRIBUTE:-OFF}
+    gloo_flag=${distibuted_flag}
+
+    if [ "$CMD" != "assert_file_approvals" ];then
+      which python
+      python -V
+      python -m pip install distro
+      python ${PADDLE_ROOT}/tools/summary_env.py
+      bash ${PADDLE_ROOT}/tools/get_cpu_info.sh
+    fi
+    export CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release}
+    export WITH_GPU=${WITH_GPU:-OFF}
+    export WITH_TENSORRT=${WITH_TENSORRT:-ON}
+    export WITH_ROCM=${WITH_ROCM:-OFF}
+    export WITH_CINN=${WITH_CINN:-OFF}
+    export WITH_DISTRIBUTE=${distibuted_flag}
+    export WITH_MKL=${WITH_MKL:-ON}
+    export WITH_AVX=${WITH_AVX:-OFF}
+    export CUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All}
+    export NEW_RELEASE_PYPI=${NEW_RELEASE_PYPI:-OFF} 
+    export NEW_RELEASE_ALL=${NEW_RELEASE_ALL:-OFF}
+    export NEW_RELEASE_JIT=${NEW_RELEASE_JIT:-OFF}
+    export WITH_PYTHON=${WITH_PYTHON:-ON}
+    export CUDNN_ROOT=/usr/
+    export WITH_TESTING=${WITH_TESTING:-ON}
+    export WITH_COVERAGE=${WITH_COVERAGE:-OFF}
+    export WITH_INCREMENTAL_COVERAGE=${WITH_INCREMENTAL_COVERAGE:-OFF}
+    export CMAKE_MODULE_PATH=/opt/rocm/hip/cmake
+    export CMAKE_EXPORT_COMPILE_COMMANDS=ON
+    export WITH_CONTRIB=${WITH_CONTRIB:-ON}
+    export WITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON}
+    export WITH_INFRT=${WITH_INFRT:-OFF}
+    export INFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR}
+    export PY_VERSION=${PY_VERSION:-3.7}
+    export CMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build}
+    export WITH_PSCORE=${distibuted_flag}
+    export WITH_PSLIB=${WITH_PSLIB:-OFF}
+    export WITH_GLOO=${gloo_flag}
+    export LITE_GIT_TAG=release/v2.10
+    export WITH_XPU=${WITH_XPU:-OFF}
+    export WITH_MLU=${WITH_MLU:-OFF}
+    export WITH_IPU=${WITH_IPU:-OFF}
+    export WITH_CNCL=${WITH_CNCL:-OFF}
+    export XPU_SDK_ROOT=${XPU_SDK_ROOT:-}
+    export WITH_LITE=${WITH_LITE:-OFF}
+    export WITH_XPU_BKCL=${WITH_XPU_BKCL:-OFF}
+    export WITH_ARM=${WITH_ARM:-OFF}
+    export WITH_ASCEND=${WITH_ASCEND:-OFF}
+    export WITH_ASCEND_CL=${WITH_ASCEND_CL:-OFF}
+    export WITH_ASCEND_INT64=${WITH_ASCEND_INT64:-OFF}
+    export WITH_STRIP=${WITH_STRIP:-ON}
+    export ON_INFER=${ON_INFER:-OFF}
+    export WITH_HETERPS=${WITH_HETERPS:-OFF}
+    export WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF}
+    export CUDA_ARCH_BIN=${CUDA_ARCH_BIN}
+    export WITH_RECORD_BUILDTIME=${WITH_RECORD_BUILDTIME:-OFF}
+    export WITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF}
+    export WITH_ONNXRUNTIME=${WITH_ONNXRUNTIME:-OFF}
+    export WITH_CUDNN_FRONTEND=${WITH_CUDNN_FRONTEND:-OFF}
+
+    # reset ccache zero stats for collect PR's actual hit rate
+    ccache -z
+
+    python setup.py install;build_error=$?
+    
+    # ci will collect ccache hit rate
+    collect_ccache_hits
+
+    if [ "$build_error" != 0 ];then
+        exit 7;
+    fi
+
+}
 function main() {
     local CMD=$1
     local parallel_number=$2
@@ -3678,7 +3897,7 @@ function main() {
         parallel_test
         ;;
       build_gpubox)
-        cmake_gen_and_build ${PYTHON_ABI:-""} ${parallel_number}
+        run_setup ${PYTHON_ABI:-""} 
         ;;
       check_xpu)
         cmake_gen_and_build ${PYTHON_ABI:-""} ${parallel_number}
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 3a3c98a9e99562..3c6ac0229d58d8 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -22,8 +22,13 @@ set(SETUP_LOG_FILE "setup.py.log")
 
 set(FLUID_CORE_NAME "libpaddle")
 
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
-               ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
+if(WITH_SETUP_INSTALL)
+  configure_file(${CMAKE_CURRENT_SOURCE_DIR}/env_dict.py.in
+                 ${CMAKE_CURRENT_BINARY_DIR}/env_dict.py)
+else()
+  configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
+                 ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
+endif()
 
 set(FLUID_DST_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/)
 
@@ -59,25 +64,48 @@ if(WITH_TESTING)
 endif()
 
 if(WIN32)
-  add_custom_command(
-    OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
-    COMMAND
-      ${CMAKE_COMMAND} -E copy_directory ${PADDLE_SOURCE_DIR}/python/paddle
-      ${PADDLE_BINARY_DIR}/python/paddle/
-    COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py
-            bdist_wheel
-    COMMENT "Packing whl packages------>>>"
-    DEPENDS copy_libpaddle ${FLUID_CORE} framework_py_proto profiler_py_proto
-            pass_desc_py_proto ${PY_FILES})
+  if(WITH_SETUP_INSTALL)
+    add_custom_command(
+      OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
+      COMMAND
+        ${CMAKE_COMMAND} -E copy_directory ${PADDLE_SOURCE_DIR}/python/paddle
+        ${PADDLE_BINARY_DIR}/python/paddle/
+      COMMENT "Packing whl packages------>>>"
+      DEPENDS copy_libpaddle ${FLUID_CORE} framework_py_proto profiler_py_proto
+              pass_desc_py_proto ${PY_FILES})
+  else()
+    add_custom_command(
+      OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
+      COMMAND
+        ${CMAKE_COMMAND} -E copy_directory ${PADDLE_SOURCE_DIR}/python/paddle
+        ${PADDLE_BINARY_DIR}/python/paddle/
+      COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py
+              bdist_wheel
+      COMMENT "Packing whl packages------>>>"
+      DEPENDS copy_libpaddle ${FLUID_CORE} framework_py_proto profiler_py_proto
+              pass_desc_py_proto ${PY_FILES})
+  endif()
 else()
-  add_custom_command(
-    OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
-    COMMAND touch stub.cc
-    COMMAND cp -r ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python
-    COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
-    COMMENT "Packing whl packages------>>>"
-    DEPENDS copy_libpaddle ${FLUID_CORE} framework_py_proto profiler_py_proto
-            pass_desc_py_proto ${PY_FILES})
+  if(WITH_SETUP_INSTALL)
+    add_custom_command(
+      OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
+      COMMAND touch stub.cc
+      COMMAND cp -r ${PADDLE_SOURCE_DIR}/python/paddle
+              ${PADDLE_BINARY_DIR}/python
+      COMMENT "Packing whl packages------>>>"
+      DEPENDS copy_libpaddle ${FLUID_CORE} framework_py_proto profiler_py_proto
+              pass_desc_py_proto ${PY_FILES})
+  else()
+    add_custom_command(
+      OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
+      COMMAND touch stub.cc
+      COMMAND cp -r ${PADDLE_SOURCE_DIR}/python/paddle
+              ${PADDLE_BINARY_DIR}/python
+      COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
+      COMMENT "Packing whl packages------>>>"
+      DEPENDS copy_libpaddle ${FLUID_CORE} framework_py_proto profiler_py_proto
+              pass_desc_py_proto ${PY_FILES})
+  endif()
 endif()
 
 add_custom_target(paddle_python ALL
@@ -93,8 +121,11 @@ if(WITH_TESTING)
   add_subdirectory(paddle/fluid/contrib/tests)
   add_subdirectory(paddle/fluid/contrib/slim/tests)
 endif()
-install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR}
-        DESTINATION opt/paddle/share/wheels)
+
+if(NOT WITH_SETUP_INSTALL)
+  install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR}
+          DESTINATION opt/paddle/share/wheels)
+endif()
 
 if(APPLE)
   find_program(INSTALL_NAME_TOOL_EXECUTABLE install_name_tool)
diff --git a/python/env_dict.py.in b/python/env_dict.py.in
new file mode 100644
index 00000000000000..5b479c7ae4c97e
--- /dev/null
+++ b/python/env_dict.py.in
@@ -0,0 +1,69 @@
+env_dict={
+    'PADDLE_SOURCE_DIR':'@PADDLE_SOURCE_DIR@',
+    'PADDLE_VERSION':'@PADDLE_VERSION@',
+    'PADDLE_BINARY_DIR':'@PADDLE_BINARY_DIR@',
+    'TAG_VERSION_REGEX':'@TAG_VERSION_REGEX@',
+    'WITH_GPU':'@WITH_GPU@',
+    'CUDNN_MAJOR_VERSION':'@CUDNN_MAJOR_VERSION@',
+    'CUDNN_MINOR_VERSION':'@CUDNN_MINOR_VERSION@',
+    'CUDNN_PATCHLEVEL_VERSION':'@CUDNN_PATCHLEVEL_VERSION@',
+    'CUDA_VERSION':'@CUDA_VERSION@',
+    'WITH_PSLI':'@WITH_PSLI@',
+    'FLUID_CORE_NAME':'@FLUID_CORE_NAME@',
+    'WARPCTC_LIBRARIES':'@WARPCTC_LIBRARIES@',
+    'LAPACK_LIB':'@LAPACK_LIB@',
+    'GFORTRAN_LIB':'@GFORTRAN_LIB@',
+    'GNU_RT_LIB_1':'@GNU_RT_LIB_1@',
+    'WITH_CUDNN_DSO':'@WITH_CUDNN_DSO@',
+    'CUDNN_LIBRARY':'@CUDNN_LIBRARY@',
+    'GNU_RT_LIB_2':'@GNU_RT_LIB_2@',
+    'WITH_MKL':'@WITH_MKL@',
+    'MKLML_SHARED_LIB':'@MKLML_SHARED_LIB@',
+    'MKLML_SHARED_IOMP_LIB':'@MKLML_SHARED_IOMP_LIB@',
+    'OPENBLAS_SHARED_LIB':'@OPENBLAS_SHARED_LIB@',
+    'OPENBLAS_LIB':'@OPENBLAS_LIB@',
+    'BLAS_LIB':'@BLAS_LIB@',
+    'WITH_LITE':'@WITH_LITE@',
+    'LITE_SHARED_LIB':'@LITE_SHARED_LIB@',
+    'LITE_WITH_NNADAPTER':'@LITE_WITH_NNADAPTER@',
+    'LITE_NNADAPTER_LIB':'@LITE_NNADAPTER_LIB@',
+    'NNADAPTER_WITH_HUAWEI_ASCEND_NPU':'@NNADAPTER_WITH_HUAWEI_ASCEND_NPU@',
+    'LITE_NNADAPTER_NPU_LIB':'@LITE_NNADAPTER_NPU_LIB@',
+    'WITH_CINN':'@WITH_CINN@',
+    'CINN_LIB_LOCATION':'@CINN_LIB_LOCATION@',
+    'CINN_LIB_NAME':'@CINN_LIB_NAME@',
+    'CINN_INCLUDE_DIR':'@CINN_INCLUDE_DIR@',
+    'CMAKE_BUILD_TYPE':'@CMAKE_BUILD_TYPE@',
+    'PSLIB_LIB':'@PSLIB_LIB@',
+    'PSLIB_VERSION_PY':'@PSLIB_VERSION_PY@',
+    'WITH_MKLDNN':'@WITH_MKLDNN@',
+    'MKLDNN_SHARED_LIB':'@MKLDNN_SHARED_LIB@',
+    'MKLDNN_SHARED_LIB_1':'@MKLDNN_SHARED_LIB_1@',
+    'MKLDNN_SHARED_LIB_2':'@MKLDNN_SHARED_LIB_2@',
+    'MKLDNN_INSTALL_DIR':'@MKLDNN_INSTALL_DIR@',
+    'WITH_ONNXRUNTIME':'@WITH_ONNXRUNTIME@',
+    'ONNXRUNTIME_SHARED_LIB':'@ONNXRUNTIME_SHARED_LIB@',
+    'PADDLE2ONNX_LIB':'@PADDLE2ONNX_LIB@',
+    'PADDLE2ONNX_LIB_NAME':'@PADDLE2ONNX_LIB_NAME@',
+    'ONNXRUNTIME_LIB_NAME':'@ONNXRUNTIME_LIB_NAME@',
+    'WITH_XPU':'@WITH_XPU@',
+    'XPU_API_LIB':'@XPU_API_LIB@',
+    'XPU_API_LIB_NAME':'@XPU_API_LIB_NAME@',
+    'XPU_RT_LIB':'@XPU_RT_LIB@',
+    'XPU_RT_LIB_NAME':'@XPU_RT_LIB_NAME@',
+    'WITH_XPU_BKCL':'@WITH_XPU_BKCL@',
+    'XPU_BKCL_LIB':'@XPU_BKCL_LIB@',
+    'XPU_BKCL_LIB_NAME':'@XPU_BKCL_LIB_NAME@',
+    'THIRD_PARTY_PATH':'@THIRD_PARTY_PATH@',
+    'SETUP_LOG_FILE':'@SETUP_LOG_FILE@',
+    'WITH_STRIP':'@WITH_STRIP@',
+    'PACKAGE_NAME':'@PACKAGE_NAME@',
+    'PADDLE_VERSION':'@PADDLE_VERSION@',
+    'APPLE':'@APPLE@',
+    'externalError_INCLUDE_DIR':'@externalError_INCLUDE_DIR@',
+    'WITH_ROCM':'@WITH_ROCM@',
+    'ORIGIN':'@ORIGIN@',
+    'WIN32':'@WIN32@',
+    'JIT_RELEASE_WHL':'@JIT_RELEASE_WHL@',
+    'WITH_PSLIB':'@WITH_PSLIB@'
+}
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000000000..58458c5470740f
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,1380 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import errno
+import fnmatch
+import glob
+import multiprocessing
+import os
+import platform
+import re
+import shutil
+import subprocess
+import sys
+from contextlib import contextmanager
+from distutils.spawn import find_executable
+from subprocess import CalledProcessError
+
+from setuptools import Command, Distribution, Extension, setup
+from setuptools.command.egg_info import egg_info
+from setuptools.command.install import install as InstallCommandBase
+from setuptools.command.install_lib import install_lib
+from setuptools.dist import Distribution
+
+if sys.version_info < (3, 7):
+    raise RuntimeError(
+        "Paddle only supports Python version>=3.7 now, you are using Python %s"
+        % platform.python_version()
+    )
+else:
+    if os.getenv("PY_VERSION") is None:
+        print("export PY_VERSION = %s" % platform.python_version())
+        python_version = platform.python_version()
+        os.environ["PY_VERSION"] = python_version
+
+# check cmake
+CMAKE = find_executable('cmake3') or find_executable('cmake')
+assert (
+    CMAKE
+), 'The "cmake" executable is not found. Please check if Cmake is installed.'
+
+TOP_DIR = os.path.dirname(os.path.realpath(__file__))
+
+IS_WINDOWS = os.name == 'nt'
+
+
+def filter_setup_args(input_args):
+    cmake_and_build = True
+    only_cmake = False
+    rerun_cmake = False
+    filter_args_list = []
+    for arg in input_args:
+        if arg == 'rerun-cmake':
+            rerun_cmake = True  # delete Cmakecache.txt and rerun cmake
+            continue
+        if arg == 'only-cmake':
+            only_cmake = True  # only cmake and do not make, leave a chance for users to adjust build options
+            continue
+        if arg in ['clean', 'egg_info', 'sdist']:
+            cmake_and_build = False
+        filter_args_list.append(arg)
+    return cmake_and_build, only_cmake, rerun_cmake, filter_args_list
+
+
+cmake_and_build, only_cmake, rerun_cmake, filter_args_list = filter_setup_args(
+    sys.argv
+)
+
+
+def parse_input_command(input_parameters):
+    dist = Distribution()
+    # get script name :setup.py
+    sys.argv = input_parameters
+    dist.script_name = os.path.basename(sys.argv[0])
+    # get args of setup.py
+    dist.script_args = sys.argv[1:]
+    print(
+        "Start executing python {} {}".format(
+            dist.script_name, "".join(dist.script_args)
+        )
+    )
+    try:
+        dist.parse_command_line()
+    except:
+        print(
+            "An error occurred while parsing the parameters, '%s'"
+            % dist.script_args
+        )
+        sys.exit(1)
+
+
+class BinaryDistribution(Distribution):
+    def has_ext_modules(foo):
+        return True
+
+
+RC = 0
+ext_suffix = (
+    '.dll'
+    if os.name == 'nt'
+    else ('.dylib' if sys.platform == 'darwin' else '.so')
+)
+
+
+def get_header_install_dir(header):
+    if 'pb.h' in header:
+        install_dir = re.sub(
+            env_dict.get("PADDLE_BINARY_DIR") + '/', '', header
+        )
+    elif 'third_party' not in header:
+        # paddle headers
+        install_dir = re.sub(
+            env_dict.get("PADDLE_SOURCE_DIR") + '/', '', header
+        )
+        print('install_dir: ', install_dir)
+        if 'fluid/jit' in install_dir:
+            install_dir = re.sub('fluid/jit', 'jit', install_dir)
+            print('fluid/jit install_dir: ', install_dir)
+        if 'trace_event.h' in install_dir:
+            install_dir = re.sub(
+                'fluid/platform/profiler',
+                'phi/backends/custom',
+                install_dir,
+            )
+            print('trace_event.h install_dir: ', install_dir)
+    else:
+        # third_party
+        install_dir = re.sub(
+            env_dict.get("THIRD_PARTY_PATH") + '/', 'third_party', header
+        )
+        patterns = ['install/mkldnn/include']
+        for pattern in patterns:
+            install_dir = re.sub(pattern, '', install_dir)
+    return install_dir
+
+
+class InstallHeaders(Command):
+    """Override how headers are copied."""
+
+    description = 'install C/C++ header files'
+
+    user_options = [
+        ('install-dir=', 'd', 'directory to install header files to'),
+        ('force', 'f', 'force installation (overwrite existing files)'),
+    ]
+
+    boolean_options = ['force']
+
+    def initialize_options(self):
+        self.install_dir = None
+        self.force = 0
+        self.outfiles = []
+
+    def finalize_options(self):
+        self.set_undefined_options(
+            'install', ('install_headers', 'install_dir'), ('force', 'force')
+        )
+
+    def run(self):
+        hdrs = self.distribution.headers
+        if not hdrs:
+            return
+        self.mkpath(self.install_dir)
+        for header in hdrs:
+            install_dir = get_header_install_dir(header)
+            install_dir = os.path.join(
+                self.install_dir, os.path.dirname(install_dir)
+            )
+            if not os.path.exists(install_dir):
+                self.mkpath(install_dir)
+            (out, _) = self.copy_file(header, install_dir)
+            self.outfiles.append(out)
+            # (out, _) = self.mkdir_and_copy_file(header)
+            # self.outfiles.append(out)
+
+    def get_inputs(self):
+        return self.distribution.headers or []
+
+    def get_outputs(self):
+        return self.outfiles
+
+
+class InstallCommand(InstallCommandBase):
+    def finalize_options(self):
+
+        ret = InstallCommandBase.finalize_options(self)
+        self.install_lib = self.install_platlib
+        print("install_lib:", self.install_platlib)
+
+        self.install_headers = os.path.join(
+            self.install_platlib, 'paddle', 'include'
+        )
+        print("install_headers:", self.install_headers)
+        return ret
+
+
+class EggInfo(egg_info):
+    """Copy license file into `.dist-info` folder."""
+
+    def run(self):
+        # don't duplicate license into `.dist-info` when building a distribution
+        if not self.distribution.have_run.get('install', True):
+            self.mkpath(self.egg_info)
+            self.copy_file(
+                env_dict.get("PADDLE_SOURCE_DIR") + "/LICENSE", self.egg_info
+            )
+
+        egg_info.run(self)
+
+
+# class Installlib is rewritten to add header files to .egg/paddle
+class InstallLib(install_lib):
+    def run(self):
+        self.build()
+        outfiles = self.install()
+        hrds = self.distribution.headers
+        if not hrds:
+            return
+        for header in hrds:
+            install_dir = get_header_install_dir(header)
+            install_dir = os.path.join(
+                self.install_dir, 'paddle/include', os.path.dirname(install_dir)
+            )
+            if not os.path.exists(install_dir):
+                self.mkpath(install_dir)
+            self.copy_file(header, install_dir)
+        if outfiles is not None:
+            # always compile, in case we have any extension stubs to deal with
+            self.byte_compile(outfiles)
+
+
+def git_commit():
+    try:
+        cmd = ['git', 'rev-parse', 'HEAD']
+        git_commit = (
+            subprocess.Popen(
+                cmd,
+                stdout=subprocess.PIPE,
+                cwd=env_dict.get("PADDLE_SOURCE_DIR"),
+            )
+            .communicate()[0]
+            .strip()
+        )
+    except:
+        git_commit = 'Unknown'
+    git_commit = git_commit.decode('utf-8')
+    return str(git_commit)
+
+
+def _get_version_detail(idx):
+    assert (
+        idx < 3
+    ), "vesion info consists of %(major)d.%(minor)d.%(patch)d, \
+        so detail index must less than 3"
+    tag_version_regex = env_dict.get("TAG_VERSION_REGEX")
+    paddle_version = env_dict.get("PADDLE_VERSION")
+    if re.match(tag_version_regex, paddle_version):
+        version_details = paddle_version.split('.')
+        if len(version_details) >= 3:
+            return version_details[idx]
+    return 0
+
+
+def _mkdir_p(dir_str):
+    try:
+        os.makedirs(dir_str)
+    except OSError as e:
+        raise RuntimeError("Failed to create folder build/")
+
+
+def get_major():
+    return int(_get_version_detail(0))
+
+
+def get_minor():
+    return int(_get_version_detail(1))
+
+
+def get_patch():
+    return str(_get_version_detail(2))
+
+
+def get_cuda_version():
+    with_gpu = env_dict.get("WITH_GPU")
+    if with_gpu == 'ON':
+        return env_dict.get("CUDA_VERSION")
+    else:
+        return 'False'
+
+
+def get_cudnn_version():
+    with_gpu = env_dict.get("WITH_GPU")
+    if with_gpu == 'ON':
+        temp_cudnn_version = ''
+        cudnn_major_version = env_dict.get("CUDNN_MAJOR_VERSION")
+        if cudnn_major_version:
+            temp_cudnn_version += cudnn_major_version
+            cudnn_minor_version = env_dict.get("CUDNN_MINOR_VERSION")
+            if cudnn_minor_version:
+                temp_cudnn_version = (
+                    temp_cudnn_version + '.' + cudnn_minor_version
+                )
+                cudnn_patchlevel_version = env_dict.get(
+                    "CUDNN_PATCHLEVEL_VERSION"
+                )
+                if cudnn_patchlevel_version:
+                    temp_cudnn_version = (
+                        temp_cudnn_version + '.' + cudnn_patchlevel_version
+                    )
+        return temp_cudnn_version
+    else:
+        return 'False'
+
+
+def is_taged():
+    try:
+        cmd = [
+            'git',
+            'describe',
+            '--exact-match',
+            '--tags',
+            'HEAD',
+            '2>/dev/null',
+        ]
+        git_tag = (
+            subprocess.Popen(
+                cmd,
+                stdout=subprocess.PIPE,
+                cwd=env_dict.get("PADDLE_SOURCE_DIR"),
+            )
+            .communicate()[0]
+            .strip()
+        )
+        git_tag = git_tag.decode()
+    except:
+        return False
+    if str(git_tag).replace('v', '') == env_dict.get("PADDLE_VERSION"):
+        return True
+    else:
+        return False
+
+
+def write_version_py(filename='paddle/version/__init__.py'):
+    cnt = '''# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY
+#
+full_version    = '%(major)d.%(minor)d.%(patch)s'
+major           = '%(major)d'
+minor           = '%(minor)d'
+patch           = '%(patch)s'
+rc              = '%(rc)d'
+cuda_version    = '%(cuda)s'
+cudnn_version   = '%(cudnn)s'
+istaged         = %(istaged)s
+commit          = '%(commit)s'
+with_mkl        = '%(with_mkl)s'
+
+__all__ = ['cuda', 'cudnn', 'show']
+
+def show():
+    """Get the version of paddle if `paddle` package if tagged. Otherwise, output the corresponding commit id.
+
+    Returns:
+        If paddle package is not tagged, the commit-id of paddle will be output.
+        Otherwise, the following information will be output.
+
+        full_version: version of paddle
+
+        major: the major version of paddle
+
+        minor: the minor version of paddle
+
+        patch: the patch level version of paddle
+
+        rc: whether it's rc version
+
+        cuda: the cuda version of package. It will return `False` if CPU version paddle package is installed
+
+        cudnn: the cudnn version of package. It will return `False` if CPU version paddle package is installed
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            # Case 1: paddle is tagged with 2.2.0
+            paddle.version.show()
+            # full_version: 2.2.0
+            # major: 2
+            # minor: 2
+            # patch: 0
+            # rc: 0
+            # cuda: '10.2'
+            # cudnn: '7.6.5'
+
+            # Case 2: paddle is not tagged
+            paddle.version.show()
+            # commit: cfa357e984bfd2ffa16820e354020529df434f7d
+            # cuda: '10.2'
+            # cudnn: '7.6.5'
+    """
+    if istaged:
+        print('full_version:', full_version)
+        print('major:', major)
+        print('minor:', minor)
+        print('patch:', patch)
+        print('rc:', rc)
+    else:
+        print('commit:', commit)
+    print('cuda:', cuda_version)
+    print('cudnn:', cudnn_version)
+
+def mkl():
+    return with_mkl
+
+def cuda():
+    """Get cuda version of paddle package.
+
+    Returns:
+        string: Return the version information of cuda. If paddle package is CPU version, it will return False.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            paddle.version.cuda()
+            # '10.2'
+
+    """
+    return cuda_version
+
+def cudnn():
+    """Get cudnn version of paddle package.
+
+    Returns:
+        string: Return the version information of cudnn. If paddle package is CPU version, it will return False.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            paddle.version.cudnn()
+            # '7.6.5'
+
+    """
+    return cudnn_version
+'''
+    commit = git_commit()
+
+    dirname = os.path.dirname(filename)
+
+    try:
+        os.makedirs(dirname)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
+
+    with open(filename, 'w') as f:
+        f.write(
+            cnt
+            % {
+                'major': get_major(),
+                'minor': get_minor(),
+                'patch': get_patch(),
+                'rc': RC,
+                'version': env_dict.get("PADDLE_VERSION"),
+                'cuda': get_cuda_version(),
+                'cudnn': get_cudnn_version(),
+                'commit': commit,
+                'istaged': is_taged(),
+                'with_mkl': env_dict.get("WITH_MKL"),
+            }
+        )
+
+
+def write_cuda_env_config_py(filename='paddle/cuda_env.py'):
+    cnt = ""
+    if env_dict.get("JIT_RELEASE_WHL") == 'ON':
+        cnt = '''# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY
+#
+import os
+os.environ['CUDA_CACHE_MAXSIZE'] = '805306368'
+'''
+
+    with open(filename, 'w') as f:
+        f.write(cnt)
+
+
+def write_parameter_server_version_py(
+    filename='paddle/fluid/incubate/fleet/parameter_server/version.py',
+):
+    cnt = '''
+
+# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY
+
+from paddle.fluid.incubate.fleet.base.mode import Mode
+
+BUILD_MODE=Mode.%(mode)s
+
+def is_transpiler():
+    return Mode.TRANSPILER == BUILD_MODE
+
+'''
+
+    dirname = os.path.dirname(filename)
+
+    try:
+        os.makedirs(dirname)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
+    with open(filename, 'w') as f:
+        f.write(
+            cnt
+            % {
+                'mode': 'PSLIB'
+                if env_dict.get("WITH_PSLIB") == 'ON'
+                else 'TRANSPILER'
+            }
+        )
+
+
+def find_files(pattern, root, recursive=False):
+    for dirpath, _, files in os.walk(root):
+        for filename in fnmatch.filter(files, pattern):
+            yield os.path.join(dirpath, filename)
+        if not recursive:
+            break
+
+
+@contextmanager
+def cd(path):
+    if not os.path.isabs(path):
+        raise RuntimeError('Can only cd to absolute path, got: {}'.format(path))
+    orig_path = os.getcwd()
+    os.chdir(path)
+    try:
+        yield
+    finally:
+        os.chdir(orig_path)
+
+
+def options_process(args, build_options):
+    for key, value in sorted(build_options.items()):
+        if value is not None:
+            args.append("-D{}={}".format(key, value))
+
+
+def cmake_run(args, build_path):
+    with cd(build_path):
+        cmake_args = []
+        cmake_args.append(CMAKE)
+        cmake_args.append('-DWITH_SETUP_INSTALL=ON')
+        cmake_args += args
+        cmake_args.append(TOP_DIR)
+        print("cmake_args:", cmake_args)
+        subprocess.check_call(cmake_args)
+
+
+def build_run(args, build_path, envrion_var):
+    with cd(build_path):
+        build_args = []
+        build_args.append(CMAKE)
+        build_args += args
+        # cmake_args.append(TOP_DIR)
+        print(" ".join(build_args))
+        try:
+            subprocess.check_call(build_args, cwd=build_path, env=envrion_var)
+        except (CalledProcessError, KeyboardInterrupt) as e:
+            sys.exit(1)
+
+
+def build_steps():
+    print('------- Building start ------')
+    if not os.path.exists(TOP_DIR + '/build'):
+        _mkdir_p(TOP_DIR + '/build')
+    build_path = TOP_DIR + '/build'
+    # run cmake to generate native build files
+    cmake_cache_file_path = os.path.join(build_path, "CMakeCache.txt")
+    # if rerun_cmake is True,remove CMakeCache.txt and rerun camke
+    if os.path.isfile(cmake_cache_file_path) and rerun_cmake is True:
+        os.remove(cmake_cache_file_path)
+    if not os.path.exists(cmake_cache_file_path):
+        env_var = os.environ.copy()  # get env variables
+        paddle_build_options = {}
+        other_options = {}
+        other_options.update(
+            {
+                option: option
+                for option in (
+                    "PYTHON_LIBRARY",
+                    "INFERENCE_DEMO_INSTALL_DIR",
+                    "ON_INFER",
+                    "PYTHON_EXECUTABLE",
+                    "TENSORRT_ROOT",
+                    "CUDA_ARCH_NAME",
+                    "CUDA_ARCH_BIN",
+                    "PYTHON_INCLUDE_DIR",
+                    "PYTHON_LIBRARIES",
+                    "PY_VERSION",
+                    "CUB_PATH",
+                    "NEW_RELEASE_PYPI",
+                    "CUDNN_ROOT",
+                    "THIRD_PARTY_PATH",
+                    "NOAVX_CORE_FILE",
+                    "LITE_GIT_TAG",
+                    "CUDA_TOOLKIT_ROOT_DIR",
+                    "NEW_RELEASE_JIT",
+                    "XPU_SDK_ROOT",
+                    "MSVC_STATIC_CRT",
+                    "Ninja",
+                    "NEW_RELEASE_ALL",
+                )
+            }
+        )
+        # if environment variables which start with "WITH_" or "CMAKE_",put it into build_options
+        for option_key, option_value in env_var.items():
+            if option_key.startswith(("CMAKE_", "WITH_")):
+                paddle_build_options[option_key] = option_value
+            if option_key in other_options:
+                print("type:", type(other_options[option_key]))
+                if (
+                    option_key == 'PYTHON_EXECUTABLE'
+                    or option_key == 'PYTHON_LIBRARY'
+                    or option_key == 'PYTHON_LIBRARIES'
+                ):
+                    key = option_key + ":FILEPATH"
+                    print(key)
+                elif option_key == 'PYTHON_INCLUDE_DIR':
+                    key = key = option_key + ':PATH'
+                    print(key)
+                else:
+                    key = other_options[option_key]
+                if key not in paddle_build_options:
+                    paddle_build_options[key] = option_value
+        args = []
+        options_process(args, paddle_build_options)
+        print("args:", args)
+        cmake_run(args, build_path)
+    # make
+    if only_cmake:
+        print(
+            "You have finished running cmake, the program exited,run 'ccmake build' to adjust build options and 'python setup.py install to build'"
+        )
+        sys.exit()
+    build_args = ["--build", ".", "--target", "install", "--config", 'Release']
+    max_jobs = os.getenv("MAX_JOBS")
+    if max_jobs is not None:
+        max_jobs = max_jobs or str(multiprocessing.cpu_count())
+
+        build_args += ["--"]
+        if IS_WINDOWS:
+            build_args += ["/p:CL_MPCount={}".format(max_jobs)]
+        else:
+            build_args += ["-j", max_jobs]
+    else:
+        build_args += ["-j", str(multiprocessing.cpu_count())]
+    environ_var = os.environ.copy()
+    build_run(build_args, build_path, environ_var)
+
+
+def get_setup_requires():
+    with open(
+        env_dict.get("PADDLE_SOURCE_DIR") + '/python/requirements.txt'
+    ) as f:
+        setup_requires = (
+            f.read().splitlines()
+        )  # Specify the dependencies to install
+    if sys.version_info >= (3, 7):
+        setup_requires_tmp = []
+        for setup_requires_i in setup_requires:
+            if (
+                "<\"3.6\"" in setup_requires_i
+                or "<=\"3.6\"" in setup_requires_i
+                or "<\"3.5\"" in setup_requires_i
+                or "<=\"3.5\"" in setup_requires_i
+                or "<\"3.7\"" in setup_requires_i
+            ):
+                continue
+            setup_requires_tmp += [setup_requires_i]
+        setup_requires = setup_requires_tmp
+        return setup_requires
+    else:
+        raise RuntimeError(
+            "please check your python version,Paddle only support Python version>=3.7 now"
+        )
+
+
+def get_package_data_and_package_dir():
+    if os.name != 'nt':
+        package_data = {
+            'paddle.fluid': [env_dict.get("FLUID_CORE_NAME") + '.so']
+        }
+    else:
+        package_data = {
+            'paddle.fluid': [
+                env_dict.get("FLUID_CORE_NAME") + '.pyd',
+                env_dict.get("FLUID_CORE_NAME") + '.lib',
+            ]
+        }
+    package_data['paddle.fluid'] += [
+        paddle_binary_dir + '/python/paddle/cost_model/static_op_benchmark.json'
+    ]
+    if 'develop' in sys.argv:
+        package_dir = {
+            '': paddle_binary_dir.split('/')[-1] + '/python',
+            # '':'build/python',
+            # The paddle.fluid.proto will be generated while compiling.
+            # So that package points to other directory.
+            'paddle.fluid.proto.profiler': paddle_binary_dir.split('/')[-1]
+            + '/paddle/fluid/platform',
+            'paddle.fluid.proto': paddle_binary_dir.split('/')[-1]
+            + '/paddle/fluid/framework',
+            'paddle.fluid': paddle_binary_dir.split('/')[-1]
+            + '/python/paddle/fluid',
+        }
+    else:
+        package_dir = {
+            '': env_dict.get("PADDLE_BINARY_DIR") + '/python',
+            'paddle.fluid.proto.profiler': env_dict.get("PADDLE_BINARY_DIR")
+            + '/paddle/fluid/platform',
+            'paddle.fluid.proto': env_dict.get("PADDLE_BINARY_DIR")
+            + '/paddle/fluid/framework',
+            'paddle.fluid': env_dict.get("PADDLE_BINARY_DIR")
+            + '/python/paddle/fluid',
+        }
+    # put all thirdparty libraries in paddle.libs
+    libs_path = paddle_binary_dir + '/python/paddle/libs'
+    package_data['paddle.libs'] = []
+    package_data['paddle.libs'] = [
+        ('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_suffix
+    ]
+    shutil.copy(env_dict.get("WARPCTC_LIBRARIES"), libs_path)
+    package_data['paddle.libs'] += [
+        os.path.basename(env_dict.get("LAPACK_LIB")),
+        os.path.basename(env_dict.get("BLAS_LIB")),
+        os.path.basename(env_dict.get("GFORTRAN_LIB")),
+        os.path.basename(env_dict.get("GNU_RT_LIB_1")),
+    ]
+    shutil.copy(env_dict.get("BLAS_LIB"), libs_path)
+    shutil.copy(env_dict.get("LAPACK_LIB"), libs_path)
+    shutil.copy(env_dict.get("GFORTRAN_LIB"), libs_path)
+    shutil.copy(env_dict.get("GNU_RT_LIB_1"), libs_path)
+    if env_dict.get("WITH_CUDNN_DSO") == 'ON' and os.path.exists(
+        env_dict.get("CUDNN_LIBRARY")
+    ):
+        package_data['paddle.libs'] += [
+            os.path.basename(env_dict.get("CUDNN_LIBRARY"))
+        ]
+        shutil.copy(env_dict.get("CUDNN_LIBRARY"), libs_path)
+        if (
+            sys.platform.startswith("linux")
+            and env_dict.get("CUDNN_MAJOR_VERSION") == '8'
+        ):
+            # libcudnn.so includes libcudnn_ops_infer.so, libcudnn_ops_train.so,
+            # libcudnn_cnn_infer.so, libcudnn_cnn_train.so, libcudnn_adv_infer.so,
+            # libcudnn_adv_train.so
+            cudnn_lib_files = glob.glob(
+                os.path.dirname(env_dict.get("CUDNN_LIBRARY"))
+                + '/libcudnn_*so.8'
+            )
+            for cudnn_lib in cudnn_lib_files:
+                if os.path.exists(cudnn_lib):
+                    package_data['paddle.libs'] += [os.path.basename(cudnn_lib)]
+                    shutil.copy(cudnn_lib, libs_path)
+    if not sys.platform.startswith("linux"):
+        package_data['paddle.libs'] += [
+            os.path.basename(env_dict.get("GNU_RT_LIB_2"))
+        ]
+        shutil.copy(env_dict.get("GNU_RT_LIB_2"), libs_path)
+    if env_dict.get("WITH_MKL") == 'ON':
+        shutil.copy(env_dict.get("MKLML_SHARED_LIB"), libs_path)
+        shutil.copy(env_dict.get("MKLML_SHARED_IOMP_LIB"), libs_path)
+        package_data['paddle.libs'] += [
+            ('libmklml_intel' if os.name != 'nt' else 'mklml') + ext_suffix,
+            ('libiomp5' if os.name != 'nt' else 'libiomp5md') + ext_suffix,
+        ]
+    else:
+        if os.name == 'nt':
+            # copy the openblas.dll
+            shutil.copy(env_dict.get("OPENBLAS_SHARED_LIB"), libs_path)
+            package_data['paddle.libs'] += ['openblas' + ext_suffix]
+        elif (
+            os.name == 'posix'
+            and platform.machine() == 'aarch64'
+            and env_dict.get("OPENBLAS_LIB").endswith('so')
+        ):
+            # copy the libopenblas.so on linux+aarch64
+            # special: libpaddle.so without avx depends on 'libopenblas.so.0', not 'libopenblas.so'
+            if os.path.exists(env_dict.get("OPENBLAS_LIB") + '.0'):
+                shutil.copy(env_dict.get("OPENBLAS_LIB") + '.0', libs_path)
+                package_data['paddle.libs'] += ['libopenblas.so.0']
+
+    if env_dict.get("WITH_LITE") == 'ON':
+        shutil.copy(env_dict.get("LITE_SHARED_LIB"), libs_path)
+        package_data['paddle.libs'] += [
+            'libpaddle_full_api_shared' + ext_suffix
+        ]
+        if env_dict.get("LITE_WITH_NNADAPTER") == 'ON':
+            shutil.copy(env_dict.get("LITE_NNADAPTER_LIB"), libs_path)
+            package_data['paddle.libs'] += ['libnnadapter' + ext_suffix]
+            if env_dict.get("NNADAPTER_WITH_HUAWEI_ASCEND_NPU") == 'ON':
+                shutil.copy(env_dict.get("LITE_NNADAPTER_NPU_LIB"), libs_path)
+                package_data['paddle.libs'] += [
+                    'libnnadapter_driver_huawei_ascend_npu' + ext_suffix
+                ]
+    if env_dict.get("WITH_CINN") == 'ON':
+        shutil.copy(
+            env_dict.get("CINN_LIB_LOCATION")
+            + '/'
+            + env_dict.get("CINN_LIB_NAME"),
+            libs_path,
+        )
+        shutil.copy(
+            env_dict.get("CINN_INCLUDE_DIR")
+            + '/cinn/runtime/cuda/cinn_cuda_runtime_source.cuh',
+            libs_path,
+        )
+        package_data['paddle.libs'] += ['libcinnapi.so']
+        package_data['paddle.libs'] += ['cinn_cuda_runtime_source.cuh']
+        if env_dict.get("CMAKE_BUILD_TYPE") == 'Release' and os.name != 'nt':
+            command = (
+                "patchelf --set-rpath '$ORIGIN/' %s/" % libs_path
+                + env_dict.get("CINN_LIB_NAME")
+            )
+            if os.system(command) != 0:
+                raise Exception(
+                    'patch '
+                    + libs_path
+                    + '/'
+                    + env_dict.get("CINN_LIB_NAME")
+                    + ' failed',
+                    'command: %s' % command,
+                )
+    if env_dict.get("WITH_PSLIB") == 'ON':
+        shutil.copy(env_dict.get("PSLIB_LIB"), libs_path)
+        if os.path.exists(env_dict.get("PSLIB_VERSION_PY")):
+            shutil.copy(
+                env_dict.get("PSLIB_VERSION_PY"),
+                paddle_binary_dir
+                + '/python/paddle/fluid/incubate/fleet/parameter_server/pslib/',
+            )
+        package_data['paddle.libs'] += ['libps' + ext_suffix]
+    if env_dict.get("WITH_MKLDNN") == 'ON':
+        if env_dict.get("CMAKE_BUILD_TYPE") == 'Release' and os.name != 'nt':
+            # only change rpath in Release mode.
+            # TODO(typhoonzero): use install_name_tool to patch mkl libs once
+            # we can support mkl on mac.
+            #
+            # change rpath of libdnnl.so.1, add $ORIGIN/ to it.
+            # The reason is that all thirdparty libraries in the same directory,
+            # thus, libdnnl.so.1 will find libmklml_intel.so and libiomp5.so.
+            command = "patchelf --set-rpath '$ORIGIN/' " + env_dict.get(
+                "MKLDNN_SHARED_LIB"
+            )
+            if os.system(command) != 0:
+                raise Exception(
+                    "patch libdnnl.so failed, command: %s" % command
+                )
+        shutil.copy(env_dict.get("MKLDNN_SHARED_LIB"), libs_path)
+        if os.name != 'nt':
+            shutil.copy(env_dict.get("MKLDNN_SHARED_LIB_1"), libs_path)
+            shutil.copy(env_dict.get("MKLDNN_SHARED_LIB_2"), libs_path)
+            package_data['paddle.libs'] += [
+                'libmkldnn.so.0',
+                'libdnnl.so.1',
+                'libdnnl.so.2',
+            ]
+        else:
+            package_data['paddle.libs'] += ['mkldnn.dll']
+
+    if env_dict.get("WITH_ONNXRUNTIME") == 'ON':
+        shutil.copy(env_dict.get("ONNXRUNTIME_SHARED_LIB"), libs_path)
+        shutil.copy(env_dict.get("PADDLE2ONNX_LIB"), libs_path)
+        if os.name == 'nt':
+            package_data['paddle.libs'] += [
+                'paddle2onnx.dll',
+                'onnxruntime.dll',
+            ]
+        else:
+            package_data['paddle.libs'] += [
+                env_dict.get("PADDLE2ONNX_LIB_NAME"),
+                env_dict.get("ONNXRUNTIME_LIB_NAME"),
+            ]
+
+    if env_dict.get("WITH_XPU") == 'ON':
+        # only change rpath in Release mode,
+        if env_dict.get("CMAKE_BUILD_TYPE") == 'Release':
+            if os.name != 'nt':
+                if env_dict.get("APPLE") == "1":
+                    command = (
+                        "install_name_tool -id \"@loader_path/\" "
+                        + env_dict.get("XPU_API_LIB")
+                    )
+                else:
+                    command = "patchelf --set-rpath '$ORIGIN/' " + env_dict.get(
+                        "XPU_API_LIB"
+                    )
+                if os.system(command) != 0:
+                    raise Exception(
+                        'patch ' + env_dict.get("XPU_API_LIB") + 'failed ,',
+                        "command: %s" % command,
+                    )
+        shutil.copy(env_dict.get("XPU_API_LIB"), libs_path)
+        shutil.copy(env_dict.get("XPU_RT_LIB"), libs_path)
+        package_data['paddle.libs'] += [
+            env_dict.get("XPU_API_LIB_NAME"),
+            env_dict.get("XPU_RT_LIB_NAME"),
+        ]
+
+    if env_dict.get("WITH_XPU_BKCL") == 'ON':
+        shutil.copy(env_dict.get("XPU_BKCL_LIB"), libs_path)
+        package_data['paddle.libs'] += [env_dict.get("XPU_BKCL_LIB_NAME")]
+
+    # remove unused paddle/libs/__init__.py
+    if os.path.isfile(libs_path + '/__init__.py'):
+        os.remove(libs_path + '/__init__.py')
+    package_dir['paddle.libs'] = libs_path
+
+    # change rpath of ${FLUID_CORE_NAME}.ext, add $ORIGIN/../libs/ to it.
+    # The reason is that libwarpctc.ext, libiomp5.ext etc are in paddle.libs, and
+    # ${FLUID_CORE_NAME}.ext is in paddle.fluid, thus paddle/fluid/../libs will pointer to above libraries.
+    # This operation will fix https://github.com/PaddlePaddle/Paddle/issues/3213
+    if env_dict.get("CMAKE_BUILD_TYPE") == 'Release':
+        if os.name != 'nt':
+            # only change rpath in Release mode, since in Debug mode, ${FLUID_CORE_NAME}.xx is too large to be changed.
+            if env_dict.get("APPLE") == "1":
+                commands = [
+                    "install_name_tool -id '@loader_path/../libs/' "
+                    + env_dict.get("PADDLE_BINARY_DIR")
+                    + '/python/paddle/fluid/'
+                    + env_dict.get("FLUID_CORE_NAME")
+                    + '.so'
+                ]
+                commands.append(
+                    "install_name_tool -add_rpath '@loader_path/../libs/' "
+                    + env_dict.get("PADDLE_BINARY_DIR")
+                    + '/python/paddle/fluid/'
+                    + env_dict.get("FLUID_CORE_NAME")
+                    + '.so'
+                )
+            else:
+                commands = [
+                    "patchelf --set-rpath '$ORIGIN/../libs/' "
+                    + env_dict.get("PADDLE_BINARY_DIR")
+                    + '/python/paddle/fluid/'
+                    + env_dict.get("FLUID_CORE_NAME")
+                    + '.so'
+                ]
+            # The sw_64 not suppot patchelf, so we just disable that.
+            if platform.machine() != 'sw_64' and platform.machine() != 'mips64':
+                for command in commands:
+                    if os.system(command) != 0:
+                        raise Exception(
+                            'patch '
+                            + env_dict.get("FLUID_CORE_NAME")
+                            + '.%s failed' % ext_suffix,
+                            'command: %s' % command,
+                        )
+    # A list of extensions that specify c++ -written modules that compile source code into dynamically linked libraries
+    ext_modules = [Extension('_foo', [paddle_binary_dir + '/python/stub.cc'])]
+    if os.name == 'nt':
+        # fix the path separator under windows
+        fix_package_dir = {}
+        for k, v in package_dir.items():
+            fix_package_dir[k] = v.replace('/', '\\')
+        package_dir = fix_package_dir
+        ext_modules = []
+    elif sys.platform == 'darwin':
+        ext_modules = []
+    return package_data, package_dir, ext_modules
+
+
+def get_headers():
+    headers = (
+        # paddle level api headers
+        list(find_files('*.h', paddle_source_dir + '/paddle'))
+        + list(find_files('*.h', paddle_source_dir + '/paddle/phi/api'))
+        + list(  # phi unify api header
+            find_files('*.h', paddle_source_dir + '/paddle/phi/api/ext')
+        )
+        + list(  # custom op api
+            find_files('*.h', paddle_source_dir + '/paddle/phi/api/include')
+        )
+        + list(  # phi api
+            find_files('*.h', paddle_source_dir + '/paddle/phi/common')
+        )
+        + list(
+            find_files('*.h', paddle_source_dir + '/paddle/phi')
+        )  # phi common headers
+        # phi level api headers (low level api)
+        + list(  # phi extension header
+            find_files(
+                '*.h', paddle_source_dir + '/paddle/phi/include', recursive=True
+            )
+        )
+        + list(  # phi include headers
+            find_files(
+                '*.h',
+                paddle_source_dir + '/paddle/phi/backends',
+                recursive=True,
+            )
+        )
+        + list(  # phi backends headers
+            find_files(
+                '*.h', paddle_source_dir + '/paddle/phi/core', recursive=True
+            )
+        )
+        + list(  # phi core headers
+            find_files(
+                '*.h',
+                paddle_source_dir + '/paddle/phi/infermeta',
+                recursive=True,
+            )
+        )
+        + list(  # phi infermeta headers
+            find_files('*.h', paddle_source_dir + '/paddle/phi/kernels')
+        )
+        + list(  # phi kernels headers
+            find_files('*.h', paddle_source_dir + '/paddle/phi/kernels/sparse')
+        )
+        + list(  # phi sparse kernels headers
+            find_files(
+                '*.h', paddle_source_dir + '/paddle/phi/kernels/selected_rows'
+            )
+        )
+        + list(  # phi selected_rows kernels headers
+            find_files('*.h', paddle_source_dir + '/paddle/phi/kernels/strings')
+        )
+        + list(  # phi sparse kernels headers
+            find_files(
+                '*.h', paddle_source_dir + '/paddle/phi/kernels/primitive'
+            )
+        )
+        + list(  # phi kernel primitive api headers
+            # capi headers
+            find_files(
+                '*.h', paddle_source_dir + '/paddle/phi/capi', recursive=True
+            )
+        )
+        + list(  # phi capi headers
+            # profiler headers
+            find_files(
+                'trace_event.h',
+                paddle_source_dir + '/paddle/fluid/platform/profiler',
+            )
+        )
+        + list(  # phi profiler headers
+            # utils api headers
+            find_files(
+                '*.h', paddle_source_dir + '/paddle/utils', recursive=True
+            )
+        )
+    )  # paddle utils headers
+
+    jit_layer_headers = [
+        'layer.h',
+        'serializer.h',
+        'serializer_utils.h',
+        'all.h',
+        'function.h',
+    ]
+
+    for f in jit_layer_headers:
+        headers += list(
+            find_files(
+                f, paddle_source_dir + '/paddle/fluid/jit', recursive=True
+            )
+        )
+
+    if env_dict.get("WITH_MKLDNN") == 'ON':
+        headers += list(
+            find_files('*', env_dict.get("MKLDNN_INSTALL_DIR") + '/include')
+        )  # mkldnn
+
+    if env_dict.get("WITH_GPU") == 'ON' or env_dict.get("WITH_ROCM") == 'ON':
+        # externalErrorMsg.pb for External Error message
+        headers += list(
+            find_files('*.pb', env_dict.get("externalError_INCLUDE_DIR"))
+        )
+    return headers
+
+
+def get_setup_parameters():
+    # get setup_requires
+    setup_requires = get_setup_requires()
+    packages = [
+        'paddle',
+        'paddle.libs',
+        'paddle.utils',
+        'paddle.utils.gast',
+        'paddle.utils.cpp_extension',
+        'paddle.dataset',
+        'paddle.reader',
+        'paddle.distributed',
+        'paddle.distributed.communication',
+        'paddle.distributed.communication.stream',
+        'paddle.distributed.metric',
+        'paddle.distributed.ps',
+        'paddle.distributed.ps.utils',
+        'paddle.incubate',
+        'paddle.incubate.autograd',
+        'paddle.incubate.optimizer',
+        'paddle.incubate.checkpoint',
+        'paddle.incubate.operators',
+        'paddle.incubate.tensor',
+        'paddle.incubate.multiprocessing',
+        'paddle.incubate.nn',
+        'paddle.incubate.asp',
+        'paddle.incubate.passes',
+        'paddle.distribution',
+        'paddle.distributed.utils',
+        'paddle.distributed.sharding',
+        'paddle.distributed.fleet',
+        'paddle.distributed.launch',
+        'paddle.distributed.launch.context',
+        'paddle.distributed.launch.controllers',
+        'paddle.distributed.launch.job',
+        'paddle.distributed.launch.plugins',
+        'paddle.distributed.launch.utils',
+        'paddle.distributed.fleet.base',
+        'paddle.distributed.fleet.recompute',
+        'paddle.distributed.fleet.elastic',
+        'paddle.distributed.fleet.meta_optimizers',
+        'paddle.distributed.fleet.meta_optimizers.sharding',
+        'paddle.distributed.fleet.meta_optimizers.ascend',
+        'paddle.distributed.fleet.meta_optimizers.dygraph_optimizer',
+        'paddle.distributed.fleet.runtime',
+        'paddle.distributed.rpc',
+        'paddle.distributed.fleet.dataset',
+        'paddle.distributed.fleet.data_generator',
+        'paddle.distributed.fleet.metrics',
+        'paddle.distributed.fleet.proto',
+        'paddle.distributed.fleet.utils',
+        'paddle.distributed.fleet.layers',
+        'paddle.distributed.fleet.layers.mpu',
+        'paddle.distributed.fleet.meta_parallel',
+        'paddle.distributed.fleet.meta_parallel.pp_utils',
+        'paddle.distributed.fleet.meta_parallel.sharding',
+        'paddle.distributed.fleet.meta_parallel.parallel_layers',
+        'paddle.distributed.auto_parallel',
+        'paddle.distributed.auto_parallel.operators',
+        'paddle.distributed.auto_parallel.tuner',
+        'paddle.distributed.auto_parallel.cost',
+        'paddle.distributed.passes',
+        'paddle.distributed.models',
+        'paddle.distributed.models.moe',
+        'paddle.framework',
+        'paddle.jit',
+        'paddle.jit.dy2static',
+        'paddle.inference',
+        'paddle.inference.contrib',
+        'paddle.inference.contrib.utils',
+        'paddle.fluid',
+        'paddle.fluid.dygraph',
+        'paddle.fluid.dygraph.amp',
+        'paddle.fluid.proto',
+        'paddle.fluid.proto.profiler',
+        'paddle.fluid.distributed',
+        'paddle.fluid.layers',
+        'paddle.fluid.dataloader',
+        'paddle.fluid.contrib',
+        'paddle.fluid.contrib.quantize',
+        'paddle.fluid.contrib.slim',
+        'paddle.fluid.contrib.slim.quantization',
+        'paddle.fluid.contrib.slim.quantization.imperative',
+        'paddle.fluid.contrib.extend_optimizer',
+        'paddle.fluid.contrib.mixed_precision',
+        'paddle.fluid.contrib.mixed_precision.bf16',
+        'paddle.fluid.contrib.layers',
+        'paddle.fluid.contrib.sparsity',
+        'paddle.fluid.transpiler',
+        'paddle.fluid.transpiler.details',
+        'paddle.fluid.incubate',
+        'paddle.fluid.incubate.data_generator',
+        'paddle.fluid.incubate.fleet',
+        'paddle.fluid.incubate.checkpoint',
+        'paddle.fluid.incubate.fleet.base',
+        'paddle.fluid.incubate.fleet.parameter_server',
+        'paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler',
+        'paddle.fluid.incubate.fleet.parameter_server.pslib',
+        'paddle.fluid.incubate.fleet.parameter_server.ir',
+        'paddle.fluid.incubate.fleet.collective',
+        'paddle.fluid.incubate.fleet.utils',
+        'paddle.amp',
+        'paddle.cost_model',
+        'paddle.hapi',
+        'paddle.vision',
+        'paddle.vision.models',
+        'paddle.vision.transforms',
+        'paddle.vision.datasets',
+        'paddle.audio',
+        'paddle.audio.functional',
+        'paddle.audio.features',
+        'paddle.audio.datasets',
+        'paddle.audio.backends',
+        'paddle.text',
+        'paddle.text.datasets',
+        'paddle.incubate',
+        'paddle.incubate.nn',
+        'paddle.incubate.nn.functional',
+        'paddle.incubate.nn.layer',
+        'paddle.incubate.optimizer.functional',
+        'paddle.incubate.autograd',
+        'paddle.incubate.distributed',
+        'paddle.incubate.distributed.utils',
+        'paddle.incubate.distributed.utils.io',
+        'paddle.incubate.distributed.fleet',
+        'paddle.incubate.distributed.models',
+        'paddle.incubate.distributed.models.moe',
+        'paddle.incubate.distributed.models.moe.gate',
+        'paddle.sparse',
+        'paddle.sparse.nn',
+        'paddle.sparse.nn.layer',
+        'paddle.sparse.nn.functional',
+        'paddle.incubate.xpu',
+        'paddle.io',
+        'paddle.optimizer',
+        'paddle.nn',
+        'paddle.nn.functional',
+        'paddle.nn.layer',
+        'paddle.nn.quant',
+        'paddle.nn.initializer',
+        'paddle.nn.utils',
+        'paddle.metric',
+        'paddle.static',
+        'paddle.static.nn',
+        'paddle.static.amp',
+        'paddle.static.sparsity',
+        'paddle.tensor',
+        'paddle.onnx',
+        'paddle.autograd',
+        'paddle.device',
+        'paddle.device.cuda',
+        'paddle.device.xpu',
+        'paddle.version',
+        'paddle.profiler',
+        'paddle.geometric',
+        'paddle.geometric.message_passing',
+        'paddle.geometric.sampling',
+    ]
+
+    paddle_bins = ''
+    if not env_dict.get("WIN32"):
+        paddle_bins = [
+            env_dict.get("PADDLE_BINARY_DIR") + '/paddle/scripts/paddle'
+        ]
+    package_data, package_dir, ext_modules = get_package_data_and_package_dir()
+    headers = get_headers()
+    return (
+        setup_requires,
+        packages,
+        paddle_bins,
+        package_data,
+        package_dir,
+        ext_modules,
+        headers,
+    )
+
+
+def main():
+    # Parse the command line and check arguments before we proceed with building steps and setup
+    parse_input_command(filter_args_list)
+
+    # Execute the build process,cmake and make
+    if cmake_and_build:
+        build_steps()
+
+    sys.path.append(TOP_DIR + "/build/python/")
+    from build.python.env_dict import env_dict as env_dict
+
+    global env_dict
+    global paddle_binary_dir, paddle_source_dir
+    paddle_binary_dir = env_dict.get("PADDLE_BINARY_DIR")
+    paddle_source_dir = env_dict.get("PADDLE_SOURCE_DIR")
+
+    # preparing parameters for setup()
+    paddle_version = env_dict.get("PADDLE_VERSION")
+    package_name = env_dict.get("PACKAGE_NAME")
+    write_version_py(
+        filename='{}/python/paddle/version/__init__.py'.format(
+            paddle_binary_dir
+        )
+    )
+    write_cuda_env_config_py(
+        filename='{}/python/paddle/cuda_env.py'.format(paddle_binary_dir)
+    )
+    write_parameter_server_version_py(
+        filename='{}/python/paddle/fluid/incubate/fleet/parameter_server/version.py'.format(
+            paddle_binary_dir
+        )
+    )
+
+    (
+        setup_requires,
+        packages,
+        scripts,
+        package_data,
+        package_dir,
+        ext_modules,
+        headers,
+    ) = get_setup_parameters()
+
+    # Log for PYPI, get long_description of setup()
+    with open(
+        paddle_source_dir + '/python/paddle/README.rst', "r", encoding='UTF-8'
+    ) as f:
+        long_description = f.read()
+
+    # strip *.so to reduce package size
+    if env_dict.get("WITH_STRIP") == 'ON':
+        command = (
+            'find '
+            + paddle_binary_dir
+            + '/python/paddle -name "*.so" | xargs -i strip {}'
+        )
+        if os.system(command) != 0:
+            raise Exception("strip *.so failed, command: %s" % command)
+
+    setup(
+        name=package_name,
+        version=paddle_version,
+        description='Parallel Distributed Deep Learning',
+        long_description=long_description,
+        long_description_content_type="text/markdown",
+        author_email="Paddle-better@baidu.com",
+        maintainer="PaddlePaddle",
+        maintainer_email="Paddle-better@baidu.com",
+        url='https://www.paddlepaddle.org.cn/',
+        download_url='https://github.com/paddlepaddle/paddle',
+        license='Apache Software License',
+        packages=packages,
+        install_requires=setup_requires,
+        ext_modules=ext_modules,
+        package_data=package_data,
+        package_dir=package_dir,
+        scripts=scripts,
+        distclass=BinaryDistribution,
+        headers=headers,
+        cmdclass={
+            'install_headers': InstallHeaders,
+            'install': InstallCommand,
+            'egg_info': EggInfo,
+            'install_lib': InstallLib,
+        },
+        entry_points={
+            'console_scripts': [
+                'fleetrun = paddle.distributed.launch.main:launch'
+            ]
+        },
+        classifiers=[
+            'Development Status :: 5 - Production/Stable',
+            'Operating System :: OS Independent',
+            'Intended Audience :: Developers',
+            'Intended Audience :: Education',
+            'Intended Audience :: Science/Research',
+            'License :: OSI Approved :: Apache Software License',
+            'Programming Language :: C++',
+            'Programming Language :: Python :: 2.7',
+            'Programming Language :: Python :: 3.5',
+            'Programming Language :: Python :: 3.6',
+            'Programming Language :: Python :: 3.7',
+            'Programming Language :: Python :: 3.8',
+        ],
+    )
+
+
+if __name__ == '__main__':
+    main()

From cb812f40e1502966d985483ce9fbf6184e1795b0 Mon Sep 17 00:00:00 2001
From: yunyaoXYY <109218879+yunyaoXYY@users.noreply.github.com>
Date: Mon, 5 Dec 2022 14:27:53 +0800
Subject: [PATCH 146/154] [Clean fluid] Clean hash, grid_sampler, log_loss,
 bilinear_tensor_product. (#48411)

* Clean fliud hash

* clean fluid grid_sampler

* clean log_loss

* Move bilinear_tensor_product from fluid to static

* Fix unitests when remove log_loss

* Fix bug when move bilinear_tensor_product

* fix test_fleet_nocvm_1.py

* Add bilinear_tensor_product into all list

* Fix code style

* Fix comments in bilinear_tensor_product

* Fix comments in bilinear_tensor_product

* Fix comments
---
 python/paddle/fluid/layers/nn.py              | 290 ----------------
 .../unittests/npu/test_log_loss_op_npu.py     |  32 --
 .../fluid/tests/unittests/test_fleet.py       |   2 +-
 .../tests/unittests/test_fleet_nocvm_1.py     |   4 +-
 .../tests/unittests/test_fleet_rolemaker.py   |   3 +-
 .../tests/unittests/test_fleet_rolemaker_2.py |   2 +-
 .../tests/unittests/test_fleet_rolemaker_3.py |   4 +-
 .../unittests/test_fleet_unitaccessor.py      |   4 +-
 .../fluid/tests/unittests/test_hash_op.py     |  41 ---
 .../tests/unittests/test_imperative_deepcf.py |   8 +-
 .../test_imperative_load_static_param.py      |  14 +-
 .../fluid/tests/unittests/test_layers.py      |  17 +-
 .../fluid/tests/unittests/test_log_loss_op.py |  31 --
 python/paddle/static/nn/__init__.py           |   4 +-
 python/paddle/static/nn/common.py             | 321 ++++++++++--------
 15 files changed, 211 insertions(+), 566 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 56765c19cba659..34c6387a1643cb 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -96,10 +96,6 @@
     'clip_by_norm',
     'mean',
     'mul',
-    'hash',
-    'grid_sampler',
-    'log_loss',
-    'bilinear_tensor_product',
     'merge_selected_rows',
     'get_tensor_from_selected_rows',
     'unfold',
@@ -5223,292 +5219,6 @@ def mul(x, y, x_num_col_dims=1, y_num_col_dims=1, name=None):
     return out
 
 
-def hash(input, hash_size, num_hash=1, name=None):
-    """
-
-    This OP hash the input to an integer less than the hash_size.
-    The hash algorithm we used was xxHash - Extremely fast hash algorithm
-    (https://github.com/Cyan4973/xxHash/tree/v0.6.5)
-
-    Args:
-        input(Variable): A **Two-Dimensional** LoDTensor with type int32, int64.
-             **Only support LoDTensor**.
-        num_hash(int, optional): The times of hash, default is 1.
-        name(str, optional): The default value is None. Normally there is no
-            need for user to set this property. For more information, please
-            refer to :ref:`api_guide_Name`.
-
-    Returns:
-       Variable: A LoDTensor with the same data type as input.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import numpy as np
-            import paddle
-            paddle.enable_static()
-
-            place = fluid.core.CPUPlace()
-
-            x = fluid.data(name="x", shape=[2,2], dtype="int32", lod_level=1)
-            res = fluid.layers.hash(name="res", input=x, hash_size=1000, num_hash=4)
-
-            exe = fluid.Executor(place)
-            exe.run(fluid.default_startup_program())
-            in1 = np.array([[1,2],[3,4]]).astype("int32")
-            print(in1)
-            x_i = fluid.create_lod_tensor(in1, [[0, 2]], place)
-            res = exe.run(fluid.default_main_program(), feed={'x':x_i}, fetch_list=[res], return_numpy=False)
-            print(np.array(res[0]))
-            # [[[722]
-            #   [407]
-            #   [337]
-            #   [395]]
-            #  [[603]
-            #   [590]
-            #   [386]
-            #   [901]]]
-    """
-    check_variable_and_dtype(input, 'input', ['int32', 'int64'], 'hash')
-    check_type(hash_size, 'hash_size', int, 'hash')
-    check_type(num_hash, 'num_hash', int, 'hash')
-    helper = LayerHelper('hash', **locals())
-    out = helper.create_variable_for_type_inference(
-        helper.input_dtype(), stop_gradient=True
-    )
-    helper.append_op(
-        type='hash',
-        inputs={'X': input},
-        outputs={'Out': out},
-        attrs={'num_hash': num_hash, 'mod_by': hash_size},
-    )
-    return out
-
-
-@templatedoc()
-def grid_sampler(x, grid, name=None):
-    """
-
-    This operation samples input X by using bilinear interpolation based on
-    flow field grid, which is usually generated by :code:`affine_grid` . The grid of
-    shape [N, H, W, 2] is the concatenation of (x, y) coordinates
-    with shape [N, H, W] each, where x is indexing the 4th dimension
-    (in width dimension) of input data x and y is indexing the 3rd
-    dimension (in height dimension), finally results is the bilinear
-    interpolation value of 4 nearest corner points. The output tensor
-    shape will be [N, C, H, W].
-
-    .. code-block:: text
-
-        Step 1:
-        Get (x, y) grid coordinates and scale to [0, H-1/W-1].
-
-        .. code-block:: text
-
-            grid_x = 0.5 * (grid[:, :, :, 0] + 1) * (W - 1)
-            grid_y = 0.5 * (grid[:, :, :, 1] + 1) * (H - 1)
-
-        Step 2:
-        Indices input data X with grid (x, y) in each [H, W] area, and bilinear
-        interpolate point value by 4 nearest points.
-
-          wn ------- y_n ------- en
-          |           |           |
-          |          d_n          |
-          |           |           |
-         x_w --d_w-- grid--d_e-- x_e
-          |           |           |
-          |          d_s          |
-          |           |           |
-          ws ------- y_s ------- wn
-
-        x_w = floor(x)              // west side x coord
-        x_e = x_w + 1               // east side x coord
-        y_n = floor(y)              // north side y coord
-        y_s = y_s + 1               // south side y coord
-
-        d_w = grid_x - x_w          // distance to west side
-        d_e = x_e - grid_x          // distance to east side
-        d_n = grid_y - y_n          // distance to north side
-        d_s = y_s - grid_y          // distance to south side
-
-        wn = X[:, :, y_n, x_w]      // north-west point value
-        en = X[:, :, y_n, x_e]      // north-east point value
-        ws = X[:, :, y_s, x_w]      // south-east point value
-        es = X[:, :, y_s, x_w]      // north-east point value
-
-        output = wn * d_e * d_s + en * d_w * d_s
-               + ws * d_e * d_n + es * d_w * d_n
-
-    Args:
-        x(Variable): The input tensor, which is a 4-D tensor with shape
-                     [N, C, H, W], N is the batch size, C is the channel
-                     number, H and W is the feature height and width.
-                     The data type is float32 or float64.
-        grid(Variable): Input grid tensor of shape [N, H, W, 2]. The
-                        data type is float32 or float64.
-        name(str, optional): For detailed information, please refer
-                             to :ref:`api_guide_Name`. Usually name is no need to set and
-                             None by default.
-
-    Returns:
-        Variable: Output of shape [N, C, H, W] data samples input X
-                  using bilnear interpolation based on input grid.
-                  The data type is same as input tensor.
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import paddle.fluid as fluid
-            import paddle
-
-            paddle.enable_static()
-            # use with affine_grid
-            x = fluid.data(name='x', shape=[None, 10, 32, 32], dtype='float32')
-            theta = fluid.layers.data(name='theta', shape=[2, 3], dtype='float32')
-            grid = fluid.layers.affine_grid(theta=theta, out_shape=[3, 10, 32, 32])
-            out = fluid.layers.grid_sampler(x=x, grid=grid)
-
-    """
-    helper = LayerHelper("grid_sampler", **locals())
-
-    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'grid_sampler')
-    check_variable_and_dtype(
-        grid, 'grid', ['float32', 'float64'], 'grid_sampler'
-    )
-    if not isinstance(x, Variable):
-        return ValueError("The x should be a Variable")
-
-    if not isinstance(grid, Variable):
-        return ValueError("The grid should be a Variable")
-
-    out = helper.create_variable_for_type_inference(x.dtype)
-    ipts = {'X': x, 'Grid': grid}
-
-    attrs = {'use_cudnn': False} if core.is_compiled_with_rocm() else {}
-
-    helper.append_op(
-        type='grid_sampler', inputs=ipts, outputs={'Output': out}, attrs=attrs
-    )
-    return out
-
-
-def log_loss(input, label, epsilon=1e-4, name=None):
-    r"""
-
-    **Negative Log Loss Layer**
-
-    This layer accepts input predictions and target label and returns the
-    negative log loss.
-
-    .. math::
-
-        Out = -label * \log{(input + \epsilon)}
-              - (1 - label) * \log{(1 - input + \epsilon)}
-
-    Args:
-        input (Tensor|list):  A 2-D tensor with shape [N x 1], where N is the
-                                batch size. This input is a probability computed
-                                by the previous operator. Data type float32.
-        label (Tensor|list):  The ground truth which is a 2-D tensor with
-                                shape [N x 1], where N is the batch size.
-                                Data type float32.
-        epsilon (float, optional): A small number for numerical stability. Default 1e-4.
-        name(str|None): For detailed information, please refer to
-            :ref:`api_guide_Name` . Usually name is no need to set and None by default.
-
-    Returns:
-        Tensor, which shape is [N x 1], data type is float32.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle
-          import paddle.nn.functional as F
-
-          label = paddle.randn((10,1))
-          prob = paddle.randn((10,1))
-          cost = F.log_loss(input=prob, label=label)
-    """
-    return paddle.nn.functional.log_loss(input, label, epsilon, name)
-
-
-def bilinear_tensor_product(
-    x, y, size, act=None, name=None, param_attr=None, bias_attr=None
-):
-    r"""
-    :api_attr: Static Graph
-
-    **Bilinear Tensor Product Layer**
-
-    This layer performs bilinear tensor product on two inputs.
-    For example:
-
-    .. math::
-       out_{i} = x * W_{i} * {y^\mathrm{T}}, i=0,1,...,size-1
-
-    In this formula:
-      - :math:`x`: the first input contains M elements, shape is [batch_size, M].
-      - :math:`y`: the second input contains N elements, shape is [batch_size, N].
-      - :math:`W_{i}`: the i-th learned weight, shape is [M, N].
-      - :math:`out_{i}`: the i-th element of out, shape is [batch_size, size].
-      - :math:`y^\mathrm{T}`: the transpose of :math:`y_{2}`.
-
-    Args:
-        x (Variable): 2-D input tensor with shape [batch_size, M]. Data type
-            is float32 or float64.
-        y (Variable): 2-D input tensor with shape [batch_size, N]. Data type
-            should be same as **x**.
-        size (int): The dimension of this layer.
-        act (str|None): Activation to be applied to the output of this layer. Default None.
-        name(str|None): For detailed information, please refer to
-            :ref:`api_guide_Name` . Usually name is no need to set and None by default.
-        param_attr (ParamAttr|None): To specify the weight parameter attribute.
-            Default: None, which means the default weight parameter property is
-            used. See usage for details in :ref:`api_fluid_ParamAttr` .
-        bias_attr (ParamAttr|None): To specify the bias parameter attribute.
-            Default: None, which means the default bias parameter property is
-            used. See usage for details in :ref:`api_fluid_ParamAttr` .
-    Returns:
-        Variable: A 2-D Tensor of shape [batch_size, size]. Data type is the same as input **x**.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle
-            paddle.enable_static()
-            layer1 = paddle.static.data("t1", shape=[-1, 5], dtype="float32")
-            layer2 = paddle.static.data("t2", shape=[-1, 4], dtype="float32")
-            tensor = paddle.static.nn.bilinear_tensor_product(x=layer1, y=layer2, size=1000)
-    """
-    helper = LayerHelper('bilinear_tensor_product', **locals())
-    dtype = helper.input_dtype('x')
-
-    param_shape = [size, x.shape[1], y.shape[1]]
-
-    w = helper.create_parameter(
-        attr=helper.param_attr, shape=param_shape, dtype=dtype, is_bias=False
-    )
-    out = helper.create_variable_for_type_inference(dtype=dtype)
-
-    inputs = {"X": x, "Y": y, "Weight": w}
-    if helper.bias_attr:
-        bias_size = [1, size]
-        bias = helper.create_parameter(
-            attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True
-        )
-        inputs["Bias"] = bias
-    helper.append_op(
-        type="bilinear_tensor_product", inputs=inputs, outputs={"Out": out}
-    )
-
-    # add activation
-    return helper.append_activation(out)
-
-
 @templatedoc()
 def get_tensor_from_selected_rows(x, name=None):
     """
diff --git a/python/paddle/fluid/tests/unittests/npu/test_log_loss_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_log_loss_op_npu.py
index 87cd872e8cc913..c47b42ee125bed 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_log_loss_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_log_loss_op_npu.py
@@ -76,37 +76,5 @@ def test_check_grad(self):
         self.check_grad_with_place(self.place, ['Predicted'], 'Loss')
 
 
-@unittest.skipIf(
-    not paddle.is_compiled_with_npu(), "core is not compiled with NPU"
-)
-class TestLogLossOpError(unittest.TestCase):
-    def test_errors(self):
-        with fluid.program_guard(fluid.Program()):
-
-            def test_x_type():
-                input_data = np.random.random(100, 1).astype("float32")
-                fluid.layers.log_loss(input_data)
-
-            self.assertRaises(TypeError, test_x_type)
-
-            def test_x_dtype():
-                x2 = fluid.layers.data(name='x2', shape=[100, 1], dtype='int32')
-                fluid.layers.log_loss(x2)
-
-            self.assertRaises(TypeError, test_x_dtype)
-
-            def test_label_type():
-                input_data = np.random.random(100, 1).astype("float32")
-                fluid.layers.log_loss(input_data)
-
-            self.assertRaises(TypeError, test_label_type)
-
-            def test_label_dtype():
-                x2 = fluid.layers.data(name='x2', shape=[100, 1], dtype='int32')
-                fluid.layers.log_loss(x2)
-
-            self.assertRaises(TypeError, test_label_dtype)
-
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_fleet.py b/python/paddle/fluid/tests/unittests/test_fleet.py
index d0445c2c5e09b8..6092710a798c0a 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet.py
@@ -79,7 +79,7 @@ def test_pslib_1(self):
                 append_batch_size=False,
             )
             label_cast = fluid.layers.cast(label, dtype='float32')
-            cost = fluid.layers.log_loss(fc, label_cast)
+            cost = paddle.nn.functional.log_loss(fc, label_cast)
         try:
             adam = fluid.optimizer.Adam(learning_rate=0.000005)
             adam = fleet.distributed_optimizer(
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py b/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py
index 577652037e5386..f5975ae990d702 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py
@@ -16,6 +16,8 @@
 import os
 import unittest
 
+import paddle
+
 
 class TestFleet1(unittest.TestCase):
     """
@@ -73,7 +75,7 @@ def test_pslib_1(self):
                 append_batch_size=False,
             )
             label_cast = fluid.layers.cast(label, dtype='float32')
-            cost = fluid.layers.log_loss(fc, label_cast)
+            cost = paddle.nn.functional.log_loss(fc, label_cast)
         try:
             adam = fluid.optimizer.Adam(learning_rate=0.000005)
             adam = fleet.distributed_optimizer(
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py
index f64d8cb1692b20..daee01f38f742c 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py
@@ -16,6 +16,7 @@
 import os
 import unittest
 
+import paddle
 import paddle.fluid.incubate.fleet.base.role_maker as role_maker
 
 
@@ -97,7 +98,7 @@ def test_pslib_1(self):
                 append_batch_size=False,
             )
             label_cast = fluid.layers.cast(label, dtype='float32')
-            cost = fluid.layers.log_loss(fc, label_cast)
+            cost = paddle.nn.functional.log_loss(fc, label_cast)
         try:
             adam = fluid.optimizer.Adam(learning_rate=0.000005)
             adam = fleet.distributed_optimizer(adam)
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py
index a657d3deb51a02..7a6ba4248352a4 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py
@@ -79,7 +79,7 @@ def test_pslib_2(self):
                 append_batch_size=False,
             )
             label_cast = fluid.layers.cast(label, dtype='float32')
-            cost = fluid.layers.log_loss(fc, label_cast)
+            cost = paddle.nn.functional.log_loss(fc, label_cast)
         try:
             adam = fluid.optimizer.Adam(learning_rate=0.000005)
             adam = fleet.distributed_optimizer(adam)
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py
index 79b5e136f189a9..c3df410610ba96 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py
@@ -16,6 +16,8 @@
 import os
 import unittest
 
+import paddle
+
 
 class TestCloudRoleMaker(unittest.TestCase):
     """
@@ -70,7 +72,7 @@ def test_pslib_1(self):
                 append_batch_size=False,
             )
             label_cast = fluid.layers.cast(label, dtype='float32')
-            cost = fluid.layers.log_loss(fc, label_cast)
+            cost = paddle.nn.functional.log_loss(fc, label_cast)
         try:
             adam = fluid.optimizer.Adam(learning_rate=0.000005)
             adam = fleet.distributed_optimizer(adam)
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py b/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py
index 9c7736a39384f1..78c4a4541e3c06 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py
@@ -16,6 +16,8 @@
 import os
 import unittest
 
+import paddle
+
 
 class TestFleet1(unittest.TestCase):
     """
@@ -73,7 +75,7 @@ def test_pslib_1(self):
                 append_batch_size=False,
             )
             label_cast = fluid.layers.cast(label, dtype='float32')
-            cost = fluid.layers.log_loss(fc, label_cast)
+            cost = paddle.nn.functional.log_loss(fc, label_cast)
 
         strategy = {}
         strategy["embedding"] = {}
diff --git a/python/paddle/fluid/tests/unittests/test_hash_op.py b/python/paddle/fluid/tests/unittests/test_hash_op.py
index 53b1551c7b8446..75ddd7bb89c8c9 100644
--- a/python/paddle/fluid/tests/unittests/test_hash_op.py
+++ b/python/paddle/fluid/tests/unittests/test_hash_op.py
@@ -17,8 +17,6 @@
 import numpy as np
 from op_test import OpTest
 
-import paddle.fluid as fluid
-
 
 class TestHashOp(OpTest):
     def setUp(self):
@@ -120,44 +118,5 @@ def test_check_output(self):
         self.check_output()
 
 
-class TestHashOpError(unittest.TestCase):
-    def test_errors(self):
-        with fluid.program_guard(fluid.Program(), fluid.Program()):
-            input_data = np.random.randint(0, 10, (8, 1)).astype("int32")
-
-            def test_Variable():
-                # the input type must be Variable
-                fluid.layers.hash(input=input_data, hash_size=2**32)
-
-            self.assertRaises(TypeError, test_Variable)
-
-            def test_type():
-                # dtype must be int32, int64.
-                x2 = fluid.layers.data(
-                    name='x2', shape=[1], dtype="float32", lod_level=1
-                )
-                fluid.layers.hash(input=x2, hash_size=2**32)
-
-            self.assertRaises(TypeError, test_type)
-
-            def test_hash_size_type():
-                # hash_size dtype must be int32, int64.
-                x3 = fluid.layers.data(
-                    name='x3', shape=[1], dtype="int32", lod_level=1
-                )
-                fluid.layers.hash(input=x3, hash_size=1024.5)
-
-            self.assertRaises(TypeError, test_hash_size_type)
-
-            def test_num_hash_type():
-                # num_hash dtype must be int32, int64.
-                x4 = fluid.layers.data(
-                    name='x4', shape=[1], dtype="int32", lod_level=1
-                )
-                fluid.layers.hash(input=x4, hash_size=2**32, num_hash=2.5)
-
-            self.assertRaises(TypeError, test_num_hash_type)
-
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
index c4e280ea46fd0b..ecb4600163c4b8 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
@@ -271,7 +271,7 @@ def test_deefcf(self):
 
             deepcf = DeepCF(num_users, num_items, matrix)
             prediction = deepcf(users, items)
-            loss = paddle.sum(fluid.layers.log_loss(prediction, labels))
+            loss = paddle.sum(paddle.nn.functional.log_loss(prediction, labels))
             adam = fluid.optimizer.AdamOptimizer(0.01)
             adam.minimize(loss)
 
@@ -325,7 +325,7 @@ def test_deefcf(self):
                         to_variable(items_np[slice : slice + self.batch_size]),
                     )
                     loss = paddle.sum(
-                        fluid.layers.log_loss(
+                        paddle.nn.functional.log_loss(
                             prediction,
                             to_variable(
                                 labels_np[slice : slice + self.batch_size]
@@ -359,7 +359,7 @@ def test_deefcf(self):
                         to_variable(items_np[slice : slice + self.batch_size]),
                     )
                     loss2 = paddle.sum(
-                        fluid.layers.log_loss(
+                        paddle.nn.functional.log_loss(
                             prediction2,
                             to_variable(
                                 labels_np[slice : slice + self.batch_size]
@@ -402,7 +402,7 @@ def test_deefcf(self):
                             ),
                         )
                         loss = paddle.sum(
-                            fluid.layers.log_loss(
+                            paddle.nn.functional.log_loss(
                                 prediction,
                                 to_variable(
                                     labels_np[slice : slice + self.batch_size]
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
index 169269cc03e31e..a98d9b994b33a9 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
@@ -86,11 +86,15 @@ def testLoadStaticModel(self):
             "t2", shape=[None, 4], dtype="float32"
         )
 
-        bilinear_tensor_pro_out_1 = fluid.layers.bilinear_tensor_product(
-            x=bilinear_tensor_pro_x, y=bilinear_tensor_pro_y, size=1000
-        )
-        bilinear_tensor_pro_out_2 = fluid.layers.bilinear_tensor_product(
-            x=bilinear_tensor_pro_x, y=bilinear_tensor_pro_y, size=1000
+        bilinear_tensor_pro_out_1 = (
+            paddle.static.nn.common.bilinear_tensor_product(
+                x=bilinear_tensor_pro_x, y=bilinear_tensor_pro_y, size=1000
+            )
+        )
+        bilinear_tensor_pro_out_2 = (
+            paddle.static.nn.common.bilinear_tensor_product(
+                x=bilinear_tensor_pro_x, y=bilinear_tensor_pro_y, size=1000
+            )
         )
 
         conv2d_trans_in = fluid.data(
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 62def4247037f5..3f7edb6022a859 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -750,7 +750,7 @@ def test_bilinear_tensor_product(self):
             data_y = layers.data(
                 name='y', shape=[1, 3], dtype="float32", append_batch_size=False
             )
-            out = layers.bilinear_tensor_product(
+            out = paddle.static.nn.common.bilinear_tensor_product(
                 data_x,
                 data_y,
                 6,
@@ -825,7 +825,7 @@ def test_bilinear_tensor_product(self):
             data_y2 = layers.data(
                 name='y', shape=[1, 3], dtype="float32", append_batch_size=False
             )
-            out2 = layers.bilinear_tensor_product(
+            out2 = paddle.static.nn.common.bilinear_tensor_product(
                 data_x2, data_y2, 6, act='sigmoid'
             )
 
@@ -3418,15 +3418,6 @@ def make_iou_similarity(self):
             out = layers.iou_similarity(x, y, name='iou_similarity')
             return out
 
-    def make_grid_sampler(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            x = self._get_data(name='x', shape=[3, 5, 7], dtype='float32')
-            grid = self._get_data(name='grid', shape=[5, 7, 2], dtype='float32')
-            out = layers.grid_sampler(x, grid)
-            return out
-
     def make_bilinear_tensor_product_layer(self):
         with program_guard(
             fluid.default_main_program(), fluid.default_startup_program()
@@ -3434,7 +3425,9 @@ def make_bilinear_tensor_product_layer(self):
             data = self._get_data(name='data', shape=[4], dtype="float32")
 
             theta = self._get_data(name="theta", shape=[5], dtype="float32")
-            out = layers.bilinear_tensor_product(data, theta, 6)
+            out = paddle.static.nn.common.bilinear_tensor_product(
+                data, theta, 6
+            )
             return out
 
     def make_batch_norm(self):
diff --git a/python/paddle/fluid/tests/unittests/test_log_loss_op.py b/python/paddle/fluid/tests/unittests/test_log_loss_op.py
index 908f4bf94e510f..25bede0af214b8 100644
--- a/python/paddle/fluid/tests/unittests/test_log_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_log_loss_op.py
@@ -17,8 +17,6 @@
 import numpy as np
 from op_test import OpTest
 
-import paddle.fluid as fluid
-
 
 def sigmoid_array(x):
     return 1 / (1 + np.exp(-x))
@@ -51,34 +49,5 @@ def test_check_grad(self):
         self.check_grad(['Predicted'], 'Loss', max_relative_error=0.03)
 
 
-class TestLogLossOpError(unittest.TestCase):
-    def test_errors(self):
-        with fluid.program_guard(fluid.Program()):
-
-            def test_x_type():
-                input_data = np.random.random(100, 1).astype("float32")
-                fluid.layers.log_loss(input_data)
-
-            self.assertRaises(TypeError, test_x_type)
-
-            def test_x_dtype():
-                x2 = fluid.layers.data(name='x2', shape=[100, 1], dtype='int32')
-                fluid.layers.log_loss(x2)
-
-            self.assertRaises(TypeError, test_x_dtype)
-
-            def test_label_type():
-                input_data = np.random.random(100, 1).astype("float32")
-                fluid.layers.log_loss(input_data)
-
-            self.assertRaises(TypeError, test_label_type)
-
-            def test_label_dtype():
-                x2 = fluid.layers.data(name='x2', shape=[100, 1], dtype='int32')
-                fluid.layers.log_loss(x2)
-
-            self.assertRaises(TypeError, test_label_dtype)
-
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py
index 1849cfd395a553..9635811f6a818c 100755
--- a/python/paddle/static/nn/__init__.py
+++ b/python/paddle/static/nn/__init__.py
@@ -20,11 +20,11 @@
 from .common import conv3d  # noqa: F401
 from .common import conv2d_transpose  # noqa: F401
 from .common import conv3d_transpose  # noqa: F401
+from .common import bilinear_tensor_product  # noqa: F401
 from .common import py_func  # noqa: F401
 
 from ...tensor.creation import create_parameter  # noqa: F401
 from ...fluid.layers import batch_norm  # noqa: F401
-from ...fluid.layers import bilinear_tensor_product  # noqa: F401
 from ...fluid.layers import case  # noqa: F401
 from ...fluid.layers import cond  # noqa: F401
 from ...fluid.layers import conv2d  # noqa: F401
@@ -61,8 +61,8 @@
 __all__ = [  # noqa
     'fc',
     'batch_norm',
-    'embedding',
     'bilinear_tensor_product',
+    'embedding',
     'case',
     'cond',
     'conv2d',
diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py
index a8dec018ff14ab..420a00ddbdc51e 100755
--- a/python/paddle/static/nn/common.py
+++ b/python/paddle/static/nn/common.py
@@ -2088,6 +2088,184 @@ def deform_conv2d(
         )
 
 
+def bilinear_tensor_product(
+    x, y, size, act=None, name=None, param_attr=None, bias_attr=None
+):
+    r"""
+    This layer performs bilinear tensor product on two inputs.
+
+    .. math::
+
+       out_{i} = x * W_{i} * {y^\mathrm{T}}, i=0,1,...,size-1
+
+    In this formula:
+      - :math:`x`: the first input contains M elements, shape is [batch_size, M].
+      - :math:`y`: the second input contains N elements, shape is [batch_size, N].
+      - :math:`W_{i}`: the i-th learned weight, shape is [M, N].
+      - :math:`out_{i}`: the i-th element of out, shape is [batch_size, size].
+      - :math:`y^\mathrm{T}`: the transpose of :math:`y_{2}`.
+
+    Args:
+        x (Variable): 2-D input tensor with shape [batch_size, M]. Data type
+            is float32 or float64.
+        y (Variable): 2-D input tensor with shape [batch_size, N]. Data type
+            should be same as **x**.
+        size (int): The dimension of this layer.
+        act (str|None): Activation to be applied to the output of this layer. Default None.
+        name(str|None): For detailed information, please refer to
+            :ref:`api_guide_Name` . Usually name is no need to set and None by default.
+        param_attr (ParamAttr|None): To specify the weight parameter attribute.
+            Default: None, which means the default weight parameter property is
+            used. See usage for details in :ref:`api_fluid_ParamAttr` .
+        bias_attr (ParamAttr|None): To specify the bias parameter attribute.
+            Default: None, which means the default bias parameter property is
+            used. See usage for details in :ref:`api_fluid_ParamAttr` .
+
+    Returns:
+        Tensor, A 2-D Tensor of shape [batch_size, size]. Data type is the same as input **x**.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            paddle.enable_static()
+
+            x = paddle.static.data("t1", shape=[-1, 5], dtype="float32")
+            y = paddle.static.data("t2", shape=[-1, 4], dtype="float32")
+            tensor = paddle.static.nn.bilinear_tensor_product(x, y, size=1000)
+
+    """
+    helper = LayerHelper('bilinear_tensor_product', **locals())
+    dtype = helper.input_dtype('x')
+
+    param_shape = [size, x.shape[1], y.shape[1]]
+
+    w = helper.create_parameter(
+        attr=helper.param_attr, shape=param_shape, dtype=dtype, is_bias=False
+    )
+    out = helper.create_variable_for_type_inference(dtype=dtype)
+
+    inputs = {"X": x, "Y": y, "Weight": w}
+    if helper.bias_attr:
+        bias_size = [1, size]
+        bias = helper.create_parameter(
+            attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True
+        )
+        inputs["Bias"] = bias
+    helper.append_op(
+        type="bilinear_tensor_product", inputs=inputs, outputs={"Out": out}
+    )
+
+    # add activation
+    return helper.append_activation(out)
+
+
+@static_only
+def prelu(x, mode, param_attr=None, data_format="NCHW", name=None):
+    r"""
+
+    prelu activation.
+
+    .. math::
+        prelu(x) = max(0, x) + \alpha * min(0, x)
+
+    There are three modes for the activation:
+
+    .. code-block:: text
+
+        all: All elements share same alpha.
+        channel: Elements in same channel share same alpha.
+        element: All elements do not share alpha. Each element has its own alpha.
+
+    Parameters:
+        x (Tensor): The input Tensor or LoDTensor with data type float32.
+        mode (str): The mode for weight sharing.
+        param_attr (ParamAttr|None, optional): The parameter attribute for the learnable \
+            weight (alpha), it can be create by ParamAttr. None by default. \
+            For detailed information, please refer to :ref:`api_paddle_ParamAttr`.
+        data_format(str, optional): Data format that specifies the layout of input.
+            It may be "NC", "NCL", "NCHW", "NCDHW", "NLC", "NHWC" or "NDHWC". Default: "NCHW".
+        name (str, optional): Name for the operation (optional, default is None). \
+            For more information, please refer to :ref:`api_guide_Name`.
+
+    Returns:
+        Tensor: A tensor with the same shape and data type as x.
+
+    Examples:
+
+        .. code-block:: python
+
+            import paddle
+            paddle.enable_static()
+
+            x = paddle.static.data(name="x", shape=[None,5,10,10], dtype="float32")
+            mode = 'channel'
+            output = paddle.static.nn.prelu(
+                x,mode,param_attr=paddle.ParamAttr(name='alpha'))
+
+    """
+    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'prelu')
+
+    helper = LayerHelper('prelu', **locals())
+    if mode not in ['all', 'channel', 'element']:
+        raise ValueError('mode should be one of all, channel, element.')
+
+    alpha_shape = [1]
+    if mode == 'channel':
+
+        true_data_format = [
+            'NC',
+            'NCL',
+            'NCHW',
+            'NCDHW',
+            'NLC',
+            'NHWC',
+            'NDHWC',
+        ]
+        if data_format not in true_data_format:
+            raise ValueError(
+                "data_format must be one of 'NC', 'NCL', 'NCHW', 'NCDHW', "
+                "'NLC', 'NHWC', 'NDHWC' but receive {}".format(data_format)
+            )
+
+        data_format = 'NCHW' if data_format[1] == 'C' else 'NHWC'
+
+        assert (
+            len(x.shape) >= 2
+        ), "The size of input shape should be equal or larger than 2 in prelu() when mode is 'channel'"
+        # NOTE(zhiqiu): The alpha_shape should be [1, channel] + [1] * len(x.shape[2:]).
+        # To be consistent with Prelu, it is simplified.
+        # NOTE(zhiqiu): Revert shape to [1, channel, 1, 1] for compatibility with saved model of old version.
+        # NOTE(GuoxiaWang): support NHWC data format
+        if data_format == 'NHWC':
+            alpha_shape = [1, 1, 1, x.shape[-1]]
+        else:
+            alpha_shape = [1, x.shape[1], 1, 1]
+
+    elif mode == 'element':
+        assert (
+            len(x.shape) >= 1
+        ), "The size of input shape should be equal or larger than 1 in prelu() when mode is 'element'"
+        alpha_shape = [1] + list(x.shape)[1:]
+    dtype = helper.input_dtype(input_param_name='x')
+    alpha = helper.create_parameter(
+        attr=helper.param_attr,
+        shape=alpha_shape,
+        dtype=dtype,
+        is_bias=False,
+        default_initializer=paddle.nn.initializer.Constant(0.25),
+    )
+
+    out = helper.create_variable_for_type_inference(dtype)
+    helper.append_op(
+        type="prelu",
+        inputs={"X": x, 'Alpha': alpha},
+        attrs={"mode": mode, "data_format": data_format},
+        outputs={"Out": out},
+    )
+    return out
+
+
 class PyFuncRegistry:
     _register_funcs = []
 
@@ -2106,12 +2284,10 @@ def __init__(self, func):
         self._id = core._append_python_callable_object_and_return_id(self)
         '''
         Why record self here?
-
         1. For debug usage. Users can call
            :code:`py_func.registered_func(idx)` method
            to find the registered function corresponding
            to :code:`idx`.
-
         2. For increasing reference count of self.
            It seems that to release Python object
            whose reference count is 1 would cause
@@ -2169,25 +2345,20 @@ def py_func(func, x, out, backward_func=None, skip_vars_in_backward_input=None):
     This is used to register customized Python OP to Paddle. The design
     principe of py_func is that Tensor and numpy array can be converted to each
     other easily. So you can use Python and numpy API to register a python OP.
-
     The forward function of the registered OP is ``func`` and the backward function
     of that is ``backward_func``. Paddle will call ``func`` at forward runtime and
     call ``backward_func`` at backward runtime(if ``backward_func`` is not  None).
     ``x`` is the input of ``func``, whose type must be Tensor; ``out`` is
     the output of ``func``, whose type can be either Tensor or numpy array.
-
     The input of the backward function ``backward_func`` is ``x``, ``out`` and
     the gradient of ``out``. If ``out`` have no gradient, the relevant input of
     ``backward_func`` is None. If ``x`` do not have a gradient, the user should
     return None in ``backward_func``.
-
     The data type and shape of ``out`` should also be set correctly before this
     API is called, and the data type and shape of the gradient of ``out`` and
     ``x`` will be inferred automatically.
-
     This API can also be used to debug the neural network by setting the ``func``
     as a function that only print variables.
-
     Args:
         func (callable): The forward function of the registered OP. When the network
             is running, the forward output ``out`` will be calculated according to this
@@ -2211,61 +2382,47 @@ def py_func(func, x, out, backward_func=None, skip_vars_in_backward_input=None):
             that no tensors need to be removed from ``x`` and ``out``. If it is not None,
             these tensors will not be the input of ``backward_func``. This parameter is only
             useful when ``backward_func`` is not None.
-
     Returns:
         Tensor|tuple(Tensor)|list[Tensor]: The output ``out`` of the forward function ``func``.
-
     Examples:
         .. code-block:: python
-
             # example 1:
             import paddle
             import numpy as np
-
             paddle.enable_static()
-
             # Creates a forward function, Tensor can be input directly without
             # being converted into numpy array.
             def tanh(x):
                 return np.tanh(x)
-
             # Skip x in backward function and return the gradient of x
             # Tensor must be actively converted to numpy array, otherwise,
             # operations such as +/- can't be used.
             def tanh_grad(y, dy):
                 return np.array(dy) * (1 - np.square(np.array(y)))
-
             # Creates a forward function for debugging running networks(print value)
             def debug_func(x):
                 print(x)
-
             def create_tmp_var(name, dtype, shape):
                 return paddle.static.default_main_program().current_block().create_var(
                     name=name, dtype=dtype, shape=shape)
-
             def simple_net(img, label):
                 hidden = img
                 for idx in range(4):
                     hidden = paddle.static.nn.fc(hidden, size=200)
                     new_hidden = create_tmp_var(name='hidden_{}'.format(idx),
                         dtype=hidden.dtype, shape=hidden.shape)
-
                     # User-defined forward and backward
                     hidden = paddle.static.py_func(func=tanh, x=hidden,
                         out=new_hidden, backward_func=tanh_grad,
                         skip_vars_in_backward_input=hidden)
-
                     # User-defined debug functions that print out the input Tensor
                     paddle.static.py_func(func=debug_func, x=hidden, out=None)
-
                 prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax')
                 ce_loss = paddle.nn.loss.CrossEntropyLoss()
                 return ce_loss(prediction, label)
-
             x = paddle.static.data(name='x', shape=[1,4], dtype='float32')
             y = paddle.static.data(name='y', shape=[1], dtype='int64')
             res = simple_net(x, y)
-
             exe = paddle.static.Executor(paddle.CPUPlace())
             exe.run(paddle.static.default_startup_program())
             input1 = np.random.random(size=[1,4]).astype('float32')
@@ -2274,54 +2431,40 @@ def simple_net(img, label):
                           feed={'x':input1, 'y':input2},
                           fetch_list=[res.name])
             print(out)
-
         .. code-block:: python
-
             # example 2:
             # This example shows how to turn Tensor into numpy array and
             # use numpy API to register an Python OP
             import paddle
             import numpy as np
-
             paddle.enable_static()
-
             def element_wise_add(x, y):
                 # Tensor must be actively converted to numpy array, otherwise,
                 # numpy.shape can't be used.
                 x = np.array(x)
                 y = np.array(y)
-
                 if x.shape != y.shape:
                     raise AssertionError("the shape of inputs must be the same!")
-
                 result = np.zeros(x.shape, dtype='int32')
                 for i in range(len(x)):
                     for j in range(len(x[0])):
                         result[i][j] = x[i][j] + y[i][j]
-
                 return result
-
             def create_tmp_var(name, dtype, shape):
                 return paddle.static.default_main_program().current_block().create_var(
                             name=name, dtype=dtype, shape=shape)
-
             def py_func_demo():
                 start_program = paddle.static.default_startup_program()
                 main_program = paddle.static.default_main_program()
-
                 # Input of the forward function
                 x = paddle.static.data(name='x', shape=[2,3], dtype='int32')
                 y = paddle.static.data(name='y', shape=[2,3], dtype='int32')
-
                 # Output of the forward function, name/dtype/shape must be specified
                 output = create_tmp_var('output','int32', [3,1])
-
                 # Multiple Variable should be passed in the form of tuple(Variale) or list[Variale]
                 paddle.static.py_func(func=element_wise_add, x=[x,y], out=output)
-
                 exe=paddle.static.Executor(paddle.CPUPlace())
                 exe.run(start_program)
-
                 # Feed numpy array to main_program
                 input1 = np.random.randint(1, 10, size=[2,3], dtype='int32')
                 input2 = np.random.randint(1, 10, size=[2,3], dtype='int32')
@@ -2329,9 +2472,7 @@ def py_func_demo():
                             feed={'x':input1, 'y':input2},
                             fetch_list=[output.name])
                 print("{0} + {1} = {2}".format(input1, input2, out))
-
             py_func_demo()
-
             # Reference output:
             # [[5, 9, 9]   + [[7, 8, 4]  =  [array([[12, 17, 13]
             #  [7, 5, 2]]     [1, 3, 3]]            [8, 8, 5]], dtype=int32)]
@@ -2405,109 +2546,3 @@ def py_func_demo():
 # For debug usage
 py_func.registered_func = PyFuncRegistry.registered_func
 py_func.registered_func_num = PyFuncRegistry.registered_func_num
-
-
-@static_only
-def prelu(x, mode, param_attr=None, data_format="NCHW", name=None):
-    r"""
-
-    prelu activation.
-
-    .. math::
-        prelu(x) = max(0, x) + \alpha * min(0, x)
-
-    There are three modes for the activation:
-
-    .. code-block:: text
-
-        all: All elements share same alpha.
-        channel: Elements in same channel share same alpha.
-        element: All elements do not share alpha. Each element has its own alpha.
-
-    Parameters:
-        x (Tensor): The input Tensor or LoDTensor with data type float32.
-        mode (str): The mode for weight sharing.
-        param_attr (ParamAttr|None, optional): The parameter attribute for the learnable \
-            weight (alpha), it can be create by ParamAttr. None by default. \
-            For detailed information, please refer to :ref:`api_paddle_ParamAttr`.
-        data_format(str, optional): Data format that specifies the layout of input.
-            It may be "NC", "NCL", "NCHW", "NCDHW", "NLC", "NHWC" or "NDHWC". Default: "NCHW".
-        name (str, optional): Name for the operation (optional, default is None). \
-            For more information, please refer to :ref:`api_guide_Name`.
-
-    Returns:
-        Tensor: A tensor with the same shape and data type as x.
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle
-            paddle.enable_static()
-
-            x = paddle.static.data(name="x", shape=[None,5,10,10], dtype="float32")
-            mode = 'channel'
-            output = paddle.static.nn.prelu(
-                x,mode,param_attr=paddle.ParamAttr(name='alpha'))
-
-    """
-    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'prelu')
-
-    helper = LayerHelper('prelu', **locals())
-    if mode not in ['all', 'channel', 'element']:
-        raise ValueError('mode should be one of all, channel, element.')
-
-    alpha_shape = [1]
-    if mode == 'channel':
-
-        true_data_format = [
-            'NC',
-            'NCL',
-            'NCHW',
-            'NCDHW',
-            'NLC',
-            'NHWC',
-            'NDHWC',
-        ]
-        if data_format not in true_data_format:
-            raise ValueError(
-                "data_format must be one of 'NC', 'NCL', 'NCHW', 'NCDHW', "
-                "'NLC', 'NHWC', 'NDHWC' but receive {}".format(data_format)
-            )
-
-        data_format = 'NCHW' if data_format[1] == 'C' else 'NHWC'
-
-        assert (
-            len(x.shape) >= 2
-        ), "The size of input shape should be equal or larger than 2 in prelu() when mode is 'channel'"
-        # NOTE(zhiqiu): The alpha_shape should be [1, channel] + [1] * len(x.shape[2:]).
-        # To be consistent with Prelu, it is simplified.
-        # NOTE(zhiqiu): Revert shape to [1, channel, 1, 1] for compatibility with saved model of old version.
-        # NOTE(GuoxiaWang): support NHWC data format
-        if data_format == 'NHWC':
-            alpha_shape = [1, 1, 1, x.shape[-1]]
-        else:
-            alpha_shape = [1, x.shape[1], 1, 1]
-
-    elif mode == 'element':
-        assert (
-            len(x.shape) >= 1
-        ), "The size of input shape should be equal or larger than 1 in prelu() when mode is 'element'"
-        alpha_shape = [1] + list(x.shape)[1:]
-    dtype = helper.input_dtype(input_param_name='x')
-    alpha = helper.create_parameter(
-        attr=helper.param_attr,
-        shape=alpha_shape,
-        dtype=dtype,
-        is_bias=False,
-        default_initializer=paddle.nn.initializer.Constant(0.25),
-    )
-
-    out = helper.create_variable_for_type_inference(dtype)
-    helper.append_op(
-        type="prelu",
-        inputs={"X": x, 'Alpha': alpha},
-        attrs={"mode": mode, "data_format": data_format},
-        outputs={"Out": out},
-    )
-    return out

From 61a1f68845f65a12723ac2a667d083a1ab27399e Mon Sep 17 00:00:00 2001
From: Guanghua Yu <742925032@qq.com>
Date: Mon, 5 Dec 2022 15:14:18 +0800
Subject: [PATCH 147/154] Support matmul in QAT and loading quantized models in
 PTQ (#47892)

---
 .../slim/quantization/imperative/utils.py      |  1 +
 .../slim/quantization/quantization_pass.py     | 18 ++++++++++++++++++
 python/paddle/nn/quant/__init__.py             |  1 +
 python/paddle/nn/quant/functional_layers.py    | 10 +++++++++-
 4 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
index d771b51e09d11f..e5ed14cb9f1e17 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
@@ -63,6 +63,7 @@
     paddle.nn.quant.subtract,
     paddle.nn.quant.multiply,
     paddle.nn.quant.divide,
+    paddle.nn.quant.matmul,
 ]
 
 fake_quant_leaf_layers = [
diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
index 6d99f0949d4a7c..705b0e5e69ee6d 100644
--- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
@@ -1939,6 +1939,15 @@ def apply(self, graph):
                     op_node.op()._set_attr("activation_bits", self._quant_bits)
                     op_node.op()._set_attr("with_quant_attr", True)
                     arg_names = utils._get_op_input_var_names(op_node)
+                    # If already quanted, skip it.
+                    skip_quant = False
+                    for arg_name in arg_names:
+                        if "quantized.dequantized" in arg_name:
+                            skip_quant = True
+                            break
+                    if skip_quant:
+                        continue
+
                     for arg_name in arg_names:
                         in_node = graph._find_node_by_name(
                             op_node.inputs, arg_name
@@ -2797,6 +2806,15 @@ def apply(self, graph):
                         continue
 
                     arg_names = utils._get_op_input_var_names(op_node)
+                    # If already quanted, skip it.
+                    skip_quant = False
+                    for arg_name in arg_names:
+                        if "quantized.dequantized" in arg_name:
+                            skip_quant = True
+                            break
+                    if skip_quant:
+                        continue
+
                     for arg_name in arg_names:
                         in_node = graph._find_node_by_name(
                             op_node.inputs, arg_name
diff --git a/python/paddle/nn/quant/__init__.py b/python/paddle/nn/quant/__init__.py
index 8973761ab69443..f96558bfbed15a 100644
--- a/python/paddle/nn/quant/__init__.py
+++ b/python/paddle/nn/quant/__init__.py
@@ -21,6 +21,7 @@
 from .functional_layers import transpose  # noqa: F401
 from .functional_layers import concat  # noqa: F401
 from .functional_layers import flatten  # noqa: F401
+from .functional_layers import matmul  # noqa: F401
 from .quant_layers import QuantStub  # noqa: F401
 
 __all__ = []
diff --git a/python/paddle/nn/quant/functional_layers.py b/python/paddle/nn/quant/functional_layers.py
index 2986e3e0500f96..3a0fafe6b6ad18 100644
--- a/python/paddle/nn/quant/functional_layers.py
+++ b/python/paddle/nn/quant/functional_layers.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...tensor import manipulation, math
+from ...tensor import linalg, manipulation, math
 from .. import Layer
 
 __all__ = []
@@ -85,3 +85,11 @@ def __init__(self):
 
     def forward(self, x, start_axis=0, stop_axis=-1, name=None):
         return manipulation.flatten(x, start_axis, stop_axis, name)
+
+
+class matmul(FloatFunctionalLayer):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x, y, transpose_x=False, transpose_y=False, name=None):
+        return linalg.matmul(x, y, transpose_x, transpose_y, name)

From 97aa938f2bed793e17e67cb6cf08e52307532288 Mon Sep 17 00:00:00 2001
From: lzydev <1528794076@qq.com>
Date: Mon, 5 Dec 2022 15:39:51 +0800
Subject: [PATCH 148/154] Generate static graph code of some ops by yaml
 (#48698)

* generate static graph code of some ops by yaml, test = develop

* generate static graph code of some ops by yaml, test = develop
---
 paddle/fluid/operators/lu_unpack_op.cc   | 142 ----------------------
 paddle/fluid/operators/mode_op.cc        | 123 -------------------
 paddle/fluid/operators/nll_loss_op.cc    | 148 -----------------------
 paddle/fluid/operators/qr_op.cc          | 120 ------------------
 paddle/fluid/operators/renorm_op.cc      |  89 --------------
 paddle/phi/api/yaml/backward.yaml        |  50 ++++++++
 paddle/phi/api/yaml/legacy_backward.yaml |  50 --------
 paddle/phi/api/yaml/legacy_ops.yaml      |  49 --------
 paddle/phi/api/yaml/op_compat.yaml       |  32 +++++
 paddle/phi/api/yaml/ops.yaml             |  49 ++++++++
 paddle/phi/ops/compat/lu_unpack_sig.cc   |  37 ------
 paddle/phi/ops/compat/mode_sig.cc        |  34 ------
 paddle/phi/ops/compat/nll_loss_sig.cc    |  38 ------
 paddle/phi/ops/compat/qr_sig.cc          |  31 -----
 paddle/phi/ops/compat/renorm_sig.cc      |  34 ------
 15 files changed, 131 insertions(+), 895 deletions(-)
 delete mode 100644 paddle/fluid/operators/lu_unpack_op.cc
 delete mode 100644 paddle/fluid/operators/mode_op.cc
 delete mode 100644 paddle/fluid/operators/nll_loss_op.cc
 delete mode 100644 paddle/fluid/operators/qr_op.cc
 delete mode 100644 paddle/fluid/operators/renorm_op.cc
 delete mode 100644 paddle/phi/ops/compat/lu_unpack_sig.cc
 delete mode 100644 paddle/phi/ops/compat/mode_sig.cc
 delete mode 100644 paddle/phi/ops/compat/nll_loss_sig.cc
 delete mode 100644 paddle/phi/ops/compat/qr_sig.cc
 delete mode 100644 paddle/phi/ops/compat/renorm_sig.cc

diff --git a/paddle/fluid/operators/lu_unpack_op.cc b/paddle/fluid/operators/lu_unpack_op.cc
deleted file mode 100644
index 9f631a60c15565..00000000000000
--- a/paddle/fluid/operators/lu_unpack_op.cc
+++ /dev/null
@@ -1,142 +0,0 @@
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/framework/infershape_utils.h"
-#include "paddle/fluid/framework/op_registry.h"
-
-#include "paddle/phi/infermeta/backward.h"
-#include "paddle/phi/infermeta/binary.h"
-
-namespace paddle {
-namespace operators {
-
-class LU_UnpackOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddComment(R"DOC(Unpack L U and P to single matrix tensor,
-                unpack L and U matrix from LU, unpack permutation matrix Pmat from Pivtos .
-                )DOC");
-    AddInput("X", "(Tensor) The input LU tensor, shape of (*,m,n)");
-    AddInput("Pivots",
-             "(Tensor) The input Pivots tensor, shape of (*,min(m,n))");
-    AddOutput(
-        "Pmat",
-        "(Tensor) The output permutation matrix tensor, shape of (*, m, m)");
-    AddOutput("L", "(Tensor) The output lower triangular matrix tensor");
-    AddOutput("U", "(Tensor) The output upper triangular matrix tensor");
-    AddAttr<bool>("unpack_ludata", "Whether to unpack L and U")
-        .SetDefault(true);
-    AddAttr<bool>("unpack_pivots", "Whether to unpack permutation matrix")
-        .SetDefault(true);
-  }
-};
-
-class LU_UnpackOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext &ctx) const override {
-    return framework::OpKernelType(
-        OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace());
-  }
-};
-
-class LU_UnpackOpVarTypeInference : public framework::VarTypeInference {
- public:
-  void operator()(framework::InferVarTypeContext *ctx) const override {
-    auto var_type = ctx->GetInputType("X", 0);
-    auto data_type = ctx->GetInputDataType("X", 0);
-
-    ctx->SetOutputType("L", var_type, framework::ALL_ELEMENTS);
-    ctx->SetOutputDataType("L", data_type, framework::ALL_ELEMENTS);
-
-    ctx->SetOutputType("U", var_type, framework::ALL_ELEMENTS);
-    ctx->SetOutputDataType("U", data_type, framework::ALL_ELEMENTS);
-
-    ctx->SetOutputType("Pmat", var_type, framework::ALL_ELEMENTS);
-    ctx->SetOutputDataType("Pmat", data_type, framework::ALL_ELEMENTS);
-  }
-};
-
-template <typename T>
-class LU_UnpackOpGradMaker : public framework::SingleGradOpMaker<T> {
- public:
-  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
-
- protected:
-  void Apply(GradOpPtr<T> retv) const override {
-    retv->SetType("lu_unpack_grad");
-    retv->SetInput("X", this->Input("X"));
-    retv->SetInput("Pivots", this->Input("Pivots"));
-    retv->SetInput("L", this->Output("L"));
-    retv->SetInput("U", this->Output("U"));
-    retv->SetInput("Pmat", this->Output("Pmat"));
-
-    retv->SetInput(framework::GradVarName("L"), this->OutputGrad("L"));
-    retv->SetInput(framework::GradVarName("U"), this->OutputGrad("U"));
-    retv->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
-    retv->SetAttrMap(this->Attrs());
-  }
-};
-
-class LU_UnpackGradOpVarTypeInference : public framework::VarTypeInference {
- public:
-  void operator()(framework::InferVarTypeContext *ctx) const override {
-    auto var_type = ctx->GetInputType("X", 0);
-    auto data_type = ctx->GetInputDataType("X", 0);
-
-    ctx->SetOutputType(
-        framework::GradVarName("X"), var_type, framework::ALL_ELEMENTS);
-    ctx->SetOutputDataType(
-        framework::GradVarName("X"), data_type, framework::ALL_ELEMENTS);
-  }
-};
-
-class LU_UnpackGradOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext &ctx) const override {
-    auto dtype = OperatorWithKernel::IndicateVarDataType(ctx, "X");
-    return framework::OpKernelType(dtype, ctx.GetPlace());
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-namespace ops = paddle::operators;
-namespace plat = paddle::platform;
-
-DECLARE_INFER_SHAPE_FUNCTOR(lu_unpack,
-                            LUUnpackInferMetaFunctor,
-                            PD_INFER_META(phi::LUUnpackInferMeta));
-DECLARE_INFER_SHAPE_FUNCTOR(lu_unpack_grad,
-                            LUUnpackGradInferMetaFunctor,
-                            PD_INFER_META(phi::LUUnpackGradInferMeta));
-
-REGISTER_OPERATOR(lu_unpack,
-                  ops::LU_UnpackOp,
-                  ops::LU_UnpackOpMaker,
-                  ops::LU_UnpackOpVarTypeInference,
-                  ops::LU_UnpackOpGradMaker<paddle::framework::OpDesc>,
-                  ops::LU_UnpackOpGradMaker<paddle::imperative::OpBase>,
-                  LUUnpackInferMetaFunctor);
-REGISTER_OPERATOR(lu_unpack_grad,
-                  ops::LU_UnpackGradOp,
-                  ops::LU_UnpackGradOpVarTypeInference,
-                  LUUnpackGradInferMetaFunctor);
diff --git a/paddle/fluid/operators/mode_op.cc b/paddle/fluid/operators/mode_op.cc
deleted file mode 100644
index 472526623511ec..00000000000000
--- a/paddle/fluid/operators/mode_op.cc
+++ /dev/null
@@ -1,123 +0,0 @@
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/framework/generator.h"
-#include "paddle/fluid/framework/infershape_utils.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/phi/core/infermeta_utils.h"
-#include "paddle/phi/infermeta/unary.h"
-
-namespace paddle {
-namespace operators {
-
-class ModeOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    framework::LibraryType library_{framework::LibraryType::kPlain};
-    phi::DataLayout layout_ = phi::DataLayout::kAnyLayout;
-    return framework::OpKernelType(
-        OperatorWithKernel::IndicateVarDataType(ctx, "X"),
-        ctx.device_context(),
-        layout_,
-        library_);
-  }
-};
-
-class ModeOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput("X", "(Tensor) The input of Mode op");
-    AddOutput("Out", "(Tensor) The output tensor of Topk op");
-    AddOutput("Indices", "(Tensor) The indices of Topk elements of input");
-    AddAttr<int>("axis",
-                 "the axis to calculate mode values."
-                 "if not set, will calculate on last axis.")
-        .SetDefault(-1);
-    AddAttr<bool>("keepdim", "Keep the dim that to reduce.").SetDefault(false);
-    AddComment(R"DOC(
-This operator finds the mode of input Tensor. And outputs their values and indices as vectors.
-)DOC");
-  }
-};
-
-class ModeOpGrad : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("X"),
-        true,
-        platform::errors::InvalidArgument("Input(X) should be not null"));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("Indices"),
-        true,
-        platform::errors::InvalidArgument("Input(Indices) should be not null"));
-    PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")),
-                      true,
-                      platform::errors::InvalidArgument(
-                          "Grad Input(Out) should be not null"));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasOutput(framework::GradVarName("X")),
-        true,
-        platform::errors::InvalidArgument("Grad Output(X) should be not null"));
-
-    auto x_dims = ctx->GetInputDim("X");
-    ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
-  }
-
- protected:
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    auto data_type = OperatorWithKernel::IndicateVarDataType(
-        ctx, framework::GradVarName("Out"));
-    return framework::OpKernelType(data_type, ctx.device_context());
-  }
-};
-
-template <typename T>
-class ModeGradOpMaker : public framework::SingleGradOpMaker<T> {
- public:
-  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
-
- protected:
-  void Apply(GradOpPtr<T> op) const override {
-    op->SetType("mode_grad");
-    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
-    op->SetInput("X", this->Input("X"));
-    op->SetInput("Indices", this->Output("Indices"));
-    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
-    op->SetAttrMap(this->Attrs());
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-
-DECLARE_INFER_SHAPE_FUNCTOR(mode,
-                            ModeInferShapeFunctor,
-                            PD_INFER_META(phi::ModeInferMeta));
-REGISTER_OPERATOR(mode,
-                  ops::ModeOp,
-                  ops::ModeOpMaker,
-                  ops::ModeGradOpMaker<paddle::framework::OpDesc>,
-                  ops::ModeGradOpMaker<paddle::imperative::OpBase>,
-                  ModeInferShapeFunctor);
-REGISTER_OPERATOR(mode_grad, ops::ModeOpGrad);
diff --git a/paddle/fluid/operators/nll_loss_op.cc b/paddle/fluid/operators/nll_loss_op.cc
deleted file mode 100644
index 782b67d90e81f7..00000000000000
--- a/paddle/fluid/operators/nll_loss_op.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <memory>
-#include <string>
-
-#include "paddle/fluid/framework/infershape_utils.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/phi/infermeta/backward.h"
-#include "paddle/phi/infermeta/ternary.h"
-
-namespace paddle {
-namespace operators {
-
-class NLLLossOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return framework::OpKernelType(
-        OperatorWithKernel::IndicateVarDataType(ctx, "X"),
-        ctx.device_context());
-  }
-};
-
-class NLLLossOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput("X",
-             "(Tensor, default Tensor<float>) A tensor whose last dimension "
-             "size is equal to the number of classes. It  is expected to "
-             "contain log-probabilities of each class. "
-             "The X tensor's shape has to be either [batch_size, C] or"
-             "[batch_size, C, dim1, ..., dimK] in with K >= 1 in the case "
-             " K-dimensional loss.");
-    AddInput("Label",
-             "(Tensor, default Tensor<int64_t>) A tensor which represents the "
-             "the ground truth. It contains the class index in the range "
-             "[0, C-1] where C = number of classes. The Lable tensor's "
-             "shape has to be (batch_size), or "
-             "(batch_size, dim1, ..., dimK) "
-             "with K >= 1 in the case K-dimensional loss.");
-    AddInput("Weight",
-             "(Tensor, optional) A tensor should be a 1D tensor assigning "
-             "weight to each of the classes. It's shape must be [C], where "
-             "C is the class number.")
-        .AsDispensable();
-    AddOutput("Out",
-              "(Tensor, default Tensor<float>) A tensor that represents the "
-              "NLL loss.");
-    AddOutput("Total_weight",
-              "(Tensor, default Tensor<float>) A tensor saves the total"
-              "weight value in the forward process.");
-    AddAttr<int64_t>("ignore_index",
-                     "(int64_t, default -100), Specifies a target value that is"
-                     "ignored and does not contribute to the input gradient.")
-        .SetDefault(-100);
-    AddAttr<std::string>(
-        "reduction",
-        "(string, default mean), Specifies the reduction to apply"
-        "to the output. The options include \"none\", \"mean\","
-        "\"sum\".")
-        .SetDefault("mean");
-    AddComment(R"DOC(
-NLL(Negative Log Likelihood) Loss Operator.
-
-This operator computes the NLL loss according to the inputs.
-The loss can be described as:
-
-$Out[i] = -X[Label[i]]*Weight[Label[i]]$
-
-It can also be used for higher dimension inputs, such as 2D images, by
-providing an input of shape (batch_size, C, d1, d2, ..., dK), with
-K >= 1, where K is the number of dimensions, and a Label of
-appropriate shape. In the case of images, it computes NLL loss
-per-pixel.
-
-)DOC");
-  }
-};
-
-class NLLLossGradOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return framework::OpKernelType(
-        OperatorWithKernel::IndicateVarDataType(ctx, "X"),
-        ctx.device_context());
-  }
-};
-
-template <typename T>
-class NLLLossGradMaker : public framework::SingleGradOpMaker<T> {
- public:
-  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
-
- protected:
-  void Apply(GradOpPtr<T> op) const override {
-    op->SetType("nll_loss_grad");
-    op->SetInput("X", this->Input("X"));
-    op->SetInput("Label", this->Input("Label"));
-    op->SetInput("Total_weight", this->Output("Total_weight"));
-
-    if (this->HasInput("Weight")) {
-      op->SetInput("Weight", this->Input("Weight"));
-    }
-    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
-    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
-
-    op->SetAttrMap(this->Attrs());
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-DECLARE_INFER_SHAPE_FUNCTOR(nll_loss,
-                            NllLossRawInferShapeFunctor,
-                            PD_INFER_META(phi::NllLossRawInferMeta));
-DECLARE_INFER_SHAPE_FUNCTOR(nll_loss_grad,
-                            NllLossGradInferShapeFunctor,
-                            PD_INFER_META(phi::NllLossGradInferMeta));
-namespace ops = paddle::operators;
-REGISTER_OPERATOR(nll_loss,
-                  ops::NLLLossOp,
-                  ops::NLLLossOpMaker,
-                  ops::NLLLossGradMaker<paddle::framework::OpDesc>,
-                  ops::NLLLossGradMaker<paddle::imperative::OpBase>,
-                  NllLossRawInferShapeFunctor);
-REGISTER_OPERATOR(nll_loss_grad,
-                  ops::NLLLossGradOp,
-                  NllLossGradInferShapeFunctor);
diff --git a/paddle/fluid/operators/qr_op.cc b/paddle/fluid/operators/qr_op.cc
deleted file mode 100644
index 3eac56d1604b9a..00000000000000
--- a/paddle/fluid/operators/qr_op.cc
+++ /dev/null
@@ -1,120 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include "paddle/fluid/framework/infershape_utils.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/phi/core/ddim.h"
-#include "paddle/phi/core/infermeta_utils.h"
-#include "paddle/phi/infermeta/unary.h"
-
-namespace paddle {
-namespace operators {
-using DDim = framework::DDim;
-
-class QrOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-};
-
-class QrOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput("X", "(Tensor), The input tensor of qr op.");
-    AddOutput("Q", "(Tensor), The output Q tensor of qr op.");
-    AddOutput("R", "(Tensor), The output R tensor of qr op.");
-    AddAttr<std::string>(
-        "mode",
-        "(string, default \"reduced\"). "
-        "If mode is \"reduced\", Qr op will return reduced Q and R matrices. "
-        "If mode is \"complete\", Qr op will return complete Q and R matrices. "
-        "If mode is \"r\", Qr op will only return reduced R matrix.")
-        .SetDefault("reduced");
-    AddComment(R"DOC(
-Qr Operator.
-This operator is used to perform QR operation for batched matrics $X$.
-$$Q, R = qr(X)$$
-)DOC");
-  }
-};
-
-class QrGradOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Q")),
-                   "Input",
-                   "Q@Grad",
-                   "QrGrad");
-    OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("R")),
-                   "Input",
-                   "R@Grad",
-                   "QrGrad");
-    OP_INOUT_CHECK(ctx->HasInput("Q"), "Input", "Q", "QrGrad");
-    OP_INOUT_CHECK(ctx->HasInput("R"), "Input", "R", "QrGrad");
-    OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")),
-                   "Output",
-                   "X@Grad",
-                   "QrGrad");
-
-    auto x_dims = ctx->GetInputDim(("X"));
-    ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
-  }
-
- protected:
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    auto dtype = OperatorWithKernel::IndicateVarDataType(ctx, "X");
-    return framework::OpKernelType(dtype, ctx.GetPlace());
-  }
-};
-
-template <typename T>
-class QrGradMaker : public framework::SingleGradOpMaker<T> {
- public:
-  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
-
-  void Apply(GradOpPtr<T> retv) const override {
-    retv->SetType("qr_grad");
-    retv->SetInput(framework::GradVarName("Q"), this->OutputGrad("Q"));
-    retv->SetInput(framework::GradVarName("R"), this->OutputGrad("R"));
-    retv->SetInput("Q", this->Output("Q"));
-    retv->SetInput("R", this->Output("R"));
-    retv->SetInput("X", this->Input("X"));
-    retv->SetAttrMap(this->Attrs());
-    retv->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-DECLARE_INFER_SHAPE_FUNCTOR(qr,
-                            QrInferShapeFunctor,
-                            PD_INFER_META(phi::QrInferMeta));
-
-REGISTER_OPERATOR(qr,
-                  ops::QrOp,
-                  ops::QrOpMaker,
-                  ops::QrGradMaker<paddle::framework::OpDesc>,
-                  ops::QrGradMaker<paddle::imperative::OpBase>,
-                  QrInferShapeFunctor);
-
-REGISTER_OPERATOR(qr_grad, ops::QrGradOp);
diff --git a/paddle/fluid/operators/renorm_op.cc b/paddle/fluid/operators/renorm_op.cc
deleted file mode 100644
index 1dc333460b6ed1..00000000000000
--- a/paddle/fluid/operators/renorm_op.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <vector>
-#include "paddle/fluid/framework/infershape_utils.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/phi/core/infermeta_utils.h"
-#include "paddle/phi/infermeta/unary.h"
-
-namespace paddle {
-namespace operators {
-
-class RenormOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-  using DDim = paddle::framework::DDim;
-};
-
-class RenormOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput("X", "(Tensor), The input tensor of renorm op.");
-    AddOutput("Out", "(Tensor), The output tensor of renorm op.");
-    AddAttr<float>("p", "(float, norm's power");
-    AddAttr<int>("axis",
-                 "int,the dimension to slice over to get the sub-tensors");
-    AddAttr<float>("max_norm", "(float, the norm upper-bound");
-    AddComment(R"DOC(
-Renorm Operator.
-
-This operator is used to scale tensor sliced by axis if its p-norm execeeds maxnorm
-
-)DOC");
-  }
-};
-
-class RenormGradOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-};
-
-template <typename T>
-class RenormGradMaker : public framework::SingleGradOpMaker<T> {
- public:
-  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
-
-  void Apply(GradOpPtr<T> retv) const override {
-    retv->SetType("renorm_grad");
-    retv->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
-    retv->SetInput("X", this->Input("X"));
-    retv->SetAttrMap(this->Attrs());
-    retv->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
-  }
-};
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-
-DECLARE_INFER_SHAPE_FUNCTOR(renorm,
-                            RenormInferShapeFunctor,
-                            PD_INFER_META(phi::UnchangedInferMeta));
-
-DECLARE_INFER_SHAPE_FUNCTOR(renorm_grad,
-                            RenormGradInferShapeFunctor,
-                            PD_INFER_META(phi::UnchangedInferMeta));
-
-REGISTER_OPERATOR(renorm,
-                  ops::RenormOp,
-                  ops::RenormOpMaker,
-                  ops::RenormGradMaker<paddle::framework::OpDesc>,
-                  ops::RenormGradMaker<paddle::imperative::OpBase>,
-                  RenormInferShapeFunctor)
-
-REGISTER_OPERATOR(renorm_grad, ops::RenormGradOp, RenormGradInferShapeFunctor);
diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml
index 2d333805b5aa02..3706935624dd57 100644
--- a/paddle/phi/api/yaml/backward.yaml
+++ b/paddle/phi/api/yaml/backward.yaml
@@ -687,6 +687,15 @@
     func : logsigmoid_grad
   inplace : (out_grad -> x_grad)
 
+- backward_op : lu_unpack_grad
+  forward : lu_unpack (Tensor x, Tensor y, bool unpack_ludata = true, bool unpack_pivots = true) -> Tensor(pmat), Tensor(l), Tensor(u)
+  args : (Tensor x, Tensor y, Tensor l, Tensor u, Tensor pmat, Tensor l_grad, Tensor u_grad, bool unpack_ludata, bool unpack_pivots)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : LUUnpackGradInferMeta
+  kernel :
+    func : lu_unpack_grad
+
 - backward_op : masked_select_grad
   forward : masked_select (Tensor x, Tensor mask) -> Tensor(out)
   args : (Tensor x, Tensor mask, Tensor out_grad)
@@ -719,6 +728,16 @@
   kernel :
     func : maxout_grad
 
+- backward_op : mode_grad
+  forward : mode(Tensor x,  int axis = -1,  bool keepdim = false) -> Tensor(out), Tensor(indices)
+  args : (Tensor x, Tensor indices, Tensor out_grad,  int axis,  bool keepdim)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : UnchangedInferMeta
+    param: [x]
+  kernel :
+    func : mode_grad
+
 - backward_op : mv_grad
   forward : mv (Tensor x, Tensor vec) -> Tensor(out)
   args : (Tensor x, Tensor vec, Tensor out_grad)
@@ -729,6 +748,17 @@
   kernel :
     func : mv_grad
 
+- backward_op : nll_loss_grad
+  forward : nll_loss (Tensor input, Tensor label, Tensor weight, int64_t ignore_index = -100, str reduction = "mean") -> Tensor(out), Tensor(total_weight)
+  args : (Tensor input, Tensor label, Tensor weight, Tensor total_weight, Tensor out_grad, int64_t ignore_index, str reduction)
+  output : Tensor(input_grad)
+  infer_meta :
+    func : NllLossGradInferMeta
+  kernel :
+    func : nll_loss_grad
+    data_type : input
+  optional : weight
+
 - backward_op : poisson_grad
   forward : poisson (Tensor x) -> Tensor(out)
   args : (Tensor out_grad)
@@ -739,6 +769,16 @@
   kernel :
     func : poisson_grad
 
+- backward_op : qr_grad
+  forward : qr (Tensor x, str mode = "reduced") -> Tensor(q), Tensor(r)
+  args : (Tensor x, Tensor q, Tensor r, Tensor q_grad, Tensor r_grad, str mode)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : qr_grad
+
 - backward_op : reciprocal_grad
   forward : reciprocal (Tensor x) -> Tensor(out)
   args : (Tensor out, Tensor out_grad)
@@ -773,6 +813,16 @@
   backward: relu_double_grad
   inplace : (out_grad -> x_grad)
 
+- backward_op : renorm_grad
+  forward : renorm (Tensor x, float p, int axis, float max_norm) -> Tensor(out)
+  args : (Tensor x, Tensor out_grad, float p, int axis, float max_norm)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [out_grad]
+  kernel :
+    func : renorm_grad
+
 - backward_op : round_grad
   forward : round(Tensor x) -> Tensor(out)
   args : (Tensor out_grad)
diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
index b0ce57461685ef..76e5720a8e5cfe 100755
--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -804,15 +804,6 @@
   kernel :
     func : lu_grad
 
-- backward_op : lu_unpack_grad
-  forward : lu_unpack (Tensor x, Tensor y, bool unpack_ludata, bool unpack_pivots) -> Tensor(pmat), Tensor(l), Tensor(u)
-  args : (Tensor x, Tensor y, Tensor l, Tensor u, Tensor pmat, Tensor l_grad, Tensor u_grad, bool unpack_ludata, bool unpack_pivots)
-  output : Tensor(x_grad)
-  infer_meta :
-    func : LUUnpackGradInferMeta
-  kernel :
-    func : lu_unpack_grad
-
 - backward_op : margin_cross_entropy_grad
   forward : margin_cross_entropy (Tensor logits, Tensor label, bool return_softmax, int ring_id, int rank, int nranks, float margin1, float margin2, float margin3, float scale) -> Tensor(softmax), Tensor(loss)
   args : (Tensor logits, Tensor label, Tensor softmax, Tensor loss_grad, bool return_softmax, int ring_id, int rank, int nranks, float margin1, float margin2, float margin3, float scale)
@@ -964,16 +955,6 @@
     func : mish_grad
   inplace : (out_grad -> x_grad)
 
-- backward_op : mode_grad
-  forward : mode(Tensor x,  int axis,  bool keepdim) -> Tensor(out), Tensor(indices)
-  args : (Tensor x, Tensor indices, Tensor out_grad,  int axis,  bool keepdim)
-  output : Tensor(x_grad)
-  infer_meta :
-    func : UnchangedInferMeta
-    param: [x]
-  kernel :
-    func : mode_grad
-
 - backward_op : multi_dot_grad
   forward : multi_dot (Tensor[] x) -> Tensor(out)
   args : (Tensor[] x, Tensor out_grad)
@@ -1041,17 +1022,6 @@
     func : nearest_interp_grad
     data_type : output_grad
 
-- backward_op : nll_loss_grad
-  forward : nll_loss (Tensor input, Tensor label, Tensor weight, int64_t ignore_index, str reduction) -> Tensor(out), Tensor(total_weight)
-  args : (Tensor input, Tensor label, Tensor weight, Tensor total_weight, Tensor out_grad, int64_t ignore_index, str reduction)
-  output : Tensor(input_grad)
-  infer_meta :
-    func : NllLossGradInferMeta
-  kernel :
-    func : nll_loss_grad
-    data_type : input
-  optional : weight
-
 - backward_op : norm_grad
   forward : norm (Tensor x, int axis, float epsilon, bool is_test) -> Tensor(out), Tensor(norm)
   args : (Tensor x, Tensor norm, Tensor out_grad, int axis, float epsilon, bool is_test)
@@ -1246,16 +1216,6 @@
   kernel :
     func : put_along_axis_grad
 
-- backward_op : qr_grad
-  forward : qr (Tensor x, str mode) -> Tensor(q), Tensor(r)
-  args : (Tensor x, Tensor q, Tensor r, Tensor q_grad, Tensor r_grad, str mode)
-  output : Tensor(x_grad)
-  infer_meta :
-    func : UnchangedInferMeta
-    param : [x]
-  kernel :
-    func : qr_grad
-
 - backward_op : real_grad
   forward : real (Tensor x) -> Tensor(out)
   args : (Tensor out_grad)
@@ -1273,16 +1233,6 @@
     func : relu6_grad
   inplace : (out_grad -> x_grad)
 
-- backward_op : renorm_grad
-  forward : renorm (Tensor x, float p, int axis, float max_norm) -> Tensor(out)
-  args : (Tensor x, Tensor out_grad, float p, int axis, float max_norm)
-  output : Tensor(x_grad)
-  infer_meta :
-    func : UnchangedInferMeta
-    param : [out_grad]
-  kernel :
-    func : renorm_grad
-
 - backward_op : repeat_interleave_grad
   forward : repeat_interleave(Tensor x, int repeats, int axis) -> Tensor(out)
   args : (Tensor x, Tensor out_grad, int repeats, int axis)
diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml
index d32a853b8c0940..1b2ee0f7e0aacd 100755
--- a/paddle/phi/api/yaml/legacy_ops.yaml
+++ b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -1164,16 +1164,6 @@
     func : lu
   backward : lu_grad
 
-- op : lu_unpack
-  args : (Tensor x, Tensor y, bool unpack_ludata, bool unpack_pivots)
-  output : Tensor(pmat), Tensor(l), Tensor(u)
-  infer_meta :
-    func : LUUnpackInferMeta
-  kernel :
-    func : lu_unpack
-    data_type : x
-  backward : lu_unpack_grad
-
 - op : margin_cross_entropy
   args : (Tensor logits, Tensor label, bool return_softmax, int ring_id, int rank, int nranks, float margin1, float margin2, float margin3, float scale)
   output : Tensor(softmax), Tensor(loss)
@@ -1339,15 +1329,6 @@
     func : mish
   backward : mish_grad
 
-- op : mode
-  args : (Tensor x,  int axis,  bool keepdim)
-  output : Tensor(out), Tensor(indices)
-  infer_meta :
-    func : ModeInferMeta
-  kernel :
-    func : mode
-  backward : mode_grad
-
 - op : momentum_
   args : (Tensor param, Tensor grad, Tensor velocity, Tensor learning_rate, Tensor master_param, float mu, bool use_nesterov = false, str regularization_method = "", float regularization_coeff = 0.0, bool multi_precision = false, float rescale_grad = 1.0f)
   output : Tensor(param_out), Tensor(velocity_out), Tensor(master_param_out)
@@ -1416,17 +1397,6 @@
     data_type : x
   backward : nearest_interp_grad
 
-- op : nll_loss
-  args : (Tensor input, Tensor label, Tensor weight, int64_t ignore_index, str reduction)
-  output : Tensor(out), Tensor(total_weight)
-  infer_meta :
-    func : NllLossRawInferMeta
-  kernel :
-    func : nll_loss
-    data_type : input
-  optional : weight
-  backward : nll_loss_grad
-
 - op : nms
   args : (Tensor x, float threshold)
   output : Tensor(out)
@@ -1615,15 +1585,6 @@
   inplace : (arr -> out)
   backward : put_along_axis_grad
 
-- op : qr
-  args : (Tensor x, str mode)
-  output : Tensor(q), Tensor(r)
-  infer_meta :
-    func : QrInferMeta
-  kernel :
-    func : qr
-  backward : qr_grad
-
 - op : randint
   args : (int low, int high, IntArray shape, DataType dtype=DataType::INT64, Place place={})
   output : Tensor(out)
@@ -1676,16 +1637,6 @@
     func : remainder
   inplace : (x -> out)
 
-- op : renorm
-  args : (Tensor x, float p, int axis, float max_norm)
-  output : Tensor
-  infer_meta :
-    func : UnchangedInferMeta
-    param : [x]
-  kernel :
-    func : renorm
-  backward : renorm_grad
-
 - op : repeat_interleave
   args : (Tensor x, int repeats, int axis)
   output : Tensor(out)
diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml
index 5640ca7eb8b0ff..843ff811f91142 100644
--- a/paddle/phi/api/yaml/op_compat.yaml
+++ b/paddle/phi/api/yaml/op_compat.yaml
@@ -761,6 +761,13 @@
   extra :
     attrs : [bool use_mkldnn = false, bool is_test = false]
 
+- op : lu_unpack
+  backward : lu_unpack_grad
+  inputs :
+    {x : X, y : Pivots}
+  outputs :
+    {pmat : Pmat, l : L, u : U}
+
 - op : masked_select
   inputs :
     {x : X, mask : Mask}
@@ -809,6 +816,13 @@
   extra :
     attrs : [bool use_mkldnn = false]
 
+- op : mode
+  backward : mode_grad
+  inputs :
+    x : X
+  outputs :
+    {out : Out, indices : Indices}
+
 - op : multiply (elementwise_mul)
   backward : multiply_grad (elementwise_mul_grad)
   extra :
@@ -832,6 +846,13 @@
   extra :
     attrs : [bool use_mkldnn = false]
 
+- op : nll_loss
+  backward : nll_loss_grad
+  inputs :
+    {input : X, label : Label, weight : Weight}
+  outputs :
+    {out : Out, total_weight : Total_weight}
+
 - op : pad2d
   backward : pad2d_grad
   extra :
@@ -869,6 +890,13 @@
   extra :
     attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", bool is_test = false]
 
+- op : qr
+  backward : qr_grad
+  inputs :
+    x : X
+  outputs :
+    {q : Q, r : R}
+
 - op : quantize_linear
   extra :
     attrs : [float moving_rate = 0.9]
@@ -946,6 +974,10 @@
 
 - op : renorm
   backward : renorm_grad
+  inputs :
+    x : X
+  outputs :
+    out : Out
   extra :
     attrs : [bool use_mkldnn = false, bool use_cudnn = false]
 
diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml
index 10b6645c61667b..2445a2650d8425 100644
--- a/paddle/phi/api/yaml/ops.yaml
+++ b/paddle/phi/api/yaml/ops.yaml
@@ -637,6 +637,16 @@
     func : logsigmoid
   backward : logsigmoid_grad
 
+- op : lu_unpack
+  args : (Tensor x, Tensor y, bool unpack_ludata = true, bool unpack_pivots = true)
+  output : Tensor(pmat), Tensor(l), Tensor(u)
+  infer_meta :
+    func : LUUnpackInferMeta
+  kernel :
+    func : lu_unpack
+    data_type : x
+  backward : lu_unpack_grad
+
 - op : masked_select
   args : (Tensor x, Tensor mask)
   output : Tensor (out)
@@ -665,6 +675,15 @@
     func : maxout
   backward : maxout_grad
 
+- op : mode
+  args : (Tensor x,  int axis = -1,  bool keepdim = false)
+  output : Tensor(out), Tensor(indices)
+  infer_meta :
+    func : ModeInferMeta
+  kernel :
+    func : mode
+  backward : mode_grad
+
 - op : mv
   args : (Tensor x, Tensor vec)
   output : Tensor
@@ -674,6 +693,17 @@
     func : mv
   backward : mv_grad
 
+- op : nll_loss
+  args : (Tensor input, Tensor label, Tensor weight, int64_t ignore_index = -100, str reduction = "mean")
+  output : Tensor(out), Tensor(total_weight)
+  infer_meta :
+    func : NllLossRawInferMeta
+  kernel :
+    func : nll_loss
+    data_type : input
+  optional : weight
+  backward : nll_loss_grad
+
 - op : npu_identity
   args : (Tensor x, int format = -1)
   output : Tensor
@@ -692,6 +722,15 @@
     func : poisson
   backward : poisson_grad
 
+- op : qr
+  args : (Tensor x, str mode = "reduced")
+  output : Tensor(q), Tensor(r)
+  infer_meta :
+    func : QrInferMeta
+  kernel :
+    func : qr
+  backward : qr_grad
+
 - op : reciprocal
   args : (Tensor x)
   output : Tensor(out)
@@ -712,6 +751,16 @@
   inplace : (x -> out)
   backward : relu_grad
 
+- op : renorm
+  args : (Tensor x, float p, int axis, float max_norm)
+  output : Tensor
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : renorm
+  backward : renorm_grad
+
 - op : round
   args : (Tensor x)
   output : Tensor(out)
diff --git a/paddle/phi/ops/compat/lu_unpack_sig.cc b/paddle/phi/ops/compat/lu_unpack_sig.cc
deleted file mode 100644
index 8baafe4fcb23ac..00000000000000
--- a/paddle/phi/ops/compat/lu_unpack_sig.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/phi/core/compat/op_utils.h"
-
-namespace phi {
-
-KernelSignature LUUnpackOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("lu_unpack",
-                         {"X", "Pivots"},
-                         {"unpack_ludata", "unpack_pivots"},
-                         {"Pmat", "L", "U"});
-}
-
-KernelSignature LUUnpackGradOpArgumentMapping(
-    const ArgumentMappingContext& ctx) {
-  return KernelSignature("lu_unpack_grad",
-                         {"X", "Pivots", "L", "U", "Pmat", "L@GRAD", "U@GRAD"},
-                         {"unpack_ludata", "unpack_pivots"},
-                         {"X@GRAD"});
-}
-
-}  // namespace phi
-
-PD_REGISTER_ARG_MAPPING_FN(lu_unpack, phi::LUUnpackOpArgumentMapping);
-PD_REGISTER_ARG_MAPPING_FN(lu_unpack_grad, phi::LUUnpackGradOpArgumentMapping);
diff --git a/paddle/phi/ops/compat/mode_sig.cc b/paddle/phi/ops/compat/mode_sig.cc
deleted file mode 100644
index e21cd69bf60a15..00000000000000
--- a/paddle/phi/ops/compat/mode_sig.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/phi/core/compat/op_utils.h"
-
-namespace phi {
-
-KernelSignature ModeOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature(
-      "mode", {"X"}, {"axis", "keepdim"}, {"Out", "Indices"});
-}
-
-KernelSignature ModeGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("mode_grad",
-                         {"X", "Indices", "Out@GRAD"},
-                         {"axis", "keepdim"},
-                         {"X@GRAD"});
-}
-
-}  // namespace phi
-
-PD_REGISTER_ARG_MAPPING_FN(mode, phi::ModeOpArgumentMapping);
-PD_REGISTER_ARG_MAPPING_FN(mode_grad, phi::ModeGradOpArgumentMapping);
diff --git a/paddle/phi/ops/compat/nll_loss_sig.cc b/paddle/phi/ops/compat/nll_loss_sig.cc
deleted file mode 100644
index f3f9c531781923..00000000000000
--- a/paddle/phi/ops/compat/nll_loss_sig.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/phi/core/compat/op_utils.h"
-
-namespace phi {
-
-KernelSignature NllLossOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  // TODO(xiongkun): can't remove the forward mapping, because the Weight is
-  // optional
-  return KernelSignature("nll_loss",
-                         {"X", "Label", "Weight"},
-                         {"ignore_index", "reduction"},
-                         {"Out", "Total_weight"});
-}
-
-KernelSignature NllLossGradOpArgumentMapping(
-    const ArgumentMappingContext& ctx) {
-  return KernelSignature("nll_loss_grad",
-                         {"X", "Label", "Weight", "Total_weight", "Out@GRAD"},
-                         {"ignore_index", "reduction"},
-                         {"X@GRAD"});
-}
-
-}  // namespace phi
-PD_REGISTER_ARG_MAPPING_FN(nll_loss_grad, phi::NllLossGradOpArgumentMapping);
-PD_REGISTER_ARG_MAPPING_FN(nll_loss, phi::NllLossOpArgumentMapping);
diff --git a/paddle/phi/ops/compat/qr_sig.cc b/paddle/phi/ops/compat/qr_sig.cc
deleted file mode 100644
index dbe1cd86434f5d..00000000000000
--- a/paddle/phi/ops/compat/qr_sig.cc
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/phi/core/compat/op_utils.h"
-
-namespace phi {
-
-KernelSignature QrOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("qr", {"X"}, {"mode"}, {"Q", "R"});
-}
-
-KernelSignature QrGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature(
-      "qr_grad", {"X", "Q", "R", "Q@GRAD", "R@GRAD"}, {"mode"}, {"X@GRAD"});
-}
-
-}  // namespace phi
-
-PD_REGISTER_ARG_MAPPING_FN(qr, phi::QrOpArgumentMapping);
-PD_REGISTER_ARG_MAPPING_FN(qr_grad, phi::QrGradOpArgumentMapping);
diff --git a/paddle/phi/ops/compat/renorm_sig.cc b/paddle/phi/ops/compat/renorm_sig.cc
deleted file mode 100644
index 0c5198dff37b04..00000000000000
--- a/paddle/phi/ops/compat/renorm_sig.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/phi/core/compat/op_utils.h"
-
-namespace phi {
-
-KernelSignature RenormOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  VLOG(3) << "in renrom arguments mapping";
-  return KernelSignature("renorm", {"X"}, {"p", "axis", "max_norm"}, {"Out"});
-}
-
-KernelSignature RenormGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  VLOG(3) << "in renrom grad arguments mapping";
-  return KernelSignature(
-      "renorm_grad", {"X", "Out@GRAD"}, {"p", "axis", "max_norm"}, {"X@GRAD"});
-}
-
-}  // namespace phi
-
-PD_REGISTER_ARG_MAPPING_FN(renorm, phi::RenormOpArgumentMapping);
-
-PD_REGISTER_ARG_MAPPING_FN(renorm_grad, phi::RenormGradOpArgumentMapping);

From 5c9bbe8980c8bdf63f3ecffd1a6a1f303cde6af4 Mon Sep 17 00:00:00 2001
From: YuanRisheng <yuanrisheng@baidu.com>
Date: Mon, 5 Dec 2022 15:58:31 +0800
Subject: [PATCH 149/154] [Add Approve Check]Check standardized API and Yaml Op
 (#48565)

* check standard api

* test approve,test=document_fix

* test approve

* perfect approve msg

* test_approve

* test_approve

* test_approve

* approve test

* test approve,test=document_fix

* test approve,test=document_fix

* fix bugs,test=document_fix

* test approve,test=document_fix

* test approve,test=document_fix

* test approve,test=document_fix

* test approve,test=document_fix

* test approve,test=document_fix

* test approve,test=document_fix

* test approve,test=document_fix

* test approve,test=document_fix

* test approve,test=document_fix

* test approve,test=document_fix

* test approve,test=document_fix
---
 tools/check_api_approvals.sh |   9 ++
 tools/check_api_yaml_same.py | 232 +++++++++++++++++++++++++++++++++++
 2 files changed, 241 insertions(+)
 create mode 100644 tools/check_api_yaml_same.py

diff --git a/tools/check_api_approvals.sh b/tools/check_api_approvals.sh
index 861c050b44987f..b64c0fe7d22be3 100644
--- a/tools/check_api_approvals.sh
+++ b/tools/check_api_approvals.sh
@@ -61,6 +61,15 @@ if [ "$api_doc_spec_diff" != "" ]; then
     check_approval 1 29231 79295425 23093488 39876205 65896652 54695910
 fi
 
+api_yaml_diff=`python ${PADDLE_ROOT}/tools/check_api_yaml_same.py ${PADDLE_ROOT}/paddle/fluid/API_DEV.spec  ${PADDLE_ROOT}/paddle/fluid/API_PR.spec ${BRANCH} ${PADDLE_ROOT}` 
+if [ "$api_yaml_diff" != "" ]; then
+    echo_line="API's name and params should be consistent with op's name and params in yaml.
+                The API or Yaml file you changed may cause inconsistent.\n"
+    echo_line="${echo_line} please request one of the RD (YuanRisheng, zyfncg, chenwhql, phlrain) review and approve.\n"
+    echo_line="${echo_line}\r\n ${api_yaml_diff}\n"
+    check_approval 1 YuanRisheng zyfncg chenwhql phlrain
+fi
+
 api_src_spec_diff=`python ${PADDLE_ROOT}/tools/check_api_source_without_core_ops.py ${PADDLE_ROOT}/paddle/fluid/API_DEV.source.md5  ${PADDLE_ROOT}/paddle/fluid/API_PR.source.md5` 
 if [ "$api_src_spec_diff" != "" ]; then
     echo_line="APIs without core.ops: \n${api_src_spec_diff}\n"
diff --git a/tools/check_api_yaml_same.py b/tools/check_api_yaml_same.py
new file mode 100644
index 00000000000000..9cf56519546c5f
--- /dev/null
+++ b/tools/check_api_yaml_same.py
@@ -0,0 +1,232 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import difflib
+import os
+import re
+import sys
+
+import yaml
+
+root_path = sys.argv[4]
+
+
+def read_yaml_ops():
+    ops_list = []
+    yaml_path = root_path + "/paddle/phi/api/yaml/ops.yaml"
+    legacy_yaml_path = root_path + "/paddle/phi/api/yaml/legacy_ops.yaml"
+
+    with open(yaml_path, 'r') as f:
+        ops_list = yaml.load(f, Loader=yaml.FullLoader)
+    with open(legacy_yaml_path, 'r') as f:
+        ops_list.extend(yaml.load(f, Loader=yaml.FullLoader))
+
+    return ops_list
+
+
+def read_api(api_file):
+    with open(api_file, 'r') as f:
+        pr_apis = f.read()
+        pr_apis = pr_apis.splitlines()
+    result = []
+    for api in pr_apis:
+        # Delete all non-function api
+        if api.find('args') == -1:
+            continue
+        result.append(api)
+    return result
+
+
+def get_api_args(api_item):
+    result = re.search(r"args=\[(?P<args>[^\]]*)\]", api_item)
+    result = [
+        param.strip().replace('\'', '')
+        for param in result.group('args').split(',')
+    ]
+    if result[-1] == 'name':
+        result = result[:-1]
+    return result
+
+
+def get_api_name(api_item):
+    if api_item[0] == '+' or api_item[0] == '-' or api_item[0] == ' ':
+        return api_item.split(" ")[1].split(".")[-1]
+    else:
+        return api_item.split(" ")[0].split(".")[-1]
+
+
+def get_yaml_op_args(op_args):
+    args_list = op_args[1:-1].split(',')
+    args_list = [args.split('=')[0].strip() for args in args_list]
+    return [param.split(' ')[-1].strip() for param in args_list]
+
+
+def get_api_diff(dev_api_file, pr_api_file):
+    develop_apis = read_api(dev_api_file)
+    pr_apis = read_api(pr_api_file)
+
+    differ = difflib.Differ()
+    diff_obj = differ.compare(develop_apis, pr_apis)
+    result = []
+    for each_diff in diff_obj:
+        result.append(each_diff)
+    return result
+
+
+def get_yaml_diff(branch):
+    ops_yaml_path = root_path + "/paddle/phi/api/yaml/ops.yaml"
+    legacy_yaml_path = root_path + "/paddle/phi/api/yaml/legacy_ops.yaml"
+    git_cmd = (
+        "git diff -U0 upstream/"
+        + branch
+        + " "
+        + ops_yaml_path
+        + " "
+        + legacy_yaml_path
+    )
+    yaml_diff = os.popen(git_cmd).readlines()
+    result = []
+    for line in yaml_diff:
+        result.append(line.strip('\r\n'))
+    return result
+
+
+api_diffs = get_api_diff(sys.argv[1], sys.argv[2])
+yaml_diffs = get_yaml_diff(sys.argv[3])
+yaml_ops = read_yaml_ops()  # The current PR yaml's ops
+approve_api_msg = []
+approve_yaml_msg = []
+
+api_add = []
+api_delete = []
+
+for each_diff in api_diffs:
+    if each_diff[0] == '+':
+        api_add.append(each_diff)
+    if each_diff[0] == '-':
+        api_delete.append(each_diff)
+
+yaml_add = []
+yaml_delete = []
+
+for each_diff in yaml_diffs:
+    if each_diff[0] == '+':
+        yaml_add.append(each_diff)
+    if each_diff[0] == '-':
+        yaml_delete.append(each_diff)
+
+# API add or modified
+for each_add in api_add:
+    add = True
+    modify = False
+    need_approve = True
+    yaml_name_found = False
+    api_name = get_api_name(each_add)
+    api_args = get_api_args(each_add)
+
+    for each_delete_api in api_delete:
+        if get_api_name(each_delete_api) == api_name:
+            modify = True
+            add = False
+
+    # If we find yaml name in yaml_delete, it shows that
+    # yaml op's name is modified.
+    for each_delete_yaml in yaml_delete:
+        if each_delete_yaml.find(api_name) != -1:
+            yaml_name_found = True
+
+    for op in yaml_ops:
+        if op['op'] == api_name:
+            yaml_name_found = True
+            if api_args == get_yaml_op_args(op['args']):
+                need_approve = False
+                break
+
+    # If API is modified and doesn't have a corresponding yaml's op
+    # We needn't approve it
+    if modify and not yaml_name_found:
+        need_approve = False
+
+    # If API is added and yaml's op is not added,
+    # it shows that new api doesn't have a corresponding yaml's op.
+    # We needn't approve it
+    if add and len(yaml_add) == 0:
+        need_approve = False
+
+    # In others, the changes need to be approved.
+    # eg: 1, The args in api is inconsistent with yaml's op
+    #     2, New Api is add, but the yaml op's name may not be inconsistent
+    #        with api's name.
+    #     3, Api's name is modified, but the yaml op's name is not modified.
+    if need_approve:
+        approve_api_msg.append(each_add)
+
+# API delete
+for each_delete in api_delete:
+    api_name = get_api_name(each_delete)
+    api_args = get_api_args(each_delete)
+
+    need_approve = False
+    for op in yaml_ops:
+        # When api is deleted, it is unusual to find the same name's
+        # op in yaml. So, we need review code.
+        if op['op'] == api_name and api_args == get_yaml_op_args(op['args']):
+            need_approve = True
+            break
+    if need_approve:
+        approve_api_msg.append(each_delete)
+
+# For yaml, we don't have to consider its add or delete.
+# Because if it is related with api, code above has dealt with it.
+# But we need consider below situation:
+# The op in yaml is modified and its corresponding api is not modified.
+
+if len(api_add) == 0 and len(api_delete) == 0:
+    pr_apis = read_api(sys.argv[2])  # Get all api of this PR
+    for each_diff in yaml_delete:
+        # Note: The condition is relaxed, because symbol '-' can present delete and modification.
+        # So if op is deleted in yaml, code below is also triggered.
+        # But we mainly deal with modification here.
+
+        # If op name in yaml is modified and this name is in API,
+        # this PR need to be reviewed.
+        if each_diff.startswith('-- op'):
+            for api in pr_apis:
+                if each_diff.find(get_api_name(api)) != -1:
+                    approve_yaml_msg.append(each_diff)
+                    break
+
+        # if op args in yaml is modified and this args is in API,
+        # this PR need to be reviewed.
+        if each_diff.startswith('-  args'):
+            yaml_op_args_str = each_diff.strip('-  args : ')
+            yaml_op_args = get_yaml_op_args(yaml_op_args_str)
+            for api in pr_apis:
+                if get_api_args(api) == yaml_op_args:
+                    approve_yaml_msg.append(each_diff)
+                    break
+
+# collect all msg
+approve_msg = []
+if len(approve_api_msg) != 0:
+    approve_msg = ['The APIs you changed are as follows:']
+    approve_msg.extend(approve_api_msg)
+
+if len(approve_yaml_msg) != 0:
+    approve_msg = ['The Yaml File you changed are as follows:']
+    approve_msg.extend(approve_yaml_msg)
+
+print('\r\n'.join(approve_msg))

From d1e2ba8a3176cc634887cc2c10b56cab13d3cbd5 Mon Sep 17 00:00:00 2001
From: sneaxiy <32832641+sneaxiy@users.noreply.github.com>
Date: Mon, 5 Dec 2022 16:33:41 +0800
Subject: [PATCH 150/154] Register exp/expm1/logit bf16 activation op kernels
 (#48702)

* register more bf16 ops

* update to register coresponding backward ops
---
 paddle/phi/kernels/gpu/activation_grad_kernel.cu | 9 ++++++---
 paddle/phi/kernels/gpu/activation_kernel.cu      | 9 ++++++---
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/paddle/phi/kernels/gpu/activation_grad_kernel.cu b/paddle/phi/kernels/gpu/activation_grad_kernel.cu
index 0c8c8b43a0bac9..441790aab3ae21 100644
--- a/paddle/phi/kernels/gpu/activation_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/activation_grad_kernel.cu
@@ -371,7 +371,8 @@ PD_REGISTER_KERNEL(exp_grad,
                    double,
                    int,
                    int64_t,
-                   phi::dtype::float16) {}
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
 
 PD_REGISTER_ACTIVATION_GRAD_KERNEL(softshrink_grad, SoftShrinkGradKernel)
 PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel)
@@ -386,7 +387,8 @@ PD_REGISTER_KERNEL(expm1_grad,
                    phi::Expm1GradKernel,
                    float,
                    double,
-                   phi::dtype::float16) {}
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
 
 PD_REGISTER_KERNEL(logit_grad,
                    GPU,
@@ -394,7 +396,8 @@ PD_REGISTER_KERNEL(logit_grad,
                    phi::LogitGradKernel,
                    float,
                    double,
-                   phi::dtype::float16) {}
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
 
 PD_REGISTER_KERNEL(square_grad,
                    GPU,
diff --git a/paddle/phi/kernels/gpu/activation_kernel.cu b/paddle/phi/kernels/gpu/activation_kernel.cu
index 271ad6107bce40..0b396b17f5cb82 100644
--- a/paddle/phi/kernels/gpu/activation_kernel.cu
+++ b/paddle/phi/kernels/gpu/activation_kernel.cu
@@ -215,21 +215,24 @@ PD_REGISTER_KERNEL(exp,
                    double,
                    int,
                    int64_t,
-                   phi::dtype::float16) {}
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
 PD_REGISTER_KERNEL(expm1,
                    GPU,
                    ALL_LAYOUT,
                    phi::Expm1Kernel,
                    float,
                    double,
-                   phi::dtype::float16) {}
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
 PD_REGISTER_KERNEL(logit,
                    GPU,
                    ALL_LAYOUT,
                    phi::LogitKernel,
                    float,
                    double,
-                   phi::dtype::float16) {}
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
 PD_REGISTER_KERNEL(square,
                    GPU,
                    ALL_LAYOUT,

From 34a957e3e362f5045af325c079b78d207e09fb19 Mon Sep 17 00:00:00 2001
From: Ruibiao Chen <chenruibiao@baidu.com>
Date: Mon, 5 Dec 2022 16:46:08 +0800
Subject: [PATCH 151/154] Replace mutable_data with DeviceContext.Alloc in phi
 kernels (#48500)

* Replace mutable_data with DeviceContext.Alloc in phi kernels

* Fix CI errors

* Fix CI errors

* Fix CI errors, test=kunlun

* Fix CI errors, test=kunlun

* Handle rnn_functor

* Update approvals
---
 paddle/fluid/operators/math/CMakeLists.txt    |  2 +-
 .../math/selected_rows_functor_test.cc        | 25 ++++++++++++
 paddle/fluid/platform/device_context.cc       |  2 +-
 paddle/phi/core/device_context.cc             |  2 +-
 paddle/phi/kernels/cpu/concat_kernel.cc       |  2 +-
 paddle/phi/kernels/cpu/elementwise_grad.h     |  6 +--
 paddle/phi/kernels/cpu/histogram_kernel.cc    |  2 +-
 .../kernels/cpu/masked_select_grad_kernel.cc  |  3 +-
 .../phi/kernels/cpu/masked_select_kernel.cc   |  3 +-
 .../kernels/cpu/put_along_axis_grad_kernel.cc |  2 +-
 .../kernels/cpu/temporal_shift_grad_kernel.cc |  5 ++-
 .../phi/kernels/cpu/temporal_shift_kernel.cc  |  3 +-
 paddle/phi/kernels/cpu/yolo_box_kernel.cc     | 13 ++++---
 paddle/phi/kernels/funcs/broadcast_function.h |  2 +-
 paddle/phi/kernels/funcs/diagonal.h           |  3 +-
 paddle/phi/kernels/funcs/elementwise_base.h   |  2 +-
 .../phi/kernels/funcs/elementwise_grad_base.h |  6 ++-
 paddle/phi/kernels/funcs/fc_functor.cc        |  7 +++-
 paddle/phi/kernels/funcs/math_function.cu     |  8 +++-
 paddle/phi/kernels/funcs/math_function_impl.h |  6 +--
 paddle/phi/kernels/funcs/select_impl.cu.h     |  2 +-
 .../kernels/funcs/selected_rows_functor.cc    | 37 +++++++++---------
 .../kernels/funcs/selected_rows_functor.cu    | 15 ++++---
 .../phi/kernels/funcs/top_k_function_cuda.h   | 11 +++---
 paddle/phi/kernels/gpu/concat_kernel.cu       |  2 +-
 paddle/phi/kernels/gpu/depthwise_conv.h       | 12 +++---
 .../kernels/gpu/depthwise_conv_grad_kernel.cu |  4 +-
 .../phi/kernels/gpu/depthwise_conv_kernel.cu  |  2 +-
 paddle/phi/kernels/gpu/dropout_grad_kernel.cu |  2 +-
 paddle/phi/kernels/gpu/elementwise_grad.h     | 39 +++++++++----------
 .../phi/kernels/gpu/graph_reindex_kernel.cu   |  4 +-
 paddle/phi/kernels/gpu/histogram_kernel.cu    |  8 ++--
 paddle/phi/kernels/gpu/kthvalue_kernel.cu     | 11 ++++--
 .../kernels/gpu/put_along_axis_grad_kernel.cu |  2 +-
 paddle/phi/kernels/gpu/rnn_functor.h          | 10 ++---
 paddle/phi/kernels/gpu/rnn_grad_kernel.cu.cc  |  2 +-
 paddle/phi/kernels/gpu/rnn_kernel.cu.cc       |  2 +-
 paddle/phi/kernels/gpu/sgd_kernel.cu          | 12 +++---
 .../kernels/gpu/temporal_shift_grad_kernel.cu |  4 +-
 .../phi/kernels/gpu/temporal_shift_kernel.cu  |  3 +-
 paddle/phi/kernels/gpu/yolo_box_kernel.cu     |  7 ++--
 .../kernels/impl/digamma_grad_kernel_impl.h   |  2 +-
 paddle/phi/kernels/impl/digamma_kernel_impl.h |  2 +-
 .../impl/elementwise_grad_kernel_impl.h       |  4 +-
 paddle/phi/kernels/memcpy_kernel.cc           | 34 ----------------
 .../xpu/elementwise_add_grad_kernel.cc        |  2 +-
 paddle/phi/kernels/xpu/full_kernel.cc         |  2 +-
 tools/check_file_diff_approvals.sh            | 18 ++++-----
 48 files changed, 184 insertions(+), 175 deletions(-)

diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt
index e2a62273d03282..3b06722ddfbe01 100644
--- a/paddle/fluid/operators/math/CMakeLists.txt
+++ b/paddle/fluid/operators/math/CMakeLists.txt
@@ -50,7 +50,7 @@ math_library(tree2col DEPS math_function)
 cc_test(
   selected_rows_functor_test
   SRCS selected_rows_functor_test.cc
-  DEPS selected_rows_functor)
+  DEPS allocator selected_rows_functor)
 cc_test(
   im2col_test
   SRCS im2col_test.cc
diff --git a/paddle/fluid/operators/math/selected_rows_functor_test.cc b/paddle/fluid/operators/math/selected_rows_functor_test.cc
index 49c6942531defd..a2c88c723fefa6 100644
--- a/paddle/fluid/operators/math/selected_rows_functor_test.cc
+++ b/paddle/fluid/operators/math/selected_rows_functor_test.cc
@@ -15,11 +15,15 @@ limitations under the License. */
 #include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 
 #include "gtest/gtest.h"
+#include "paddle/fluid/memory/allocation/allocator_facade.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 TEST(selected_rows_functor, cpu_add) {
   paddle::platform::CPUPlace cpu_place;
   phi::CPUContext ctx(cpu_place);
+  ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
+                       .GetAllocator(cpu_place)
+                       .get());
   phi::funcs::SetConstant<phi::CPUContext, float> functor;
   int64_t height = 10;
   int64_t row_numel = 10;
@@ -109,6 +113,9 @@ TEST(selected_rows_functor, cpu_add) {
 TEST(selected_rows_functor, cpu_add_to) {
   paddle::platform::CPUPlace cpu_place;
   phi::CPUContext ctx(cpu_place);
+  ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
+                       .GetAllocator(cpu_place)
+                       .get());
   phi::funcs::SetConstant<phi::CPUContext, float> functor;
   int64_t height = 10;
   int64_t row_numel = 10;
@@ -198,6 +205,9 @@ TEST(selected_rows_functor, cpu_add_to) {
 TEST(selected_rows_functor, cpu_merge_average_float) {
   paddle::platform::CPUPlace cpu_place;
   phi::CPUContext ctx(cpu_place);
+  ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
+                       .GetAllocator(cpu_place)
+                       .get());
   phi::funcs::SetConstant<phi::CPUContext, float> functor;
   int64_t height = 10;
   int64_t row_numel = 10;
@@ -233,6 +243,9 @@ TEST(selected_rows_functor, cpu_merge_average_float) {
 TEST(selected_rows_functor, cpu_merge_add_float) {
   paddle::platform::CPUPlace cpu_place;
   phi::CPUContext ctx(cpu_place);
+  ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
+                       .GetAllocator(cpu_place)
+                       .get());
   phi::funcs::SetConstant<phi::CPUContext, float> functor;
   int64_t height = 10;
   int64_t row_numel = 10;
@@ -269,6 +282,9 @@ TEST(selected_rows_functor, cpu_merge_add_float) {
 TEST(selected_rows_functor, cpu_merge_add_int) {
   paddle::platform::CPUPlace cpu_place;
   phi::CPUContext ctx(cpu_place);
+  ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
+                       .GetAllocator(cpu_place)
+                       .get());
   phi::funcs::SetConstant<phi::CPUContext, int> functor;
   int64_t height = 10;
   int64_t row_numel = 10;
@@ -305,6 +321,9 @@ TEST(selected_rows_functor, cpu_merge_add_int) {
 TEST(selected_rows_functor, cpu_merge_add_multi) {
   paddle::platform::CPUPlace cpu_place;
   phi::CPUContext ctx(cpu_place);
+  ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
+                       .GetAllocator(cpu_place)
+                       .get());
   phi::funcs::SetConstant<phi::CPUContext, float> set_const;
 
   int64_t height = 10;
@@ -354,6 +373,9 @@ TEST(selected_rows_functor, cpu_merge_add_multi) {
 TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) {
   paddle::platform::CPUPlace cpu_place;
   phi::CPUContext ctx(cpu_place);
+  ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
+                       .GetAllocator(cpu_place)
+                       .get());
   phi::funcs::SetConstant<phi::CPUContext, float> set_const;
 
   int64_t height = 10;
@@ -409,6 +431,9 @@ TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) {
 TEST(selected_rows_functor, cpu_sum_to) {
   paddle::platform::CPUPlace cpu_place;
   phi::CPUContext ctx(cpu_place);
+  ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
+                       .GetAllocator(cpu_place)
+                       .get());
   phi::funcs::SetConstant<phi::CPUContext, float> functor;
   int64_t height = 10;
   int64_t row_numel = 10;
diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc
index a09f438c505e80..539bbfb87d0aa0 100644
--- a/paddle/fluid/platform/device_context.cc
+++ b/paddle/fluid/platform/device_context.cc
@@ -163,7 +163,7 @@ std::unique_ptr<DeviceContext> CreateDeviceContext(
     if (!disable_setting_default_stream_for_allocator) {
       instance.SetDefaultStream(CUDAPlace(p.GetDeviceId()), cuda_ctx->stream());
     }
-    dev_ctx->SetAllocator(instance.GetAllocator(p).get());
+    dev_ctx->SetAllocator(instance.GetAllocator(p, cuda_ctx->stream()).get());
     dev_ctx->SetPinnedAllocator(
         instance.GetAllocator(paddle::platform::CUDAPinnedPlace()).get());
 
diff --git a/paddle/phi/core/device_context.cc b/paddle/phi/core/device_context.cc
index d46f9250eeb4c9..a18e695cce4d8d 100644
--- a/paddle/phi/core/device_context.cc
+++ b/paddle/phi/core/device_context.cc
@@ -148,7 +148,7 @@ struct DeviceContext::Impl {
     if (tensor->initialized() && tensor->place() != place) {
       ClearHolder(tensor);
     }
-    auto* allocator = tensor->numel() == 0
+    auto* allocator = tensor->numel() == 0 && requested_size == 0
                           ? zero_allocator_
                           : (pinned ? pinned_allocator_ : device_allocator_);
 #ifdef PADDLE_WITH_CUDA
diff --git a/paddle/phi/kernels/cpu/concat_kernel.cc b/paddle/phi/kernels/cpu/concat_kernel.cc
index 96e02f4c42046a..1075cb9f777c38 100644
--- a/paddle/phi/kernels/cpu/concat_kernel.cc
+++ b/paddle/phi/kernels/cpu/concat_kernel.cc
@@ -44,7 +44,7 @@ void ConcatKernel(const Context& dev_ctx,
 
   phi::DDim out_dims = phi::funcs::ComputeAndCheckShape(true, x_dims, axis);
   out->Resize(out_dims);
-  out->mutable_data<T>(dev_ctx.GetPlace());
+  dev_ctx.template Alloc<T>(out);
 
   // If axis is 0, the lod of the output is not the same as inputs.
   if (axis == 0 && x[0]->lod().size() > 0) {
diff --git a/paddle/phi/kernels/cpu/elementwise_grad.h b/paddle/phi/kernels/cpu/elementwise_grad.h
index 92587566eb8759..05c02f167b6a2d 100644
--- a/paddle/phi/kernels/cpu/elementwise_grad.h
+++ b/paddle/phi/kernels/cpu/elementwise_grad.h
@@ -90,13 +90,11 @@ ElementwiseAddGrad(const CPUContext& ctx,
                    int axis = -1) {
   auto blas = phi::funcs::GetBlas<CPUContext, T>(ctx);
   if (dx) {
-    blas.VCOPY(
-        dout.numel(), dout.data<T>(), dx->mutable_data<T>(ctx.GetPlace()));
+    blas.VCOPY(dout.numel(), dout.data<T>(), ctx.template Alloc<T>(dx));
   }
 
   if (dy) {
-    blas.VCOPY(
-        dout.numel(), dout.data<T>(), dy->mutable_data<T>(ctx.GetPlace()));
+    blas.VCOPY(dout.numel(), dout.data<T>(), ctx.template Alloc<T>(dy));
   }
 }
 
diff --git a/paddle/phi/kernels/cpu/histogram_kernel.cc b/paddle/phi/kernels/cpu/histogram_kernel.cc
index d9c41508efde08..4c04566b8b0b07 100644
--- a/paddle/phi/kernels/cpu/histogram_kernel.cc
+++ b/paddle/phi/kernels/cpu/histogram_kernel.cc
@@ -34,7 +34,7 @@ void HistogramKernel(const Context& dev_ctx,
   const T* input_data = input.data<T>();
   auto input_numel = input.numel();
 
-  int64_t* out_data = output->mutable_data<int64_t>(dev_ctx.GetPlace());
+  int64_t* out_data = dev_ctx.template Alloc<int64_t>(output);
   phi::funcs::SetConstant<Context, int64_t>()(
       dev_ctx, output, static_cast<int64_t>(0));
 
diff --git a/paddle/phi/kernels/cpu/masked_select_grad_kernel.cc b/paddle/phi/kernels/cpu/masked_select_grad_kernel.cc
index bbb08f06167769..f615fb2e0bc3fd 100644
--- a/paddle/phi/kernels/cpu/masked_select_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/masked_select_grad_kernel.cc
@@ -27,7 +27,8 @@ void MaskedSelectGradKernel(const Context& dev_ctx,
                             DenseTensor* x_grad) {
   auto* mask_data = mask.data<bool>();
   auto* input_data = out_grad.data<T>();
-  auto* out_data = x_grad->mutable_data<T>(dev_ctx.GetPlace());
+
+  auto* out_data = dev_ctx.template Alloc<T>(x_grad);
   int mask_size = mask.numel();
 
   int index = 0;
diff --git a/paddle/phi/kernels/cpu/masked_select_kernel.cc b/paddle/phi/kernels/cpu/masked_select_kernel.cc
index f377658d507f60..33311c26cfeb64 100644
--- a/paddle/phi/kernels/cpu/masked_select_kernel.cc
+++ b/paddle/phi/kernels/cpu/masked_select_kernel.cc
@@ -48,7 +48,8 @@ void MaskedSelectKernel(const Context& dev_ctx,
 
   DDim out_dim{out_size};
   out->Resize(out_dim);
-  auto out_data = out->mutable_data<T>(phi::CPUPlace());
+
+  auto out_data = dev_ctx.template HostAlloc<T>(out);
 
   int index = 0;
   for (int i = 0; i < mask_size; i++) {
diff --git a/paddle/phi/kernels/cpu/put_along_axis_grad_kernel.cc b/paddle/phi/kernels/cpu/put_along_axis_grad_kernel.cc
index ca57c223beb4be..969c5b9fe33064 100644
--- a/paddle/phi/kernels/cpu/put_along_axis_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/put_along_axis_grad_kernel.cc
@@ -58,7 +58,7 @@ void PutAlongAxisGradKernel(const Context& dev_ctx,
 
   if (value_grad) {
     value_grad->Resize(index.dims());
-    value_grad->mutable_data<T>(dev_ctx.GetPlace());
+    dev_ctx.template Alloc<T>(value_grad);
     if (index_type == DataType::INT32) {
       paddle::operators::cpu_gather_kernel<T, int32_t>(
           out_grad, axis, index, *value_grad, dev_ctx);
diff --git a/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc b/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc
index 9e6a0e441223f4..3dcd3c9eb49fb7 100644
--- a/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc
@@ -114,8 +114,9 @@ void TemporalShiftGradKernel(const Context& dev_ctx,
       (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
                                         : phi::make_ddim({nt, h, w, c}));
   const T* output_grad_data = output_grad->data<T>();
-  T* input_grad_data =
-      input_grad->mutable_data<T>(in_grad_dims, dev_ctx.GetPlace());
+  input_grad->Resize(in_grad_dims);
+
+  T* input_grad_data = dev_ctx.template Alloc<T>(input_grad);
 
   if (data_layout == DataLayout::kNCHW) {
     TemporalShiftBwNCHW<T>(
diff --git a/paddle/phi/kernels/cpu/temporal_shift_kernel.cc b/paddle/phi/kernels/cpu/temporal_shift_kernel.cc
index 3d10520ec84f5a..3edd3aa301f985 100644
--- a/paddle/phi/kernels/cpu/temporal_shift_kernel.cc
+++ b/paddle/phi/kernels/cpu/temporal_shift_kernel.cc
@@ -114,7 +114,8 @@ void TemporalShiftKernel(const Context& dev_ctx,
       (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
                                         : phi::make_ddim({nt, h, w, c}));
   const T* input_data = input->data<T>();
-  T* output_data = output->mutable_data<T>(out_dims, dev_ctx.GetPlace());
+  output->Resize(out_dims);
+  T* output_data = dev_ctx.template Alloc<T>(output);
 
   if (data_layout == DataLayout::kNCHW) {
     TemporalShiftFwNCHW<T>(
diff --git a/paddle/phi/kernels/cpu/yolo_box_kernel.cc b/paddle/phi/kernels/cpu/yolo_box_kernel.cc
index 6b882ad2895129..0c04c78214a352 100644
--- a/paddle/phi/kernels/cpu/yolo_box_kernel.cc
+++ b/paddle/phi/kernels/cpu/yolo_box_kernel.cc
@@ -51,16 +51,19 @@ void YoloBoxKernel(const Context& dev_ctx,
   const int an_stride = (class_num + 5) * stride;
 
   DenseTensor anchors_;
-  auto anchors_data =
-      anchors_.mutable_data<int>({an_num * 2}, dev_ctx.GetPlace());
+  anchors_.Resize({an_num * 2});
+  auto anchors_data = dev_ctx.template Alloc<int>(&anchors_);
   std::copy(anchors.begin(), anchors.end(), anchors_data);
 
   const T* input_data = input->data<T>();
   const int* imgsize_data = imgsize->data<int>();
-  T* boxes_data = boxes->mutable_data<T>({n, box_num, 4}, dev_ctx.GetPlace());
+  boxes->Resize({n, box_num, 4});
+  T* boxes_data = dev_ctx.template Alloc<T>(boxes);
   memset(boxes_data, 0, boxes->numel() * sizeof(T));
-  T* scores_data =
-      scores->mutable_data<T>({n, box_num, class_num}, dev_ctx.GetPlace());
+
+  scores->Resize({n, box_num, class_num});
+  T* scores_data = dev_ctx.template Alloc<T>(scores);
+
   memset(scores_data, 0, scores->numel() * sizeof(T));
 
   T box[4];
diff --git a/paddle/phi/kernels/funcs/broadcast_function.h b/paddle/phi/kernels/funcs/broadcast_function.h
index e19735e6c13550..d2c30c8fa36114 100644
--- a/paddle/phi/kernels/funcs/broadcast_function.h
+++ b/paddle/phi/kernels/funcs/broadcast_function.h
@@ -996,7 +996,7 @@ void ElementwiseCompute(const GPUContext &dev_ctx,
                         DenseTensor *z) {
   std::vector<const DenseTensor *> ins = {&x, &y};
   std::vector<DenseTensor *> outs = {z};
-  z->mutable_data<OutType>(dev_ctx.GetPlace());
+  dev_ctx.template Alloc<OutType>(z);
   BroadcastKernel<ElementwiseType::kBinary, T, OutType, Functor, 1>(
       dev_ctx, ins, &outs, axis, func);
 }
diff --git a/paddle/phi/kernels/funcs/diagonal.h b/paddle/phi/kernels/funcs/diagonal.h
index 81525cb25449ee..92f970aed32795 100644
--- a/paddle/phi/kernels/funcs/diagonal.h
+++ b/paddle/phi/kernels/funcs/diagonal.h
@@ -104,7 +104,8 @@ DenseTensor Diagonal(const DeviceContext& context,
     DenseTensor diag;
     DDim diag_dims = phi::make_ddim(ret_dims);
     auto dig_stride = phi::stride(diag_dims);
-    auto diag_data = diag.mutable_data<T>(diag_dims, context.GetPlace());
+    diag.Resize(diag_dims);
+    auto diag_data = context.template Alloc<T>(&diag);
 
     int64_t pos = std::abs(offset) * offset_stride;
     int64_t dim_size = ret_strides.size();
diff --git a/paddle/phi/kernels/funcs/elementwise_base.h b/paddle/phi/kernels/funcs/elementwise_base.h
index 17b0a653cc8a85..ffb3ff4ae334aa 100644
--- a/paddle/phi/kernels/funcs/elementwise_base.h
+++ b/paddle/phi/kernels/funcs/elementwise_base.h
@@ -474,7 +474,7 @@ static inline void GetDoubleGradSafeTensor(const DeviceContext &dev_ctx,
   } else {
     auto meta = phi::DenseTensorMeta(x.dtype(), x.dims(), x.layout());
     *ddx_safe = phi::Empty(dev_ctx, std::move(meta));
-    ddx_safe->mutable_data(dev_ctx.GetPlace());
+    dev_ctx.template Alloc<T>(ddx_safe);
     SetConstant<DeviceContext, T> set_zero;
     set_zero(dev_ctx, ddx_safe, static_cast<T>(0));
   }
diff --git a/paddle/phi/kernels/funcs/elementwise_grad_base.h b/paddle/phi/kernels/funcs/elementwise_grad_base.h
index 65f21e5b7f196c..b9ffb4e3f12378 100644
--- a/paddle/phi/kernels/funcs/elementwise_grad_base.h
+++ b/paddle/phi/kernels/funcs/elementwise_grad_base.h
@@ -237,7 +237,8 @@ void CommonElementwiseBroadcastBackward(const CPUContext &ctx,
   // result.
   if (dx && dx->IsSharedBufferWith(dout)) {
     dx->clear();
-    dx->mutable_data<T>(x_dims, ctx.GetPlace());
+    dx->Resize(x_dims);
+    ctx.template Alloc<T>(dx);
   }
 
   VLOG(3) << "CommonElementwiseBroadcastBackward xdims:"
@@ -1680,7 +1681,8 @@ void CommonElementwiseBroadcastBackward(const GPUContext &ctx,
   // result.
   if (dx && dx->IsSharedBufferWith(dout)) {
     dx->clear();
-    dx->mutable_data<T>(x_dims, ctx.GetPlace());
+    dx->Resize(x_dims);
+    ctx.template Alloc<T>(dx);
   }
 
   VLOG(3) << "CommonElementwiseBroadcastBackward xdims:"
diff --git a/paddle/phi/kernels/funcs/fc_functor.cc b/paddle/phi/kernels/funcs/fc_functor.cc
index f428746bc524d7..31212a687fa73a 100644
--- a/paddle/phi/kernels/funcs/fc_functor.cc
+++ b/paddle/phi/kernels/funcs/fc_functor.cc
@@ -39,8 +39,11 @@ void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
     const int NN = N + 4;
     const int KK = K + 4;
     phi::DenseTensor X1;
-    T* X1_data = X1.mutable_data<T>({M * KK}, paddle::platform::CPUPlace());
-    Y1_data = Y1.mutable_data<T>({M * (N + 4)}, paddle::platform::CPUPlace());
+    X1.Resize({M * KK});
+    T* X1_data = context.template HostAlloc<T>(&X1);
+
+    Y1.Resize({M * (N + 4)});
+    Y1_data = context.template HostAlloc<T>(&Y1);
 #ifdef PADDLE_WITH_MKLML
 #pragma omp parallel for
 #endif
diff --git a/paddle/phi/kernels/funcs/math_function.cu b/paddle/phi/kernels/funcs/math_function.cu
index db4cdc57e2f04d..a0e59f8f3fe23c 100644
--- a/paddle/phi/kernels/funcs/math_function.cu
+++ b/paddle/phi/kernels/funcs/math_function.cu
@@ -319,7 +319,9 @@ void ColwiseSum<phi::GPUContext, double>::operator()(
                         size,
                         vector->numel()));
   phi::DenseTensor one;
-  one.mutable_data<double>({in_dims[0]}, context.GetPlace());
+  one.Resize({in_dims[0]});
+  context.template Alloc<double>(&one);
+
   SetConstant<phi::GPUContext, double> set;
   set(context, &one, static_cast<double>(1.0));
   phi::funcs::GetBlas<phi::GPUContext, double>(context).GEMV(
@@ -355,7 +357,9 @@ void RowwiseSum<phi::GPUContext, double>::operator()(
                         in_dims[0],
                         vector->numel()));
   phi::DenseTensor one;
-  one.mutable_data<double>({size}, context.GetPlace());
+  one.Resize({size});
+  context.template Alloc<double>(&one);
+
   SetConstant<phi::GPUContext, double> set;
   set(context, &one, static_cast<double>(1.0));
   phi::funcs::GetBlas<phi::GPUContext, double>(context).GEMV(
diff --git a/paddle/phi/kernels/funcs/math_function_impl.h b/paddle/phi/kernels/funcs/math_function_impl.h
index b59a249bbbf046..2011523a0153d7 100644
--- a/paddle/phi/kernels/funcs/math_function_impl.h
+++ b/paddle/phi/kernels/funcs/math_function_impl.h
@@ -117,7 +117,7 @@ class ColwiseSum<phi::CPUContext, T> {
             size,
             out->numel()));
 
-    T* out_buf = out->mutable_data<T>(out->place());
+    T* out_buf = context.template Alloc<T>(out);
     const T* in_buf = input.data<T>();
 
     for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
@@ -185,7 +185,7 @@ class RowwiseMean<phi::CPUContext, T> {
             height,
             out->numel()));
     auto inv_size = 1.0 / size;
-    T* out_buf = out->mutable_data<T>(out->place());
+    T* out_buf = context.template Alloc<T>(out);
     const T* in_buf = input.data<T>();
 
     for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
@@ -251,7 +251,7 @@ class RowwiseSum<phi::CPUContext, T> {
             height,
             out->numel()));
 
-    T* out_buf = out->mutable_data<T>(out->place());
+    T* out_buf = context.template Alloc<T>(out);
     const T* in_buf = input.data<T>();
 
     for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
diff --git a/paddle/phi/kernels/funcs/select_impl.cu.h b/paddle/phi/kernels/funcs/select_impl.cu.h
index 4fb1bc13ae7f82..c5ddce68e7e2d6 100644
--- a/paddle/phi/kernels/funcs/select_impl.cu.h
+++ b/paddle/phi/kernels/funcs/select_impl.cu.h
@@ -451,7 +451,7 @@ void SelectKernel(const KPDevice &dev_ctx,
     out_dim.push_back(static_cast<int64_t>(rank));
     out->Resize(phi::make_ddim(out_dim));
   }
-  auto out_data = out->mutable_data<OutT>(cuda_place);
+  auto out_data = dev_ctx.template Alloc<OutT>(out);
   // 3.2 get true data's index according to cond_data and cumsum_data
   if (total_true_num <= 0) return;
   SelectKernel<MT, InT, CT, OutT, Functor, kVecSize, SelectData>
diff --git a/paddle/phi/kernels/funcs/selected_rows_functor.cc b/paddle/phi/kernels/funcs/selected_rows_functor.cc
index de362d45a8ba74..fb087660612ec5 100644
--- a/paddle/phi/kernels/funcs/selected_rows_functor.cc
+++ b/paddle/phi/kernels/funcs/selected_rows_functor.cc
@@ -542,11 +542,10 @@ struct MergeAddImpl {
     }
 
     out.set_height(input_height);
-    out.mutable_value()->mutable_data<T>(
-        phi::make_ddim(
-            {static_cast<int64_t>(merged_row_set.size()), input_width}),
-        context.GetPlace());
-    auto* out_data = out.mutable_value()->data<T>();
+    DenseTensor* out_tensor = out.mutable_value();
+    out_tensor->Resize(phi::make_ddim(
+        {static_cast<int64_t>(merged_row_set.size()), input_width}));
+    auto* out_data = context.template Alloc<T>(out_tensor);
 
     if (merged_row_set.size() == row_num && !sorted_result) {
       // no duplicated ids, just concat the result together
@@ -659,9 +658,10 @@ struct MergeAdd<phi::XPUContext, T> {
 
     out.set_rows(merge_rows);
     out.set_height(input.height());
-    out.mutable_value()->mutable_data<T>(
-        phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}),
-        context.GetPlace());
+    DenseTensor* out_tensor = out.mutable_value();
+    out_tensor->Resize(
+        phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}));
+    context.template Alloc<T>(out_tensor);
 
     std::unordered_map<int64_t, size_t> rows_to_id;
     for (size_t i = 0; i < merge_rows.size(); ++i) {
@@ -748,12 +748,13 @@ struct MergeAdd<phi::XPUContext, T> {
 
     out.set_rows(merge_rows);
     out.set_height(input_height);
-    out.mutable_value()->mutable_data<T>(
-        phi::make_ddim(
-            {static_cast<int64_t>(merged_row_set.size()), input_width}),
-        context.GetPlace());
 
-    float* y_data = reinterpret_cast<float*>(out.mutable_value()->data<T>());
+    DenseTensor* out_tensor = out.mutable_value();
+    out_tensor->Resize(phi::make_ddim(
+        {static_cast<int64_t>(merged_row_set.size()), input_width}));
+    context.template Alloc<T>(out_tensor);
+
+    float* y_data = reinterpret_cast<float*>(out_tensor->data<T>());
 
     std::unordered_map<int64_t, size_t> rows_to_id;
     for (size_t i = 0; i < merge_rows.size(); ++i) {
@@ -856,11 +857,11 @@ struct MergeAverage<phi::CPUContext, T> {
     }
 
     out.set_height(input_height);
-    out.mutable_value()->mutable_data<T>(
-        phi::make_ddim(
-            {static_cast<int64_t>(merged_row_set.size()), input_width}),
-        context.GetPlace());
-    auto* out_data = out.mutable_value()->data<T>();
+
+    DenseTensor* out_tensor = out.mutable_value();
+    out_tensor->Resize(phi::make_ddim(
+        {static_cast<int64_t>(merged_row_set.size()), input_width}));
+    auto* out_data = context.template Alloc<T>(out_tensor);
 
     std::vector<int64_t> merge_rows(merged_row_set.begin(),
                                     merged_row_set.end());
diff --git a/paddle/phi/kernels/funcs/selected_rows_functor.cu b/paddle/phi/kernels/funcs/selected_rows_functor.cu
index e08fea2b353176..8f409466e19b19 100644
--- a/paddle/phi/kernels/funcs/selected_rows_functor.cu
+++ b/paddle/phi/kernels/funcs/selected_rows_functor.cu
@@ -392,9 +392,10 @@ struct MergeAddImpl {
 
     out.set_rows(merge_rows);
     out.set_height(input.height());
-    out.mutable_value()->mutable_data<T>(
-        phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}),
-        context.GetPlace());
+    DenseTensor* out_tensor = out.mutable_value();
+    out_tensor->Resize(
+        phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}));
+    context.template Alloc<T>(out_tensor);
 
     phi::funcs::SetConstant<DeviceContext, T> constant_functor;
     constant_functor(context, out.mutable_value(), static_cast<T>(0));
@@ -462,9 +463,11 @@ struct MergeAddImpl {
 
     out.set_rows(merge_rows);
     out.set_height(input_height);
-    out.mutable_value()->mutable_data<T>(
-        phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}),
-        context.GetPlace());
+
+    DenseTensor* out_tensor = out.mutable_value();
+    out_tensor->Resize(
+        phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}));
+    context.template Alloc<T>(out_tensor);
 
     phi::funcs::SetConstant<DeviceContext, T> constant_functor;
     constant_functor(context, out.mutable_value(), static_cast<T>(0));
diff --git a/paddle/phi/kernels/funcs/top_k_function_cuda.h b/paddle/phi/kernels/funcs/top_k_function_cuda.h
index 6c48e05c76cf79..f04c7a8da8be1e 100644
--- a/paddle/phi/kernels/funcs/top_k_function_cuda.h
+++ b/paddle/phi/kernels/funcs/top_k_function_cuda.h
@@ -941,8 +941,7 @@ bool SortTopk(const phi::GPUContext& ctx,
   const std::vector<int64_t> dims = {num_rows, num_cols};
   auto dim = phi::make_ddim(dims);
   input_indices.Resize(dim);
-  // input_indices.Resize(num_rows*num_cols);
-  input_indices.mutable_data<int64_t>(ctx.GetPlace());
+  ctx.template Alloc<int64_t>(&input_indices);
   size_t temp_storage_bytes = -1;
 
   auto ComputeBlockSize = [](int col) {
@@ -984,7 +983,7 @@ bool SortTopk(const phi::GPUContext& ctx,
 
   const T* input = input_tensor->data<T>();
   T* values = out_tensor->data<T>();
-  int64_t* indices = indices_tensor->mutable_data<int64_t>(ctx.GetPlace());
+  int64_t* indices = ctx.template Alloc<int64_t>(indices_tensor);
 
   if (k == num_cols) {
     // Doing a full sort.
@@ -993,8 +992,8 @@ bool SortTopk(const phi::GPUContext& ctx,
   } else {
     temp_values.Resize(dim);
     temp_indices.Resize(dim);
-    sorted_values_ptr = temp_values.mutable_data<T>(ctx.GetPlace());
-    sorted_indices_ptr = temp_indices.mutable_data<int64_t>(ctx.GetPlace());
+    sorted_values_ptr = ctx.template Alloc<T>(&temp_values);
+    sorted_indices_ptr = ctx.template Alloc<int64_t>(&temp_indices);
   }
 
   // Get temp storage buffer size, maybe can allocate a fixed buffer to save
@@ -1067,7 +1066,7 @@ bool SortTopk(const phi::GPUContext& ctx,
 #endif
   }
   Tensor temp_storage;
-  temp_storage.mutable_data<uint8_t>(ctx.GetPlace(), temp_storage_bytes);
+  ctx.template Alloc<uint8_t>(&temp_storage, temp_storage_bytes);
 
   if (largest) {
     auto err = cub::DeviceSegmentedRadixSort::SortPairsDescending(
diff --git a/paddle/phi/kernels/gpu/concat_kernel.cu b/paddle/phi/kernels/gpu/concat_kernel.cu
index 0666c60a8d0c12..80ff71b2158241 100644
--- a/paddle/phi/kernels/gpu/concat_kernel.cu
+++ b/paddle/phi/kernels/gpu/concat_kernel.cu
@@ -43,7 +43,7 @@ void ConcatKernel(const Context& dev_ctx,
 
   phi::DDim out_dims = phi::funcs::ComputeAndCheckShape(true, x_dims, axis);
   out->Resize(out_dims);
-  out->mutable_data<T>(dev_ctx.GetPlace());
+  dev_ctx.template Alloc<T>(out);
 
   // If axis is 0, the lod of the output is not the same as inputs.
   if (axis == 0 && x[0]->lod().size() > 0) {
diff --git a/paddle/phi/kernels/gpu/depthwise_conv.h b/paddle/phi/kernels/gpu/depthwise_conv.h
index 9ed88135041506..879056d67a7dad 100644
--- a/paddle/phi/kernels/gpu/depthwise_conv.h
+++ b/paddle/phi/kernels/gpu/depthwise_conv.h
@@ -1231,7 +1231,7 @@ class DepthwiseConvFunctor<phi::GPUContext, T, fuse_relu_before_conv> {
 
     const T* input_data = input.data<T>();
     const T* filter_data = filter.data<T>();
-    T* output_data = output->mutable_data<T>(context.GetPlace());
+    T* output_data = context.template Alloc<T>(output);
 
     phi::DenseTensor filter_hwc;
     if (data_layout == DataLayout::kNHWC) {
@@ -1240,7 +1240,7 @@ class DepthwiseConvFunctor<phi::GPUContext, T, fuse_relu_before_conv> {
                                        filter.dims()[0],
                                        filter.dims()[1]});
       filter_hwc.Resize(filter_hwc_dims);
-      filter_hwc.mutable_data<T>(context.GetPlace());
+      context.template Alloc<T>(&filter_hwc);
       std::vector<int> perm_axis({2, 3, 0, 1});
       phi::funcs::TransposeNormal<phi::GPUContext, T> trans;
       trans(context, filter, &filter_hwc, perm_axis);
@@ -1409,7 +1409,7 @@ class DepthwiseConvInputGradFunctor<phi::GPUContext, T, fuse_relu_before_conv> {
     const T* input_data = input.data<T>();
     const T* filter_data = filter.data<T>();
     const T* output_grad_data = output_grad.data<T>();
-    T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
+    T* input_grad_data = context.template Alloc<T>(input_grad);
 
     phi::DenseTensor filter_hwc;
     if (data_layout == DataLayout::kNHWC) {
@@ -1418,7 +1418,7 @@ class DepthwiseConvInputGradFunctor<phi::GPUContext, T, fuse_relu_before_conv> {
                                        filter.dims()[0],
                                        filter.dims()[1]});
       filter_hwc.Resize(filter_hwc_dims);
-      filter_hwc.mutable_data<T>(context.GetPlace());
+      context.template Alloc<T>(&filter_hwc);
       std::vector<int> perm_axis({2, 3, 0, 1});
       phi::funcs::TransposeNormal<phi::GPUContext, T> trans;
       trans(context, filter, &filter_hwc, perm_axis);
@@ -1584,7 +1584,7 @@ class DepthwiseConvFilterGradFunctor<phi::GPUContext,
 
     const T* input_data = input.data<T>();
     const T* output_grad_data = output_grad.data<T>();
-    T* filter_grad_data = filter_grad->mutable_data<T>(context.GetPlace());
+    T* filter_grad_data = context.template Alloc<T>(filter_grad);
 
     int block_size = 512;
     int blocks;
@@ -1654,7 +1654,7 @@ class DepthwiseConvFilterGradFunctor<phi::GPUContext,
                                               filter_grad->dims()[0],          \
                                               filter_grad->dims()[1]});        \
         filter_grad_hwc.Resize(filter_grad_hwc_dims);                          \
-        filter_grad_hwc.mutable_data<T>(context.GetPlace());                   \
+        context.template Alloc<T>(&filter_grad_hwc);                           \
         phi::funcs::SetConstant<phi::GPUContext, T> set_zero;                  \
         set_zero(context, &filter_grad_hwc, static_cast<T>(0));                \
         filter_grad_data = filter_grad_hwc.data<T>();                          \
diff --git a/paddle/phi/kernels/gpu/depthwise_conv_grad_kernel.cu b/paddle/phi/kernels/gpu/depthwise_conv_grad_kernel.cu
index 2e815b3e455d57..5bb0a4946f17bf 100644
--- a/paddle/phi/kernels/gpu/depthwise_conv_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/depthwise_conv_grad_kernel.cu
@@ -75,7 +75,7 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
   phi::funcs::SetConstant<Context, T> set_zero;
 
   if (input_grad) {
-    input_grad->mutable_data<T>(dev_ctx.GetPlace());
+    dev_ctx.template Alloc<T>(input_grad);
     set_zero(dev_ctx, input_grad, static_cast<T>(0));
 
     if (fuse_relu) {
@@ -106,7 +106,7 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
   }
 
   if (filter_grad) {
-    filter_grad->mutable_data<T>(dev_ctx.GetPlace());
+    dev_ctx.template Alloc<T>(filter_grad);
     set_zero(dev_ctx, filter_grad, static_cast<T>(0));
     if (fuse_relu) {
       paddle::operators::math::DepthwiseConvFilterGradFunctor<Context, T, true>
diff --git a/paddle/phi/kernels/gpu/depthwise_conv_kernel.cu b/paddle/phi/kernels/gpu/depthwise_conv_kernel.cu
index 8617a42e4e544a..1cb6301dc99acc 100644
--- a/paddle/phi/kernels/gpu/depthwise_conv_kernel.cu
+++ b/paddle/phi/kernels/gpu/depthwise_conv_kernel.cu
@@ -32,7 +32,7 @@ void DepthwiseConvKernel(const Context& dev_ctx,
                          const std::string& data_format,
                          DenseTensor* out) {
   DenseTensor* output = out;
-  output->mutable_data<T>(dev_ctx.GetPlace());
+  dev_ctx.template Alloc<T>(output);
 
   const std::vector<int> strides = strides_t;
   std::vector<int> dilations = dilations_t;
diff --git a/paddle/phi/kernels/gpu/dropout_grad_kernel.cu b/paddle/phi/kernels/gpu/dropout_grad_kernel.cu
index 4aa59cded8f379..cdb8d0bd277622 100644
--- a/paddle/phi/kernels/gpu/dropout_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/dropout_grad_kernel.cu
@@ -29,7 +29,7 @@ void DropoutGradRawKernel(const Context& dev_ctx,
                           const std::string& mode,
                           DenseTensor* x_grad) {
   bool upscale_in_train = (mode == "upscale_in_train");
-  x_grad->mutable_data<T>(dev_ctx.GetPlace());
+  dev_ctx.template Alloc<T>(x_grad);
   paddle::operators::DropoutGradGPUKernelDriver<T>(dev_ctx,
                                                    is_test,
                                                    p.to<float>(),
diff --git a/paddle/phi/kernels/gpu/elementwise_grad.h b/paddle/phi/kernels/gpu/elementwise_grad.h
index e8f01be8973704..84047f14739b55 100644
--- a/paddle/phi/kernels/gpu/elementwise_grad.h
+++ b/paddle/phi/kernels/gpu/elementwise_grad.h
@@ -153,7 +153,7 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx,
 
   // dx
   if (dx != nullptr) {
-    auto *dx_data = dx->mutable_data<T>(ctx.GetPlace());
+    auto *dx_data = ctx.template Alloc<T>(dx);
     if (dx->dims() == dout.dims()) {
       if (dx_data != dout_data) {
         phi::Copy(ctx, dout, ctx.GetPlace(), false, dx);
@@ -163,7 +163,8 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx,
       // the result of dy wrong.
       if (dx->IsSharedBufferWith(dout)) {
         dx->clear();
-        dx->mutable_data<T>(x.dims(), ctx.GetPlace());
+        dx->Resize(x.dims());
+        ctx.template Alloc<T>(dx);
       }
       std::vector<int> reduce_dims =
           funcs::GetReduceDim(x.dims(), out.dims(), axis);
@@ -173,7 +174,7 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx,
   }
   // dy
   if (dy != nullptr) {
-    auto *dy_data = dy->mutable_data<T>(ctx.GetPlace());
+    auto *dy_data = ctx.template Alloc<T>(dy);
     if (dy->dims() == dout.dims()) {
       if (dy_data != dout_data) {
         phi::Copy(ctx, dout, ctx.GetPlace(), false, dy);
@@ -217,12 +218,11 @@ void ElementwiseAddGrad(const GPUContext &ctx,
                  PREDEFINED_BLOCK_SIZE,
              1);
     SimpleElemwiseAddGradCUDAKernel<T>
-        <<<grid_size, block_size, 0, ctx.stream()>>>(
-            dout.data<T>(),
-            size,
-            vec_size,
-            dx->mutable_data<T>(ctx.GetPlace()),
-            dy->mutable_data<T>(ctx.GetPlace()));
+        <<<grid_size, block_size, 0, ctx.stream()>>>(dout.data<T>(),
+                                                     size,
+                                                     vec_size,
+                                                     ctx.template Alloc<T>(dx),
+                                                     ctx.template Alloc<T>(dy));
   } else {
     VLOG(4) << "Special case when dy_data is the same as dout_data, "
                "and dx_data is the same as dout_data, do not need "
@@ -264,7 +264,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
   auto *dout_data = dout.data<T>();
   // dx
   if (dx != nullptr) {
-    auto *dx_data = dx->mutable_data<T>(ctx.GetPlace());
+    auto *dx_data = ctx.template Alloc<T>(dx);
     if (dx->dims() == dout.dims()) {
       if (dx_data != dout_data) {
         phi::Copy(ctx, dout, ctx.GetPlace(), false, dx);
@@ -274,7 +274,8 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
       // the result of dy wrong.
       if (dx->IsSharedBufferWith(dout)) {
         dx->clear();
-        dx->mutable_data<T>(x.dims(), ctx.GetPlace());
+        dx->Resize(x.dims());
+        ctx.template Alloc<T>(dx);
       }
       std::vector<int> reduce_dims =
           funcs::GetReduceDim(x.dims(), out.dims(), axis);
@@ -284,7 +285,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
   }
   // dy
   if (dy != nullptr) {
-    auto *dy_data = dy->mutable_data<T>(ctx.GetPlace());
+    auto *dy_data = ctx.template Alloc<T>(dy);
     if (dy->dims() == dout.dims()) {
       if (dy_data != dout_data) {
         dim3 block_size = dim3(PREDEFINED_BLOCK_SIZE, 1);
@@ -293,10 +294,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
             dim3((size + PREDEFINED_BLOCK_SIZE - 1) / PREDEFINED_BLOCK_SIZE, 1);
         SimpleElemwiseSubGradCUDAKernel<T>
             <<<grid_size, block_size, 0, ctx.stream()>>>(
-                dout.data<T>(),
-                size,
-                nullptr,
-                dy->mutable_data<T>(ctx.GetPlace()));
+                dout.data<T>(), size, nullptr, ctx.template Alloc<T>(dy));
       }
     } else {
       std::vector<int> reduce_dims =
@@ -320,11 +318,10 @@ void elementwise_sub_grad(const GPUContext &ctx,
   dim3 grid_size =
       dim3((size + PREDEFINED_BLOCK_SIZE - 1) / PREDEFINED_BLOCK_SIZE, 1);
   SimpleElemwiseSubGradCUDAKernel<T>
-      <<<grid_size, block_size, 0, ctx.stream()>>>(
-          dout.data<T>(),
-          size,
-          dx->mutable_data<T>(ctx.GetPlace()),
-          dy->mutable_data<T>(ctx.GetPlace()));
+      <<<grid_size, block_size, 0, ctx.stream()>>>(dout.data<T>(),
+                                                   size,
+                                                   ctx.template Alloc<T>(dx),
+                                                   ctx.template Alloc<T>(dy));
 }
 /*
 ******************************
diff --git a/paddle/phi/kernels/gpu/graph_reindex_kernel.cu b/paddle/phi/kernels/gpu/graph_reindex_kernel.cu
index 046c210e186013..10a5eec5b1ecf6 100644
--- a/paddle/phi/kernels/gpu/graph_reindex_kernel.cu
+++ b/paddle/phi/kernels/gpu/graph_reindex_kernel.cu
@@ -314,9 +314,9 @@ void GraphReindexKernel(const Context& dev_ctx,
     const auto* ph_index = hashtable_index.get_ptr();
     hashtable_index_out.ShareDataWith(*ph_index);
     int* hashtable_value_data =
-        hashtable_value_out.mutable_data<int>(dev_ctx.GetPlace());
+        dev_ctx.template Alloc<int>(&hashtable_value_out);
     int* hashtable_index_data =
-        hashtable_index_out.mutable_data<int>(dev_ctx.GetPlace());
+        dev_ctx.template Alloc<int>(&hashtable_index_out);
     BufferReindex<T, Context>(dev_ctx,
                               x_data,
                               src_outputs,
diff --git a/paddle/phi/kernels/gpu/histogram_kernel.cu b/paddle/phi/kernels/gpu/histogram_kernel.cu
index 02f5bbb530a6c5..47929e640d57a2 100644
--- a/paddle/phi/kernels/gpu/histogram_kernel.cu
+++ b/paddle/phi/kernels/gpu/histogram_kernel.cu
@@ -85,7 +85,7 @@ void HistogramKernel(const Context& dev_ctx,
   const T* input_data = input.data<T>();
   const int input_numel = input.numel();
 
-  int64_t* out_data = output->mutable_data<int64_t>(dev_ctx.GetPlace());
+  int64_t* out_data = dev_ctx.template Alloc<int64_t>(output);
   phi::funcs::SetConstant<Context, int64_t>()(
       dev_ctx, output, static_cast<int64_t>(0));
 
@@ -98,8 +98,10 @@ void HistogramKernel(const Context& dev_ctx,
     auto input_x = phi::EigenVector<T>::Flatten(input);
 
     DenseTensor input_min_t, input_max_t;
-    auto* input_min_data = input_min_t.mutable_data<T>({1}, dev_ctx.GetPlace());
-    auto* input_max_data = input_max_t.mutable_data<T>({1}, dev_ctx.GetPlace());
+    input_min_t.Resize({1});
+    input_max_t.Resize({1});
+    auto* input_min_data = dev_ctx.template Alloc<T>(&input_min_t);
+    auto* input_max_data = dev_ctx.template Alloc<T>(&input_max_t);
     auto input_min_scala = phi::EigenScalar<T>::From(input_min_t);
     auto input_max_scala = phi::EigenScalar<T>::From(input_max_t);
 
diff --git a/paddle/phi/kernels/gpu/kthvalue_kernel.cu b/paddle/phi/kernels/gpu/kthvalue_kernel.cu
index 1f6dc489690535..b04cea2ceb55ea 100644
--- a/paddle/phi/kernels/gpu/kthvalue_kernel.cu
+++ b/paddle/phi/kernels/gpu/kthvalue_kernel.cu
@@ -67,7 +67,7 @@ bool SortKthvalue(const phi::GPUContext& dev_ctx,
   DenseTensor temp_values, temp_indices;
   const T* input = input_tensor->data<T>();
   T* values = out_tensor->data<T>();
-  int64_t* indices = indices_tensor->mutable_data<int64_t>(dev_ctx.GetPlace());
+  int64_t* indices = dev_ctx.template Alloc<int64_t>(indices_tensor);
   temp_values.Resize(dim);
   temp_indices.Resize(dim);
   sorted_values_ptr = dev_ctx.template Alloc<T>(&temp_values);
@@ -208,13 +208,16 @@ void KthvalueKernel(const Context& dev_ctx,
     }
     trans_out_dims[in_dims.size() - 1] = 1;
     DenseTensor trans_input;
-    trans_input.mutable_data<T>(trans_dims, dev_ctx.GetPlace());
+    trans_input.Resize(trans_dims);
+    dev_ctx.template Alloc<T>(&trans_input);
     int ndims = trans.size();
     funcs::TransCompute<phi::GPUContext, T>(
         ndims, dev_ctx, x, &trans_input, trans);
     DenseTensor trans_ind, trans_out;
-    trans_ind.mutable_data<int64_t>(trans_out_dims, dev_ctx.GetPlace());
-    trans_out.mutable_data<T>(trans_out_dims, dev_ctx.GetPlace());
+    trans_ind.Resize(trans_out_dims);
+    trans_out.Resize(trans_out_dims);
+    dev_ctx.template Alloc<int64_t>(&trans_ind);
+    dev_ctx.template Alloc<T>(&trans_out);
     const int64_t input_height =
         phi::product(phi::slice_ddim(trans_dims, 0, trans_dims.size() - 1));
     const int64_t input_width = trans_dims[trans_dims.size() - 1];
diff --git a/paddle/phi/kernels/gpu/put_along_axis_grad_kernel.cu b/paddle/phi/kernels/gpu/put_along_axis_grad_kernel.cu
index 16c32886e235a1..fcf43f9f42718d 100644
--- a/paddle/phi/kernels/gpu/put_along_axis_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/put_along_axis_grad_kernel.cu
@@ -50,7 +50,7 @@ void PutAlongAxisGradKernel(const Context& dev_ctx,
   }
   if (value_grad) {
     value_grad->Resize(index.dims());
-    value_grad->mutable_data<T>(dev_ctx.GetPlace());
+    dev_ctx.template Alloc<T>(value_grad);
     if (index_type == DataType::INT32) {
       paddle::operators::gpu_gather_kernel<T, int32_t>(
           out_grad,
diff --git a/paddle/phi/kernels/gpu/rnn_functor.h b/paddle/phi/kernels/gpu/rnn_functor.h
index 59c59889863608..3c82726662de70 100644
--- a/paddle/phi/kernels/gpu/rnn_functor.h
+++ b/paddle/phi/kernels/gpu/rnn_functor.h
@@ -58,7 +58,7 @@ class RNNDescriptors {
 
   template <typename T>
   void Create(const gpuDnnHandle_t &handle,
-              const Place &place,
+              const DeviceContext &dev_ctx,
               const std::vector<int> &sequence_length,
               size_t *workspace_size,
               size_t *reserve_size,
@@ -103,17 +103,15 @@ class RNNDescriptors {
 #ifdef PADDLE_WITH_HIP
       PADDLE_ENFORCE_GPU_SUCCESS(
           phi::dynload::miopenDropoutGetStatesSize(handle, &state_size));
-      dropout_state->mutable_data<uint8_t>({static_cast<int64_t>(state_size)},
-                                           place);
 #else
       PADDLE_ENFORCE_GPU_SUCCESS(
           phi::dynload::cudnnDropoutGetStatesSize(handle, &state_size));
-      dropout_state->mutable_data<uint8_t>({static_cast<int64_t>(state_size)},
-                                           place);
 #endif
+      dropout_state->Resize({static_cast<int64_t>(state_size)});
+      dev_ctx.template Alloc<uint8_t>(dropout_state);
     }
     dropout_desc_.descriptor(handle,
-                             place,
+                             dev_ctx.GetPlace(),
                              is_initialized,
                              dropout_prob_,
                              is_test_ ? nullptr : dropout_state,
diff --git a/paddle/phi/kernels/gpu/rnn_grad_kernel.cu.cc b/paddle/phi/kernels/gpu/rnn_grad_kernel.cu.cc
index fe0446323739f8..ff1d295b11e681 100644
--- a/paddle/phi/kernels/gpu/rnn_grad_kernel.cu.cc
+++ b/paddle/phi/kernels/gpu/rnn_grad_kernel.cu.cc
@@ -248,7 +248,7 @@ void RnnGradKernel(const Context &dev_ctx,
                      is_test);
 
   rnn.Create<T>(handle,
-                dev_ctx.GetPlace(),
+                dev_ctx,
                 SequenceLength,
                 &workspace_size,
                 &reserve_size,
diff --git a/paddle/phi/kernels/gpu/rnn_kernel.cu.cc b/paddle/phi/kernels/gpu/rnn_kernel.cu.cc
index 079a159ee81e9f..44fab87d910d76 100644
--- a/paddle/phi/kernels/gpu/rnn_kernel.cu.cc
+++ b/paddle/phi/kernels/gpu/rnn_kernel.cu.cc
@@ -280,7 +280,7 @@ void RnnKernel(const Context &dev_ctx,
                      is_bidirec,
                      is_test);
   rnn.Create<T>(handle,
-                dev_ctx.GetPlace(),
+                dev_ctx,
                 SequenceLength,
                 &workspace_size,
                 &reserve_size,
diff --git a/paddle/phi/kernels/gpu/sgd_kernel.cu b/paddle/phi/kernels/gpu/sgd_kernel.cu
index e3f0bf968c82cc..b7cf9e5badce0c 100644
--- a/paddle/phi/kernels/gpu/sgd_kernel.cu
+++ b/paddle/phi/kernels/gpu/sgd_kernel.cu
@@ -82,9 +82,8 @@ void SGDDenseKernel(const Context& dev_ctx,
   const MPDType* master_in_data =
       multi_precision ? master_param->data<MPDType>() : nullptr;
   MPDType* master_out_data =
-      multi_precision
-          ? master_param_out->mutable_data<MPDType>(dev_ctx.GetPlace())
-          : nullptr;
+      multi_precision ? dev_ctx.template Alloc<MPDType>(master_param_out)
+                      : nullptr;
 
   int block = 512;
   int grid = (param.numel() + block - 1) / block;
@@ -94,7 +93,7 @@ void SGDDenseKernel(const Context& dev_ctx,
       grad.data<T>(),
       learning_rate.data<T>(),
       param.numel(),
-      param_out->mutable_data<T>(dev_ctx.GetPlace()),
+      dev_ctx.template Alloc<T>(param_out),
       master_in_data,
       master_out_data);
 }
@@ -119,9 +118,8 @@ void SGDDenseParamSparseGradKernel(
   const MPDType* master_in_data =
       multi_precision ? master_param->data<MPDType>() : nullptr;
   MPDType* master_out_data =
-      multi_precision
-          ? master_param_out->mutable_data<MPDType>(dev_ctx.GetPlace())
-          : nullptr;
+      multi_precision ? dev_ctx.template Alloc<MPDType>(master_param_out)
+                      : nullptr;
 
   PADDLE_ENFORCE_EQ(
       &param,
diff --git a/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu b/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu
index b4a1574ee84e84..cc5d95a12f7a3c 100644
--- a/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu
@@ -120,8 +120,8 @@ void TemporalShiftGradKernel(const Context& dev_ctx,
       (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
                                         : phi::make_ddim({nt, h, w, c}));
   const T* output_grad_data = output_grad->data<T>();
-  T* input_grad_data =
-      input_grad->mutable_data<T>(in_grad_dims, dev_ctx.GetPlace());
+  input_grad->Resize(in_grad_dims);
+  T* input_grad_data = dev_ctx.template Alloc<T>(input_grad);
 
   int pixelNum = nt * chw;
   int threads = 1024;
diff --git a/paddle/phi/kernels/gpu/temporal_shift_kernel.cu b/paddle/phi/kernels/gpu/temporal_shift_kernel.cu
index c69a8aa2882169..b321fad07ac1fd 100644
--- a/paddle/phi/kernels/gpu/temporal_shift_kernel.cu
+++ b/paddle/phi/kernels/gpu/temporal_shift_kernel.cu
@@ -120,7 +120,8 @@ void TemporalShiftKernel(const Context& dev_ctx,
       (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
                                         : phi::make_ddim({nt, h, w, c}));
   const T* input_data = input->data<T>();
-  T* output_data = output->mutable_data<T>(out_dims, dev_ctx.GetPlace());
+  output->Resize(out_dims);
+  T* output_data = dev_ctx.template Alloc<T>(output);
 
   int pixelNum = nt * chw;
   int threads = 1024;
diff --git a/paddle/phi/kernels/gpu/yolo_box_kernel.cu b/paddle/phi/kernels/gpu/yolo_box_kernel.cu
index 8baf339f0c6d72..a55834c6ae7aa5 100644
--- a/paddle/phi/kernels/gpu/yolo_box_kernel.cu
+++ b/paddle/phi/kernels/gpu/yolo_box_kernel.cu
@@ -139,9 +139,10 @@ void YoloBoxKernel(const Context& dev_ctx,
 
   const T* input_data = input->data<T>();
   const int* imgsize_data = img_size.data<int>();
-  T* boxes_data = boxes->mutable_data<T>({n, box_num, 4}, dev_ctx.GetPlace());
-  T* scores_data =
-      scores->mutable_data<T>({n, box_num, class_num}, dev_ctx.GetPlace());
+  boxes->Resize({n, box_num, 4});
+  T* boxes_data = dev_ctx.template Alloc<T>(boxes);
+  scores->Resize({n, box_num, class_num});
+  T* scores_data = dev_ctx.template Alloc<T>(scores);
   phi::funcs::SetConstant<phi::GPUContext, T> set_zero;
   set_zero(dev_ctx, boxes, static_cast<T>(0));
   set_zero(dev_ctx, scores, static_cast<T>(0));
diff --git a/paddle/phi/kernels/impl/digamma_grad_kernel_impl.h b/paddle/phi/kernels/impl/digamma_grad_kernel_impl.h
index 49046dfa4d20dd..160e100f2b449d 100644
--- a/paddle/phi/kernels/impl/digamma_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/digamma_grad_kernel_impl.h
@@ -42,7 +42,7 @@ void DigammaGradKernel(const Context& ctx,
                        const DenseTensor& x,
                        const DenseTensor& out_grad,
                        DenseTensor* x_grad) {
-  x_grad->mutable_data<T>(ctx.GetPlace());
+  ctx.template Alloc<T>(x_grad);
 
   auto* dout_data = out_grad.data<T>();
   auto* x_data = x.data<T>();
diff --git a/paddle/phi/kernels/impl/digamma_kernel_impl.h b/paddle/phi/kernels/impl/digamma_kernel_impl.h
index 4547806a38ddba..ded77ca5a8f314 100644
--- a/paddle/phi/kernels/impl/digamma_kernel_impl.h
+++ b/paddle/phi/kernels/impl/digamma_kernel_impl.h
@@ -38,7 +38,7 @@ struct DigammaFunctor {
 
 template <typename T, typename Context>
 void DigammaKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) {
-  out->mutable_data<T>(ctx.GetPlace());
+  ctx.template Alloc<T>(out);
   auto* x_data = x.data<T>();
   auto* out_data = out->data<T>();
   auto numel = x.numel();
diff --git a/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h b/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h
index 28387975e6e998..396f1e9548648a 100644
--- a/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h
@@ -68,7 +68,7 @@ void AddDoubleGradImpl(const Context& dev_ctx,
     funcs::GetDoubleGradSafeTensor<Context, T>(
         dev_ctx, y, ddy.get_ptr(), &ddy_safe);
 
-    ddout->mutable_data<T>(dev_ctx.GetPlace());
+    dev_ctx.template Alloc<T>(ddout);
     auto ddx_dims = ddx_safe.dims();
     auto ddy_dims = ddy_safe.dims();
     if (ddx_dims.size() >= ddy_dims.size()) {
@@ -102,7 +102,7 @@ void SubtractDoubleGradImpl(const Context& dev_ctx,
     funcs::GetDoubleGradSafeTensor<Context, T>(
         dev_ctx, y, ddy.get_ptr(), &ddy_safe);
 
-    ddout->mutable_data<T>(dev_ctx.GetPlace());
+    dev_ctx.template Alloc<T>(ddout);
     funcs::ElementwiseCompute<funcs::SubtractFunctor<T>, T>(
         dev_ctx, ddx_safe, ddy_safe, axis, funcs::SubtractFunctor<T>(), ddout);
   }
diff --git a/paddle/phi/kernels/memcpy_kernel.cc b/paddle/phi/kernels/memcpy_kernel.cc
index acc87dc9960d1c..521edc26af320c 100644
--- a/paddle/phi/kernels/memcpy_kernel.cc
+++ b/paddle/phi/kernels/memcpy_kernel.cc
@@ -25,32 +25,6 @@ namespace phi {
 
 static constexpr size_t WAIT_THRESHOLD = 64 * 1024;
 
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-template <>
-void MemcpyH2DKernel(const GPUContext& dev_ctx,
-                     const DenseTensor& x,
-                     int dst_place_type,
-                     DenseTensor* out) {
-  PADDLE_ENFORCE_GE(
-      dst_place_type,
-      0,
-      errors::OutOfRange("dst_place_type only support 0-3, but got: %d",
-                         dst_place_type));
-  PADDLE_ENFORCE_LE(
-      dst_place_type,
-      3,
-      errors::OutOfRange("dst_place_type only support 0-3, but got: %d",
-                         dst_place_type));
-
-  auto stream = dev_ctx.stream();
-  out->mutable_data(dev_ctx.GetPlace(),
-                    x.dtype(),
-                    phi::Stream(reinterpret_cast<phi::StreamId>(stream)));
-
-  Copy(dev_ctx, x, dev_ctx.GetPlace(), false, out);
-}
-#endif
-
 template <typename Context>
 void MemcpyH2DKernel(const Context& dev_ctx,
                      const DenseTensor& x,
@@ -77,10 +51,6 @@ void MemcpyD2HKernel(const Context& dev_ctx,
                      DenseTensor* out) {
   switch (dst_place_type) {
     case 0:
-      // NOTE(lvyongkang): phi::Copy will use DeviceContext.zero_allocator to
-      // alloc and assign DeviceContext.place to out, which causes place check
-      // fails. So we specify out's place here.
-      out->mutable_data(CPUPlace());
       Copy(dev_ctx, x, CPUPlace(), false, out);
       // NOTE(copy from Aurelius84): host <-> device memory copies of a memory
       // block of 64 KB or less are asynchronous. See
@@ -91,10 +61,6 @@ void MemcpyD2HKernel(const Context& dev_ctx,
       break;
 
     case 1:
-      // NOTE(lvyongkang): phi::Copy will use DeviceContext.zero_allocator to
-      // alloc and assign DeviceContext.place to out, which causes place check
-      // fails. So we specify out's place here.
-      out->mutable_data(GPUPinnedPlace());
       Copy(dev_ctx, x, GPUPinnedPlace(), false, out);
       // paddle::memory::Copy use async copy for GPUPinnedPlace
       dev_ctx.Wait();
diff --git a/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc
index a25cd0cd61303f..9dd8f7df08ccc3 100644
--- a/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc
@@ -74,7 +74,7 @@ void AddGradKernel(const Context& dev_ctx,
   }
 
   if (dy != nullptr) {
-    T* dy_data = dy->mutable_data<T>(dev_ctx.GetPlace());
+    T* dy_data = dev_ctx.template Alloc<T>(dy);
     if (dy->dims() == dz_dims) {
       if (dy_data != dz_data) {
         int ret = xpu::copy(dev_ctx.x_context(),
diff --git a/paddle/phi/kernels/xpu/full_kernel.cc b/paddle/phi/kernels/xpu/full_kernel.cc
index c5fca8881e221e..44c5842210b71b 100644
--- a/paddle/phi/kernels/xpu/full_kernel.cc
+++ b/paddle/phi/kernels/xpu/full_kernel.cc
@@ -45,7 +45,7 @@ void TensorSetConstantXPU(phi::DenseTensor* tensor,
 
 template <typename T, typename Context, typename VType>
 void FullValueXPU(const Context& dev_ctx, DenseTensor* tensor, VType val) {
-  tensor->mutable_data<T>(dev_ctx.GetPlace());
+  dev_ctx.template Alloc<T>(tensor);
 
   PD_VISIT_ALL_TYPES(tensor->dtype(), "FullValueXPU", ([&] {
                        TensorSetConstantXPU<VType, data_t>(
diff --git a/tools/check_file_diff_approvals.sh b/tools/check_file_diff_approvals.sh
index f1aa530e870705..63c204af017586 100644
--- a/tools/check_file_diff_approvals.sh
+++ b/tools/check_file_diff_approvals.sh
@@ -342,17 +342,17 @@ if [ "${PHI_INCLUDE_FLUID_FILES}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
     check_approval 1 chenwhql YuanRisheng zyfncg
 fi
 
-HAS_MODIFIED_PHI_KERNEL_FILES=`git diff --name-only upstream/$BRANCH | grep "paddle/phi/kernels" || true`
-PHI_USE_MUTABLE_DATA_FILES=""
-for CHANGE_FILE in ${HAS_MODIFIED_PHI_KERNEL_FILES}; do
-    PHI_DIR_ADDED_LINES=`git diff -U0 upstream/$BRANCH -- ${PADDLE_ROOT}/${CHANGE_FILE} | grep "^+" | grep -w "mutable_data" || true`
-    if [ "${PHI_DIR_ADDED_LINES}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
-        PHI_USE_MUTABLE_DATA_FILES="${PHI_USE_MUTABLE_DATA_FILES} ${CHANGE_FILE}"
+HAS_MODIFIED_PHI_OR_FLUID_FILES=`git diff --name-only upstream/$BRANCH | grep -E "paddle/phi|paddle/fluid" || true`
+USE_MUTABLE_DATA_FILES=""
+for CHANGE_FILE in ${HAS_MODIFIED_PHI_OR_FLUID_FILES}; do
+    ADDED_LINES=`git diff -U0 upstream/$BRANCH -- ${PADDLE_ROOT}/${CHANGE_FILE} | grep "^+" | grep -w "mutable_data" || true`
+    if [ "${ADDED_LINES}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
+        USE_MUTABLE_DATA_FILES="${USE_MUTABLE_DATA_FILES} ${CHANGE_FILE}"
     fi
 done
-if [ "${PHI_USE_MUTABLE_DATA_FILES}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
-    echo_line="You can not use the DenseTensor::mutable_data() method in paddle/phi/kernels files(${PHI_USE_MUTABLE_DATA_FILES}). If you want to alloc memory, use phi::DeviceContext::Alloc() or phi::DeviceContext::HostAlloc() instead and if you want to get mutable data, use DenseTensor::data(). If you have any questions, you can have one RD (chenwhql, Shixiaowei02, YuanRisheng or zyfncg) review and approve.\n"
-    check_approval 1 chenwhql Shixiaowei02 YuanRisheng zyfncg
+if [ "${USE_MUTABLE_DATA_FILES}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
+    echo_line="You can not use the DenseTensor::mutable_data() method in files(${USE_MUTABLE_DATA_FILES}). If you want to alloc memory, use phi::DeviceContext::Alloc() or phi::DeviceContext::HostAlloc() instead and if you want to get mutable data, use DenseTensor::data(). If you have any questions, you can have one RD (chenwhql, Shixiaowei02, YuanRisheng, zyfncg or From00) review and approve.\n"
+    check_approval 1 chenwhql Shixiaowei02 YuanRisheng zyfncg From00
 fi
 
 ALL_CHANGE_FILES=`git diff --numstat upstream/$BRANCH | awk '{print $3}' | grep ".py"`

From 790e7c382eb55c850fdd8f75635a0649bed25f1b Mon Sep 17 00:00:00 2001
From: yunyaoXYY <109218879+yunyaoXYY@users.noreply.github.com>
Date: Mon, 5 Dec 2022 16:46:17 +0800
Subject: [PATCH 152/154] [Fluid Clean] Clean image_resize, resize_bilinear,
 resize_trilinear and resize_nearest. (#48691)

* clean fluild resize_trilinear

* clean fluild resize_bilinear

* clean fluild resize_nearest

* clean fluid image_resize

* fix test_trt_nearest_interp_op.py

* fix yolov3.py

* fix yolov3.py
---
 python/paddle/fluid/layers/nn.py              | 1047 -----------------
 .../unittests/dygraph_to_static/yolov3.py     |    5 +-
 .../inference/test_trt_nearest_interp_op.py   |   11 +-
 .../mlu/test_bilinear_interp_v2_op_mlu.py     |   51 -
 .../mlu/test_nearest_interp_v2_op_mlu.py      |   96 --
 .../npu/test_nearest_interp_op_npu.py         |   82 --
 .../unittests/test_bilinear_interp_op.py      |   52 -
 .../unittests/test_bilinear_interp_v2_op.py   |   51 -
 .../fluid/tests/unittests/test_layers.py      |   88 --
 .../tests/unittests/test_nearest_interp_op.py |   80 --
 .../unittests/test_nearest_interp_v2_op.py    |   93 --
 .../unittests/test_trilinear_interp_op.py     |   82 --
 .../unittests/test_trilinear_interp_v2_op.py  |   95 --
 13 files changed, 8 insertions(+), 1825 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 34c6387a1643cb..860a5375bf818d 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -81,10 +81,6 @@
     'autoincreased_step_counter',
     'unsqueeze',
     'lod_reset',
-    'image_resize',
-    'resize_bilinear',
-    'resize_trilinear',
-    'resize_nearest',
     'relu',
     'elementwise_add',
     'elementwise_div',
@@ -3149,1049 +3145,6 @@ def lod_reset(x, y=None, target_lod=None):
     return out
 
 
-def image_resize(
-    input,
-    out_shape=None,
-    scale=None,
-    name=None,
-    resample='BILINEAR',
-    actual_shape=None,
-    align_corners=True,
-    align_mode=1,
-    data_format='NCHW',
-):
-    """
-
-    This op resizes a batch of images.
-
-    The input must be a 3-D Tensor of the shape (num_batches, channels, in_w)
-    or a 4-D Tensor of the shape (num_batches, channels, in_h, in_w)
-    or (num_batches, in_h, in_w, channels), or a 5-D Tensor of the shape
-    (num_batches, channels, in_d, in_h, in_w) or (num_batches, in_d, in_h, in_w, channels),
-    and the resizing only applies on the three dimensions(depth, height and width).
-
-    **Warning:** the parameter :attr:`actual_shape` will be deprecated in the
-    future and only use :attr:`out_shape` instead.
-
-    Supporting resample methods:
-        'LINEAR' : Linear interpolation
-
-        'BILINEAR' : Bilinear interpolation
-
-        'TRILINEAR' : Trilinear interpolation
-
-        'NEAREST' : Nearest neighbor interpolation
-
-        'BICUBIC' : Bicubic interpolation
-
-    Linear interpolation is the method of using a line connecting two known quantities
-    to determine the value of an unknown quantity between the two known quantities.
-
-    Nearest neighbor interpolation is to perform nearest neighbor interpolation
-    in both the 3rd dimension(in height direction) and the 4th dimension(in width
-    direction) on input tensor.
-
-    Bilinear interpolation is an extension of linear interpolation for
-    interpolating functions of two variables (e.g. H-direction and
-    W-direction in this op) on a rectilinear 2D grid. The key idea is
-    to perform linear interpolation first in one direction, and then
-    again in the other direction.
-
-    Trilinear interpolation is an extension of linear interpolation for
-    interpolating functions of three variables (e.g. D-direction,
-    H-direction and W-direction in this op) on a rectilinear 3D grid.
-    The linear interpolation is performed on three directions.
-
-    Bicubic interpolation is an extension of cubic interpolation for interpolating
-    data points on a two-dimensional regular grid. The interpolated surface is
-    smoother than corresponding surfaces obtained by bilinear interpolation or
-    nearest-neighbor interpolation.
-
-    Align_corners and align_mode are optional parameters,the calculation method
-    of interpolation can be selected by them.
-
-    Example:
-
-    .. code-block:: text
-
-        For scale:
-
-            if align_corners = True && out_size > 1 :
-
-              scale_factor = (in_size-1.0)/(out_size-1.0)
-
-            else:
-
-              scale_factor = float(in_size/out_size)
-
-
-        Nearest neighbor interpolation:
-
-          if:
-              align_corners = False
-
-              input : (N,C,H_in,W_in)
-              output: (N,C,H_out,W_out) where:
-
-              H_out = floor (H_{in} * scale_{factor})
-              W_out = floor (W_{in} * scale_{factor})
-
-          else:
-              align_corners = True
-
-              input : (N,C,H_in,W_in)
-              output: (N,C,H_out,W_out) where:
-
-              H_out = round(H_{in} * scale_{factor})
-              W_out = round(W_{in} * scale_{factor})
-
-        linear interpolation:
-
-          if:
-              align_corners = False , align_mode = 0
-
-              input : (N,C,W_in)
-              output: (N,C,W_out) where:
-
-              W_out = (W_{in}+0.5) * scale_{factor} - 0.5
-
-          else:
-
-              input : (N,C,W_in)
-              output: (N,C,H_out,W_out) where:
-
-              W_out = W_{in} * scale_{factor}
-
-        Bilinear interpolation:
-
-          if:
-              align_corners = False , align_mode = 0
-
-              input : (N,C,H_in,W_in)
-              output: (N,C,H_out,W_out) where:
-
-              H_out = (H_{in}+0.5) * scale_{factor} - 0.5
-              W_out = (W_{in}+0.5) * scale_{factor} - 0.5
-
-          else:
-
-              input : (N,C,H_in,W_in)
-              output: (N,C,H_out,W_out) where:
-
-              H_out = H_{in} * scale_{factor}
-              W_out = W_{in} * scale_{factor}
-
-        Trilinear interpolation:
-
-          if:
-              align_corners = False , align_mode = 0
-
-              input : (N,C,D_in,H_in,W_in)
-              output: (N,C,D_out,H_out,W_out) where:
-
-              D_out = (D_{in}+0.5) * scale_{factor} - 0.5
-              H_out = (H_{in}+0.5) * scale_{factor} - 0.5
-              W_out = (W_{in}+0.5) * scale_{factor} - 0.5
-
-
-          else:
-
-              input : (N,C,D_in,H_in,W_in)
-              output: (N,C,D_out,H_out,W_out) where:
-
-              D_out = D_{in} * scale_{factor}
-
-        Trilinear interpolation:
-          if:
-              align_corners = False , align_mode = 0
-              input : (N,C,D_in,H_in,W_in)
-              output: (N,C,D_out,H_out,W_out) where:
-              D_out = (D_{in}+0.5) * scale_{factor} - 0.5
-              H_out = (H_{in}+0.5) * scale_{factor} - 0.5
-              W_out = (W_{in}+0.5) * scale_{factor} - 0.5
-          else:
-              input : (N,C,D_in,H_in,W_in)
-              output: (N,C,D_out,H_out,W_out) where:
-              D_out = D_{in} * scale_{factor}
-              H_out = H_{in} * scale_{factor}
-              W_out = W_{in} * scale_{factor}
-
-
-    For details of linear interpolation, please refer to Wikipedia:
-    https://en.wikipedia.org/wiki/Linear_interpolation.
-
-    For details of nearest neighbor interpolation, please refer to Wikipedia:
-    https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation.
-
-    For details of bilinear interpolation, please refer to Wikipedia:
-    https://en.wikipedia.org/wiki/Bilinear_interpolation.
-
-    For details of trilinear interpolation, please refer to Wikipedia:
-    https://en.wikipedia.org/wiki/Trilinear_interpolation.
-
-    For details of bicubic interpolation, please refer to Wikipedia:
-    https://en.wikipedia.org/wiki/Bicubic_interpolation
-
-    Parameters:
-        input (Variable): 3-D, 4-D or 5-D Tensor, its data type is float32, float64, or uint8,
-                          its data format is specified by :attr:`data_format`.
-        out_shape (list|tuple|Variable|None): Output shape of image resize
-             layer, the shape is (out_w, ) when input is a 3-D Tensor, the shape is (out_h, out_w)
-             when input is a 4-D Tensor and is (out_d, out_h, out_w) when input is a 5-D Tensor.
-             Default: None. If a list, each element can be an integer or a Tensor Variable of shape: [1].
-             If a Tensor Variable, its dimensions size should be a 1.
-        scale(float|Variable|None): The multiplier for the input height or width. At
-             least one of :attr:`out_shape` or :attr:`scale` must be set.
-             And :attr:`out_shape` has a higher priority than :attr:`scale`.
-             Default: None.
-        name(str|None): A name for this layer(optional). If set None, the layer
-                        will be named automatically.
-        resample(str): The resample method. It supports 'LINEAR', 'BICUBIC', 'BILINEAR', 'TRILINEAR'
-                       and 'NEAREST' currently. Default: 'BILINEAR'
-        actual_shape(Variable): An optional input to specify output shape
-                                dynamically. If provided, image resize
-                                according to this given shape rather than
-                                :attr:`out_shape` and :attr:`scale` specifying
-                                shape. That is to say actual_shape has the
-                                highest priority. It is recommended to use
-                                :attr:`out_shape` if you want to specify output
-                                shape dynamically, because :attr:`actual_shape`
-                                will be deprecated. When using actual_shape to
-                                specify output shape, one of :attr:`out_shape`
-                                and :attr:`scale` should also be set, otherwise
-                                errors would be occurred in graph constructing stage.
-                                Default: None
-        align_corners(bool) :  An optional bool, If True, the centers of the 4 corner pixels of the
-                               input and output tensors are aligned, preserving the values at the
-                               corner pixels.
-                               Default: True
-        align_mode(int)  :  An optional for linear/bilinear/trilinear interpolation. Refer to the fomula in the
-                            the example code above, it can be \'0\' for src_idx = scale*(dst_indx+0.5)-0.5 ,
-                            can be \'1\' for src_idx = scale*dst_index.
-        data_format (str, optional): Specify the data format of the input, and the data format of the output
-            will be consistent with that of the input. An optional string from:`NCW`, `NWC`, `"NCHW"`, `"NHWC"`, `"NCDHW"`,
-            `"NDHWC"`. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
-            `[batch_size, input_channels, input_height, input_width]`. When it is `"NCHW"`, the data is stored
-            in the order of: `[batch_size, input_channels, input_depth, input_height, input_width]`.
-
-    Returns:
-        A 3-D Tensor of the shape (num_batches, channels, out_w) or (num_batches, out_w, channels),
-        A 4-D Tensor of the shape (num_batches, channels, out_h, out_w) or (num_batches, out_h, out_w, channels),
-        or 5-D Tensor of the shape (num_batches, channels, out_d, out_h, out_w) or (num_batches, out_d, out_h, out_w, channels).
-
-    Raises:
-        TypeError: out_shape should be a list or tuple or Variable.
-        TypeError: actual_shape should either be Variable or None.
-        ValueError: The 'resample' of image_resize can only be 'LINEAR', 'BILINEAR',
-                    'TRILINEAR', 'BICUBIC' or 'NEAREST' currently.
-        ValueError: 'LINEAR' only support 3-D tensor.
-        ValueError: 'BICUBIC', 'BILINEAR' and 'NEAREST' only support 4-D tensor.
-        ValueError: 'TRILINEAR' only support 5-D tensor.
-        ValueError: One of out_shape and scale must not be None.
-        ValueError: out_shape length should be 1 for input 3-D tensor.
-        ValueError: out_shape length should be 2 for input 4-D tensor.
-        ValueError: out_shape length should be 3 for input 5-D tensor.
-        ValueError: scale should be greater than zero.
-        TypeError: align_corners should be a bool value
-        ValueError: align_mode can only be '0' or '1'
-        ValueError: data_format can only be 'NCW', 'NWC', 'NCHW', 'NHWC', 'NCDHW' or 'NDHWC'.
-
-    Examples:
-        .. code-block:: python
-
-            #declarative mode
-            import paddle
-            import paddle.fluid as fluid
-            import numpy as np
-            paddle.enable_static()
-            input = fluid.data(name="input", shape=[None,3,6,10])
-
-            #1
-            output = fluid.layers.image_resize(input=input,out_shape=[12,12])
-
-            #2
-            #x = np.array([2]).astype("int32")
-            #dim1 = fluid.data(name="dim1", shape=[1], dtype="int32")
-            #fluid.layers.assign(input=x, output=dim1)
-            #output = fluid.layers.image_resize(input=input,out_shape=[12,dim1])
-
-            #3
-            #x = np.array([3,12]).astype("int32")
-            #shape_tensor = fluid.data(name="shape_tensor", shape=[2], dtype="int32")
-            #fluid.layers.assign(input=x, output=shape_tensor)
-            #output = fluid.layers.image_resize(input=input,out_shape=shape_tensor)
-
-            #4
-            #x = np.array([0.5]).astype("float32")
-            #scale_tensor = fluid.data(name="scale", shape=[1], dtype="float32")
-            #fluid.layers.assign(x,scale_tensor)
-            #output = fluid.layers.image_resize(input=input,scale=scale_tensor)
-
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            exe.run(fluid.default_startup_program())
-
-            input_data = np.random.rand(2,3,6,10).astype("float32")
-
-            output_data = exe.run(fluid.default_main_program(),
-                feed={"input":input_data},
-                fetch_list=[output],
-                return_numpy=True)
-
-            print(output_data[0].shape)
-
-            #1
-            # (2, 3, 12, 12)
-            #2
-            # (2, 3, 12, 2)
-            #3
-            # (2, 3, 3, 12)
-            #4
-            # (2, 3, 3, 5)
-
-            #imperative mode
-            import paddle.fluid.dygraph as dg
-
-            with dg.guard(place) as g:
-                input = dg.to_variable(input_data)
-                output = fluid.layers.image_resize(input=input, out_shape=[12,12])
-                print(output.shape)
-
-                # [2L, 3L, 12L, 12L]
-
-    """
-    resample_methods = {
-        'LINEAR': 'linear',
-        'BILINEAR': 'bilinear',
-        'TRILINEAR': 'trilinear',
-        'NEAREST': 'nearest',
-        'LINEAR': 'linear',
-    }
-    resample = resample.upper()
-    if resample not in resample_methods:
-        raise ValueError(
-            "The 'resample' of image_resize can only be 'LINEAR', 'BILINEAR', 'TRILINEAR' "
-            "or 'NEAREST' currently."
-        )
-    resample_type = resample_methods[resample]
-
-    if resample == 'LINEAR' and len(input.shape) != 3:
-        raise ValueError("'LINER only support 3-D tensor.")
-    elif resample in ['BILINEAR', 'NEAREST'] and len(input.shape) != 4:
-        raise ValueError("'BILINEAR' and 'NEAREST' only support 4-D tensor.")
-    elif resample == 'TRILINEAR' and len(input.shape) != 5:
-        raise ValueError("'TRILINEAR'only support 5-D tensor.")
-
-    if not isinstance(align_corners, bool):
-        raise TypeError("Attr align_corners should be a bool value")
-    if align_mode != 0 and align_mode != 1:
-        raise ValueError("align_mode can only be 0 or 1")
-
-    if out_shape is None and scale is None:
-        raise ValueError("One of out_shape and scale must not be None.")
-    helper = LayerHelper('{}_interp'.format(resample_type), **locals())
-    dtype = helper.input_dtype()
-
-    if len(input.shape) == 3 and data_format not in ['NCW', 'NWC']:
-        raise ValueError(
-            "Got wrong value for param `data_format`: "
-            + data_format
-            + " received but only `NCW` or `NWC` supported for 3-D input."
-        )
-    elif len(input.shape) == 4 and data_format not in ['NCHW', 'NHWC']:
-        raise ValueError(
-            "Got wrong value for param `data_format`: "
-            + data_format
-            + " received but only `NCHW` or `NHWC` supported for 4-D input."
-        )
-    elif len(input.shape) == 5 and data_format not in ['NCDHW', 'NDHWC']:
-        raise ValueError(
-            "Got wrong value for param `data_format`: "
-            + data_format
-            + " received but only `NCDHW` or `NDHWC` supported for 5-D input."
-        )
-
-    def _is_list_or_turple_(data):
-        return isinstance(data, list) or isinstance(data, tuple)
-
-    if data_format == 'NCHW' or data_format == 'NCDHW' or data_format == 'NCW':
-        data_layout = 'NCHW'
-    if data_format == 'NHWC' or data_format == 'NDHWC' or data_format == 'NWC':
-        data_layout = 'NHWC'
-
-    inputs = {"X": input}
-    attrs = {
-        "out_d": -1,
-        "out_h": -1,
-        "out_w": -1,
-        "interp_method": resample_type,
-        "align_corners": align_corners,
-        "align_mode": align_mode,
-        "data_layout": data_layout,
-    }
-
-    if out_shape is not None:
-        if isinstance(out_shape, Variable) and not _non_static_mode():
-            out_shape.stop_gradient = True
-            inputs['OutSize'] = out_shape
-        else:
-            if _non_static_mode():
-                if isinstance(out_shape, Variable):
-                    out_shape = list(out_shape.numpy())
-                else:
-                    out_shape = list(out_shape)
-                for i, dim in enumerate(out_shape):
-                    if isinstance(dim, Variable):
-                        out_shape[i] = dim.numpy()[0]
-            if not (_is_list_or_turple_(out_shape)):
-                raise TypeError(
-                    "out_shape should be a list or tuple or Variable."
-                )
-            # Validate the shape
-            contain_var = False
-            for dim_idx, dim_size in enumerate(out_shape):
-                if isinstance(dim_size, Variable):
-                    contain_var = True
-                    continue
-                assert (
-                    dim_size > 0
-                ), "Each dimension size given in out_shape must be greater than 0."
-
-            if contain_var:
-                new_size_tensor = []
-                size_list = []
-                for dim in out_shape:
-                    if isinstance(dim, Variable):
-                        dim.stop_gradient = True
-                        new_size_tensor.append(dim)
-                        size_list.append(-1)
-                    else:
-                        assert isinstance(dim, int)
-                        temp_out = helper.create_variable_for_type_inference(
-                            'int32'
-                        )
-                        fill_constant(
-                            [1], 'int32', dim, force_cpu=True, out=temp_out
-                        )
-                        new_size_tensor.append(temp_out)
-                        size_list.append(dim)
-                inputs['SizeTensor'] = new_size_tensor
-
-            if len(input.shape) == 3:
-                if len(out_shape) != 1:
-                    raise ValueError(
-                        "out_shape length should be 1 for " "input 3-D tensor."
-                    )
-                if contain_var:
-                    attrs['out_w'] = size_list[0]
-                else:
-                    out_shape = list(map(int, out_shape))
-                    attrs['out_w'] = out_shape[0]
-            elif len(input.shape) == 4:
-                if len(out_shape) != 2:
-                    raise ValueError(
-                        "out_shape length should be 2 for " "input 4-D tensor."
-                    )
-                if contain_var:
-                    attrs['out_h'] = size_list[0]
-                    attrs['out_w'] = size_list[1]
-                else:
-                    out_shape = list(map(int, out_shape))
-                    attrs['out_h'] = out_shape[0]
-                    attrs['out_w'] = out_shape[1]
-            if len(input.shape) == 5:
-                if len(out_shape) != 3:
-                    raise ValueError(
-                        "out_shape length should be 3 for " "input 5-D tensor."
-                    )
-                if contain_var:
-                    attrs['out_d'] = size_list[0]
-                    attrs['out_h'] = size_list[1]
-                    attrs['out_w'] = size_list[2]
-                else:
-                    out_shape = list(map(int, out_shape))
-                    attrs['out_d'] = out_shape[0]
-                    attrs['out_h'] = out_shape[1]
-                    attrs['out_w'] = out_shape[2]
-
-    else:
-        if _non_static_mode() and isinstance(scale, Variable):
-            scale = scale.numpy()
-        elif isinstance(scale, Variable):
-            scale.stop_gradient = True
-            inputs["Scale"] = scale
-        elif isinstance(scale, float) or isinstance(scale, int):
-            if scale <= 0:
-                raise ValueError("Attr(scale) should be greater than zero.")
-            attrs['scale'] = float(scale)
-        else:
-            raise TypeError(
-                "Attr(scale)'s type should be float, int or Variable."
-            )
-
-    if isinstance(actual_shape, Variable):
-        warnings.warn(
-            "actual_shape will be deprecated, it is recommended to use "
-            "out_shape instead of actual_shape to specify output shape dynamically."
-        )
-        actual_shape.stop_gradient = True
-        inputs["OutSize"] = actual_shape
-    elif actual_shape is not None:
-        raise TypeError("actual_shape should either be Variable or None.")
-
-    if _non_static_mode():
-        attr_list = []
-        for k, v in attrs.items():
-            attr_list.append(k)
-            attr_list.append(v)
-        dy_attr = tuple(attr_list)
-
-        if resample_type == "linear":
-            out = _legacy_C_ops.linear_interp(input, actual_shape, *dy_attr)
-        elif resample_type == "bilinear":
-            out = _legacy_C_ops.bilinear_interp(input, actual_shape, *dy_attr)
-        elif resample_type == "trilinear":
-            out = _legacy_C_ops.trilinear_interp(input, actual_shape, *dy_attr)
-        elif resample_type == "nearest":
-            out = _legacy_C_ops.nearest_interp(input, actual_shape, *dy_attr)
-        elif resample_type == "bicubic":
-            out = _legacy_C_ops.bicubic_interp(input, actual_shape, *dy_attr)
-        return out
-
-    out = helper.create_variable_for_type_inference(dtype)
-    helper.append_op(
-        type='{}_interp'.format(resample_type),
-        inputs=inputs,
-        outputs={"Out": out},
-        attrs=attrs,
-    )
-    return out
-
-
-@templatedoc(op_type="bilinear_interp")
-def resize_bilinear(
-    input,
-    out_shape=None,
-    scale=None,
-    name=None,
-    actual_shape=None,
-    align_corners=True,
-    align_mode=1,
-    data_format='NCHW',
-):
-    """
-
-    This op resizes the input by performing bilinear interpolation based on given
-    output shape which specified by actual_shape, out_shape and scale
-    in priority order.
-
-    **Warning:** the parameter :attr:`actual_shape` will be deprecated in
-    the future and only use :attr:`out_shape` instead.
-
-    Bilinear interpolation is an extension of linear interpolation for
-    interpolating functions of two variables (e.g. H-direction and
-    W-direction in this op) on a rectilinear 2D grid. The key idea is
-    to perform linear interpolation first in one direction, and then
-    again in the other direction.
-
-    For details of bilinear interpolation, please refer to Wikipedia:
-    https://en.wikipedia.org/wiki/Bilinear_interpolation
-
-    Align_corners and align_mode are optional parameters,the calculation
-    method of interpolation can be selected by them.
-
-    Example:
-
-    .. code-block:: text
-
-        For scale:
-
-            if align_corners = True && out_size > 1 :
-
-              scale_factor = (in_size-1.0)/(out_size-1.0)
-
-            else:
-
-              scale_factor = float(in_size/out_size)
-
-        Bilinear interpolation:
-
-          if:
-              align_corners = False , align_mode = 0
-
-              input : (N,C,H_in,W_in)
-              output: (N,C,H_out,W_out) where:
-
-              H_out = (H_{in}+0.5) * scale_{factor} - 0.5
-              W_out = (W_{in}+0.5) * scale_{factor} - 0.5
-
-          else:
-
-              input : (N,C,H_in,W_in)
-              output: (N,C,H_out,W_out) where:
-              H_out = H_{in} * scale_{factor}
-              W_out = W_{in} * scale_{factor}
-
-    Parameters:
-        input(Variable): 4-D Tensor(NCHW), its data type is float32, float64, or uint8,
-                          its data format is specified by :attr:`data_format`.
-        out_shape(list|tuple|Variable|None): Output shape of resize bilinear
-            layer, the shape is (out_h, out_w).Default: None. If a list, each
-            element can be an integer or a Tensor Variable with shape: [1]. If a
-            Tensor Variable, its dimension size should be 1.
-        scale(float|Variable|None): The multiplier for the input height or width. At
-             least one of :attr:`out_shape` or :attr:`scale` must be set.
-             And :attr:`out_shape` has a higher priority than :attr:`scale`.
-             Default: None.
-        actual_shape(Variable): An optional input to specify output shape
-                                dynamically. If provided, image resize
-                                according to this given shape rather than
-                                :attr:`out_shape` and :attr:`scale` specifying
-                                shape. That is to say actual_shape has the
-                                highest priority. It is recommended to use
-                                :attr:`out_shape` if you want to specify output
-                                shape dynamically, because :attr:`actual_shape`
-                                will be deprecated. When using actual_shape to
-                                specify output shape, one of :attr:`out_shape`
-                                and :attr:`scale` should also be set, otherwise
-                                errors would be occurred in graph constructing stage.
-                                Default: None
-        align_corners(bool): ${align_corners_comment}
-        align_mode(bool): ${align_mode_comment}
-        data_format (str, optional): Specify the data format of the input, and the data format of the output
-            will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
-            The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
-            `[batch_size, input_channels, input_height, input_width]`.
-        name(str, optional): The default value is None.  Normally there is no need for user to set this property.  For more information, please refer to :ref:`api_guide_Name`
-
-    Returns:
-        Variable: 4-D tensor(NCHW or NHWC).
-
-    Examples:
-        .. code-block:: python
-
-            #declarative mode
-            import paddle.fluid as fluid
-            import numpy as np
-            import paddle
-            paddle.enable_static()
-            input = fluid.data(name="input", shape=[None,3,6,10])
-
-            #1
-            output = fluid.layers.resize_bilinear(input=input,out_shape=[12,12])
-
-            #2
-            #x = np.array([2]).astype("int32")
-            #dim1 = fluid.data(name="dim1", shape=[1], dtype="int32")
-            #fluid.layers.assign(input=x, output=dim1)
-            #output = fluid.layers.resize_bilinear(input=input,out_shape=[12,dim1])
-
-            #3
-            #x = np.array([3,12]).astype("int32")
-            #shape_tensor = fluid.data(name="shape_tensor", shape=[2], dtype="int32")
-            #fluid.layers.assign(input=x, output=shape_tensor)
-            #output = fluid.layers.resize_bilinear(input=input,out_shape=shape_tensor)
-
-            #4
-            #x = np.array([0.5]).astype("float32")
-            #scale_tensor = fluid.data(name="scale", shape=[1], dtype="float32")
-            #fluid.layers.assign(x,scale_tensor)
-            #output = fluid.layers.resize_bilinear(input=input,scale=scale_tensor)
-
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            exe.run(fluid.default_startup_program())
-
-            input_data = np.random.rand(2,3,6,10).astype("float32")
-
-            output_data = exe.run(fluid.default_main_program(),
-                feed={"input":input_data},
-                fetch_list=[output],
-                return_numpy=True)
-
-            print(output_data[0].shape)
-
-            #1
-            # (2, 3, 12, 12)
-            #2
-            # (2, 3, 12, 2)
-            #3
-            # (2, 3, 3, 12)
-            #4
-            # (2, 3, 3, 5)
-
-            #imperative mode
-            import paddle.fluid.dygraph as dg
-
-            with dg.guard(place) as g:
-                input = dg.to_variable(input_data)
-                output = fluid.layers.resize_bilinear(input=input, out_shape=[12,12])
-                print(output.shape)
-
-                # [2L, 3L, 12L, 12L]
-
-    """
-
-    return image_resize(
-        input,
-        out_shape,
-        scale,
-        name,
-        'BILINEAR',
-        actual_shape,
-        align_corners,
-        align_mode,
-        data_format,
-    )
-
-
-@templatedoc(op_type="trilinear_interp")
-def resize_trilinear(
-    input,
-    out_shape=None,
-    scale=None,
-    name=None,
-    actual_shape=None,
-    align_corners=True,
-    align_mode=1,
-    data_format='NCDHW',
-):
-    """
-
-    This op resizes the input by performing trilinear interpolation based on given
-    output shape which specified by actual_shape, out_shape and scale
-    in priority order.
-
-    **Warning:** the parameter :attr:`actual_shape` will be deprecated
-    in the future and only use :attr:`out_shape` instead.
-
-    Trilinear interpolation is an extension of linear interpolation for
-    interpolating functions of three variables (e.g. D-direction,
-    H-direction and W-direction in this op) on a rectilinear 3D grid.
-    The linear interpolation is performed on three directions.
-
-    For details of trilinear interpolation, please refer to Wikipedia:
-    https://en.wikipedia.org/wiki/Trilinear_interpolation
-
-    Align_corners and align_mode are optional parameters,the calculation
-    method of interpolation can be selected by them.
-
-    Example:
-
-    .. code-block:: text
-
-        For scale:
-
-            if align_corners = True && out_size > 1 :
-
-              scale_factor = (in_size-1.0)/(out_size-1.0)
-
-            else:
-
-              scale_factor = float(in_size/out_size)
-
-        Bilinear interpolation:
-
-          if:
-
-              align_corners = False , align_mode = 0
-
-              input : (N,C,D_in,H_in,W_in)
-              output: (N,C,D_out,H_out,W_out) where:
-
-              D_out = (D_{in}+0.5) * scale_{factor} - 0.5
-              H_out = (H_{in}+0.5) * scale_{factor} - 0.5
-              W_out = (W_{in}+0.5) * scale_{factor} - 0.5
-
-          else:
-
-              input : (N,C,D_in,H_in,W_in)
-              output: (N,C,D_out,H_out,W_out) where:
-
-              D_out = D_{in} * scale_{factor}
-              H_out = H_{in} * scale_{factor}
-              W_out = W_{in} * scale_{factor}
-
-    Parameters:
-        input(${x_type}): 5-D Tensor, its data type is float32, float64, or uint8,
-                          its data format is specified by :attr:`data_format`.
-        out_shape(list|tuple|Variable|None): The output shape of resized tensor, the shape is (out_d, out_h, out_w). Default: None. Every element should be an integer or a Tensor Variable with shape: [1] if it is a list. If it is a Tensor Variable, its dimension size should be 1.
-        scale(float|Variable|None): The multiplier for the input depth, height or width.
-             At least one of :attr:`out_shape` or :attr:`scale` must be set.
-             And :attr:`out_shape` has a higher priority than :attr:`scale`.
-             Default: None.
-        name(str, optional): The default value is None.  Normally there is no need for user to set this property.  For more information, please refer to :ref:`api_guide_Name`
-        actual_shape(Variable): An optional input to specify output shape
-                                dynamically. If provided, image resize
-                                according to this given shape rather than
-                                :attr:`out_shape` and :attr:`scale` specifying
-                                shape. That is to say actual_shape has the
-                                highest priority. It is recommended to use
-                                :attr:`out_shape` if you want to specify output
-                                shape dynamically, because :attr:`actual_shape`
-                                will be deprecated. When using actual_shape to
-                                specify output shape, one of :attr:`out_shape`
-                                and :attr:`scale` should also be set, otherwise
-                                errors would be occurred in graph constructing stage.
-                                Default: None
-        align_corners(bool): ${align_corners_comment}
-        align_mode(bool): ${align_mode_comment}
-        data_format (str, optional): Specify the data format of the input, and the data format of the output
-            will be consistent with that of the input. An optional string from: `"NCDHW"`, `"NDHWC"`.
-            The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
-            `[batch_size, input_channels, input_depth, input_height, input_width]`.
-
-    Returns:
-        Variable: A 5-D Tensor(NCDHW or NDHWC)
-
-    Examples:
-        .. code-block:: python
-
-            #declarative mode
-            import paddle.fluid as fluid
-            import paddle
-            import numpy as np
-            paddle.enable_static()
-            input = fluid.data(name="input", shape=[None,3,6,8,10])
-
-            #1
-            output = fluid.layers.resize_trilinear(input=input,out_shape=[12,12,12])
-
-            #2
-            #x = np.array([2]).astype("int32")
-            #dim1 = fluid.data(name="dim1", shape=[1], dtype="int32")
-            #fluid.layers.assign(input=x, output=dim1)
-            #output = fluid.layers.resize_trilinear(input=input,out_shape=[12,dim1,4])
-
-            #3
-            #x = np.array([3,12,12]).astype("int32")
-            #shape_tensor = fluid.data(name="shape_tensor", shape=[3], dtype="int32")
-            #fluid.layers.assign(input=x, output=shape_tensor)
-            #output = fluid.layers.resize_trilinear(input=input,out_shape=shape_tensor)
-
-            #4
-            #x = np.array([0.5]).astype("float32")
-            #scale_tensor = fluid.data(name="scale", shape=[1], dtype="float32")
-            #fluid.layers.assign(x,scale_tensor)
-            #output = fluid.layers.resize_trilinear(input=input,scale=scale_tensor)
-
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            exe.run(fluid.default_startup_program())
-
-            input_data = np.random.rand(2,3,6,8,10).astype("float32")
-
-            output_data = exe.run(fluid.default_main_program(),
-                feed={"input":input_data},
-                fetch_list=[output],
-                return_numpy=True)
-
-            print(output_data[0].shape)
-
-            #1
-            # (2, 3, 12, 12, 12)
-            #2
-            # (2, 3, 12, 2, 4)
-            #3
-            # (2, 3, 3, 12, 12)
-            #4
-            # (2, 3, 3, 4, 5)
-
-            #imperative mode
-            import paddle.fluid.dygraph as dg
-
-            with dg.guard(place) as g:
-                input = dg.to_variable(input_data)
-                output = fluid.layers.resize_trilinear(input=input, out_shape=[12,12,12])
-                print(output.shape)
-
-                # [2L, 3L, 12L, 12L, 12L]
-
-
-
-    """
-
-    return image_resize(
-        input,
-        out_shape,
-        scale,
-        name,
-        'TRILINEAR',
-        actual_shape,
-        align_corners,
-        align_mode,
-        data_format,
-    )
-
-
-@templatedoc(op_type="nearest_interp")
-def resize_nearest(
-    input,
-    out_shape=None,
-    scale=None,
-    name=None,
-    actual_shape=None,
-    align_corners=True,
-    data_format='NCHW',
-):
-    """
-
-    This op resizes the input by performing nearest neighbor interpolation in both the
-    height direction and the width direction based on given output shape
-    which is specified by actual_shape, out_shape and scale in priority order.
-
-    **Warning:** the parameter :attr:`actual_shape` will be deprecated in the
-    future and only use :attr:`out_shape` instead.
-
-    Example:
-
-    .. code-block:: text
-
-        For scale:
-
-            if align_corners = True && out_size > 1 :
-              scale_factor = (in_size-1.0)/(out_size-1.0)
-
-            else:
-
-              scale_factor = float(in_size/out_size)
-
-        Nearest neighbor interpolation:
-
-          if:
-              align_corners = False
-
-              input : (N,C,H_in,W_in)
-              output: (N,C,H_out,W_out) where:
-
-              H_out = floor(H_{in} * scale_{factor})
-              W_out = floor(W_{in} * scale_{factor})
-
-          else:
-              align_corners = True
-
-              input : (N,C,H_in,W_in)
-              output: (N,C,H_out,W_out) where:
-
-              H_out = round(H_{in} * scale_{factor})
-              W_out = round(W_{in} * scale_{factor})
-
-
-    For details of nearest neighbor interpolation, please refer to Wikipedia:
-    https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation
-
-    Parameters:
-        input(${x_type}): 4-D Tensor, its data type is float32, float64, or uint8,
-                          its data format is specified by :attr:`data_format`.
-        out_shape(list|tuple|Variable|None): The output shape of resized tensor, the shape is (out_h, out_w). Default: None. Every element should be an integer or a tensor Variable with shape: [1] if it is a list. If it is a tensor Variable, its dimension size should be 1.
-        scale(float|Variable|None): The multiplier for the input height or width. At
-             least one of :attr:`out_shape` or :attr:`scale` must be set.
-             And :attr:`out_shape` has a higher priority than :attr:`scale`.
-             Default: None.
-        name(str, optional): The default value is None.  Normally there is no need for user to set this property.  For more information, please refer to :ref:`api_guide_Name`
-        actual_shape(Variable): An optional input to specify output shape
-                                dynamically. If provided, image resize
-                                according to this given shape rather than
-                                :attr:`out_shape` and :attr:`scale` specifying
-                                shape. That is to say actual_shape has the
-                                highest priority. It is recommended to use
-                                :attr:`out_shape` if you want to specify output
-                                shape dynamically, because :attr:`actual_shape`
-                                will be deprecated. When using actual_shape to
-                                specify output shape, one of :attr:`out_shape`
-                                and :attr:`scale` should also be set, otherwise
-                                errors would be occurred in graph constructing stage.
-                                Default: None
-        align_corners(bool): ${align_corners_comment}
-        data_format (str, optional): Specify the data format of the input, and the data format of the output
-            will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
-            The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
-            `[batch_size, input_channels, input_height, input_width]`.
-
-    Returns:
-        Variable: 4-D tensor(NCHW or NHWC).
-
-    Examples:
-        .. code-block:: python
-
-            #declarative mode
-            import paddle.fluid as fluid
-            import numpy as np
-            import paddle
-            paddle.enable_static()
-
-            input = fluid.data(name="input", shape=[None,3,6,10])
-
-            #1
-            output = fluid.layers.resize_nearest(input=input,out_shape=[12,12])
-
-            #2
-            #x = np.array([2]).astype("int32")
-            #dim1 = fluid.data(name="dim1", shape=[1], dtype="int32")
-            #fluid.layers.assign(input=x, output=dim1)
-            #output = fluid.layers.resize_nearest(input=input,out_shape=[12,dim1])
-
-            #3
-            #x = np.array([3,12]).astype("int32")
-            #shape_tensor = fluid.data(name="shape_tensor", shape=[2], dtype="int32")
-            #fluid.layers.assign(input=x, output=shape_tensor)
-            #output = fluid.layers.resize_nearest(input=input,out_shape=shape_tensor)
-
-            #4
-            #x = np.array([0.5]).astype("float32")
-            #scale_tensor = fluid.data(name="scale", shape=[1], dtype="float32")
-            #fluid.layers.assign(x,scale_tensor)
-            #output = fluid.layers.resize_nearest(input=input,scale=scale_tensor)
-
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            exe.run(fluid.default_startup_program())
-
-            input_data = np.random.rand(2,3,6,10).astype("float32")
-
-            output_data = exe.run(fluid.default_main_program(),
-                feed={"input":input_data},
-                fetch_list=[output],
-                return_numpy=True)
-
-            print(output_data[0].shape)
-
-            #1
-            # (2, 3, 12, 12)
-            #2
-            # (2, 3, 12, 2)
-            #3
-            # (2, 3, 3, 12)
-            #4
-            # (2, 3, 3, 5)
-
-            #imperative mode
-            import paddle.fluid.dygraph as dg
-
-            with dg.guard(place) as g:
-                input = dg.to_variable(input_data)
-                output = fluid.layers.resize_nearest(input=input, out_shape=[12,12])
-                print(output.shape)
-
-                # [2L, 3L, 12L, 12L]
-
-
-
-    """
-
-    return image_resize(
-        input,
-        out_shape,
-        scale,
-        name,
-        'NEAREST',
-        actual_shape,
-        align_corners,
-        align_mode=1,
-        data_format=data_format,
-    )
-
-
 @deprecated(since="2.0.0", update_to="paddle.nn.functional.relu")
 def relu(x, name=None):
     """
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py
index 5cf1f0f0f533a0..1c1877681c4b18 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py
@@ -211,9 +211,10 @@ def forward(self, inputs):
         out_shape.stop_gradient = True
 
         # reisze by actual_shape
-        out = fluid.layers.resize_nearest(
-            input=inputs, scale=self.scale, actual_shape=out_shape
+        out = paddle.nn.functional.interpolate(
+            x=inputs, size=out_shape, mode='nearest'
         )
+
         return out
 
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_nearest_interp_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_nearest_interp_op.py
index db0b53100348be..505a7ccad3bc29 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_nearest_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_nearest_interp_op.py
@@ -17,6 +17,7 @@
 import numpy as np
 from inference_pass_test import InferencePassTest
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -81,16 +82,14 @@ def set_params(self):
 
     def append_nearest_interp(self, data):
         if self.scale > 0.0:
-            return fluid.layers.resize_nearest(
+            return paddle.nn.functional.interpolate(
                 data,
-                scale=self.scale,
-                align_corners=self.align_corners,
+                scale_factor=self.scale,
                 data_format=self.data_layout,
             )
-        return fluid.layers.resize_nearest(
+        return paddle.nn.functional.interpolate(
             data,
-            out_shape=self.resize_shape,
-            align_corners=self.align_corners,
+            size=self.resize_shape,
             data_format=self.data_layout,
         )
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_bilinear_interp_v2_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_bilinear_interp_v2_op_mlu.py
index 912578cda8c5c3..c912b9ab219f21 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_bilinear_interp_v2_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_bilinear_interp_v2_op_mlu.py
@@ -511,57 +511,6 @@ def init_test_case(self):
         self.scale_by_1Dtensor = True
 
 
-class TestBilinearInterpOpAPI(unittest.TestCase):
-    def test_case(self):
-        x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
-
-        dim = fluid.data(name="dim", shape=[1], dtype="int32")
-        shape_tensor = fluid.data(name="shape_tensor", shape=[2], dtype="int32")
-        actual_size = fluid.data(name="actual_size", shape=[2], dtype="int32")
-        scale_tensor = fluid.data(
-            name="scale_tensor", shape=[1], dtype="float32"
-        )
-
-        out1 = fluid.layers.resize_bilinear(x, out_shape=[12, 12])
-        out2 = fluid.layers.resize_bilinear(x, out_shape=[12, dim])
-        out3 = fluid.layers.resize_bilinear(x, out_shape=shape_tensor)
-        out4 = fluid.layers.resize_bilinear(
-            x, out_shape=[4, 4], actual_shape=actual_size
-        )
-        out5 = fluid.layers.resize_bilinear(x, scale=scale_tensor)
-
-        x_data = np.random.random((2, 3, 6, 6)).astype("float32")
-        dim_data = np.array([12]).astype("int32")
-        shape_data = np.array([12, 12]).astype("int32")
-        actual_size_data = np.array([12, 12]).astype("int32")
-        scale_data = np.array([2.0]).astype("float32")
-
-        if core.is_compiled_with_mlu():
-            place = paddle.device.MLUPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-        exe.run(fluid.default_startup_program())
-        results = exe.run(
-            fluid.default_main_program(),
-            feed={
-                "x": x_data,
-                "dim": dim_data,
-                "shape_tensor": shape_data,
-                "actual_size": actual_size_data,
-                "scale_tensor": scale_data,
-            },
-            fetch_list=[out1, out2, out3, out4, out5],
-            return_numpy=True,
-        )
-
-        expect_res = bilinear_interp_np(
-            x_data, out_h=12, out_w=12, align_corners=True
-        )
-        for res in results:
-            np.testing.assert_allclose(res, expect_res, rtol=1e-6)
-
-
 class TestBilinearInterpOpAPI_dy(unittest.TestCase):
     def test_case(self):
         import paddle
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_nearest_interp_v2_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_nearest_interp_v2_op_mlu.py
index 1ab286740ef2d3..3494d471f8e34e 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_nearest_interp_v2_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_nearest_interp_v2_op_mlu.py
@@ -546,101 +546,5 @@ def init_test_case(self):
         self.scale_by_1Dtensor = True
 
 
-class TestNearestAPI(unittest.TestCase):
-    def test_case(self):
-        x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
-        y = fluid.data(name="y", shape=[2, 6, 6, 3], dtype="float32")
-
-        dim = fluid.data(name="dim", shape=[1], dtype="int32")
-        shape_tensor = fluid.data(name="shape_tensor", shape=[2], dtype="int32")
-        actual_size = fluid.data(name="actual_size", shape=[2], dtype="int32")
-        scale_tensor = fluid.data(
-            name="scale_tensor", shape=[1], dtype="float32"
-        )
-
-        out1 = fluid.layers.resize_nearest(
-            y, out_shape=[12, 12], data_format='NHWC', align_corners=False
-        )
-        out2 = fluid.layers.resize_nearest(
-            x, out_shape=[12, dim], align_corners=False
-        )
-        out3 = fluid.layers.resize_nearest(
-            x, out_shape=shape_tensor, align_corners=False
-        )
-        out4 = fluid.layers.resize_nearest(
-            x, out_shape=[4, 4], actual_shape=actual_size, align_corners=False
-        )
-        out5 = fluid.layers.resize_nearest(
-            x, scale=scale_tensor, align_corners=False
-        )
-
-        x_data = np.random.random((2, 3, 6, 6)).astype("float32")
-        dim_data = np.array([12]).astype("int32")
-        shape_data = np.array([12, 12]).astype("int32")
-        actual_size_data = np.array([12, 12]).astype("int32")
-        scale_data = np.array([2.0]).astype("float32")
-
-        place = paddle.MLUPlace(0)
-        exe = fluid.Executor(place)
-        exe.run(fluid.default_startup_program())
-        results = exe.run(
-            fluid.default_main_program(),
-            feed={
-                "x": x_data,
-                "y": np.transpose(x_data, (0, 2, 3, 1)),
-                "dim": dim_data,
-                "shape_tensor": shape_data,
-                "actual_size": actual_size_data,
-                "scale_tensor": scale_data,
-            },
-            fetch_list=[out1, out2, out3, out4, out5],
-            return_numpy=True,
-        )
-
-        expect_res = nearest_neighbor_interp_np(
-            x_data, out_h=12, out_w=12, align_corners=False
-        )
-        np.testing.assert_allclose(
-            results[0], np.transpose(expect_res, (0, 2, 3, 1))
-        )
-        for i in range(len(results) - 1):
-            np.testing.assert_allclose(results[i + 1], expect_res)
-
-
-class TestNearestInterpException(unittest.TestCase):
-    def test_exception(self):
-        import paddle
-
-        input = fluid.data(name="input", shape=[1, 3, 6, 6], dtype="float32")
-
-        def attr_data_format():
-            # for 4-D input, data_format can only be NCHW or NHWC
-            out = fluid.layers.resize_nearest(
-                input, out_shape=[4, 8], data_format='NDHWC'
-            )
-
-        def attr_scale_type():
-            out = fluid.layers.resize_nearest(input, scale='scale')
-
-        def attr_scale_value():
-            out = fluid.layers.resize_nearest(input, scale=-0.3)
-
-        def input_shape_error():
-            x = paddle.randn([1, 3])
-            out = paddle.nn.functional.interpolate(x, scale_factor='scale')
-
-        def mode_error():
-            x = paddle.randn([1, 3])
-            out = paddle.nn.functional.interpolate(
-                x, scale_factor='scale', mode="BILINEAR"
-            )
-
-        self.assertRaises(ValueError, attr_data_format)
-        self.assertRaises(TypeError, attr_scale_type)
-        self.assertRaises(ValueError, attr_scale_value)
-        self.assertRaises(ValueError, input_shape_error)
-        self.assertRaises(ValueError, mode_error)
-
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/npu/test_nearest_interp_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_nearest_interp_op_npu.py
index 82a2ff48251df0..3fa4518f1e3738 100755
--- a/python/paddle/fluid/tests/unittests/npu/test_nearest_interp_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_nearest_interp_op_npu.py
@@ -404,87 +404,5 @@ def init_test_case(self):
         self.scale_by_1Dtensor = True
 
 
-class TestNearestAPI(unittest.TestCase):
-    def test_case(self):
-        x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
-        y = fluid.data(name="y", shape=[2, 6, 6, 3], dtype="float32")
-
-        dim = fluid.data(name="dim", shape=[1], dtype="int32")
-        shape_tensor = fluid.data(name="shape_tensor", shape=[2], dtype="int32")
-        actual_size = fluid.data(name="actual_size", shape=[2], dtype="int32")
-        scale_tensor = fluid.data(
-            name="scale_tensor", shape=[1], dtype="float32"
-        )
-
-        out1 = fluid.layers.resize_nearest(
-            y, out_shape=[12, 12], data_format='NHWC', align_corners=False
-        )
-        out2 = fluid.layers.resize_nearest(
-            x, out_shape=[12, dim], align_corners=False
-        )
-        out3 = fluid.layers.resize_nearest(
-            x, out_shape=shape_tensor, align_corners=False
-        )
-        out4 = fluid.layers.resize_nearest(
-            x, out_shape=[4, 4], actual_shape=actual_size, align_corners=False
-        )
-        out5 = fluid.layers.resize_nearest(
-            x, scale=scale_tensor, align_corners=False
-        )
-
-        x_data = np.random.random((2, 3, 6, 6)).astype("float32")
-        dim_data = np.array([12]).astype("int32")
-        shape_data = np.array([12, 12]).astype("int32")
-        actual_size_data = np.array([12, 12]).astype("int32")
-        scale_data = np.array([2.0]).astype("float32")
-
-        place = paddle.NPUPlace(0)
-        exe = fluid.Executor(place)
-        exe.run(fluid.default_startup_program())
-        results = exe.run(
-            fluid.default_main_program(),
-            feed={
-                "x": x_data,
-                "y": np.transpose(x_data, (0, 2, 3, 1)),
-                "dim": dim_data,
-                "shape_tensor": shape_data,
-                "actual_size": actual_size_data,
-                "scale_tensor": scale_data,
-            },
-            fetch_list=[out1, out2, out3, out4, out5],
-            return_numpy=True,
-        )
-
-        expect_res = nearest_neighbor_interp_np(
-            x_data, out_h=12, out_w=12, align_corners=False
-        )
-        np.testing.assert_allclose(
-            results[0], np.transpose(expect_res, (0, 2, 3, 1))
-        )
-        for i in range(len(results) - 1):
-            np.testing.assert_allclose(results[i + 1], expect_res)
-
-
-class TestNearestInterpException(unittest.TestCase):
-    def test_exception(self):
-        input = fluid.data(name="input", shape=[1, 3, 6, 6], dtype="float32")
-
-        def attr_data_format():
-            # for 4-D input, data_format can only be NCHW or NHWC
-            out = fluid.layers.resize_nearest(
-                input, out_shape=[4, 8], data_format='NDHWC'
-            )
-
-        def attr_scale_type():
-            out = fluid.layers.resize_nearest(input, scale='scale')
-
-        def attr_scale_value():
-            out = fluid.layers.resize_nearest(input, scale=-0.3)
-
-        self.assertRaises(ValueError, attr_data_format)
-        self.assertRaises(TypeError, attr_scale_type)
-        self.assertRaises(ValueError, attr_scale_value)
-
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
index 5381df947acc02..fd2372e2571dac 100755
--- a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
@@ -18,7 +18,6 @@
 from op_test import OpTest
 
 import paddle
-import paddle.fluid as fluid
 import paddle.fluid.core as core
 
 paddle.enable_static()
@@ -529,56 +528,5 @@ def init_test_case(self):
         self.scale_by_1Dtensor = True
 
 
-class TestBilinearInterpOpAPI(unittest.TestCase):
-    def test_case(self):
-        x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
-
-        dim = fluid.data(name="dim", shape=[1], dtype="int32")
-        shape_tensor = fluid.data(name="shape_tensor", shape=[2], dtype="int32")
-        actual_size = fluid.data(name="actual_size", shape=[2], dtype="int32")
-        scale_tensor = fluid.data(
-            name="scale_tensor", shape=[1], dtype="float32"
-        )
-
-        out1 = fluid.layers.resize_bilinear(x, out_shape=[12, 12])
-        out2 = fluid.layers.resize_bilinear(x, out_shape=[12, dim])
-        out3 = fluid.layers.resize_bilinear(x, out_shape=shape_tensor)
-        out4 = fluid.layers.resize_bilinear(
-            x, out_shape=[4, 4], actual_shape=actual_size
-        )
-        out5 = fluid.layers.resize_bilinear(x, scale=scale_tensor)
-
-        x_data = np.random.random((2, 3, 6, 6)).astype("float32")
-        dim_data = np.array([12]).astype("int32")
-        shape_data = np.array([12, 12]).astype("int32")
-        actual_size_data = np.array([12, 12]).astype("int32")
-        scale_data = np.array([2.0]).astype("float32")
-
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-        exe.run(fluid.default_startup_program())
-        results = exe.run(
-            fluid.default_main_program(),
-            feed={
-                "x": x_data,
-                "dim": dim_data,
-                "shape_tensor": shape_data,
-                "actual_size": actual_size_data,
-                "scale_tensor": scale_data,
-            },
-            fetch_list=[out1, out2, out3, out4, out5],
-            return_numpy=True,
-        )
-
-        expect_res = bilinear_interp_np(
-            x_data, out_h=12, out_w=12, align_corners=True
-        )
-        for res in results:
-            np.testing.assert_allclose(res, expect_res, rtol=1e-05)
-
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py
index ff5a0a707b87aa..01d57595000a38 100755
--- a/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py
@@ -638,57 +638,6 @@ def init_test_case(self):
         self.scale_by_1Dtensor = True
 
 
-class TestBilinearInterpOpAPI(unittest.TestCase):
-    def test_case(self):
-        x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
-
-        dim = fluid.data(name="dim", shape=[1], dtype="int32")
-        shape_tensor = fluid.data(name="shape_tensor", shape=[2], dtype="int32")
-        actual_size = fluid.data(name="actual_size", shape=[2], dtype="int32")
-        scale_tensor = fluid.data(
-            name="scale_tensor", shape=[1], dtype="float32"
-        )
-
-        out1 = fluid.layers.resize_bilinear(x, out_shape=[12, 12])
-        out2 = fluid.layers.resize_bilinear(x, out_shape=[12, dim])
-        out3 = fluid.layers.resize_bilinear(x, out_shape=shape_tensor)
-        out4 = fluid.layers.resize_bilinear(
-            x, out_shape=[4, 4], actual_shape=actual_size
-        )
-        out5 = fluid.layers.resize_bilinear(x, scale=scale_tensor)
-
-        x_data = np.random.random((2, 3, 6, 6)).astype("float32")
-        dim_data = np.array([12]).astype("int32")
-        shape_data = np.array([12, 12]).astype("int32")
-        actual_size_data = np.array([12, 12]).astype("int32")
-        scale_data = np.array([2.0]).astype("float32")
-
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-        exe.run(fluid.default_startup_program())
-        results = exe.run(
-            fluid.default_main_program(),
-            feed={
-                "x": x_data,
-                "dim": dim_data,
-                "shape_tensor": shape_data,
-                "actual_size": actual_size_data,
-                "scale_tensor": scale_data,
-            },
-            fetch_list=[out1, out2, out3, out4, out5],
-            return_numpy=True,
-        )
-
-        expect_res = bilinear_interp_np(
-            x_data, out_h=12, out_w=12, align_corners=True
-        )
-        for res in results:
-            np.testing.assert_allclose(res, expect_res, rtol=1e-05)
-
-
 class TestBilinearInterpOpAPI_dy(unittest.TestCase):
     def test_case(self):
         import paddle
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 3f7edb6022a859..dcf442200d178b 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -3187,94 +3187,6 @@ def make_topk(self):
             return values
             return indices
 
-    def make_resize_bilinear(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            x = self._get_data(name='x', shape=[3, 9, 6], dtype="float32")
-            output = layers.resize_bilinear(x, out_shape=[12, 12])
-            return output
-
-    def make_resize_bilinear_by_scale(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            x = self._get_data(name='x', shape=[3, 9, 6], dtype="float32")
-            output = layers.resize_bilinear(x, scale=1.5)
-            return output
-
-    def make_resize_nearest(self):
-        try:
-            with program_guard(
-                fluid.default_main_program(), fluid.default_startup_program()
-            ):
-                x = self._get_data(name='x1', shape=[3, 9, 6], dtype="float32")
-                output = layers.resize_nearest(x, out_shape=[12, 12])
-        except ValueError:
-            pass
-
-        try:
-            with program_guard(
-                fluid.default_main_program(), fluid.default_startup_program()
-            ):
-                x = self._get_data(
-                    name='x2', shape=[3, 9, 6, 7], dtype="float32"
-                )
-                output = layers.resize_nearest(x, out_shape=[12, 12, 12])
-        except ValueError:
-            pass
-
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            x = self._get_data(name='x', shape=[3, 9, 6], dtype="float32")
-            output = layers.resize_nearest(x, out_shape=[12, 12])
-            return output
-
-    def make_resize_nearest_by_scale(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            x = self._get_data(name='x1', shape=[3, 9, 6], dtype="float32")
-            output = layers.resize_nearest(x, scale=1.8)
-            return output
-
-    def make_resize_trilinear(self):
-        try:
-            with program_guard(
-                fluid.default_main_program(), fluid.default_startup_program()
-            ):
-                x = self._get_data(name='x2', shape=[3, 9, 6], dtype="float32")
-                output = layers.resize_trilinear(x, out_shape=[12, 12, 12])
-        except ValueError:
-            pass
-
-        try:
-            with program_guard(
-                fluid.default_main_program(), fluid.default_startup_program()
-            ):
-                x = self._get_data(
-                    name='x', shape=[3, 9, 6, 7], dtype="float32"
-                )
-                output = layers.resize_trilinear(x, out_shape=[12, 12])
-        except ValueError:
-            pass
-
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            x = self._get_data(name='x', shape=[3, 9, 6, 7], dtype="float32")
-            output = layers.resize_trilinear(x, out_shape=[12, 12, 12])
-            return output
-
-    def make_resize_trilinear_by_scale(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            x = self._get_data(name='x', shape=[3, 9, 6, 7], dtype="float32")
-            output = layers.resize_trilinear(x, scale=2.1)
-            return output
-
     def make_polygon_box_transform(self):
         with program_guard(
             fluid.default_main_program(), fluid.default_startup_program()
diff --git a/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py b/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py
index 4a910e4d83c290..b81ac851b5a8a9 100755
--- a/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py
@@ -17,7 +17,6 @@
 import numpy as np
 from op_test import OpTest
 
-import paddle.fluid as fluid
 import paddle.fluid.core as core
 
 
@@ -459,85 +458,6 @@ def init_test_case(self):
         self.scale_by_1Dtensor = True
 
 
-class TestNearestAPI(unittest.TestCase):
-    def test_case(self):
-        x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
-        y = fluid.data(name="y", shape=[2, 6, 6, 3], dtype="float32")
-
-        dim = fluid.data(name="dim", shape=[1], dtype="int32")
-        shape_tensor = fluid.data(name="shape_tensor", shape=[2], dtype="int32")
-        actual_size = fluid.data(name="actual_size", shape=[2], dtype="int32")
-        scale_tensor = fluid.data(
-            name="scale_tensor", shape=[1], dtype="float32"
-        )
-
-        out1 = fluid.layers.resize_nearest(
-            y, out_shape=[12, 12], data_format='NHWC'
-        )
-        out2 = fluid.layers.resize_nearest(x, out_shape=[12, dim])
-        out3 = fluid.layers.resize_nearest(x, out_shape=shape_tensor)
-        out4 = fluid.layers.resize_nearest(
-            x, out_shape=[4, 4], actual_shape=actual_size
-        )
-        out5 = fluid.layers.resize_nearest(x, scale=scale_tensor)
-
-        x_data = np.random.random((2, 3, 6, 6)).astype("float32")
-        dim_data = np.array([12]).astype("int32")
-        shape_data = np.array([12, 12]).astype("int32")
-        actual_size_data = np.array([12, 12]).astype("int32")
-        scale_data = np.array([2.0]).astype("float32")
-
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-        exe.run(fluid.default_startup_program())
-        results = exe.run(
-            fluid.default_main_program(),
-            feed={
-                "x": x_data,
-                "y": np.transpose(x_data, (0, 2, 3, 1)),
-                "dim": dim_data,
-                "shape_tensor": shape_data,
-                "actual_size": actual_size_data,
-                "scale_tensor": scale_data,
-            },
-            fetch_list=[out1, out2, out3, out4, out5],
-            return_numpy=True,
-        )
-
-        expect_res = nearest_neighbor_interp_np(
-            x_data, out_h=12, out_w=12, align_corners=True
-        )
-        np.testing.assert_allclose(
-            results[0], np.transpose(expect_res, (0, 2, 3, 1)), rtol=1e-05
-        )
-        for i in range(len(results) - 1):
-            np.testing.assert_allclose(results[i + 1], expect_res, rtol=1e-05)
-
-
-class TestNearestInterpException(unittest.TestCase):
-    def test_exception(self):
-        input = fluid.data(name="input", shape=[1, 3, 6, 6], dtype="float32")
-
-        def attr_data_format():
-            # for 4-D input, data_format can only be NCHW or NHWC
-            out = fluid.layers.resize_nearest(
-                input, out_shape=[4, 8], data_format='NDHWC'
-            )
-
-        def attr_scale_type():
-            out = fluid.layers.resize_nearest(input, scale='scale')
-
-        def attr_scale_value():
-            out = fluid.layers.resize_nearest(input, scale=-0.3)
-
-        self.assertRaises(ValueError, attr_data_format)
-        self.assertRaises(TypeError, attr_scale_type)
-        self.assertRaises(ValueError, attr_scale_value)
-
-
 if __name__ == "__main__":
     import paddle
 
diff --git a/python/paddle/fluid/tests/unittests/test_nearest_interp_v2_op.py b/python/paddle/fluid/tests/unittests/test_nearest_interp_v2_op.py
index e0f62dfa187ca2..b3d47b6f71455e 100755
--- a/python/paddle/fluid/tests/unittests/test_nearest_interp_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_nearest_interp_v2_op.py
@@ -684,64 +684,6 @@ def init_test_case(self):
         self.scale_by_1Dtensor = True
 
 
-class TestNearestAPI(unittest.TestCase):
-    def test_case(self):
-        x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")
-        y = fluid.data(name="y", shape=[2, 6, 6, 3], dtype="float32")
-
-        dim = fluid.data(name="dim", shape=[1], dtype="int32")
-        shape_tensor = fluid.data(name="shape_tensor", shape=[2], dtype="int32")
-        actual_size = fluid.data(name="actual_size", shape=[2], dtype="int32")
-        scale_tensor = fluid.data(
-            name="scale_tensor", shape=[1], dtype="float32"
-        )
-
-        out1 = fluid.layers.resize_nearest(
-            y, out_shape=[12, 12], data_format='NHWC'
-        )
-        out2 = fluid.layers.resize_nearest(x, out_shape=[12, dim])
-        out3 = fluid.layers.resize_nearest(x, out_shape=shape_tensor)
-        out4 = fluid.layers.resize_nearest(
-            x, out_shape=[4, 4], actual_shape=actual_size
-        )
-        out5 = fluid.layers.resize_nearest(x, scale=scale_tensor)
-
-        x_data = np.random.random((2, 3, 6, 6)).astype("float32")
-        dim_data = np.array([12]).astype("int32")
-        shape_data = np.array([12, 12]).astype("int32")
-        actual_size_data = np.array([12, 12]).astype("int32")
-        scale_data = np.array([2.0]).astype("float32")
-
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-        exe.run(fluid.default_startup_program())
-        results = exe.run(
-            fluid.default_main_program(),
-            feed={
-                "x": x_data,
-                "y": np.transpose(x_data, (0, 2, 3, 1)),
-                "dim": dim_data,
-                "shape_tensor": shape_data,
-                "actual_size": actual_size_data,
-                "scale_tensor": scale_data,
-            },
-            fetch_list=[out1, out2, out3, out4, out5],
-            return_numpy=True,
-        )
-
-        expect_res = nearest_neighbor_interp_np(
-            x_data, out_h=12, out_w=12, align_corners=True
-        )
-        np.testing.assert_allclose(
-            results[0], np.transpose(expect_res, (0, 2, 3, 1)), rtol=1e-05
-        )
-        for i in range(len(results) - 1):
-            np.testing.assert_allclose(results[i + 1], expect_res, rtol=1e-05)
-
-
 class TestNearestInterpOpAPI_dy(unittest.TestCase):
     def test_case(self):
         import paddle
@@ -793,41 +735,6 @@ def test_case(self):
             np.testing.assert_allclose(out.numpy(), expect_res, rtol=1e-05)
 
 
-class TestNearestInterpException(unittest.TestCase):
-    def test_exception(self):
-        import paddle
-
-        input = fluid.data(name="input", shape=[1, 3, 6, 6], dtype="float32")
-
-        def attr_data_format():
-            # for 4-D input, data_format can only be NCHW or NHWC
-            out = fluid.layers.resize_nearest(
-                input, out_shape=[4, 8], data_format='NDHWC'
-            )
-
-        def attr_scale_type():
-            out = fluid.layers.resize_nearest(input, scale='scale')
-
-        def attr_scale_value():
-            out = fluid.layers.resize_nearest(input, scale=-0.3)
-
-        def input_shape_error():
-            x = paddle.randn([1, 3])
-            out = paddle.nn.functional.interpolate(x, scale_factor='scale')
-
-        def mode_error():
-            x = paddle.randn([1, 3])
-            out = paddle.nn.functional.interpolate(
-                x, scale_factor='scale', mode="BILINEAR"
-            )
-
-        self.assertRaises(ValueError, attr_data_format)
-        self.assertRaises(TypeError, attr_scale_type)
-        self.assertRaises(ValueError, attr_scale_value)
-        self.assertRaises(ValueError, input_shape_error)
-        self.assertRaises(ValueError, mode_error)
-
-
 @unittest.skipIf(
     not fluid.core.is_compiled_with_cuda(), "core is not compiled with CUDA"
 )
diff --git a/python/paddle/fluid/tests/unittests/test_trilinear_interp_op.py b/python/paddle/fluid/tests/unittests/test_trilinear_interp_op.py
index 66d55884224ad7..42a716cceee427 100755
--- a/python/paddle/fluid/tests/unittests/test_trilinear_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_trilinear_interp_op.py
@@ -17,9 +17,7 @@
 import numpy as np
 from op_test import OpTest
 
-import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.nn.functional import interpolate
 
 
 def trilinear_interp_np(
@@ -623,85 +621,5 @@ def init_test_case(self):
         self.scale_by_1Dtensor = True
 
 
-class TestTrilinearInterpAPI(unittest.TestCase):
-    def test_case(self):
-        x = fluid.data(name="x", shape=[2, 3, 6, 9, 4], dtype="float32")
-        y = fluid.data(name="y", shape=[2, 6, 9, 4, 3], dtype="float32")
-
-        dim = fluid.data(name="dim", shape=[1], dtype="int32")
-        shape_tensor = fluid.data(name="shape_tensor", shape=[3], dtype="int32")
-        actual_size = fluid.data(name="actual_size", shape=[3], dtype="int32")
-        scale_tensor = fluid.data(
-            name="scale_tensor", shape=[1], dtype="float32"
-        )
-
-        out1 = fluid.layers.resize_trilinear(
-            y, out_shape=[12, 18, 8], data_format='NDHWC'
-        )
-        out2 = fluid.layers.resize_trilinear(x, out_shape=[12, dim, 8])
-        out3 = fluid.layers.resize_trilinear(x, out_shape=shape_tensor)
-        out4 = fluid.layers.resize_trilinear(
-            x, out_shape=[4, 4, 8], actual_shape=actual_size
-        )
-        out5 = fluid.layers.resize_trilinear(x, scale=scale_tensor)
-        out6 = interpolate(
-            x, scale_factor=scale_tensor, mode='trilinear', data_format="NCDHW"
-        )
-        out7 = interpolate(
-            x, size=[4, 4, 8], mode='trilinear', data_format="NCDHW"
-        )
-        out8 = interpolate(
-            x, size=shape_tensor, mode='trilinear', data_format="NCDHW"
-        )
-
-        x_data = np.random.random((2, 3, 6, 9, 4)).astype("float32")
-        dim_data = np.array([18]).astype("int32")
-        shape_data = np.array([12, 18, 8]).astype("int32")
-        actual_size_data = np.array([12, 18, 8]).astype("int32")
-        scale_data = np.array([2.0]).astype("float32")
-
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-        exe.run(fluid.default_startup_program())
-        results = exe.run(
-            fluid.default_main_program(),
-            feed={
-                "x": x_data,
-                "y": np.transpose(x_data, (0, 2, 3, 4, 1)),
-                "dim": dim_data,
-                "shape_tensor": shape_data,
-                "actual_size": actual_size_data,
-                "scale_tensor": scale_data,
-            },
-            fetch_list=[out1, out2, out3, out4, out5],
-            return_numpy=True,
-        )
-
-        expect_res = trilinear_interp_np(
-            x_data, out_d=12, out_h=18, out_w=8, align_mode=1
-        )
-        np.testing.assert_allclose(
-            results[0], np.transpose(expect_res, (0, 2, 3, 4, 1)), rtol=1e-05
-        )
-        for i in range(len(results) - 1):
-            np.testing.assert_allclose(results[i + 1], expect_res, rtol=1e-05)
-
-
-class TestTrilinearInterpOpException(unittest.TestCase):
-    def test_exception(self):
-        input = fluid.data(name="input", shape=[2, 3, 6, 9, 4], dtype="float32")
-
-        def attr_data_format():
-            # for 5-D input, data_format only can be NCDHW or NDHWC
-            out = fluid.layers.resize_trilinear(
-                input, out_shape=[4, 8, 4], data_format='NHWC'
-            )
-
-        self.assertRaises(ValueError, attr_data_format)
-
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_trilinear_interp_v2_op.py b/python/paddle/fluid/tests/unittests/test_trilinear_interp_v2_op.py
index 88b4b607261bf9..a3f7192a05837f 100755
--- a/python/paddle/fluid/tests/unittests/test_trilinear_interp_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_trilinear_interp_v2_op.py
@@ -20,7 +20,6 @@
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-from paddle.fluid.framework import _test_eager_guard
 from paddle.nn.functional import interpolate
 
 np.random.seed(123)
@@ -741,100 +740,6 @@ def init_test_case(self):
         self.scale_by_1Dtensor = True
 
 
-class TestTrilinearInterpAPI(unittest.TestCase):
-    def test_imperative_case(self):
-        with _test_eager_guard():
-            self.func_case()
-        self.func_case()
-
-    def func_case(self):
-        x = fluid.data(name="x", shape=[2, 3, 6, 9, 4], dtype="float32")
-        y = fluid.data(name="y", shape=[2, 6, 9, 4, 3], dtype="float32")
-
-        dim = fluid.data(name="dim", shape=[1], dtype="int32")
-        shape_tensor = fluid.data(name="shape_tensor", shape=[3], dtype="int32")
-        actual_size = fluid.data(name="actual_size", shape=[3], dtype="int32")
-        scale_tensor = fluid.data(
-            name="scale_tensor", shape=[1], dtype="float32"
-        )
-
-        out1 = fluid.layers.resize_trilinear(
-            y, out_shape=[12, 18, 8], data_format='NDHWC'
-        )
-        out2 = fluid.layers.resize_trilinear(x, out_shape=[12, dim, 8])
-        out3 = fluid.layers.resize_trilinear(x, out_shape=shape_tensor)
-        out4 = fluid.layers.resize_trilinear(
-            x, out_shape=[4, 4, 8], actual_shape=actual_size
-        )
-        out5 = fluid.layers.resize_trilinear(x, scale=scale_tensor)
-        out6 = interpolate(
-            x, scale_factor=scale_tensor, mode='trilinear', data_format="NCDHW"
-        )
-        out7 = interpolate(
-            x, size=[4, 4, 8], mode='trilinear', data_format="NCDHW"
-        )
-        out8 = interpolate(
-            x, size=shape_tensor, mode='trilinear', data_format="NCDHW"
-        )
-
-        x_data = np.random.random((2, 3, 6, 9, 4)).astype("float32")
-        dim_data = np.array([18]).astype("int32")
-        shape_data = np.array([12, 18, 8]).astype("int32")
-        actual_size_data = np.array([12, 18, 8]).astype("int32")
-        scale_data = np.array([2.0]).astype("float32")
-
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-        exe.run(fluid.default_startup_program())
-        results = exe.run(
-            fluid.default_main_program(),
-            feed={
-                "x": x_data,
-                "y": np.transpose(x_data, (0, 2, 3, 4, 1)),
-                "dim": dim_data,
-                "shape_tensor": shape_data,
-                "actual_size": actual_size_data,
-                "scale_tensor": scale_data,
-            },
-            fetch_list=[out1, out2, out3, out4, out5],
-            return_numpy=True,
-        )
-
-        expect_res = trilinear_interp_np(
-            x_data, out_d=12, out_h=18, out_w=8, align_mode=1
-        )
-        np.testing.assert_allclose(
-            results[0], np.transpose(expect_res, (0, 2, 3, 4, 1)), rtol=1e-05
-        )
-        for i in range(len(results) - 1):
-            np.testing.assert_allclose(results[i + 1], expect_res, rtol=1e-05)
-
-        # Follow the calculation of preceding out6, out7, out8.
-        # To pass CI-coverage, calculate out9 without verifying accuracy.
-        # Preceding PR link: https://github.com/PaddlePaddle/Paddle/pull/26520/files#diff-ee0c2b73d08659e90a8f3ac48451a6588d35e1613742f864f9aad4394e12c290
-        with fluid.dygraph.guard():
-            x = fluid.dygraph.to_variable(x_data)
-            out9 = interpolate(
-                x, size=[12, 18, 8], mode='trilinear', data_format="NCDHW"
-            )
-
-
-class TestTrilinearInterpOpException(unittest.TestCase):
-    def test_exception(self):
-        input = fluid.data(name="input", shape=[2, 3, 6, 9, 4], dtype="float32")
-
-        def attr_data_format():
-            # for 5-D input, data_format only can be NCDHW or NDHWC
-            out = fluid.layers.resize_trilinear(
-                input, out_shape=[4, 8, 4], data_format='NHWC'
-            )
-
-        self.assertRaises(ValueError, attr_data_format)
-
-
 @unittest.skipIf(
     not fluid.core.is_compiled_with_cuda(), "core is not compiled with CUDA"
 )

From 595338c67a3b1871118741c37c0dafe1276e98d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=85=AD=E4=B8=AA=E9=AA=A8=E5=A4=B4?=
 <46243324+zrr1999@users.noreply.github.com>
Date: Mon, 5 Dec 2022 17:04:20 +0800
Subject: [PATCH 153/154] fix bug in paddle/phi/api/yaml/generator (#48659)

* fix bug

* fix bugs in api_gen tools
---
 paddle/fluid/operators/generator/cross_validate.py     | 4 ++--
 paddle/phi/api/yaml/generator/api_gen.py               | 2 +-
 paddle/phi/api/yaml/generator/backward_api_gen.py      | 2 +-
 paddle/phi/api/yaml/generator/intermediate_api_gen.py  | 2 +-
 paddle/phi/api/yaml/generator/wrapped_infermeta_gen.py | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/paddle/fluid/operators/generator/cross_validate.py b/paddle/fluid/operators/generator/cross_validate.py
index 7c5014994c4ac4..82d21f47c62d0c 100644
--- a/paddle/fluid/operators/generator/cross_validate.py
+++ b/paddle/fluid/operators/generator/cross_validate.py
@@ -40,14 +40,14 @@ def main(forward_op_yaml_paths, backward_op_yaml_paths):
         '--forward_yaml_paths',
         type=str,
         nargs='+',
-        default=str(current_dir / "op .parsed.yaml"),
+        default=[str(current_dir / "op .parsed.yaml")],
         help="forward op yaml file.",
     )
     parser.add_argument(
         '--backward_yaml_paths',
         type=str,
         nargs='+',
-        default=str(current_dir / "backward_op .parsed.yaml"),
+        default=[str(current_dir / "backward_op .parsed.yaml")],
         help="backward op yaml file.",
     )
 
diff --git a/paddle/phi/api/yaml/generator/api_gen.py b/paddle/phi/api/yaml/generator/api_gen.py
index c9ebfd7fea387d..63e6d6cb5040ed 100644
--- a/paddle/phi/api/yaml/generator/api_gen.py
+++ b/paddle/phi/api/yaml/generator/api_gen.py
@@ -409,7 +409,7 @@ def main():
         '--api_yaml_path',
         help='path to api yaml file',
         nargs='+',
-        default='paddle/phi/api/yaml/ops.yaml',
+        default=['paddle/phi/api/yaml/ops.yaml'],
     )
 
     parser.add_argument(
diff --git a/paddle/phi/api/yaml/generator/backward_api_gen.py b/paddle/phi/api/yaml/generator/backward_api_gen.py
index 75914ac4004975..4d10f8b56bcf68 100644
--- a/paddle/phi/api/yaml/generator/backward_api_gen.py
+++ b/paddle/phi/api/yaml/generator/backward_api_gen.py
@@ -351,7 +351,7 @@ def main():
         '--backward_yaml_path',
         help='path to backward yaml file',
         nargs='+',
-        default='paddle/phi/api/yaml/backward.yaml',
+        default=['paddle/phi/api/yaml/backward.yaml'],
     )
     parser.add_argument(
         '--backward_header_path',
diff --git a/paddle/phi/api/yaml/generator/intermediate_api_gen.py b/paddle/phi/api/yaml/generator/intermediate_api_gen.py
index 0c53a578b3201b..9cab819aa4a6f6 100644
--- a/paddle/phi/api/yaml/generator/intermediate_api_gen.py
+++ b/paddle/phi/api/yaml/generator/intermediate_api_gen.py
@@ -147,7 +147,7 @@ def main():
         '--api_yaml_path',
         nargs='+',
         help='path to api yaml file',
-        default='paddle/phi/api/yaml/ops.yaml',
+        default=['paddle/phi/api/yaml/ops.yaml'],
     )
 
     parser.add_argument(
diff --git a/paddle/phi/api/yaml/generator/wrapped_infermeta_gen.py b/paddle/phi/api/yaml/generator/wrapped_infermeta_gen.py
index fdbb9caa3301cc..d9dc4dfc1cc2c3 100644
--- a/paddle/phi/api/yaml/generator/wrapped_infermeta_gen.py
+++ b/paddle/phi/api/yaml/generator/wrapped_infermeta_gen.py
@@ -181,7 +181,7 @@ def main():
         '--api_yaml_path',
         help='path to api yaml file',
         nargs='+',
-        default='paddle/phi/api/yaml/ops.yaml',
+        default=['paddle/phi/api/yaml/ops.yaml'],
     )
     parser.add_argument(
         '--wrapped_infermeta_header_path',

From e707ee53845d1cd2d946a57bdbfa252904794545 Mon Sep 17 00:00:00 2001
From: Nyakku Shigure <sigure.qaq@gmail.com>
Date: Mon, 5 Dec 2022 17:19:48 +0800
Subject: [PATCH 154/154] [Dy2St] optimize `print` function convertor to
 display Tensor at compile time (#48672)

* [Dy2St] refactor convert_print to display Tensor in compile time
---
 .../unittests/dygraph_to_static/test_print.py | 170 ++++++------------
 python/paddle/jit/dy2static/__init__.py       |   1 -
 .../paddle/jit/dy2static/ast_transformer.py   |   4 -
 .../paddle/jit/dy2static/call_transformer.py  |   1 +
 .../paddle/jit/dy2static/convert_call_func.py |   4 +
 .../paddle/jit/dy2static/convert_operators.py |  16 +-
 .../paddle/jit/dy2static/print_transformer.py |  59 ------
 7 files changed, 66 insertions(+), 189 deletions(-)
 delete mode 100644 python/paddle/jit/dy2static/print_transformer.py

diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_print.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_print.py
index a593bd37a9174c..21de18a9f531dd 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_print.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_print.py
@@ -16,149 +16,82 @@
 
 import numpy
 
+import paddle
 import paddle.fluid as fluid
-from paddle.jit import ProgramTranslator
-from paddle.jit.api import declarative
+from paddle.jit import ProgramTranslator, to_static
 
 program_translator = ProgramTranslator()
 
 
-# 1. print VarBase
-@declarative
+# 1. print Tensor
+@to_static
 def dyfunc_print_variable(x):
-    """
-    PY2:
-    Print(dest=None, values=[Name(id='x_v', annotation=None, type_comment=None)], nl=True)],
-    PY3:
-    Expr(
-        value=Call(func=Name(id='print', annotation=None, type_comment=None),
-            args=[Name(id='x_v', annotation=None, type_comment=None)],
-            keywords=[]))
-    """
     # NOTE: transform to static code, var name will be changed
-    x_v = fluid.dygraph.to_variable(x)
-    print(x_v)
+    x_t = paddle.to_tensor(x)
+    print(x_t)
 
 
 # 2. print ndarray
-@declarative
+@to_static
 def dyfunc_print_ndarray(x):
-    """
-    PY2:
-    Print(dest=None, values=[Name(id='x', annotation=None, type_comment=None)
-    PY3:
-    Expr(
-        value=Call(func=Name(id='print', annotation=None, type_comment=None),
-            args=[Name(id='x', annotation=None, type_comment=None)],
-            keywords=[]))
-    """
     print(x)
 
 
-# 3. print VarBase with format
-@declarative
+# 3. print Tensor with format
+@to_static
 def dyfunc_print_with_format(x):
-    """
-    PY2:
-    Print(dest=None,
-        values=[
-            Call(
-                func=Attribute(value=Constant(value='PrintVariable: {}', kind=None), attr='format'),
-                args=[Name(id='x_v', annotation=None, type_comment=None)],
-                keywords=[])],
-        nl=True)
-    PY3:
-    Expr(
-        value=Call(func=Name(id='print', annotation=None, type_comment=None),
-            args=[
-                Call(
-                    func=Attribute(value=Constant(value='PrintVariable: {}', kind=None), attr='format'),
-                    args=[Name(id='x_v', annotation=None, type_comment=None)],
-                    keywords=[])],
-            keywords=[]))
-    """
-    x_v = fluid.dygraph.to_variable(x)
-    print("PrintVariable: {}".format(x_v))
-
-
-# 4. print VarBase with format 2
-@declarative
+    x_t = paddle.to_tensor(x)
+    print("PrintTensor: {}".format(x_t))
+
+
+# 4. print Tensor with format 2
+@to_static
 def dyfunc_print_with_format2(x):
-    """
-    PY2:
-    Print(dest=None,
-        values=[
-            BinOp(left=Constant(value='PrintVariable: %s', kind=None),
-                op=Mod,
-                right=Name(id='x_v', annotation=None, type_comment=None))],
-        nl=True)
-    PY3:
-    Expr(
-        value=Call(func=Name(id='print', annotation=None, type_comment=None),
-            args=[
-                BinOp(left=Constant(value='PrintVariable: %s', kind=None),
-                    op=Mod,
-                    right=Name(id='x_v', annotation=None, type_comment=None))],
-            keywords=[]))
-    """
-    x_v = fluid.dygraph.to_variable(x)
-    print("PrintVariable: %s" % (x_v))
-
-
-# 5. print VarBase in control flow1
-@declarative
+    x_t = paddle.to_tensor(x)
+    print("PrintTensor: %s" % (x_t))
+
+
+# 5. print Tensor in control flow1
+@to_static
 def dyfunc_print_with_ifelse(x):
-    x_v = fluid.dygraph.to_variable(x)
-    if len(x_v.shape) > 1:
-        print(x_v)
+    x_t = paddle.to_tensor(x)
+    if len(x_t.shape) > 1:
+        print(x_t)
     else:
-        print(x_v)
+        print(x_t)
 
 
-# 6. print mutiple VarBases
-@declarative
-def dyfunc_print_multi_vars(x):
-    """
-    # NOTE: y_v type is error before cur PR in this case
-    Assign(targets=[Name(id='y_v', annotation=None, type_comment=None)],
-        value=BinOp(left=Name(id='x_v', annotation=None, type_comment=None), op=Mult, right=Constant(value=2, kind=None)))
-    """
-    x_v = fluid.dygraph.to_variable(x)
-    y_v = x_v * 2
-    print(x_v)
-    print(y_v)
+# 6. print multiple Tensor
+@to_static
+def dyfunc_print_multi_tensor(x):
+    x_t = paddle.to_tensor(x)
+    y_t = x_t * 2
+    print(x_t)
+    print(y_t)
 
 
-# 7. print continue VarBase
-@declarative
+# 7. print continue Tensor
+@to_static
 def dyfunc_print_continue_vars(x):
-    """
-    PY3:
-    Expr(
-        value=Call(func=Name(id='print', annotation=None, type_comment=None),
-            args=[Name(id='x_v', annotation=None, type_comment=None),
-                Name(id='y_v', annotation=None, type_comment=None)],
-            keywords=[]))
-    PY2:
-    Print(dest=None,
-        values=[
-            Tuple(
-                elts=[Name(id='x_v', annotation=None, type_comment=None),
-                    Name(id='y_v', annotation=None, type_comment=None)])],
-        nl=True)
-    """
-    x_v = fluid.dygraph.to_variable(x)
-    y_v = x_v * 2
-    print(x_v, y_v)
+    x_t = paddle.to_tensor(x)
+    y_t = x_t * 2
+    print(x_t, y_t)
+
+
+# 8. print with kwargs
+@to_static
+def dyfunc_print_with_kwargs(x):
+    x_t = paddle.to_tensor(x)
+    print("Tensor", x_t, end='\n\n', sep=': ')
 
 
 class TestPrintBase(unittest.TestCase):
     def setUp(self):
         self.input = numpy.ones(5).astype("int32")
         self.place = (
-            fluid.CUDAPlace(0)
-            if fluid.is_compiled_with_cuda()
-            else fluid.CPUPlace()
+            paddle.CUDAPlace(0)
+            if paddle.is_compiled_with_cuda()
+            else paddle.CPUPlace()
         )
         self.set_test_func()
 
@@ -207,9 +140,9 @@ def set_test_func(self):
         self.dygraph_func = dyfunc_print_with_ifelse
 
 
-class TestPrintMultipleVar(TestPrintVariable):
+class TestPrintMultipleTensor(TestPrintVariable):
     def set_test_func(self):
-        self.dygraph_func = dyfunc_print_multi_vars
+        self.dygraph_func = dyfunc_print_multi_tensor
 
 
 class TestPrintContinueVar(TestPrintVariable):
@@ -217,5 +150,10 @@ def set_test_func(self):
         self.dygraph_func = dyfunc_print_continue_vars
 
 
+class TestPrintWithKwargs(TestPrintVariable):
+    def set_test_func(self):
+        self.dygraph_func = dyfunc_print_with_kwargs
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/jit/dy2static/__init__.py b/python/paddle/jit/dy2static/__init__.py
index c42116c21065de..89204e62cee143 100644
--- a/python/paddle/jit/dy2static/__init__.py
+++ b/python/paddle/jit/dy2static/__init__.py
@@ -25,7 +25,6 @@
 from .convert_operators import convert_logical_not as Not  # noqa: F401
 from .convert_operators import convert_logical_or as Or  # noqa: F401
 from .convert_operators import convert_pop as Pop  # noqa: F401
-from .convert_operators import convert_print as Print  # noqa: F401
 from .convert_operators import convert_shape as Shape  # noqa: F401
 from .convert_operators import convert_while_loop as While  # noqa: F401
 from .convert_operators import unpack_by_structure as Unpack  # noqa: F401
diff --git a/python/paddle/jit/dy2static/ast_transformer.py b/python/paddle/jit/dy2static/ast_transformer.py
index 826232e723f607..2acbda4c8b2aad 100644
--- a/python/paddle/jit/dy2static/ast_transformer.py
+++ b/python/paddle/jit/dy2static/ast_transformer.py
@@ -52,9 +52,6 @@
 from .loop_transformer import (
     LoopTransformer,
 )
-from .print_transformer import (
-    PrintTransformer,
-)
 from .return_transformer import (
     ReturnTransformer,
 )
@@ -135,7 +132,6 @@ def transfer_from_node_type(self, node_wrapper):
             LoopTransformer,  # for/while -> while_op
             IfElseTransformer,  # if/else -> cond_op
             AssertTransformer,  # assert statement
-            PrintTransformer,  # print statement
             CallTransformer,  # transform call recursively
             CastTransformer,  # type casting statement
             DecoratorTransformer,  # transform decorators to function call
diff --git a/python/paddle/jit/dy2static/call_transformer.py b/python/paddle/jit/dy2static/call_transformer.py
index 11f0f6624e8fed..c5ae35ee3d4ac1 100644
--- a/python/paddle/jit/dy2static/call_transformer.py
+++ b/python/paddle/jit/dy2static/call_transformer.py
@@ -60,6 +60,7 @@ def _no_need_convert_call(self, node):
                 'zip',
                 'range',
                 'enumerate',
+                'print',
             }
             is_builtin = eval("is_builtin({})".format(func_str))
             need_convert = func_str in need_convert_builtin_func_list
diff --git a/python/paddle/jit/dy2static/convert_call_func.py b/python/paddle/jit/dy2static/convert_call_func.py
index e0f393028cfac4..da0560cb346923 100644
--- a/python/paddle/jit/dy2static/convert_call_func.py
+++ b/python/paddle/jit/dy2static/convert_call_func.py
@@ -30,6 +30,7 @@
     convert_zip,
     convert_range,
     convert_enumerate,
+    convert_print,
 )
 
 from paddle.jit.dy2static.logging_utils import (
@@ -215,6 +216,9 @@ def dyfunc(x):
     if is_builtin(func, "enumerate"):
         return convert_enumerate
 
+    if is_builtin(func, "print"):
+        return convert_print
+
     if is_builtin(func) or is_unsupported(func):
         return func
 
diff --git a/python/paddle/jit/dy2static/convert_operators.py b/python/paddle/jit/dy2static/convert_operators.py
index df20a5c4e0c98c..1d3e23a4b96b72 100644
--- a/python/paddle/jit/dy2static/convert_operators.py
+++ b/python/paddle/jit/dy2static/convert_operators.py
@@ -736,17 +736,15 @@ def convert_assert(cond, message=""):
         assert cond, message
 
 
-def convert_print(*args):
+def convert_print(*objects, sep=' ', end='\n', file=None, flush=False):
     """
-    A function representing Python ``print`` statement. Note: this is a basic
-    python function so we haven't handle sep, end, file and flush parameters of
-    python function.
+    A function representing Python ``print`` function. It will print all arguments
+    at compile time and only print the Tensor values at runtime.
     """
-    for var in args:
-        if isinstance(var, Variable):
-            var = Print(var)
-        else:
-            print(var)
+    for obj in objects:
+        if isinstance(obj, Variable):
+            Print(obj)
+    print(*objects, sep=sep, end=end, file=file, flush=flush)
 
 
 def convert_pop(target, *args):
diff --git a/python/paddle/jit/dy2static/print_transformer.py b/python/paddle/jit/dy2static/print_transformer.py
deleted file mode 100644
index aa4bc2c219bbce..00000000000000
--- a/python/paddle/jit/dy2static/print_transformer.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.utils import gast
-
-from paddle.jit.dy2static.static_analysis import (
-    AstNodeWrapper,
-    StaticAnalysisVisitor,
-)
-from .base_transformer import (
-    BaseTransformer,
-)
-
-
-class PrintTransformer(BaseTransformer):
-    """
-    This class transforms python print function to fluid.layers.Print.
-    """
-
-    def __init__(self, wrapper_root):
-        assert isinstance(
-            wrapper_root, AstNodeWrapper
-        ), "Input non-AstNodeWrapper node for the initialization of PrintTransformer."
-        self.wrapper_root = wrapper_root
-        self.root = wrapper_root.node
-
-        self.static_analysis_visitor = StaticAnalysisVisitor(self.root)
-        self.node_to_wrapper_map = (
-            self.static_analysis_visitor.get_node_to_wrapper_map()
-        )
-
-    def transform(self):
-        self.visit(self.root)
-
-    # NOTE: deal with print in PY3
-    def visit_Call(self, node):
-        if isinstance(node.func, gast.Name) and node.func.id == 'print':
-            node = self._create_print_node(node.args)
-        return node
-
-    # NOTE: deal with print in PY2
-    def visit_Print(self, node):
-        convert_print_node = self._create_print_node(node.values)
-        return gast.Expr(value=convert_print_node)
-
-    def _create_print_node(self, print_args):
-        convert_print_func = gast.parse('_jst.Print').body[0].value
-        return gast.Call(func=convert_print_func, args=print_args, keywords=[])