From 15a5bd9cdbdd371fa8d4634bc340223e1c1418ce Mon Sep 17 00:00:00 2001
From: co63oc <co63@163.com>
Date: Sun, 7 Apr 2024 20:34:00 +0800
Subject: [PATCH 1/6] Fix

---
 paddle/fluid/framework/ir/CMakeLists.txt      |   1 -
 .../framework/ir/graph_pattern_detector.cc    |  46 --
 .../framework/ir/graph_pattern_detector.h     |  25 -
 .../ir/seqconv_eltadd_relu_fuse_pass.cc       | 163 -------
 .../ir/seqconv_eltadd_relu_fuse_pass.h        |  42 --
 .../inference/api/paddle_pass_builder.cc      |   3 +-
 .../sequence_ops/sequence_conv_op.cc          | 279 -----------
 .../sequence_ops/sequence_conv_op.cu.cc       |  25 -
 .../operators/sequence_ops/sequence_conv_op.h | 204 --------
 .../sequence_ops/sequence_conv_op_xpu.cc      | 374 ---------------
 python/paddle/static/nn/__init__.py           |   2 -
 python/paddle/static/nn/sequence_lod.py       | 141 ------
 .../quantization/quant2_int8_onednn_pass.py   |   1 -
 test/book/notest_understand_sentiment.py      | 266 -----------
 test/book/test_recommender_system.py          | 392 ---------------
 .../test_seqconv_eltadd_relu_fuse_pass.py     | 120 -----
 test/legacy_test/CMakeLists.txt               |   1 -
 test/legacy_test/dist_text_classification.py  | 242 ----------
 test/legacy_test/nets.py                      |  80 ----
 .../test_dist_text_classification.py          |  16 -
 .../test_fusion_seqconv_eltadd_relu_op.py     |  95 ----
 test/sequence/CMakeLists.txt                  |   1 -
 test/sequence/test_sequence_conv.py           | 312 ------------
 test/xpu/test_sequence_conv_op_xpu.py         | 448 ------------------
 24 files changed, 1 insertion(+), 3278 deletions(-)
 delete mode 100644 paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc
 delete mode 100644 paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h
 delete mode 100644 paddle/fluid/operators/sequence_ops/sequence_conv_op.cc
 delete mode 100644 paddle/fluid/operators/sequence_ops/sequence_conv_op.cu.cc
 delete mode 100644 paddle/fluid/operators/sequence_ops/sequence_conv_op.h
 delete mode 100644 paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc
 delete mode 100644 test/book/notest_understand_sentiment.py
 delete mode 100644 test/book/test_recommender_system.py
 delete mode 100644 test/ir/inference/test_seqconv_eltadd_relu_fuse_pass.py
 delete mode 100644 test/legacy_test/dist_text_classification.py
 delete mode 100644 test/legacy_test/test_fusion_seqconv_eltadd_relu_op.py
 delete mode 100644 test/sequence/test_sequence_conv.py
 delete mode 100644 test/xpu/test_sequence_conv_op_xpu.py
diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt
index d310d4ea5a4ee..faa4c17411401 100755
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -81,7 +81,6 @@ pass_library(seq_concat_fc_fuse_pass inference)
 pass_library(multi_batch_merge_pass base)
 pass_library(map_op_to_another_pass inference)
 pass_library(conv_bn_fuse_pass inference)
-pass_library(seqconv_eltadd_relu_fuse_pass inference)
 pass_library(seqpool_concat_fuse_pass inference)
 pass_library(seqpool_cvm_concat_fuse_pass inference)
 pass_library(repeated_fc_relu_fuse_pass inference)
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
index 3910e7586e35c..2ee2f7399cf60 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -1057,52 +1057,6 @@ PDNode *patterns::OperatorReshape2::operator()(const std::string &operator_type,
   return reshape2_out;
 }
 
-PDNode *patterns::SeqConvEltAddRelu::operator()(
-    paddle::framework::ir::PDNode *seqconv_input) {
-  // Create Operators
-  seqconv_input->assert_is_op_input("sequence_conv", "X");
-  auto *seqconv_op = pattern->NewNode(seqconv_repr())
-                         ->assert_is_op("sequence_conv")
-                         ->assert_has_n_inputs(2)
-                         ->assert_op_attr<bool>("paddingTrainable", false)
-                         ->assert_op_attr<int>("contextStride", 1);
-
-  auto *eltadd_op =
-      pattern->NewNode(eltadd_repr())->assert_is_op("elementwise_add");
-  auto *relu_op = pattern->NewNode(relu_repr())->assert_is_op("relu");
-  // Create variables
-  // Filter
-  auto *seqconv_weight_var =
-      pattern->NewNode(seqconv_weight_repr())
-          ->AsInput()
-          ->assert_is_persistable_var()
-          ->assert_is_op_input("sequence_conv", "Filter");
-  // Bias
-  auto *eltadd_bias_var = pattern->NewNode(eltadd_bias_repr())
-                              ->AsInput()
-                              ->assert_is_op_input("elementwise_add");
-  // intermediate variable, will be removed in the IR after fuse.
-  auto *seqconv_out_var = pattern->NewNode(seqconv_out_repr())
-                              ->AsIntermediate()
-                              ->assert_is_only_output_of_op("sequence_conv")
-                              ->assert_is_op_input("elementwise_add");
-  auto *eltadd_out_var = pattern->NewNode(eltadd_out_repr())
-                             ->AsIntermediate()
-                             ->assert_is_only_output_of_op("elementwise_add")
-                             ->assert_is_only_input_of_op("relu");
-  // output
-  auto *relu_out_var = pattern->NewNode(relu_out_repr())
-                           ->AsOutput()
-                           ->assert_is_op_output("relu");
-
-  seqconv_op->LinksFrom({seqconv_input, seqconv_weight_var})
-      .LinksTo({seqconv_out_var});
-  eltadd_op->LinksFrom({seqconv_out_var, eltadd_bias_var})
-      .LinksTo({eltadd_out_var});
-  relu_op->LinksFrom({eltadd_out_var}).LinksTo({relu_out_var});
-  return relu_out_var;
-}
-
 PDNode *patterns::FC::operator()(paddle::framework::ir::PDNode *x,
                                  bool with_bias,
                                  bool with_relu) {
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h
index 4eac3440a4514..f7f3a2f9c3ea1 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -600,31 +600,6 @@ struct OperatorReshape2 : public PatternBase {
   PATTERN_DECL_NODE(reshape2_out);
 };
 
-// SEQCONV with Elementwise_Add ReLU
-// op: seqconv + elementwise_add + relu
-// named nodes:
-// seqconv_input, seqconv_weight,
-// seqconv_out, seqconv,
-// elementwise_add_bias, elementwise_add_out, elementwise_add
-// relu_out, relu
-struct SeqConvEltAddRelu : public PatternBase {
-  SeqConvEltAddRelu(PDPattern* pattern, const std::string& name_scope)
-      : PatternBase(pattern, name_scope, "seqconv_eltadd_relu") {}
-
-  PDNode* operator()(PDNode* seqconv_input);
-
-  // declare operator node's name
-  PATTERN_DECL_NODE(seqconv);
-  PATTERN_DECL_NODE(eltadd);
-  PATTERN_DECL_NODE(relu);
-  // declare variable node's name
-  PATTERN_DECL_NODE(seqconv_weight);
-  PATTERN_DECL_NODE(seqconv_out);
-  PATTERN_DECL_NODE(eltadd_bias);
-  PATTERN_DECL_NODE(eltadd_out);
-  PATTERN_DECL_NODE(relu_out);
-};
-
 // FC with bias
 // op: mul + elementwise_add
 // named nodes:
diff --git a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc b/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc
deleted file mode 100644
index c2015d6b4305c..0000000000000
--- a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc
+++ /dev/null
@@ -1,163 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h"
-
-#include <string>
-
-#include "paddle/fluid/framework/op_version_registry.h"
-
-namespace paddle {
-namespace framework {
-class Scope;
-}  // namespace framework
-}  // namespace paddle
-
-namespace paddle {
-namespace framework {
-namespace ir {
-
-SeqConvEltAddReluFusePass::SeqConvEltAddReluFusePass() {
-  AddOpCompat(OpCompat("sequence_conv"))
-      .AddInput("X")
-      .IsTensor()
-      .End()
-      .AddInput("Filter")
-      .IsTensor()
-      .End()
-      .AddInput("PaddingData")
-      .IsOptional()
-      .IsTensor()
-      .End()
-      .AddOutput("Out")
-      .IsTensor()
-      .End()
-      .AddAttr("contextLength")
-      .IsNumGT(0)
-      .End()
-      .AddAttr("contextStart")  // the contextStart attribute can be negative,
-                                // unconstrained
-      .End()
-      .AddAttr("contextStride")
-      .IsNumEQ(1)
-      .End();
-
-  AddOpCompat(OpCompat("elementwise_add"))
-      .AddInput("X")
-      .IsTensor()
-      .End()
-      .AddInput("Y")
-      .IsTensor()
-      .End()
-      .AddOutput("Out")
-      .IsTensor()
-      .End()
-      .AddAttr("axis")
-      .IsNumEQ(1)
-      .End();
-
-  AddOpCompat(OpCompat("relu"))
-      .AddInput("X")
-      .IsTensor()
-      .End()
-      .AddOutput("Out")
-      .IsTensor()
-      .End();
-}
-
-class Node;
-
-void SeqConvEltAddReluFusePass::ApplyImpl(ir::Graph* graph) const {
-  FusePassBase::Init(name_scope_, graph);
-  GraphPatternDetector gpd;
-  auto* pattern = gpd.mutable_pattern();
-
-  PDNode* x = pattern->NewNode(patterns::PDNodeName(name_scope_, "X"))
-                  ->assert_is_op_input("sequence_conv")
-                  ->assert_var_not_persistable();
-  patterns::SeqConvEltAddRelu fuse_pattern(pattern, name_scope_);
-  fuse_pattern(x);
-
-  // Create New OpDesc
-  auto fuse_creator = [&](Node* seqconv,
-                          Node* input,
-                          Node* seqconv_weight,
-                          Node* eltadd_bias,
-                          Node* relu_out) {
-    OpDesc op_desc;
-    op_desc.SetType("fusion_seqconv_eltadd_relu");
-    op_desc.SetInput("X", {input->Name()});
-    op_desc.SetInput("Filter", {seqconv_weight->Name()});
-    op_desc.SetInput("Bias", {eltadd_bias->Name()});
-    op_desc.SetAttr("contextLength", seqconv->Op()->GetAttr("contextLength"));
-    op_desc.SetAttr("contextStart", seqconv->Op()->GetAttr("contextStart"));
-    op_desc.SetAttr("contextStride", seqconv->Op()->GetAttr("contextStride"));
-    const std::string ColMat = patterns::UniqueKey("SeqConvColMat");
-    op_desc.SetOutput("ColMat", {ColMat});
-    op_desc.SetOutput("Out", {relu_out->Name()});
-    VarDesc key(ColMat);
-    key.SetPersistable(false);
-    auto* key_col_mat = graph->CreateVarNode(&key);
-
-    auto* op = graph->CreateOpNode(&op_desc);
-    IR_NODE_LINK_TO(input, op);
-    IR_NODE_LINK_TO(seqconv_weight, op);
-    IR_NODE_LINK_TO(eltadd_bias, op);
-    IR_NODE_LINK_TO(op, relu_out);
-    IR_NODE_LINK_TO(op, key_col_mat);
-    return op;
-  };
-
-  int fusion_count{0};
-
-  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
-                     Graph* g) {
-    if (!IsCompat(subgraph, g)) {
-      LOG(WARNING) << "Pass in op compat failed.";
-      return;
-    }
-    VLOG(4) << "handle SeqConv EltAdd Relu fuse";
-    GET_IR_NODE_FROM_SUBGRAPH(seqconv, seqconv, fuse_pattern);
-    GET_IR_NODE_FROM_SUBGRAPH(seqconv_weight, seqconv_weight, fuse_pattern);
-    GET_IR_NODE_FROM_SUBGRAPH(seqconv_out, seqconv_out, fuse_pattern);
-    GET_IR_NODE_FROM_SUBGRAPH(eltadd, eltadd, fuse_pattern);
-    GET_IR_NODE_FROM_SUBGRAPH(eltadd_bias, eltadd_bias, fuse_pattern);
-    GET_IR_NODE_FROM_SUBGRAPH(eltadd_out, eltadd_out, fuse_pattern);
-    GET_IR_NODE_FROM_SUBGRAPH(relu, relu, fuse_pattern);
-    GET_IR_NODE_FROM_SUBGRAPH(relu_out, relu_out, fuse_pattern);
-
-    fuse_creator(
-        seqconv, subgraph.at(x), seqconv_weight, eltadd_bias, relu_out);
-    std::unordered_set<const Node*> marked_nodes(
-        {seqconv, seqconv_out, eltadd, eltadd_out, relu});
-    GraphSafeRemoveNodes(graph, marked_nodes);
-    ++fusion_count;
-  };
-
-  gpd(graph, handler);
-  AddStatis(fusion_count);
-}
-
-}  // namespace ir
-}  // namespace framework
-}  // namespace paddle
-
-REGISTER_PASS(seqconv_eltadd_relu_fuse_pass,
-              paddle::framework::ir::SeqConvEltAddReluFusePass);
-REGISTER_PASS_CAPABILITY(seqconv_eltadd_relu_fuse_pass)
-    .AddCombination(
-        paddle::framework::compatible::OpVersionComparatorCombination()
-            .EQ("sequence_conv", 0)
-            .LE("elementwise_add", 1)
-            .EQ("relu", 0));
diff --git a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h b/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h
deleted file mode 100644
index fe06002251ae2..0000000000000
--- a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <string>
-
-#include "paddle/fluid/framework/ir/fuse_pass_base.h"
-#include "paddle/fluid/framework/ir/graph.h"
-#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
-
-namespace paddle {
-namespace framework {
-namespace ir {
-
-class Graph;
-
-class SeqConvEltAddReluFusePass : public FusePassBase {
- public:
-  SeqConvEltAddReluFusePass();
-  virtual ~SeqConvEltAddReluFusePass() {}
-
- protected:
-  void ApplyImpl(ir::Graph* graph) const override;
-
-  const std::string name_scope_{"seqconv_eltadd_relu_fuse"};
-};
-
-}  // namespace ir
-}  // namespace framework
-}  // namespace paddle
diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc
index 48d25ebc390b5..f53e89d9f812c 100644
--- a/paddle/fluid/inference/api/paddle_pass_builder.cc
+++ b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -234,8 +234,7 @@ const std::vector<std::string> kCINNCompilerPasses{
 const std::vector<std::string> CpuBasicPasses{
     "simplify_with_basic_ops_pass",  //
     "layer_norm_fuse_pass",
-    "attention_lstm_fuse_pass",       //
-    "seqconv_eltadd_relu_fuse_pass",  //
+    "attention_lstm_fuse_pass",  //
     // "seqpool_concat_fuse_pass",    //
     "seqpool_cvm_concat_fuse_pass",  //
     // "embedding_fc_lstm_fuse_pass", //
diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc b/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc
deleted file mode 100644
index c94f57807cd52..0000000000000
--- a/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc
+++ /dev/null
@@ -1,279 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/sequence_ops/sequence_conv_op.h"
-
-#include <algorithm>
-#include <memory>
-#include <string>
-#include <unordered_set>
-
-namespace paddle {
-namespace operators {
-
-class SequenceConvOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  void InferShape(framework::InferShapeContext *ctx) const override {
-    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SequenceConv");
-    OP_INOUT_CHECK(ctx->HasInput("Filter"), "Input", "Filter", "SequenceConv");
-    OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "SequenceConv");
-
-    int context_length = ctx->Attrs().Get<int>("contextLength");
-    int context_start = ctx->Attrs().Get<int>("contextStart");
-
-    auto in_dims = ctx->GetInputDim("X");
-    auto filter_dims = ctx->GetInputDim("Filter");
-    PADDLE_ENFORCE_EQ(
-        ctx->Attrs().Get<int>("contextStride"),
-        1,
-        platform::errors::InvalidArgument(
-            "Currently, SequenceConvOp only supports contextStride=1. But "
-            "received contextStride = %u.",
-            ctx->Attrs().Get<int>("contextStride")));
-    PADDLE_ENFORCE_EQ(
-        in_dims.size() == 2 && filter_dims.size() == 2,
-        true,
-        platform::errors::InvalidArgument(
-            "Input(X, Filter) should be 2-D tensor. But received Input(X): "
-            "input rank %u, input shape [%s]; received Input(Filter): "
-            "input rank %u, input shape [%s].",
-            in_dims.size(),
-            in_dims,
-            filter_dims.size(),
-            filter_dims));
-    PADDLE_ENFORCE_EQ(
-        filter_dims[0],
-        context_length * in_dims[1],
-        platform::errors::InvalidArgument(
-            "Filter's height should be context_length * "
-            "input_hidden_size. But received: filter's height = %d, "
-            "context_length * input_hidden_size = %d.",
-            filter_dims[0],
-            context_length * in_dims[1]));
-
-    if (ctx->Attrs().Get<bool>("paddingTrainable")) {
-      OP_INOUT_CHECK(ctx->HasInput("PaddingData"),
-                     "Input",
-                     "PaddingData",
-                     "sequence_conv");
-      framework::DDim padding_dim = ctx->GetInputDim("PaddingData");
-      int up_pad = std::max(0, -context_start);
-      int down_pad = std::max(0, context_start + context_length - 1);
-      int total_pad = up_pad + down_pad;
-      int input_width = static_cast<int>(in_dims[1]);
-      bool start_equals_zero = context_start == 0;
-      bool length_equals_one = context_length == 1;
-      bool start_length = start_equals_zero && length_equals_one;
-
-      PADDLE_ENFORCE_EQ(
-          start_length,
-          false,
-          platform::errors::InvalidArgument(
-              "If context_start is 0 and context_length is 1, paddingTrainable "
-              "should be false."));
-      PADDLE_ENFORCE_EQ(
-          padding_dim.size(),
-          2,
-          platform::errors::InvalidArgument(
-              "Input(PaddingData) should be 2-D tensor. But received: "
-              "input rank %u, input shape [%s].",
-              padding_dim.size(),
-              padding_dim));
-      PADDLE_ENFORCE_EQ(
-          padding_dim[0] == total_pad && padding_dim[1] == input_width,
-          true,
-          platform::errors::InvalidArgument("Input(PaddingData)'s shape is not "
-                                            "consistent with 'context_start' "
-                                            "and 'context_length'. Received "
-                                            "Input(PaddingData): input rank "
-                                            "%u, "
-                                            "input shape [%s].",
-                                            padding_dim.size(),
-                                            padding_dim));
-    }
-
-    in_dims[1] = filter_dims[1];
-    ctx->SetOutputDim("Out", in_dims);
-    ctx->ShareLoD("X", "Out");
-  }
-};
-
-class SequenceConvGradOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  void InferShape(framework::InferShapeContext *ctx) const override {
-    OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")),
-                   "Input",
-                   framework::GradVarName("Out"),
-                   "SequenceConvGrad");
-    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SequenceConvGrad");
-
-    if (ctx->Attrs().Get<bool>("paddingTrainable") &&
-        ctx->HasOutput(framework::GradVarName("PaddingData"))) {
-      ctx->SetOutputDim(framework::GradVarName("PaddingData"),
-                        ctx->GetInputDim("PaddingData"));
-    }
-    if (ctx->HasOutput(framework::GradVarName("X"))) {
-      ctx->ShareDim("X", /*->*/ framework::GradVarName("X"));
-      ctx->ShareLoD("X", /*->*/ framework::GradVarName("X"));
-    }
-    if (ctx->HasOutput(framework::GradVarName("Filter"))) {
-      ctx->SetOutputDim(framework::GradVarName("Filter"),
-                        ctx->GetInputDim("Filter"));
-    }
-  }
-};
-
-class SequenceConvOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput(
-        "X",
-        "(phi::DenseTensor) the input(X) is a LodTensor, which supports "
-        "variable-time length input sequence. The underlying tensor in "
-        "this phi::DenseTensor is a matrix with shape (T, N), where T is the "
-        "total time steps in this mini-batch and N is the input_hidden_size.");
-    AddInput(
-        "PaddingData",
-        "(phi::DenseTensor, optional) the input(PaddingData) is an optional "
-        "parameter, and it is learnable. "
-        "This is a tensor with shape (P, N), where P is the "
-        "top_pad + bottom_pad, N is the input_hidden_size. In order to "
-        "ensure the equal length of sequence before and after "
-        "convolution, it is necessary to fill the top and bottom of each "
-        "sequence according to context_length, context_stride and "
-        "context_start")
-        .AsDispensable();
-    AddInput(
-        "Filter",
-        "(phi::DenseTensor) the input(Filter) is an learnable parameter."
-        "This is a tensor with shape (K, M), where K is the "
-        "context_length * input_hidden_size, M is the output feature size.");
-    AddOutput(
-        "Out",
-        "(phi::DenseTensor) the output(Out) is a LodTensor, which support "
-        "variable-time length output sequence. The underlying tensor in "
-        "this phi::DenseTensor is a matrix with shape (T, M), where, T is the "
-        "total time steps in this mini-batch, M is the output feature size.");
-
-    AddAttr<bool>("paddingTrainable",
-                  "(bool, default:false) the padding data of SequenceConvOp "
-                  "is trainable or not.")
-        .SetDefault(false);
-    AddAttr<int>("contextLength",
-                 "(int) the contextLength of SequenceConvOp is the "
-                 "height of the convolution kernel.")
-        .GreaterThan(0);
-    AddAttr<int>("contextStart",
-                 "(int, default:0) the contextStart of SequenceConvOp "
-                 "represents the beginning of the convolution of the number of "
-                 "rows of sequence, which can be negative. The negative number "
-                 "means to pad contextStart time-steps of zeros or learnable "
-                 "parameters at the beginning of each instance. The positive "
-                 "number means to skip contextStart time-steps of each "
-                 "instance.")
-        .SetDefault(0);
-    AddAttr<int>("contextStride",
-                 "(int, default:1) the contextStride of SequenceConvOp "
-                 "represents the stride length of convolution kernel. "
-                 "Currently, SequenceConvOp only supports"
-                 "contextStride=1.")
-        .SetDefault(1)
-        .GreaterThan(0);
-
-    AddComment(R"DOC(
-Sequence Conv Operator.
-
-SequenceConvOp performs convolution operation on features of contextLength
-time-steps of each instance. The convolution operation calculates the output
-based on the input, filter, strides and paddings parameters.
-The size of each dimension of the parameters is checked during infer-shape.
-In order to ensure the equal length of sequence before and after convolution,
-it is necessary to fill the top and bottom of each sequence based on
-context_length, context_stride and context_start.
-
-    )DOC");
-  }
-};
-
-template <typename T>
-class SequenceConvGradOpMaker : public framework::SingleGradOpMaker<T> {
- public:
-  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
-
- protected:
-  void Apply(GradOpPtr<T> op) const override {
-    op->SetType("sequence_conv_grad");
-    op->SetAttrMap(this->Attrs());
-
-    if (op->HasAttr("paddingTrainable") &&
-        PADDLE_GET_CONST(bool, op->GetAttr("paddingTrainable")) &&
-        this->HasInput("PaddingData")) {
-      op->SetInput("PaddingData", this->Input("PaddingData"));
-      op->SetOutput(framework::GradVarName("PaddingData"),
-                    this->InputGrad("PaddingData"));
-    }
-
-    op->SetInput("X", this->Input("X"));
-    op->SetInput("Filter", this->Input("Filter"));
-    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
-
-    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
-    op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter"));
-  }
-};
-
-class SequenceConvGradNoNeedBufferVarsInference
-    : public framework::NoNeedBufferVarsInference {
- public:
-  using framework::NoNeedBufferVarsInference::NoNeedBufferVarsInference;
-
-  const std::unordered_set<std::string> &operator()(
-      const framework::InferNoNeedBufferVarsContext &ctx) const final {
-    static const std::unordered_set<std::string> kPaddingData({"PaddingData"});
-    if (!PADDLE_GET_CONST(bool, ctx.GetAttr("paddingTrainable"))) {
-      return kPaddingData;
-    } else {
-      return Empty();
-    }
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OPERATOR(sequence_conv,
-                  ops::SequenceConvOp,
-                  ops::SequenceConvOpMaker,
-                  ops::SequenceConvGradOpMaker<paddle::framework::OpDesc>,
-                  ops::SequenceConvGradOpMaker<paddle::imperative::OpBase>);
-
-REGISTER_OPERATOR(sequence_conv_grad,
-                  ops::SequenceConvGradOp,
-                  ops::SequenceConvGradNoNeedBufferVarsInference);
-
-PD_REGISTER_STRUCT_KERNEL(
-    sequence_conv, CPU, ALL_LAYOUT, ops::SequenceConvKernel, float, double) {}
-PD_REGISTER_STRUCT_KERNEL(sequence_conv_grad,
-                          CPU,
-                          ALL_LAYOUT,
-                          ops::SequenceConvGradKernel,
-                          float,
-                          double) {}
diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op.cu.cc b/paddle/fluid/operators/sequence_ops/sequence_conv_op.cu.cc
deleted file mode 100644
index a96efea48557a..0000000000000
--- a/paddle/fluid/operators/sequence_ops/sequence_conv_op.cu.cc
+++ /dev/null
@@ -1,25 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/sequence_ops/sequence_conv_op.h"
-
-namespace ops = paddle::operators;
-PD_REGISTER_STRUCT_KERNEL(
-    sequence_conv, GPU, ALL_LAYOUT, ops::SequenceConvKernel, float, double) {}
-PD_REGISTER_STRUCT_KERNEL(sequence_conv_grad,
-                          GPU,
-                          ALL_LAYOUT,
-                          ops::SequenceConvGradKernel,
-                          float,
-                          double) {}
diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h
deleted file mode 100644
index 347db6e37db10..0000000000000
--- a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-#include <algorithm>
-
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/math/context_project.h"
-#include "paddle/phi/kernels/funcs/math_function.h"
-
-namespace paddle {
-namespace operators {
-
-template <typename T, typename DeviceContext>
-class SequenceConvKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    auto* in = context.Input<phi::DenseTensor>("X");
-    auto* out = context.Output<phi::DenseTensor>("Out");
-    auto filter = *context.Input<phi::DenseTensor>("Filter");
-
-    out->mutable_data<T>(context.GetPlace());
-
-    int context_start = context.Attr<int>("contextStart");
-    int context_length = context.Attr<int>("contextLength");
-    int context_stride = context.Attr<int>("contextStride");
-    bool padding_trainable = context.Attr<bool>("paddingTrainable");
-
-    PADDLE_ENFORCE_EQ(in->lod().empty(),
-                      false,
-                      platform::errors::InvalidArgument(
-                          "Input(X) phi::DenseTensor of SequenceConvOp "
-                          "does not contain LoD information."));
-    PADDLE_ENFORCE_EQ(
-        in->lod().size(),
-        1UL,
-        platform::errors::InvalidArgument(
-            "Only support input sequence with lod level equal to 1 at "
-            "present. But received: lod level %u.",
-            in->lod().size()));
-
-    const phi::DenseTensor* padding_data = nullptr;
-    if (padding_trainable) {
-      padding_data = context.Input<phi::DenseTensor>("PaddingData");
-    }
-
-    int up_pad = std::max(0, -context_start);
-    int down_pad = std::max(0, context_start + context_length - 1);
-    auto sequence_width = static_cast<int64_t>(in->dims()[1]);
-
-    framework::DDim col_shape = {in->dims()[0],
-                                 context_length * sequence_width};
-    phi::DenseTensor col;
-    col.mutable_data<T>(col_shape, context.GetPlace());
-    // Because if padding_trainable is false, padding data should be zeros.
-    phi::funcs::SetConstant<DeviceContext, T> set_zero;
-    auto& dev_ctx = context.template device_context<DeviceContext>();
-    auto blas = phi::funcs::GetBlas<DeviceContext, T>(dev_ctx);
-    set_zero(dev_ctx, &col, static_cast<T>(0));
-    math::ContextProjectFunctor<DeviceContext, T> seq_project_functor;
-
-    seq_project_functor(dev_ctx,
-                        *in,
-                        padding_data,
-                        padding_trainable,
-                        context_start,
-                        context_length,
-                        context_stride,
-                        up_pad,
-                        down_pad,
-                        &col);
-
-    blas.MatMul(col, filter, out);
-  }
-};
-
-template <typename T, typename DeviceContext>
-class SequenceConvGradKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    auto* in_g = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
-    auto* out_g =
-        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
-    auto* filter_g =
-        context.Output<phi::DenseTensor>(framework::GradVarName("Filter"));
-    auto* padding_data_g =
-        context.Output<phi::DenseTensor>(framework::GradVarName("PaddingData"));
-    auto* in = context.Input<phi::DenseTensor>("X");
-    auto* filter = context.Input<phi::DenseTensor>("Filter");
-
-    int context_start = context.Attr<int>("contextStart");
-    int context_length = context.Attr<int>("contextLength");
-    int context_stride = context.Attr<int>("contextStride");
-    bool padding_trainable = context.Attr<bool>("paddingTrainable");
-
-    PADDLE_ENFORCE_EQ(
-        in->lod().size(),
-        1UL,
-        platform::errors::InvalidArgument(
-            "Only support input sequence with lod level equal to 1 at "
-            "present. But received: lod level %u.",
-            in->lod().size()));
-    auto lod_g_level_0 = in->lod()[0];
-
-    int up_pad = std::max(0, -context_start);
-    int down_pad = std::max(0, context_start + context_length - 1);
-    auto sequence_width = static_cast<int64_t>(in->dims()[1]);
-
-    phi::funcs::SetConstant<DeviceContext, T> set_zero;
-    auto& dev_ctx = context.template device_context<DeviceContext>();
-    auto blas = phi::funcs::GetBlas<DeviceContext, T>(dev_ctx);
-    // use col_shape in the im2col calculation
-    framework::DDim col_shape = {in->dims()[0],
-                                 sequence_width * context_length};
-    phi::DenseTensor col;
-
-    if (in_g || filter_g || (padding_trainable && padding_data_g)) {
-      col.mutable_data<T>(col_shape, context.GetPlace());
-      // Because if padding_trainable is false, padding data should be zeros.
-      set_zero(dev_ctx, &col, static_cast<T>(0));
-      blas.MatMul(*out_g, false, *filter, true, &col);
-    }
-    math::ContextProjectFunctor<DeviceContext, T> seq_project_functor;
-    math::ContextProjectGradFunctor<DeviceContext, T> seq_project_grad_functor;
-
-    if (in_g) {
-      in_g->mutable_data<T>(context.GetPlace());
-      in_g->set_lod(in->lod());
-      set_zero(dev_ctx, in_g, static_cast<T>(0));
-
-      seq_project_grad_functor(dev_ctx,
-                               *in_g,
-                               padding_trainable,
-                               context_start,
-                               context_length,
-                               context_stride,
-                               up_pad,
-                               down_pad,
-                               false,
-                               true,
-                               padding_data_g,
-                               &col);
-    }
-
-    if (padding_trainable && padding_data_g) {
-      padding_data_g->mutable_data<T>(context.GetPlace());
-      set_zero(dev_ctx, padding_data_g, static_cast<T>(0));
-
-      phi::DenseTensor* input = const_cast<phi::DenseTensor*>(in);
-      seq_project_grad_functor(dev_ctx,
-                               *input,
-                               padding_trainable,
-                               context_start,
-                               context_length,
-                               context_stride,
-                               up_pad,
-                               down_pad,
-                               true,
-                               false,
-                               padding_data_g,
-                               &col);
-    }
-
-    if (filter_g) {
-      filter_g->mutable_data<T>(context.GetPlace());
-      set_zero(dev_ctx, filter_g, static_cast<T>(0));
-
-      phi::DenseTensor filter_grad = *filter_g;
-      phi::DenseTensor out_grad = *out_g;
-
-      const phi::DenseTensor* padding_data = nullptr;
-      if (padding_trainable) {
-        padding_data = context.Input<phi::DenseTensor>("PaddingData");
-      }
-
-      seq_project_functor(dev_ctx,
-                          *in,
-                          padding_data,
-                          padding_trainable,
-                          context_start,
-                          context_length,
-                          context_stride,
-                          up_pad,
-                          down_pad,
-                          &col);
-
-      blas.MatMul(col, true, out_grad, false, &filter_grad);
-    }
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc b/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc
deleted file mode 100644
index 53fb13180c36a..0000000000000
--- a/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc
+++ /dev/null
@@ -1,374 +0,0 @@
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#ifdef PADDLE_WITH_XPU
-
-#include "paddle/fluid/operators/sequence_ops/sequence_conv_op.h"
-#include "paddle/fluid/platform/device/device_wrapper.h"
-
-namespace paddle {
-namespace operators {
-
-template <typename T, typename DeviceContext>
-class SequenceConvXPUKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    auto* in = context.Input<phi::DenseTensor>("X");
-    auto* out = context.Output<phi::DenseTensor>("Out");
-    auto filter = *context.Input<phi::DenseTensor>("Filter");
-
-    out->mutable_data<T>(context.GetPlace());
-
-    int context_start = context.Attr<int>("contextStart");
-    int context_length = context.Attr<int>("contextLength");
-    int context_stride = context.Attr<int>("contextStride");
-    bool padding_trainable = context.Attr<bool>("paddingTrainable");
-
-    PADDLE_ENFORCE_EQ(in->lod().empty(),
-                      false,
-                      platform::errors::InvalidArgument(
-                          "Input(X) phi::DenseTensor of SequenceConvOp "
-                          "does not contain LoD information."));
-    PADDLE_ENFORCE_EQ(
-        in->lod().size(),
-        1UL,
-        platform::errors::InvalidArgument(
-            "Only support input sequence with lod level equal to 1 at "
-            "present. But received: lod level %u.",
-            in->lod().size()));
-
-    PADDLE_ENFORCE_EQ(
-        padding_trainable,
-        false,
-        platform::errors::InvalidArgument("Only support padding_trainable "
-                                          "equal false."));
-
-    int up_pad = std::max(0, -context_start);
-    int down_pad = std::max(0, context_start + context_length - 1);
-    PADDLE_ENFORCE_EQ(
-        up_pad,
-        2,
-        platform::errors::InvalidArgument("Only support up_pad equal 2."));
-    PADDLE_ENFORCE_EQ(
-        down_pad,
-        2,
-        platform::errors::InvalidArgument("Only support down_pad equal 2."));
-
-    auto xpu_context =
-        context.template device_context<DeviceContext>().x_context();
-    auto sequence_width = static_cast<int64_t>(in->dims()[1]);
-    framework::DDim col_shape = {in->dims()[0],
-                                 context_length * sequence_width};
-    xpu::ctx_guard RAII_GUARD(xpu_context);
-    int col_numel = col_shape[0] * col_shape[1];
-    T* col_data = RAII_GUARD.alloc_l3_or_gm<T>(col_numel);
-    PADDLE_ENFORCE_NOT_NULL(
-        col_data, paddle::platform::errors::Fatal("XPU memory is not enough"));
-
-    auto lod_level_0 = in->lod()[0];
-    int lod_size = lod_level_0.size();
-    // If batch size set to 256, the lod is {0, batch[0] - 0,
-    // batch[1] - batch [0], ..., batch[255] - batch[254]},
-    // so the lod_size will be 257.
-    PADDLE_ENFORCE_LE(
-        lod_size,
-        257,
-        platform::errors::InvalidArgument("Only support batch size <= 256."));
-
-    std::vector<int> cpu_lodx(lod_size);
-    for (int i = 0; i < lod_size; i++) {
-      cpu_lodx[i] = lod_level_0[i];
-    }
-    xpu::VectorParam<int> lodx = {
-        cpu_lodx.data(), static_cast<int>(cpu_lodx.size()), nullptr};
-
-    int r = xpu::sequence_context_projection<T, int>(xpu_context,
-                                                     in->data<T>(),
-                                                     col_data,
-                                                     nullptr,
-                                                     lodx,
-                                                     sequence_width,
-                                                     context_start,
-                                                     context_length,
-                                                     context_stride,
-                                                     {2, 2});
-    PADDLE_ENFORCE_XDNN_SUCCESS(r, "sequence_context_projection");
-
-    bool trans_a = false;
-    bool trans_b = false;
-    int m = col_shape[0];
-    int k = col_shape[1];
-    int k1 = filter.dims()[0];
-    int n = filter.dims()[1];
-    PADDLE_ENFORCE_EQ(k,
-                      k1,
-                      platform::errors::InvalidArgument(
-                          "The shape of FC in SequenceConvOp is invalid."
-                          "The k of matrix A is %d, k1 of matrix B is %d."
-                          "But expect k == k1",
-                          k,
-                          k1));
-    int lda = (!trans_a) ? k : m;
-    int ldb = (!trans_b) ? n : k;
-    int ldc = n;
-    T alpha = static_cast<T>(1.0);
-    T beta = static_cast<T>(0.0);
-    const T* data_a = col_data;
-    const T* data_b = filter.data<T>();
-    T* data_c = out->data<T>();
-
-    r = xpu::fc_fusion<T, T, T, int32_t>(xpu_context,
-                                         data_a,
-                                         data_b,
-                                         data_c,
-                                         m,
-                                         n,
-                                         k,
-                                         trans_a,
-                                         trans_b,
-                                         nullptr,
-                                         nullptr,
-                                         nullptr,
-                                         lda,
-                                         ldb,
-                                         ldc,
-                                         alpha,
-                                         beta,
-                                         nullptr,
-                                         xpu::Activation_t::LINEAR);
-    PADDLE_ENFORCE_XDNN_SUCCESS(r, "fc_fusion");
-    if (xpu_context->xpu_stream != nullptr) {
-      xpu_wait(xpu_context->xpu_stream);
-    }
-  }
-};
-
-template <typename T, typename DeviceContext>
-class SequenceConvGradXPUKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    auto* in_g = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
-    auto* out_g =
-        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
-    auto* filter_g =
-        context.Output<phi::DenseTensor>(framework::GradVarName("Filter"));
-    auto* in = context.Input<phi::DenseTensor>("X");
-    auto* filter = context.Input<phi::DenseTensor>("Filter");
-
-    int context_start = context.Attr<int>("contextStart");
-    int context_length = context.Attr<int>("contextLength");
-    int context_stride = context.Attr<int>("contextStride");
-    bool padding_trainable = context.Attr<bool>("paddingTrainable");
-
-    PADDLE_ENFORCE_EQ(in->lod().empty(),
-                      false,
-                      platform::errors::InvalidArgument(
-                          "Input(X) phi::DenseTensor of SequenceConvOp "
-                          "does not contain LoD information."));
-    PADDLE_ENFORCE_EQ(
-        in->lod().size(),
-        1UL,
-        platform::errors::InvalidArgument(
-            "Only support input sequence with lod level equal to 1 at "
-            "present. But received: lod level %u.",
-            in->lod().size()));
-
-    PADDLE_ENFORCE_EQ(
-        padding_trainable,
-        false,
-        platform::errors::InvalidArgument("Only support padding_trainable "
-                                          "equal false."));
-
-    int up_pad = std::max(0, -context_start);
-    int down_pad = std::max(0, context_start + context_length - 1);
-    PADDLE_ENFORCE_EQ(
-        up_pad,
-        2,
-        platform::errors::InvalidArgument("Only support up_pad equal 2."));
-    PADDLE_ENFORCE_EQ(
-        down_pad,
-        2,
-        platform::errors::InvalidArgument("Only support down_pad equal 2."));
-
-    auto lod_level_0 = in->lod()[0];
-    int lod_size = lod_level_0.size();
-    PADDLE_ENFORCE_LE(
-        lod_size,
-        257,
-        platform::errors::InvalidArgument("Only support batch size <= 256."));
-
-    std::vector<int> cpu_lodx(lod_size);
-    for (int i = 0; i < lod_size; i++) {
-      cpu_lodx[i] = lod_level_0[i];
-    }
-    xpu::VectorParam<int> lodx = {
-        cpu_lodx.data(), static_cast<int>(cpu_lodx.size()), nullptr};
-
-    auto xpu_context =
-        context.template device_context<DeviceContext>().x_context();
-    auto sequence_width = static_cast<int64_t>(in->dims()[1]);
-    framework::DDim col_shape = {in->dims()[0],
-                                 context_length * sequence_width};
-    xpu::ctx_guard RAII_GUARD(xpu_context);
-    int col_numel = col_shape[0] * col_shape[1];
-    T* col_data = RAII_GUARD.alloc_l3_or_gm<T>(col_numel);
-    PADDLE_ENFORCE_NOT_NULL(
-        col_data, paddle::platform::errors::Fatal("XPU memory is not enough"));
-
-    if (in_g || filter_g) {
-      bool trans_a = false;
-      bool trans_b = true;
-      int m = out_g->dims()[0];
-      int k = out_g->dims()[1];
-      int n = filter->dims()[0];
-      int k1 = filter->dims()[1];
-      PADDLE_ENFORCE_EQ(k,
-                        k1,
-                        platform::errors::InvalidArgument(
-                            "The shape of FC in SequenceConvGradOp is invalid."
-                            "The k of matrix A is %d, k1 of matrix B is %d."
-                            "But expect k == k1",
-                            k,
-                            k1));
-      int lda = (!trans_a) ? k : m;
-      int ldb = (!trans_b) ? n : k;
-      int ldc = n;
-      T alpha = static_cast<T>(1.0);
-      T beta = static_cast<T>(0.0);
-      const T* data_a = out_g->data<T>();
-      const T* data_b = filter->data<T>();
-      T* data_c = col_data;
-
-      int r = xpu::fc_fusion<T, T, T, int32_t>(xpu_context,
-                                               data_a,
-                                               data_b,
-                                               data_c,
-                                               m,
-                                               n,
-                                               k,
-                                               trans_a,
-                                               trans_b,
-                                               nullptr,
-                                               nullptr,
-                                               nullptr,
-                                               lda,
-                                               ldb,
-                                               ldc,
-                                               alpha,
-                                               beta,
-                                               nullptr,
-                                               xpu::Activation_t::LINEAR);
-      PADDLE_ENFORCE_XDNN_SUCCESS(r, "fc_fusion");
-    }
-
-    if (in_g) {
-      PADDLE_ENFORCE_LT(sequence_width,
-                        512,
-                        platform::errors::InvalidArgument(
-                            "Only support sequence_width < 512."));
-
-      in_g->mutable_data<T>(context.GetPlace());
-      in_g->set_lod(in->lod());
-
-      int r = xpu::sequence_context_projection_grad<T, int>(xpu_context,
-                                                            in_g->data<T>(),
-                                                            col_data,
-                                                            nullptr,
-                                                            lodx,
-                                                            sequence_width,
-                                                            context_start,
-                                                            context_length,
-                                                            context_stride,
-                                                            {2, 2});
-      PADDLE_ENFORCE_XDNN_SUCCESS(r, "sequence_context_projection_grad");
-    }
-
-    if (filter_g) {
-      filter_g->mutable_data<T>(context.GetPlace());
-
-      int r = xpu::sequence_context_projection<T, int>(xpu_context,
-                                                       in->data<T>(),
-                                                       col_data,
-                                                       nullptr,
-                                                       lodx,
-                                                       sequence_width,
-                                                       context_start,
-                                                       context_length,
-                                                       context_stride,
-                                                       {2, 2});
-      PADDLE_ENFORCE_XDNN_SUCCESS(r, "sequence_context_projection");
-
-      bool trans_a = true;
-      bool trans_b = false;
-      int k = col_shape[0];
-      int m = col_shape[1];
-      int k1 = out_g->dims()[0];
-      int n = out_g->dims()[1];
-      PADDLE_ENFORCE_EQ(k,
-                        k1,
-                        platform::errors::InvalidArgument(
-                            "The shape of FC in SequenceConvGradOp is invalid."
-                            "The k of matrix A is %d, k1 of matrix B is %d."
-                            "But expect k == k1",
-                            k,
-                            k1));
-      int lda = (!trans_a) ? k : m;
-      int ldb = (!trans_b) ? n : k;
-      int ldc = n;
-      T alpha = static_cast<T>(1.0);
-      T beta = static_cast<T>(0.0);
-      const T* data_a = col_data;
-      const T* data_b = out_g->data<T>();
-      T* data_c = filter_g->data<T>();
-
-      r = xpu::fc_fusion<T, T, T, int32_t>(xpu_context,
-                                           data_a,
-                                           data_b,
-                                           data_c,
-                                           m,
-                                           n,
-                                           k,
-                                           trans_a,
-                                           trans_b,
-                                           nullptr,
-                                           nullptr,
-                                           nullptr,
-                                           lda,
-                                           ldb,
-                                           ldc,
-                                           alpha,
-                                           beta,
-                                           nullptr,
-                                           xpu::Activation_t::LINEAR);
-      PADDLE_ENFORCE_XDNN_SUCCESS(r, "fc_fusion");
-      if (xpu_context->xpu_stream != nullptr) {
-        xpu_wait(xpu_context->xpu_stream);
-      }
-    }
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-PD_REGISTER_STRUCT_KERNEL(
-    sequence_conv, XPU, ALL_LAYOUT, ops::SequenceConvXPUKernel, float) {}
-PD_REGISTER_STRUCT_KERNEL(sequence_conv_grad,
-                          XPU,
-                          ALL_LAYOUT,
-                          ops::SequenceConvGradXPUKernel,
-                          float) {}
-
-#endif
diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py
index 7d08a6eff11bf..d05631a7b690b 100755
--- a/python/paddle/static/nn/__init__.py
+++ b/python/paddle/static/nn/__init__.py
@@ -38,7 +38,6 @@
 from .loss import nce
 from .sequence_lod import (
     sequence_concat,
-    sequence_conv,
     sequence_enumerate,
     sequence_expand,
     sequence_expand_as,
@@ -80,7 +79,6 @@
     'switch_case',
     'while_loop',
     'sparse_embedding',
-    'sequence_conv',
     'sequence_softmax',
     'sequence_pool',
     'sequence_concat',
diff --git a/python/paddle/static/nn/sequence_lod.py b/python/paddle/static/nn/sequence_lod.py
index 51cfd0d5307d5..f8cabc30521f3 100644
--- a/python/paddle/static/nn/sequence_lod.py
+++ b/python/paddle/static/nn/sequence_lod.py
@@ -22,147 +22,6 @@
 __all__ = []
 
 
-@templatedoc()
-def sequence_conv(
-    input,
-    num_filters,
-    filter_size=3,
-    filter_stride=1,
-    padding=True,
-    padding_start=None,
-    bias_attr=None,
-    param_attr=None,
-    act=None,
-    name=None,
-):
-    r"""
-
-    Note:
-        Only receives Tensor as input. If your input is Tensor, please use conv2d Op.(base.layers.** :ref:`api_paddle_nn_functional_conv2d` ).
-
-    This operator receives input sequences with variable length and other convolutional
-    configuration parameters(num_filters, filter_size) to apply the convolution operation.
-    It fills all-zero padding data on both sides of the sequence by default to ensure that
-    the output is the same length as the input. You can customize the padding behavior by
-    configuring the parameter :attr:`padding\_start` .
-
-    **Warning:** the parameter :attr:`padding` take no effect and will be deprecated in the future.
-
-    .. code-block:: text
-
-            Here we will illustrate the details of the padding operation:
-            For a mini-batch of 2 variable lengths sentences, containing 3, and 1 time-steps:
-            Assumed input (X) is a [4, N] float Tensor, and for the sake of simplicity, we assume N=2.
-            input.data = [[1, 1],
-                          [2, 2],
-                          [3, 3],
-                          [4, 4]]
-
-            This is to say that input (X) has 4 words and the dimension of each word
-            representation is 2.
-
-            * Case1:
-
-                If padding_start is -1 and filter_size is 3.
-                The length of padding data is calculated as follows:
-                up_pad_len = max(0, -padding_start) = 1
-                down_pad_len = max(0, filter_size + padding_start - 1) = 1
-
-                The output of the input sequence after padding is:
-                data_after_padding = [[0, 0, 1, 1, 2, 2],
-                                      [1, 1, 2, 2, 3, 3],
-                                      [2, 2, 3, 3, 0, 0],
-                                      [0, 0, 4, 4, 0, 0]]
-
-                It will be multiplied by the filter weight to get the final output.
-                Assume num_filters = 3
-                output.data = [[ 0.3234, -0.2334,  0.7433],
-                               [ 0.5646,  0.9464, -0.1223],
-                               [-0.1343,  0.5653,  0.4555],
-                               [ 0.9954, -0.1234, -0.1234]]
-                output.shape = [4, 3]     # 3 = num_filters
-                output.lod = [[0, 3, 4]]  # Remain the same
-
-
-    Args:
-        input (Tensor): Tensor with shape :math:`(M, K)`, where M is the total time-step of mini-batch
-            and K is hidden_size of input. Only lod_level of 1 is supported. The data type should be float32 or
-            float64.
-        num_filters (int): the number of filters.
-        filter_size (int): the height of filter. Specified filter width is not supported, the width is
-            hidden_size by default. Default: 3.
-        filter_stride (int, optional): stride of the filter. Currently only supports :attr:`stride` = 1.
-        padding (bool, optional): the parameter :attr:`padding` take no effect and will be discarded in the
-            future. Currently, it will always pad input to make sure the length of the output is
-            the same as input whether :attr:`padding` is set true or false. Because the length of
-            input sequence may be shorter than :attr:`filter\_size`, which will cause the convolution
-            result to not be computed correctly. These padding data will not be trainable or updated
-            while training. Default: True.
-        padding_start (int): It is used to indicate the start index for padding the input
-            sequence, which can be negative. The negative number means to pad
-            :attr:`|padding_start|` time-steps of all-zero data at the beginning of each instance.
-            The positive number means to skip :attr:`padding_start` time-steps of each instance,
-            and it will pad :math:`filter\_size + padding\_start - 1` time-steps of all-zero data
-            at the end of the sequence to ensure that the output is the same length as the input.
-            If set None, the same length :math:`\\frac{filter\_size}{2}` of data will be filled
-            on both sides of the sequence. If set 0, the length of :math:`filter\_size - 1` data
-            is padded at the end of each input sequence. Default: None.
-        bias_attr (ParamAttr): To specify the bias parameter property. Default: None, which means the
-            default bias parameter property is used. See usage for details in :ref:`api_paddle_ParamAttr` .
-        param_attr (ParamAttr): To specify the weight parameter property. Default: None, which means the
-            default weight parameter property is used. See usage for details in :ref:`api_paddle_ParamAttr` .
-        act (str): Activation to be applied to the output of this layer, such as tanh, softmax,
-            sigmoid, relu. For more information, please refer to :ref:`api_guide_activations_en` . Default: None.
-        name (str, optional): The default value is None.  Normally there is no need for user to set this property.
-            For more information, please refer to :ref:`api_guide_Name` .
-
-    Returns:
-        Tensor: Tensor with the same length as input. The data type is float32 or float64, which is same as input.
-
-    Examples:
-
-        .. code-block:: python
-
-            >>> import paddle
-            >>> paddle.enable_static()
-
-            >>> x = paddle.static.data(name='x', shape=[-1, 10], dtype='float32', lod_level=1)
-            >>> x_conved = paddle.static.nn.sequence_conv(input=x, num_filters=2, filter_size=3, padding_start=-1)
-    """
-
-    assert (
-        not in_dygraph_mode()
-    ), "sequence layer is not supported in dygraph mode yet."
-    check_variable_and_dtype(
-        input, 'input', ['float32', 'float64'], 'sequence_conv'
-    )
-    helper = LayerHelper('sequence_conv', **locals())
-    dtype = helper.input_dtype()
-    filter_shape = [filter_size * input.shape[1], num_filters]
-    filter_param = helper.create_parameter(
-        attr=helper.param_attr, shape=filter_shape, dtype=dtype
-    )
-    pre_bias = helper.create_variable_for_type_inference(dtype)
-    if padding_start is None:
-        padding_start = -int(filter_size // 2)
-
-    helper.append_op(
-        type='sequence_conv',
-        inputs={
-            'X': [input],
-            'Filter': [filter_param],
-        },
-        outputs={"Out": pre_bias},
-        attrs={
-            'contextStride': filter_stride,
-            'contextStart': padding_start,
-            'contextLength': filter_size,
-        },
-    )
-    pre_act = helper.append_bias_op(pre_bias)
-    return helper.append_activation(pre_act)
-
-
 def sequence_softmax(input, use_cudnn=False, name=None):
     r"""
 
diff --git a/python/paddle/static/quantization/quant2_int8_onednn_pass.py b/python/paddle/static/quantization/quant2_int8_onednn_pass.py
index 19fb5f928bea5..16827447dec7f 100644
--- a/python/paddle/static/quantization/quant2_int8_onednn_pass.py
+++ b/python/paddle/static/quantization/quant2_int8_onednn_pass.py
@@ -435,7 +435,6 @@ def _optimize_fp32_graph(self, graph):
         graph = self._apply_pass(graph, 'simplify_with_basic_ops_pass')
         graph = self._apply_pass(graph, 'layer_norm_fuse_pass')
         graph = self._apply_pass(graph, 'attention_lstm_fuse_pass')
-        graph = self._apply_pass(graph, 'seqconv_eltadd_relu_fuse_pass')
         graph = self._apply_pass(graph, 'fc_lstm_fuse_pass')
         graph = self._apply_pass(graph, 'mul_lstm_fuse_pass')
         graph = self._apply_pass(graph, 'fc_gru_fuse_pass')
diff --git a/test/book/notest_understand_sentiment.py b/test/book/notest_understand_sentiment.py
deleted file mode 100644
index 250b3f2dc679c..0000000000000
--- a/test/book/notest_understand_sentiment.py
+++ /dev/null
@@ -1,266 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import contextlib
-import math
-import os
-import sys
-import unittest
-
-import numpy as np
-
-# TODO: remove sys.path.append
-sys.path.append("../legacy_test")
-import nets
-
-import paddle
-from paddle import base
-
-
-def convolution_net(
-    data, label, input_dim, class_dim=2, emb_dim=32, hid_dim=32
-):
-    emb = paddle.static.nn.embedding(
-        input=data, size=[input_dim, emb_dim], is_sparse=True
-    )
-    conv_3 = nets.sequence_conv_pool(
-        input=emb,
-        num_filters=hid_dim,
-        filter_size=3,
-        act="tanh",
-        pool_type="sqrt",
-    )
-    conv_4 = nets.sequence_conv_pool(
-        input=emb,
-        num_filters=hid_dim,
-        filter_size=4,
-        act="tanh",
-        pool_type="sqrt",
-    )
-    prediction = paddle.static.nn.fc(
-        x=[conv_3, conv_4], size=class_dim, activation="softmax"
-    )
-    cost = paddle.nn.functional.cross_entropy(
-        input=prediction, label=label, reduction='none', use_softmax=False
-    )
-    avg_cost = paddle.mean(cost)
-    accuracy = paddle.static.accuracy(input=prediction, label=label)
-    return avg_cost, accuracy, prediction
-
-
-def train(
-    word_dict,
-    net_method,
-    use_cuda,
-    parallel=False,
-    save_dirname=None,
-    is_local=True,
-):
-    BATCH_SIZE = 128
-    PASS_NUM = 5
-    dict_dim = len(word_dict)
-    class_dim = 2
-
-    data = paddle.static.data(
-        name="words", shape=[-1, 1], dtype="int64", lod_level=1
-    )
-    label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
-
-    if not parallel:
-        cost, acc_out, prediction = net_method(
-            data, label, input_dim=dict_dim, class_dim=class_dim
-        )
-    else:
-        raise NotImplementedError()
-
-    adagrad = paddle.optimizer.Adagrad(learning_rate=0.002)
-    adagrad.minimize(cost)
-
-    train_data = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.imdb.train(word_dict), buf_size=1000
-        ),
-        batch_size=BATCH_SIZE,
-    )
-    place = base.CUDAPlace(0) if use_cuda else base.CPUPlace()
-    exe = base.Executor(place)
-    feeder = base.DataFeeder(feed_list=[data, label], place=place)
-
-    def train_loop(main_program):
-        exe.run(base.default_startup_program())
-
-        for pass_id in range(PASS_NUM):
-            for data in train_data():
-                cost_val, acc_val = exe.run(
-                    main_program,
-                    feed=feeder.feed(data),
-                    fetch_list=[cost, acc_out],
-                )
-                print("cost=" + str(cost_val) + " acc=" + str(acc_val))
-                if cost_val < 0.4 and acc_val > 0.8:
-                    if save_dirname is not None:
-                        paddle.static.io.save_inference_model(
-                            save_dirname, data, prediction, exe
-                        )
-                    return
-                if math.isnan(float(cost_val)):
-                    sys.exit("got NaN loss, training failed.")
-        raise AssertionError(f"Cost is too large for {net_method.__name__}")
-
-    if is_local:
-        train_loop(base.default_main_program())
-    else:
-        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
-        pserver_ips = os.getenv("PADDLE_PSERVER_IPS")  # ip,ip...
-        eplist = []
-        for ip in pserver_ips.split(","):
-            eplist.append(':'.join([ip, port]))
-        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
-        trainers = int(os.getenv("PADDLE_TRAINERS"))
-        current_endpoint = os.getenv("POD_IP") + ":" + port
-        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
-        training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
-        t = paddle.distributed.transpiler.DistributeTranspiler()
-        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
-        if training_role == "PSERVER":
-            pserver_prog = t.get_pserver_program(current_endpoint)
-            pserver_startup = t.get_startup_program(
-                current_endpoint, pserver_prog
-            )
-            exe.run(pserver_startup)
-            exe.run(pserver_prog)
-        elif training_role == "TRAINER":
-            train_loop(t.get_trainer_program())
-
-
-def infer(word_dict, use_cuda, save_dirname=None):
-    if save_dirname is None:
-        return
-
-    place = base.CUDAPlace(0) if use_cuda else base.CPUPlace()
-    exe = base.Executor(place)
-
-    inference_scope = base.core.Scope()
-    with base.scope_guard(inference_scope):
-        # Use paddle.static.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be fed
-        # data using feed operators), and the fetch_targets (variables that
-        # we want to obtain data from using fetch operators).
-        [
-            inference_program,
-            feed_target_names,
-            fetch_targets,
-        ] = paddle.static.io.load_inference_model(save_dirname, exe)
-
-        word_dict_len = len(word_dict)
-
-        # Setup input by creating LoDTensor to represent sequence of words.
-        # Here each word is the basic element of the LoDTensor and the shape of
-        # each word (base_shape) should be [1] since it is simply an index to
-        # look up for the corresponding word vector.
-        # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]],
-        # which has only one level of detail. Then the created LoDTensor will have only
-        # one higher level structure (sequence of words, or sentence) than the basic
-        # element (word). Hence the LoDTensor will hold data for three sentences of
-        # length 3, 4 and 2, respectively.
-        # Note that recursive_sequence_lengths should be a list of lists.
-        recursive_seq_lens = [[3, 4, 2]]
-        base_shape = [1]
-        # The range of random integers is [low, high]
-        tensor_words = base.create_random_int_lodtensor(
-            recursive_seq_lens, base_shape, place, low=0, high=word_dict_len - 1
-        )
-
-        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
-        # and results will contain a list of data corresponding to fetch_targets.
-        assert feed_target_names[0] == "words"
-        results = exe.run(
-            inference_program,
-            feed={feed_target_names[0]: tensor_words},
-            fetch_list=fetch_targets,
-            return_numpy=False,
-        )
-        print(results[0].recursive_sequence_lengths())
-        np_data = np.array(results[0])
-        print("Inference Shape: ", np_data.shape)
-        print("Inference results: ", np_data)
-
-
-def main(word_dict, net_method, use_cuda, parallel=False, save_dirname=None):
-    if use_cuda and not base.core.is_compiled_with_cuda():
-        return
-
-    train(
-        word_dict,
-        net_method,
-        use_cuda,
-        parallel=parallel,
-        save_dirname=save_dirname,
-    )
-    infer(word_dict, use_cuda, save_dirname)
-
-
-class TestUnderstandSentiment(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.word_dict = paddle.dataset.imdb.word_dict()
-
-    @contextlib.contextmanager
-    def new_program_scope(self):
-        prog = base.Program()
-        startup_prog = base.Program()
-        scope = base.core.Scope()
-        with base.scope_guard(scope):
-            with base.program_guard(prog, startup_prog):
-                yield
-
-    def test_conv_cpu(self):
-        with self.new_program_scope():
-            main(
-                self.word_dict,
-                net_method=convolution_net,
-                use_cuda=False,
-                save_dirname="understand_sentiment_conv.inference.model",
-            )
-
-    def test_conv_cpu_parallel(self):
-        with self.new_program_scope():
-            main(
-                self.word_dict,
-                net_method=convolution_net,
-                use_cuda=False,
-                parallel=True,
-            )
-
-    def test_conv_gpu(self):
-        with self.new_program_scope():
-            main(
-                self.word_dict,
-                net_method=convolution_net,
-                use_cuda=True,
-                save_dirname="understand_sentiment_conv.inference.model",
-            )
-
-    def test_conv_gpu_parallel(self):
-        with self.new_program_scope():
-            main(
-                self.word_dict,
-                net_method=convolution_net,
-                use_cuda=True,
-                parallel=True,
-            )
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/book/test_recommender_system.py b/test/book/test_recommender_system.py
deleted file mode 100644
index 7a4a70be105d5..0000000000000
--- a/test/book/test_recommender_system.py
+++ /dev/null
@@ -1,392 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import math
-import os
-import sys
-import tempfile
-
-import numpy as np
-
-# TODO: remove sys.path.append
-sys.path.append("../legacy_test")
-import nets
-
-import paddle
-from paddle import base
-from paddle.base import framework
-from paddle.base.executor import Executor
-from paddle.optimizer import SGD
-
-paddle.enable_static()
-
-IS_SPARSE = True
-USE_GPU = False
-BATCH_SIZE = 256
-
-
-def get_usr_combined_features():
-    # FIXME(dzh) : old API integer_value(10) may has range check.
-    # currently we don't have user configurated check.
-
-    USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
-
-    uid = paddle.static.data(name='user_id', shape=[-1, 1], dtype='int64')
-
-    usr_emb = paddle.static.nn.embedding(
-        input=uid,
-        dtype='float32',
-        size=[USR_DICT_SIZE, 32],
-        param_attr='user_table',
-        is_sparse=IS_SPARSE,
-    )
-
-    usr_fc = paddle.static.nn.fc(x=usr_emb, size=32)
-
-    USR_GENDER_DICT_SIZE = 2
-
-    usr_gender_id = paddle.static.data(
-        name='gender_id', shape=[-1, 1], dtype='int64'
-    )
-
-    usr_gender_emb = paddle.static.nn.embedding(
-        input=usr_gender_id,
-        size=[USR_GENDER_DICT_SIZE, 16],
-        param_attr='gender_table',
-        is_sparse=IS_SPARSE,
-    )
-
-    usr_gender_fc = paddle.static.nn.fc(x=usr_gender_emb, size=16)
-
-    USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
-    usr_age_id = paddle.static.data(name='age_id', shape=[-1, 1], dtype="int64")
-
-    usr_age_emb = paddle.static.nn.embedding(
-        input=usr_age_id,
-        size=[USR_AGE_DICT_SIZE, 16],
-        is_sparse=IS_SPARSE,
-        param_attr='age_table',
-    )
-
-    usr_age_fc = paddle.static.nn.fc(x=usr_age_emb, size=16)
-
-    USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
-    usr_job_id = paddle.static.data(name='job_id', shape=[-1, 1], dtype="int64")
-
-    usr_job_emb = paddle.static.nn.embedding(
-        input=usr_job_id,
-        size=[USR_JOB_DICT_SIZE, 16],
-        param_attr='job_table',
-        is_sparse=IS_SPARSE,
-    )
-
-    usr_job_fc = paddle.static.nn.fc(x=usr_job_emb, size=16)
-
-    concat_embed = paddle.concat(
-        [usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], axis=1
-    )
-
-    usr_combined_features = paddle.static.nn.fc(
-        x=concat_embed, size=200, activation="tanh"
-    )
-
-    return usr_combined_features
-
-
-def get_mov_combined_features():
-    MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
-
-    mov_id = paddle.static.data(name='movie_id', shape=[-1, 1], dtype='int64')
-
-    mov_emb = paddle.static.nn.embedding(
-        input=mov_id,
-        dtype='float32',
-        size=[MOV_DICT_SIZE, 32],
-        param_attr='movie_table',
-        is_sparse=IS_SPARSE,
-    )
-
-    mov_fc = paddle.static.nn.fc(x=mov_emb, size=32)
-
-    CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
-
-    category_id = paddle.static.data(
-        name='category_id', shape=[-1, 1], dtype='int64', lod_level=1
-    )
-
-    mov_categories_emb = paddle.static.nn.embedding(
-        input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE
-    )
-
-    mov_categories_hidden = paddle.static.nn.sequence_lod.sequence_pool(
-        input=mov_categories_emb.squeeze(-2), pool_type="sum"
-    )
-
-    MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
-
-    mov_title_id = paddle.static.data(
-        name='movie_title', shape=[-1, 1], dtype='int64', lod_level=1
-    )
-
-    mov_title_emb = paddle.static.nn.embedding(
-        input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE
-    )
-
-    mov_title_conv = nets.sequence_conv_pool(
-        input=mov_title_emb.squeeze(-2),
-        num_filters=32,
-        filter_size=3,
-        act="tanh",
-        pool_type="sum",
-    )
-
-    concat_embed = paddle.concat(
-        [mov_fc, mov_categories_hidden, mov_title_conv], axis=1
-    )
-
-    # FIXME(dzh) : need tanh operator
-    mov_combined_features = paddle.static.nn.fc(
-        x=concat_embed, size=200, activation="tanh"
-    )
-
-    return mov_combined_features
-
-
-def model():
-    usr_combined_features = get_usr_combined_features()
-    mov_combined_features = get_mov_combined_features()
-
-    # need cos sim
-    inference = paddle.nn.functional.cosine_similarity(
-        x1=usr_combined_features, x2=mov_combined_features
-    )
-    scale_infer = paddle.scale(x=inference, scale=5.0)
-
-    label = paddle.static.data(name='score', shape=[-1, 1], dtype='float32')
-    square_cost = paddle.nn.functional.square_error_cost(
-        input=scale_infer, label=label
-    )
-    avg_cost = paddle.mean(square_cost)
-
-    return scale_infer, avg_cost
-
-
-def train(use_cuda, save_dirname, is_local=True):
-    scale_infer, avg_cost = model()
-
-    # test program
-    test_program = base.default_main_program().clone(for_test=True)
-
-    sgd_optimizer = SGD(learning_rate=0.2)
-    sgd_optimizer.minimize(avg_cost)
-
-    place = base.CUDAPlace(0) if use_cuda else base.CPUPlace()
-
-    exe = Executor(place)
-
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(paddle.dataset.movielens.train(), buf_size=8192),
-        batch_size=BATCH_SIZE,
-    )
-    test_reader = paddle.batch(
-        paddle.dataset.movielens.test(), batch_size=BATCH_SIZE
-    )
-
-    feed_order = [
-        'user_id',
-        'gender_id',
-        'age_id',
-        'job_id',
-        'movie_id',
-        'category_id',
-        'movie_title',
-        'score',
-    ]
-    feed_infer_order = [
-        'user_id',
-        'gender_id',
-        'age_id',
-        'job_id',
-        'movie_id',
-        'category_id',
-        'movie_title',
-    ]
-
-    def train_loop(main_program):
-        exe.run(framework.default_startup_program())
-
-        feed_list = [
-            main_program.global_block().var(var_name) for var_name in feed_order
-        ]
-        feed_infer_list = [
-            main_program.global_block().var(var_name)
-            for var_name in feed_infer_order
-        ]
-        feeder = base.DataFeeder(feed_list, place)
-
-        PASS_NUM = 100
-        for pass_id in range(PASS_NUM):
-            for batch_id, data in enumerate(train_reader()):
-                # train a mini-batch
-                outs = exe.run(
-                    program=main_program,
-                    feed=feeder.feed(data),
-                    fetch_list=[avg_cost],
-                )
-                out = np.array(outs[0])
-                if (batch_id + 1) % 10 == 0:
-                    avg_cost_set = []
-                    for test_data in test_reader():
-                        avg_cost_np = exe.run(
-                            program=test_program,
-                            feed=feeder.feed(test_data),
-                            fetch_list=[avg_cost],
-                        )
-                        avg_cost_set.append(avg_cost_np[0])
-                        break  # test only 1 segment for speeding up CI
-
-                    # get test avg_cost
-                    test_avg_cost = np.array(avg_cost_set).mean()
-                    if test_avg_cost < 6.0:
-                        # if avg_cost less than 6.0, we think our code is good.
-                        if save_dirname is not None:
-                            paddle.static.io.save_inference_model(
-                                save_dirname,
-                                feed_infer_list,
-                                [scale_infer],
-                                exe,
-                            )
-                        return
-
-                if math.isnan(float(out)):
-                    sys.exit("got NaN loss, training failed.")
-
-    if is_local:
-        train_loop(base.default_main_program())
-    else:
-        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
-        pserver_ips = os.getenv("PADDLE_PSERVER_IPS")  # ip,ip...
-        eplist = []
-        for ip in pserver_ips.split(","):
-            eplist.append(':'.join([ip, port]))
-        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
-        trainers = int(os.getenv("PADDLE_TRAINERS"))
-        current_endpoint = os.getenv("POD_IP") + ":" + port
-        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
-        training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
-        t = paddle.distributed.transpiler.DistributeTranspiler()
-        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
-        if training_role == "PSERVER":
-            pserver_prog = t.get_pserver_program(current_endpoint)
-            pserver_startup = t.get_startup_program(
-                current_endpoint, pserver_prog
-            )
-            exe.run(pserver_startup)
-            exe.run(pserver_prog)
-        elif training_role == "TRAINER":
-            train_loop(t.get_trainer_program())
-
-
-def infer(use_cuda, save_dirname=None):
-    if save_dirname is None:
-        return
-
-    place = base.CUDAPlace(0) if use_cuda else base.CPUPlace()
-    exe = base.Executor(place)
-
-    inference_scope = base.core.Scope()
-    with base.scope_guard(inference_scope):
-        # Use paddle.static.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be fed
-        # data using feed operators), and the fetch_targets (variables that
-        # we want to obtain data from using fetch operators).
-        [
-            inference_program,
-            feed_target_names,
-            fetch_targets,
-        ] = paddle.static.io.load_inference_model(save_dirname, exe)
-
-        # Use the first data from paddle.dataset.movielens.test() as input
-        assert feed_target_names[0] == "user_id"
-        # Use create_lod_tensor(data, recursive_sequence_lengths, place) API
-        # to generate LoD Tensor where `data` is a list of sequences of index
-        # numbers, `recursive_sequence_lengths` is the length-based level of detail
-        # (lod) info associated with `data`.
-        # For example, data = [[10, 2, 3], [2, 3]] means that it contains
-        # two sequences of indexes, of length 3 and 2, respectively.
-        # Correspondingly, recursive_sequence_lengths = [[3, 2]] contains one
-        # level of detail info, indicating that `data` consists of two sequences
-        # of length 3 and 2, respectively.
-        user_id = base.create_lod_tensor([[np.int64(1)]], [[1]], place)
-
-        assert feed_target_names[1] == "gender_id"
-        gender_id = base.create_lod_tensor([[np.int64(1)]], [[1]], place)
-
-        assert feed_target_names[2] == "age_id"
-        age_id = base.create_lod_tensor([[np.int64(0)]], [[1]], place)
-
-        assert feed_target_names[3] == "job_id"
-        job_id = base.create_lod_tensor([[np.int64(10)]], [[1]], place)
-
-        assert feed_target_names[4] == "movie_id"
-        movie_id = base.create_lod_tensor([[np.int64(783)]], [[1]], place)
-
-        assert feed_target_names[5] == "category_id"
-        category_id = base.create_lod_tensor(
-            [np.array([10, 8, 9], dtype='int64')], [[3]], place
-        )
-
-        assert feed_target_names[6] == "movie_title"
-        movie_title = base.create_lod_tensor(
-            [np.array([1069, 4140, 2923, 710, 988], dtype='int64')],
-            [[5]],
-            place,
-        )
-
-        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
-        # and results will contain a list of data corresponding to fetch_targets.
-        results = exe.run(
-            inference_program,
-            feed={
-                feed_target_names[0]: user_id,
-                feed_target_names[1]: gender_id,
-                feed_target_names[2]: age_id,
-                feed_target_names[3]: job_id,
-                feed_target_names[4]: movie_id,
-                feed_target_names[5]: category_id,
-                feed_target_names[6]: movie_title,
-            },
-            fetch_list=fetch_targets,
-            return_numpy=False,
-        )
-        print("inferred score: ", np.array(results[0]))
-
-
-def main(use_cuda):
-    if use_cuda and not base.core.is_compiled_with_cuda():
-        return
-
-    # Directory for saving the inference model
-    temp_dir = tempfile.TemporaryDirectory()
-    save_dirname = os.path.join(
-        temp_dir.name, "recommender_system.inference.model"
-    )
-
-    train(use_cuda, save_dirname)
-    infer(use_cuda, save_dirname)
-    temp_dir.cleanup()
-
-
-if __name__ == '__main__':
-    main(USE_GPU)
diff --git a/test/ir/inference/test_seqconv_eltadd_relu_fuse_pass.py b/test/ir/inference/test_seqconv_eltadd_relu_fuse_pass.py
deleted file mode 100644
index b31533ac958d0..0000000000000
--- a/test/ir/inference/test_seqconv_eltadd_relu_fuse_pass.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-from functools import partial
-
-import hypothesis.strategies as st
-import numpy as np
-from auto_scan_test import PassAutoScanTest
-from program_config import OpConfig, ProgramConfig, TensorConfig
-
-
-class TestSeqconvEltaddReluFusePass(PassAutoScanTest):
-    def is_program_valid(self, program_config: ProgramConfig) -> bool:
-        return True
-
-    def sample_program_config(self, draw):
-        contextLength = draw(st.sampled_from([1, 2, 3, 4]))
-        contextStart = draw(st.sampled_from([1, 2, 3]))
-        contextStride = draw(st.sampled_from([1]))
-        paddingTrainable = False
-        axis = draw(st.sampled_from([1]))
-        batch_size = draw(st.integers(min_value=1, max_value=4))
-
-        def generate_input():
-            shape = [batch_size, 128, 6, 120]
-            return np.random.random(shape).astype(np.float32)
-
-        def generate_weight(shape):
-            return np.random.random(shape).astype(np.float32)
-
-        im2sequence_op = OpConfig(
-            type="im2sequence",
-            inputs={"X": ["input_data"]},
-            outputs={"Out": ["seq_out"]},
-            attrs={
-                "kernels": [6, 1],
-                "out_stride": [1, 1],
-                "paddings": [0, 0, 0, 0],
-                "strides": [1, 1],
-            },
-        )
-
-        sequence_conv_op = OpConfig(
-            type="sequence_conv",
-            inputs={"X": ["seq_out"], "Filter": ["conv_weight"]},
-            outputs={"Out": ["conv_out"]},
-            attrs={
-                "contextLength": contextLength,
-                "contextStart": contextStart,
-                "contextStride": contextStride,
-                "paddingTrainable": paddingTrainable,
-            },
-        )
-
-        elementwise_add_op = OpConfig(
-            type="elementwise_add",
-            inputs={"X": ["conv_out"], "Y": ["elt_weight"]},
-            outputs={"Out": ["elt_output"]},
-            attrs={'axis': axis},
-        )
-
-        relu_op = OpConfig(
-            type="relu",
-            inputs={"X": ["elt_output"]},
-            outputs={"Out": ["relu_output"]},
-            attrs={},
-        )
-
-        model_net = [
-            im2sequence_op,
-            sequence_conv_op,
-            elementwise_add_op,
-            relu_op,
-        ]
-
-        program_config = ProgramConfig(
-            ops=model_net,
-            weights={
-                "conv_weight": TensorConfig(
-                    data_gen=partial(generate_weight, [768 * contextLength, 16])
-                ),
-                "elt_weight": TensorConfig(
-                    data_gen=partial(generate_weight, [16])
-                ),
-            },
-            inputs={
-                "input_data": TensorConfig(data_gen=partial(generate_input))
-            },
-            outputs=["relu_output"],
-        )
-
-        return program_config
-
-    def sample_predictor_configs(self, program_config):
-        config = self.create_inference_config()
-        yield config, ["im2sequence", "fusion_seqconv_eltadd_relu"], (
-            1e-5,
-            1e-5,
-        )
-
-    def test(self):
-        self.run_and_statis(
-            quant=False, passes=["seqconv_eltadd_relu_fuse_pass"]
-        )
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/test/legacy_test/CMakeLists.txt b/test/legacy_test/CMakeLists.txt
index 63d84ece4aa98..3c1024abc34ea 100644
--- a/test/legacy_test/CMakeLists.txt
+++ b/test/legacy_test/CMakeLists.txt
@@ -655,7 +655,6 @@ if(WITH_DISTRIBUTE)
   list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_lars")
   list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_train")
   list(REMOVE_ITEM DIST_TEST_OPS "test_dist_save_load")
-  list(REMOVE_ITEM DIST_TEST_OPS "test_dist_text_classification")
   list(REMOVE_ITEM DIST_TEST_OPS "test_dist_train")
   list(REMOVE_ITEM DIST_TEST_OPS "test_dist_word2vec")
   list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_gloo")
diff --git a/test/legacy_test/dist_text_classification.py b/test/legacy_test/dist_text_classification.py
deleted file mode 100644
index 0e3c79d758c80..0000000000000
--- a/test/legacy_test/dist_text_classification.py
+++ /dev/null
@@ -1,242 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import re
-import string
-import tarfile
-
-import nets
-from test_dist_base import TestDistRunnerBase, runtime_main
-
-import paddle
-from paddle import base
-
-DTYPE = "float32"
-VOCAB_URL = 'http://paddle-dist-ce-data.bj.bcebos.com/imdb.vocab'
-VOCAB_MD5 = '23c86a0533c0151b6f12fa52b106dcc2'
-DATA_URL = 'http://paddle-dist-ce-data.bj.bcebos.com/text_classification.tar.gz'
-DATA_MD5 = '29ebfc94f11aea9362bbb7f5e9d86b8a'
-
-
-# Load dictionary.
-def load_vocab(filename):
-    vocab = {}
-    with open(filename, 'r', encoding="utf-8") as f:
-        for idx, line in enumerate(f):
-            vocab[line.strip()] = idx
-    return vocab
-
-
-def get_worddict(dict_path):
-    word_dict = load_vocab(dict_path)
-    word_dict["<unk>"] = len(word_dict)
-    dict_dim = len(word_dict)
-    return word_dict, dict_dim
-
-
-def conv_net(
-    input,
-    dict_dim,
-    emb_dim=128,
-    window_size=3,
-    num_filters=128,
-    fc0_dim=96,
-    class_dim=2,
-):
-    emb = paddle.static.nn.embedding(
-        input=input,
-        size=[dict_dim, emb_dim],
-        is_sparse=False,
-        param_attr=base.ParamAttr(
-            initializer=paddle.nn.initializer.Constant(value=0.01)
-        ),
-    )
-
-    conv_3 = nets.sequence_conv_pool(
-        input=emb,
-        num_filters=num_filters,
-        filter_size=window_size,
-        act="tanh",
-        pool_type="max",
-        param_attr=base.ParamAttr(
-            initializer=paddle.nn.initializer.Constant(value=0.01)
-        ),
-    )
-
-    fc_0 = paddle.static.nn.fc(
-        x=[conv_3],
-        size=fc0_dim,
-        weight_attr=base.ParamAttr(
-            initializer=paddle.nn.initializer.Constant(value=0.01)
-        ),
-    )
-
-    prediction = paddle.static.nn.fc(
-        x=[fc_0],
-        size=class_dim,
-        activation="softmax",
-        weight_attr=base.ParamAttr(
-            initializer=paddle.nn.initializer.Constant(value=0.01)
-        ),
-    )
-
-    return prediction
-
-
-def inference_network(dict_dim):
-    data = paddle.static.data(
-        name="words", shape=[-1, 1], dtype="int64", lod_level=1
-    )
-    out = conv_net(data, dict_dim)
-    return out
-
-
-def get_reader(word_dict, batch_size):
-    # The training data set.
-    train_reader = paddle.batch(train(word_dict), batch_size=batch_size)
-
-    # The testing data set.
-    test_reader = paddle.batch(test(word_dict), batch_size=batch_size)
-
-    return train_reader, test_reader
-
-
-def get_optimizer(learning_rate):
-    optimizer = paddle.optimizer.SGD(learning_rate=learning_rate)
-    return optimizer
-
-
-class TestDistTextClassification2x2(TestDistRunnerBase):
-    def get_model(self, batch_size=2):
-        vocab = os.path.join(
-            paddle.dataset.common.DATA_HOME, "text_classification", "imdb.vocab"
-        )
-        word_dict, dict_dim = get_worddict(vocab)
-
-        # Input data
-        data = paddle.static.data(
-            name="words", shape=[-1, 1], dtype="int64", lod_level=1
-        )
-        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
-
-        # Train program
-        predict = conv_net(data, dict_dim)
-        cost = paddle.nn.functional.cross_entropy(
-            input=predict, label=label, reduction='none', use_softmax=False
-        )
-        avg_cost = paddle.mean(x=cost)
-        acc = paddle.static.accuracy(input=predict, label=label)
-        inference_program = base.default_main_program().clone()
-
-        # Optimization
-        opt = get_optimizer(learning_rate=0.001)
-        opt.minimize(avg_cost)
-
-        # Reader
-        train_reader, test_reader = get_reader(word_dict, batch_size)
-
-        return (
-            inference_program,
-            avg_cost,
-            train_reader,
-            test_reader,
-            acc,
-            predict,
-        )
-
-
-def tokenize(pattern):
-    """
-    Read files that match the given pattern.  Tokenize and yield each file.
-    """
-
-    with tarfile.open(
-        paddle.dataset.common.download(
-            DATA_URL, 'text_classification', DATA_MD5
-        )
-    ) as tarf:
-        # Note that we should use tarfile.next(), which does
-        # sequential access of member files, other than
-        # tarfile.extractfile, which does random access and might
-        # destroy hard disks.
-        tf = tarf.next()
-        while tf is not None:
-            if bool(pattern.match(tf.name)):
-                # newline and punctuations removal and ad-hoc tokenization.
-                yield tarf.extractfile(tf).read().rstrip(b'\n\r').translate(
-                    None, string.punctuation.encode('latin-1')
-                ).lower().split()
-            tf = tarf.next()
-
-
-def reader_creator(pos_pattern, neg_pattern, word_idx):
-    UNK = word_idx['<unk>']
-    INS = []
-
-    def load(pattern, out, label):
-        for doc in tokenize(pattern):
-            out.append(([word_idx.get(w, UNK) for w in doc], label))
-
-    load(pos_pattern, INS, 0)
-    load(neg_pattern, INS, 1)
-
-    def reader():
-        yield from INS
-
-    return reader
-
-
-def train(word_idx):
-    """
-    IMDB training set creator.
-
-    It returns a reader creator, each sample in the reader is an zero-based ID
-    sequence and label in [0, 1].
-
-    :param word_idx: word dictionary
-    :type word_idx: dict
-    :return: Training reader creator
-    :rtype: callable
-    """
-    return reader_creator(
-        re.compile(r"train/pos/.*\.txt$"),
-        re.compile(r"train/neg/.*\.txt$"),
-        word_idx,
-    )
-
-
-def test(word_idx):
-    """
-    IMDB test set creator.
-
-    It returns a reader creator, each sample in the reader is an zero-based ID
-    sequence and label in [0, 1].
-
-    :param word_idx: word dictionary
-    :type word_idx: dict
-    :return: Test reader creator
-    :rtype: callable
-    """
-    return reader_creator(
-        re.compile(r"test/pos/.*\.txt$"),
-        re.compile(r"test/neg/.*\.txt$"),
-        word_idx,
-    )
-
-
-if __name__ == "__main__":
-    paddle.dataset.common.download(VOCAB_URL, 'text_classification', VOCAB_MD5)
-    paddle.dataset.common.download(DATA_URL, 'text_classification', DATA_MD5)
-    runtime_main(TestDistTextClassification2x2)
diff --git a/test/legacy_test/nets.py b/test/legacy_test/nets.py
index 035cb04a6f6d7..d63199ab9baf8 100644
--- a/test/legacy_test/nets.py
+++ b/test/legacy_test/nets.py
@@ -18,7 +18,6 @@
 
 __all__ = [
     "simple_img_conv_pool",
-    "sequence_conv_pool",
     "glu",
     "scaled_dot_product_attention",
     "img_conv_group",
@@ -273,85 +272,6 @@ def __extend_list__(obj):
     return pool_out
 
 
-def sequence_conv_pool(
-    input,
-    num_filters,
-    filter_size,
-    param_attr=None,
-    act="sigmoid",
-    pool_type="max",
-    bias_attr=None,
-):
-    """
-        :api_attr: Static Graph
-
-    **This api takes input as an LoDTensor. If input is a Tensor, please use**
-    :ref:`api_base_nets_simple_img_conv_pool` **instead**
-
-    The sequence_conv_pool is composed of :ref:`api_base_layers_sequence_conv`
-    and :ref:`api_base_layers_sequence_pool` .
-
-    Args:
-        input (Tensor): 2-D LoDTensor, the input of sequence_conv,
-            which supports variable-time length input sequence.
-            The underlying of input is a matrix with shape
-            (T, N), where T is the total time steps in this mini-batch and N is
-            the input_hidden_size. The data type is float32 or float64.
-        num_filters(int): The number of filter.
-        filter_size (int): The filter size.
-        param_attr (ParamAttr): The parameters of the sequence_conv Layer. Default: None.
-        act (str|None): Activation type for Sequence_conv Layer.
-                        If set to None, no activation will be applied. Default: "sigmoid".
-        pool_type (str): Pooling type can be :math:`max` for max-pooling, :math:`average` for
-            average-pooling, :math:`sum` for sum-pooling, :math:`sqrt` for sqrt-pooling.
-            Default :math:`max`.
-        bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of sequence_conv.
-            If it is set to False, no bias will be added to the output units.
-            If it is set to None or one attribute of ParamAttr, sequence_conv
-            will create ParamAttr as bias_attr. If the Initializer of the bias_attr
-            is not set, the bias is initialized zero. Default: None.
-
-    Returns:
-        The final result after sequence_conv and sequence_pool.
-        It is a 2-D Tensor, with the same data type as :attr:`input`
-
-    Return Type:
-        Tensor
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.base as base
-            import paddle
-            paddle.enable_static()
-            input_dim = 100 #len(word_dict)
-            emb_dim = 128
-            hid_dim = 512
-            data = paddle.static.data(name="words", shape=[None, 1], dtype="int64", lod_level=1)
-            emb = paddle.static.nn.embedding(input=data, size=[input_dim, emb_dim], is_sparse=True)
-            seq_conv = base.nets.sequence_conv_pool(input=emb,
-                                                     num_filters=hid_dim,
-                                                     filter_size=3,
-                                                     act="tanh",
-                                                     pool_type="sqrt")
-    """
-
-    check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'input')
-    conv_out = paddle.static.nn.sequence_lod.sequence_conv(
-        input=input,
-        num_filters=num_filters,
-        filter_size=filter_size,
-        param_attr=param_attr,
-        bias_attr=bias_attr,
-        act=act,
-    )
-
-    pool_out = paddle.static.nn.sequence_lod.sequence_pool(
-        input=conv_out, pool_type=pool_type
-    )
-    return pool_out
-
-
 @deprecated(since="2.0.0", update_to="paddle.nn.functional.glu")
 def glu(input, dim=-1):
     r"""
diff --git a/test/legacy_test/test_dist_text_classification.py b/test/legacy_test/test_dist_text_classification.py
index f777775724aee..46711a67bfb22 100644
--- a/test/legacy_test/test_dist_text_classification.py
+++ b/test/legacy_test/test_dist_text_classification.py
@@ -25,28 +25,12 @@ def _setup_config(self):
         self._sync_mode = True
         self._enforce_place = "CPU"
 
-    def test_text_classification(self):
-        self.check_with_place(
-            "dist_text_classification.py",
-            delta=1e-6,
-            check_error_log=True,
-            log_name=flag_name,
-        )
-
 
 class TestDistTextClassification2x2Async(TestDistBase):
     def _setup_config(self):
         self._sync_mode = False
         self._enforce_place = "CPU"
 
-    def test_se_resnext(self):
-        self.check_with_place(
-            "dist_text_classification.py",
-            delta=100,
-            check_error_log=True,
-            log_name=flag_name,
-        )
-
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/legacy_test/test_fusion_seqconv_eltadd_relu_op.py b/test/legacy_test/test_fusion_seqconv_eltadd_relu_op.py
deleted file mode 100644
index b4b2471d95da9..0000000000000
--- a/test/legacy_test/test_fusion_seqconv_eltadd_relu_op.py
+++ /dev/null
@@ -1,95 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import unittest
-
-import numpy as np
-from op_test import OpTest
-
-sys.path.append("../../test/sequence")
-from test_sequence_conv import seqconv
-
-
-class TestSeqConvEltAddRelu(OpTest):
-    def set_conf(self):
-        pass
-
-    def setUp(self):
-        self.op_type = 'fusion_seqconv_eltadd_relu'
-        self.lod = [[6, 4]]
-        self.in_fea_size = 16
-        self.out_fea_size = 8
-        self.context_length = 4
-        self.context_stride = 1
-        self.context_start = 0
-        self.set_conf()
-
-        assert self.context_stride == 1
-
-        T = sum(self.lod[0])
-        x = np.random.uniform(-1, 1, [T, self.in_fea_size]).astype('float32')
-        w = np.random.uniform(
-            -1, 1, [self.in_fea_size * self.context_length, self.out_fea_size]
-        ).astype('float32')
-        b = np.random.uniform(-2, 1, [1, self.out_fea_size]).astype('float32')
-        out = seqconv(x, self.lod, w, self.context_length, self.context_start)
-        out = np.maximum(out + b, 0)
-
-        self.inputs = {'X': (x, self.lod), 'Filter': w, 'Bias': b}
-        self.attrs = {
-            'contextStart': self.context_start,
-            'contextLength': self.context_length,
-            'contextStride': self.context_stride,
-        }
-        self.outputs = {'Out': out}
-
-    def test_check_output(self):
-        self.check_output(check_dygraph=False)
-
-
-class TestSeqConvEltAddReluBS1(TestSeqConvEltAddRelu):
-    def set_conf(self):
-        self.lod = [[10]]
-
-
-class TestSeqConvEltAddReluBS1Case2(TestSeqConvEltAddRelu):
-    def set_conf(self):
-        self.lod = [[2]]
-
-
-class TestSeqConvEltAddReluCase1(TestSeqConvEltAddRelu):
-    def set_conf(self):
-        self.lod = [[3, 5, 1, 6]]
-        self.context_length = 3
-        self.context_start = -2
-
-
-class TestSeqConvEltAddReluCase2(TestSeqConvEltAddRelu):
-    def set_conf(self):
-        self.lod = [[10, 1, 2, 4, 1, 5, 6]]
-        self.in_fea_size = 2
-        self.context_length = 4
-        self.context_start = -1
-
-
-class TestSeqConvEltAddReluCase3(TestSeqConvEltAddRelu):
-    def set_conf(self):
-        self.lod = [[10, 1, 2, 4, 1, 5, 6]]
-        self.context_length = 5
-        self.context_start = -4
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/sequence/CMakeLists.txt b/test/sequence/CMakeLists.txt
index 4f5b02114ebfd..ff11cacdf9ae0 100644
--- a/test/sequence/CMakeLists.txt
+++ b/test/sequence/CMakeLists.txt
@@ -7,7 +7,6 @@ string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
 foreach(TEST_OP ${TEST_OPS})
   py_test_modules(${TEST_OP} MODULES ${TEST_OP})
 endforeach()
-set_tests_properties(test_sequence_conv PROPERTIES TIMEOUT 120)
 set_tests_properties(test_sequence_concat PROPERTIES TIMEOUT 120)
 set_tests_properties(test_sequence_pool PROPERTIES TIMEOUT 120)
 
diff --git a/test/sequence/test_sequence_conv.py b/test/sequence/test_sequence_conv.py
deleted file mode 100644
index 21644be661dde..0000000000000
--- a/test/sequence/test_sequence_conv.py
+++ /dev/null
@@ -1,312 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-import unittest
-
-import numpy as np
-from op_test import OpTest
-
-import paddle
-
-
-def seqconv(
-    x,
-    lod,
-    filter,
-    context_length,
-    context_start,
-    padding_trainable=False,
-    padding_data=None,
-):
-    [T, M] = x.shape
-    col = np.zeros((T, context_length * M)).astype('float32')
-    offset = [0]
-    for seq_len in lod[0]:
-        offset.append(offset[-1] + seq_len)
-    begin_pad = np.max([0, -context_start])
-    for i in range(len(offset) - 1):
-        for j in range(context_length):
-            in_begin = offset[i] + context_start + j
-            in_end = offset[i + 1] + context_start + j
-            out_begin = offset[i]
-            out_end = offset[i + 1]
-            if in_begin < offset[i]:
-                pad_size = np.min(
-                    [offset[i] - in_begin, offset[i + 1] - offset[i]]
-                )
-                if padding_trainable:
-                    sub_w = padding_data[j : j + pad_size, :]
-                    col[
-                        offset[i] : offset[i] + pad_size, j * M : (j + 1) * M
-                    ] = sub_w
-                out_begin = offset[i] + pad_size
-                in_begin = offset[i]
-
-            if in_end > offset[i + 1]:
-                pad_size = np.min(
-                    [in_end - offset[i + 1], offset[i + 1] - offset[i]]
-                )
-                if padding_trainable:
-                    sub_w = padding_data[
-                        begin_pad
-                        + context_start
-                        + j
-                        - pad_size : begin_pad
-                        + context_start
-                        + j,
-                        :,
-                    ]
-                    col[
-                        offset[i + 1] - pad_size : offset[i + 1],
-                        j * M : (j + 1) * M,
-                    ] = sub_w
-                in_end = offset[i + 1]
-                out_end = offset[i + 1] - pad_size
-            if in_end <= in_begin:
-                continue
-            in_sub = x[in_begin:in_end, :]
-            col[out_begin:out_end, j * M : (j + 1) * M] += in_sub
-    return np.dot(col, filter)
-
-
-class TestSeqProject(OpTest):
-    def setUp(self):
-        self.init_test_case()
-        self.op_type = 'sequence_conv'
-
-        if (
-            self.context_length == 1
-            and self.context_start == 0
-            and self.padding_trainable
-        ):
-            print(
-                "If context_start is 0 "
-                "and context_length is 1,"
-                " padding_trainable should be false."
-            )
-            return
-
-        # one level, batch size
-        x = np.random.uniform(
-            0.1, 1, [self.input_size[0], self.input_size[1]]
-        ).astype('float32')
-        w = np.random.uniform(
-            0.1,
-            1,
-            [
-                self.context_length * self.input_size[1],
-                self.output_represention,
-            ],
-        ).astype('float32')
-
-        begin_pad = np.max([0, -self.context_start])
-        end_pad = np.max([0, self.context_start + self.context_length - 1])
-        total_pad = begin_pad + end_pad
-        padding_data = np.random.uniform(
-            0.1, 1, [total_pad, self.input_size[1]]
-        ).astype('float32')
-        self.pad_data = padding_data
-        self.inputs = {
-            'X': (x, self.lod),
-            'Filter': w,
-        }
-        self.inputs_val = ['X', 'Filter']
-        self.inputs_val_no_x = ['Filter']
-        self.inputs_val_no_f = ['X']
-
-        if total_pad != 0:
-            self.inputs['PaddingData'] = padding_data
-            self.inputs_val = ['X', 'PaddingData', 'Filter']
-            self.inputs_val_no_x = ['PaddingData', 'Filter']
-            self.inputs_val_no_f = ['PaddingData', 'X']
-
-        self.attrs = {
-            'contextStart': self.context_start,
-            'contextLength': self.context_length,
-            'paddingTrainable': self.padding_trainable,
-            'contextStride': self.context_stride,
-        }
-        out = seqconv(
-            x,
-            self.lod,
-            w,
-            self.context_length,
-            self.context_start,
-            self.padding_trainable,
-            self.pad_data,
-        )
-        self.outputs = {'Out': out}
-
-    def test_check_output(self):
-        # NODE(yjjiang11): This op will be deprecated.
-        self.check_output(check_dygraph=False)
-
-    def test_check_grad(self):
-        if self.padding_trainable:
-            self.check_grad(
-                set(self.inputs_val),
-                'Out',
-                max_relative_error=0.05,
-                check_dygraph=False,
-            )
-
-    def test_check_grad_input(self):
-        self.check_grad(
-            ['X'],
-            'Out',
-            max_relative_error=0.05,
-            no_grad_set=set(self.inputs_val_no_x),
-            check_dygraph=False,
-        )
-
-    def test_check_grad_padding_data(self):
-        if self.padding_trainable:
-            self.check_grad(
-                ['PaddingData'],
-                'Out',
-                no_grad_set={'X', 'Filter'},
-                check_dygraph=False,
-            )
-
-    def test_check_grad_Filter(self):
-        self.check_grad(
-            ['Filter'],
-            'Out',
-            max_relative_error=0.05,
-            no_grad_set=set(self.inputs_val_no_f),
-            check_dygraph=False,
-        )
-
-    def test_check_grad_input_filter(self):
-        if self.padding_trainable:
-            self.check_grad(
-                ['X', 'Filter'],
-                'Out',
-                max_relative_error=0.05,
-                no_grad_set={'PaddingData'},
-                check_dygraph=False,
-            )
-
-    def test_check_grad_padding_input(self):
-        if self.padding_trainable:
-            self.check_grad(
-                self.inputs_val_no_f,
-                'Out',
-                max_relative_error=0.05,
-                no_grad_set={'Filter'},
-                check_dygraph=False,
-            )
-
-    def test_check_grad_padding_filter(self):
-        if self.padding_trainable:
-            self.check_grad(
-                self.inputs_val_no_x,
-                'Out',
-                max_relative_error=0.05,
-                no_grad_set={'X'},
-                check_dygraph=False,
-            )
-
-    def init_test_case(self):
-        self.input_row = 11
-        self.context_start = 0
-        self.context_length = 1
-        self.padding_trainable = False
-        self.context_stride = 1
-
-        self.input_size = [self.input_row, 23]
-        offset_lod = [[0, 4, 5, 8, self.input_row]]
-        self.lod = [[]]
-        # convert from offset-based lod to length-based lod
-        for i in range(len(offset_lod[0]) - 1):
-            self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i])
-        self.output_represention = 8  # output feature size
-
-
-class TestSeqProjectCase1(TestSeqProject):
-    def init_test_case(self):
-        self.input_row = 11
-        self.context_start = -1
-        self.context_length = 3
-        self.padding_trainable = True
-        self.context_stride = 1
-
-        self.input_size = [self.input_row, 50]
-        offset_lod = [[0, 4, 5, 8, self.input_row]]
-        self.lod = [[]]
-        # convert from offset-based lod to length-based lod
-        for i in range(len(offset_lod[0]) - 1):
-            self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i])
-        self.output_represention = 8  # output feature size
-
-
-class TestSeqProjectCase2Len0(TestSeqProject):
-    def init_test_case(self):
-        self.input_row = 11
-        self.context_start = -1
-        self.context_length = 3
-        self.padding_trainable = True
-        self.context_stride = 1
-
-        self.input_size = [self.input_row, 50]
-        offset_lod = [[0, 0, 4, 5, 5, 8, self.input_row, self.input_row]]
-        self.lod = [[]]
-        # convert from offset-based lod to length-based lod
-        for i in range(len(offset_lod[0]) - 1):
-            self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i])
-        self.output_represention = 8  # output feature size
-
-
-class TestSeqProjectCase3(TestSeqProject):
-    def init_test_case(self):
-        self.input_row = 25
-        self.context_start = 2
-        self.context_length = 3
-        self.padding_trainable = True
-        self.context_stride = 1
-
-        self.input_size = [self.input_row, 25]
-        idx = list(range(self.input_size[0]))
-        del idx[0]
-        offset_lod = [
-            [0] + np.sort(random.sample(idx, 8)).tolist() + [self.input_size[0]]
-        ]
-        self.lod = [[]]
-        # convert from offset-based lod to length-based lod
-        for i in range(len(offset_lod[0]) - 1):
-            self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i])
-        self.output_represention = 8  # output feature size
-
-
-class TestSeqConvApi(unittest.TestCase):
-    def test_api(self):
-        from paddle import base
-
-        x = paddle.static.data('x', shape=[-1, 32], lod_level=1)
-        y = paddle.static.nn.sequence_lod.sequence_conv(
-            input=x, num_filters=2, filter_size=3, padding_start=None
-        )
-
-        place = base.CPUPlace()
-        x_tensor = base.create_lod_tensor(
-            np.random.rand(10, 32).astype("float32"), [[2, 3, 1, 4]], place
-        )
-        exe = base.Executor(place)
-        exe.run(base.default_startup_program())
-        ret = exe.run(feed={'x': x_tensor}, fetch_list=[y], return_numpy=False)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/test/xpu/test_sequence_conv_op_xpu.py b/test/xpu/test_sequence_conv_op_xpu.py
deleted file mode 100644
index 1b0816938a248..0000000000000
--- a/test/xpu/test_sequence_conv_op_xpu.py
+++ /dev/null
@@ -1,448 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-import sys
-import unittest
-
-import numpy as np
-
-import paddle
-
-sys.path.append("../")
-from get_test_cover_info import (
-    XPUOpTestWrapper,
-    create_test_class,
-    get_xpu_op_support_types,
-)
-from op_test_xpu import XPUOpTest
-
-paddle.enable_static()
-np.set_printoptions(threshold=np.inf)
-
-
-def seqconv(
-    x,
-    lod,
-    filter,
-    context_length,
-    context_start,
-    padding_trainable=False,
-    padding_data=None,
-):
-    [T, M] = x.shape
-    col = np.zeros((T, context_length * M)).astype('float32')
-    offset = [0]
-    for seq_len in lod[0]:
-        offset.append(offset[-1] + seq_len)
-    begin_pad = np.max([0, -context_start])
-    for i in range(len(offset) - 1):
-        for j in range(context_length):
-            in_begin = offset[i] + context_start + j
-            in_end = offset[i + 1] + context_start + j
-            out_begin = offset[i]
-            out_end = offset[i + 1]
-            if in_begin < offset[i]:
-                pad_size = np.min(
-                    [offset[i] - in_begin, offset[i + 1] - offset[i]]
-                )
-                if padding_trainable:
-                    sub_w = padding_data[j : j + pad_size, :]
-                    col[
-                        offset[i] : offset[i] + pad_size, j * M : (j + 1) * M
-                    ] = sub_w
-                out_begin = offset[i] + pad_size
-                in_begin = offset[i]
-
-            if in_end > offset[i + 1]:
-                pad_size = np.min(
-                    [in_end - offset[i + 1], offset[i + 1] - offset[i]]
-                )
-                if padding_trainable:
-                    sub_w = padding_data[
-                        begin_pad
-                        + context_start
-                        + j
-                        - pad_size : begin_pad
-                        + context_start
-                        + j,
-                        :,
-                    ]
-                    col[
-                        offset[i + 1] - pad_size : offset[i + 1],
-                        j * M : (j + 1) * M,
-                    ] = sub_w
-                in_end = offset[i + 1]
-                out_end = offset[i + 1] - pad_size
-            if in_end <= in_begin:
-                continue
-            in_sub = x[in_begin:in_end, :]
-            col[out_begin:out_end, j * M : (j + 1) * M] += in_sub
-    return np.dot(col, filter)
-
-
-class XPUTestSequenceConv(XPUOpTestWrapper):
-    def __init__(self):
-        self.op_name = 'sequence_conv'
-
-    class TestSeqProject(XPUOpTest):
-        def setUp(self):
-            self.init_test_case()
-            self.op_type = 'sequence_conv'
-            self.dtype = self.in_type
-            self.use_xpu = True
-
-            if (
-                self.context_length == 1
-                and self.context_start == 0
-                and self.padding_trainable
-            ):
-                print(
-                    "If context_start is 0 "
-                    "and context_length is 1,"
-                    " padding_trainable should be false."
-                )
-                return
-
-            # one level, batch size
-            x = np.random.uniform(
-                -6.10907e-05,
-                0.000104218,
-                [self.input_size[0], self.input_size[1]],
-            ).astype(self.dtype)
-            w = np.random.uniform(
-                -3.17068e-05,
-                0.000159822,
-                [
-                    self.context_length * self.input_size[1],
-                    self.output_represention,
-                ],
-            ).astype(self.dtype)
-
-            begin_pad = np.max([0, -self.context_start])
-            end_pad = np.max([0, self.context_start + self.context_length - 1])
-            total_pad = begin_pad + end_pad
-            padding_data = np.random.uniform(
-                0, 0, [total_pad, self.input_size[1]]
-            ).astype(self.dtype)
-            self.pad_data = padding_data
-            self.inputs = {
-                'X': (x, self.lod),
-                'Filter': w,
-            }
-            self.inputs_val = ['X', 'Filter']
-            self.inputs_val_no_x = ['Filter']
-            self.inputs_val_no_f = ['X']
-
-            if total_pad != 0:
-                self.inputs['PaddingData'] = padding_data
-                self.inputs_val = ['X', 'PaddingData', 'Filter']
-                self.inputs_val_no_x = ['PaddingData', 'Filter']
-                self.inputs_val_no_f = ['PaddingData', 'X']
-
-            self.attrs = {
-                'contextStart': self.context_start,
-                'contextLength': self.context_length,
-                'paddingTrainable': self.padding_trainable,
-                'contextStride': self.context_stride,
-            }
-            out = seqconv(
-                x,
-                self.lod,
-                w,
-                self.context_length,
-                self.context_start,
-                self.padding_trainable,
-                self.pad_data,
-            )
-            self.outputs = {'Out': out}
-
-        def test_check_output(self):
-            place = paddle.XPUPlace(0)
-            self.check_output_with_place(place)
-
-        def test_check_grad_input(self):
-            self.check_grad(['X'], 'Out', no_grad_set=set(self.inputs_val_no_x))
-
-        def test_check_grad_padding_data(self):
-            if self.padding_trainable:
-                self.check_grad(
-                    ['PaddingData'], 'Out', no_grad_set={'X', 'Filter'}
-                )
-
-        def test_check_grad_Filter(self):
-            self.check_grad(
-                ['Filter'], 'Out', no_grad_set=set(self.inputs_val_no_f)
-            )
-
-        def test_check_grad_input_filter(self):
-            if self.padding_trainable:
-                self.check_grad(
-                    ['X', 'Filter'], 'Out', no_grad_set={'PaddingData'}
-                )
-
-        def test_check_grad_padding_input(self):
-            if self.padding_trainable:
-                self.check_grad(
-                    self.inputs_val_no_f, 'Out', no_grad_set={'Filter'}
-                )
-
-        def test_check_grad_padding_filter(self):
-            if self.padding_trainable:
-                self.check_grad(self.inputs_val_no_x, 'Out', no_grad_set={'X'})
-
-        def init_test_case(self):
-            self.input_row = 7
-            self.input_col = 25
-            self.context_start = -2
-            self.context_length = 5
-            self.padding_trainable = False
-            self.context_stride = 1
-
-            self.input_size = [self.input_row, self.input_col]
-            offset_lod = [[0, 1, self.input_row]]
-            self.lod = [[]]
-            # convert from offset-based lod to length-based lod
-            for i in range(len(offset_lod[0]) - 1):
-                self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i])
-            self.output_represention = 8  # output feature size
-
-    class TestSeqProjectCase1(TestSeqProject):
-        def init_test_case(self):
-            self.input_row = 11
-            self.context_start = -2
-            self.context_length = 5
-            self.padding_trainable = False
-            self.context_stride = 1
-
-            self.input_size = [self.input_row, 50]
-            offset_lod = [[0, 4, 5, 8, self.input_row]]
-            self.lod = [[]]
-            # convert from offset-based lod to length-based lod
-            for i in range(len(offset_lod[0]) - 1):
-                self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i])
-            self.output_represention = 8  # output feature size
-
-    class TestSeqProjectCase2Len0(TestSeqProject):
-        def init_test_case(self):
-            self.input_row = 11
-            self.context_start = -2
-            self.context_length = 5
-            self.padding_trainable = False
-            self.context_stride = 1
-
-            self.input_size = [self.input_row, 50]
-            offset_lod = [[0, 0, 4, 5, 5, 8, self.input_row, self.input_row]]
-            self.lod = [[]]
-            # convert from offset-based lod to length-based lod
-            for i in range(len(offset_lod[0]) - 1):
-                self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i])
-            self.output_represention = 8  # output feature size
-
-    class TestSeqProjectCase3(TestSeqProject):
-        def init_test_case(self):
-            self.input_row = 25
-            self.context_start = -2
-            self.context_length = 5
-            self.padding_trainable = False
-            self.context_stride = 1
-
-            self.input_size = [self.input_row, 25]
-            idx = list(range(self.input_size[0]))
-            del idx[0]
-            offset_lod = [
-                [0]
-                + np.sort(random.sample(idx, 8)).tolist()
-                + [self.input_size[0]]
-            ]
-            self.lod = [[]]
-            # convert from offset-based lod to length-based lod
-            for i in range(len(offset_lod[0]) - 1):
-                self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i])
-            self.output_represention = 8  # output feature size
-
-    class TestSeqProjectCase4(TestSeqProject):
-        def init_test_case(self):
-            self.input_row = 7835
-            self.input_col = 128
-            self.context_start = -2
-            self.context_length = 5
-            self.padding_trainable = False
-            self.context_stride = 1
-
-            self.input_size = [self.input_row, self.input_col]
-            offset_lod = [
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    131,
-                    241,
-                    242,
-                    263,
-                    264,
-                    265,
-                    266,
-                    267,
-                    268,
-                    387,
-                    515,
-                    516,
-                    644,
-                    645,
-                    772,
-                    794,
-                    922,
-                    923,
-                    924,
-                    944,
-                    945,
-                    1073,
-                    1074,
-                    1202,
-                    1330,
-                    1458,
-                    1556,
-                    1557,
-                    1558,
-                    1686,
-                    1748,
-                    1876,
-                    1912,
-                    1913,
-                    1914,
-                    2032,
-                    2066,
-                    2194,
-                    2308,
-                    2309,
-                    2347,
-                    2475,
-                    2476,
-                    2477,
-                    2478,
-                    2606,
-                    2607,
-                    2735,
-                    2736,
-                    2737,
-                    2738,
-                    2838,
-                    2966,
-                    2967,
-                    2968,
-                    2969,
-                    3097,
-                    3225,
-                    3353,
-                    3481,
-                    3482,
-                    3520,
-                    3642,
-                    3643,
-                    3754,
-                    3882,
-                    3883,
-                    4010,
-                    4011,
-                    4012,
-                    4140,
-                    4219,
-                    4228,
-                    4356,
-                    4357,
-                    4415,
-                    4475,
-                    4476,
-                    4604,
-                    4605,
-                    4606,
-                    4694,
-                    4695,
-                    4808,
-                    4936,
-                    4961,
-                    4962,
-                    5004,
-                    5132,
-                    5260,
-                    5312,
-                    5440,
-                    5441,
-                    5569,
-                    5570,
-                    5675,
-                    5676,
-                    5750,
-                    5810,
-                    5811,
-                    5939,
-                    6021,
-                    6149,
-                    6277,
-                    6278,
-                    6364,
-                    6425,
-                    6519,
-                    6647,
-                    6648,
-                    6739,
-                    6867,
-                    6995,
-                    6996,
-                    7120,
-                    7223,
-                    7244,
-                    7367,
-                    7407,
-                    7408,
-                    7467,
-                    7595,
-                    7699,
-                    7827,
-                    7835,
-                ]
-            ]
-            self.lod = [[]]
-            # convert from offset-based lod to length-based lod
-            for i in range(len(offset_lod[0]) - 1):
-                self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i])
-            self.output_represention = 8  # output feature size
-
-
-support_types = get_xpu_op_support_types('sequence_conv')
-for stype in support_types:
-    create_test_class(globals(), XPUTestSequenceConv, stype)
-
-
-class TestSeqConvApi(unittest.TestCase):
-    def test_api(self):
-        from paddle import base
-
-        x = paddle.static.data('x', shape=[-1, 32], lod_level=1)
-        y = paddle.static.nn.sequence_lod.sequence_conv(
-            input=x, num_filters=2, filter_size=3, padding_start=None
-        )
-        place = base.CPUPlace()
-        x_tensor = base.create_lod_tensor(
-            np.random.rand(10, 32).astype("float32"), [[2, 3, 1, 4]], place
-        )
-        exe = base.Executor(place)
-        exe.run(base.default_startup_program())
-        ret = exe.run(feed={'x': x_tensor}, fetch_list=[y], return_numpy=False)
-
-
-if __name__ == '__main__':
-    unittest.main()

From 213a8ffa4e251c81bbf62a2dc0bb9a9e07b66fb0 Mon Sep 17 00:00:00 2001
From: co63oc <co63@163.com>
Date: Sun, 7 Apr 2024 21:50:58 +0800
Subject: [PATCH 2/6] Fix

---
 .../api/analyzer_seq_conv1_tester.cc          | 190 ------------------
 1 file changed, 190 deletions(-)
 delete mode 100644 test/cpp/inference/api/analyzer_seq_conv1_tester.cc

diff --git a/test/cpp/inference/api/analyzer_seq_conv1_tester.cc b/test/cpp/inference/api/analyzer_seq_conv1_tester.cc
deleted file mode 100644
index cc4cb24201b97..0000000000000
--- a/test/cpp/inference/api/analyzer_seq_conv1_tester.cc
+++ /dev/null
@@ -1,190 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "test/cpp/inference/api/tester_helper.h"
-
-namespace paddle {
-namespace inference {
-
-struct DataRecord {
-  std::vector<std::vector<int64_t>> title1, title2, title3, l1;
-  std::vector<size_t> lod1, lod2, lod3, l1_lod;
-  size_t batch_iter{0}, batch_size{1}, num_samples;  // total number of samples
-  DataRecord() = default;
-  explicit DataRecord(const std::string &path, int batch_size = 1)
-      : batch_size(batch_size) {
-    Load(path);
-  }
-  DataRecord NextBatch() {
-    DataRecord data;
-    size_t batch_end = batch_iter + batch_size;
-    // NOTE skip the final batch, if no enough data is provided.
-    if (batch_end <= title1.size()) {
-      GetInputPerBatch(title1, &data.title1, &data.lod1, batch_iter, batch_end);
-      GetInputPerBatch(title2, &data.title2, &data.lod2, batch_iter, batch_end);
-      GetInputPerBatch(title3, &data.title3, &data.lod3, batch_iter, batch_end);
-      GetInputPerBatch(l1, &data.l1, &data.l1_lod, batch_iter, batch_end);
-    }
-    batch_iter += batch_size;
-    return data;
-  }
-  void Load(const std::string &path) {
-    std::ifstream file(path);
-    std::string line;
-    int num_lines = 0;
-    while (std::getline(file, line)) {
-      num_lines++;
-      std::vector<std::string> data;
-      split(line, '\t', &data);
-      PADDLE_ENFORCE_GT(
-          data.size(),
-          4,
-          paddle::platform::errors::Fatal("The size of data is invaild."));
-      // load title1 data
-      std::vector<int64_t> title1_data;
-      split_to_int64(data[0], ' ', &title1_data);
-      // load title2 data
-      std::vector<int64_t> title2_data;
-      split_to_int64(data[1], ' ', &title2_data);
-      // load title3 data
-      std::vector<int64_t> title3_data;
-      split_to_int64(data[2], ' ', &title3_data);
-      // load l1 data
-      std::vector<int64_t> l1_data;
-      split_to_int64(data[3], ' ', &l1_data);
-      title1.push_back(std::move(title1_data));
-      title2.push_back(std::move(title2_data));
-      title3.push_back(std::move(title3_data));
-      l1.push_back(std::move(l1_data));
-    }
-    num_samples = num_lines;
-  }
-};
-
-void PrepareInputs(std::vector<PaddleTensor> *input_slots,
-                   DataRecord *data,
-                   int batch_size) {
-  PaddleTensor title1_tensor, title2_tensor, title3_tensor, l1_tensor;
-  title1_tensor.name = "title1";
-  title2_tensor.name = "title2";
-  title3_tensor.name = "title3";
-  l1_tensor.name = "l1";
-  auto one_batch = data->NextBatch();
-  // assign data
-  TensorAssignData<int64_t>(&title1_tensor, one_batch.title1, one_batch.lod1);
-  TensorAssignData<int64_t>(&title2_tensor, one_batch.title2, one_batch.lod2);
-  TensorAssignData<int64_t>(&title3_tensor, one_batch.title3, one_batch.lod3);
-  TensorAssignData<int64_t>(&l1_tensor, one_batch.l1, one_batch.l1_lod);
-  // Set inputs.
-  input_slots->assign({title1_tensor, title2_tensor, title3_tensor, l1_tensor});
-  for (auto &tensor : *input_slots) {
-    tensor.dtype = PaddleDType::INT64;
-  }
-}
-
-void SetConfig(AnalysisConfig *cfg) {
-  cfg->SetModel(FLAGS_infer_model);
-  cfg->DisableGpu();
-  cfg->SwitchSpecifyInputNames();
-  cfg->SwitchIrOptim();
-}
-
-void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
-  DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
-  std::vector<PaddleTensor> input_slots;
-  int epoch =
-      FLAGS_test_all_data ? data.num_samples / FLAGS_batch_size : 1;  // NOLINT
-  LOG(INFO) << "number of samples: " << epoch * FLAGS_batch_size;
-  for (int bid = 0; bid < epoch; ++bid) {
-    PrepareInputs(&input_slots, &data, FLAGS_batch_size);
-    (*inputs).emplace_back(input_slots);
-  }
-}
-
-// Easy for profiling independently.
-TEST(Analyzer_seq_conv1, profile) {
-  AnalysisConfig cfg;
-  SetConfig(&cfg);
-  std::vector<std::vector<PaddleTensor>> outputs;
-
-  std::vector<std::vector<PaddleTensor>> input_slots_all;
-  SetInput(&input_slots_all);
-  TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
-                 input_slots_all,
-                 &outputs,
-                 FLAGS_num_threads);
-
-  if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
-    // the first inference result
-    PADDLE_ENFORCE_GT(outputs.size(),
-                      0,
-                      paddle::platform::errors::Fatal(
-                          "The size of output should be greater than 0."));
-    auto output = outputs.back();
-    PADDLE_ENFORCE_EQ(output.size(),
-                      1UL,
-                      paddle::platform::errors::Fatal(
-                          "The size of output should be equal to 0."));
-    size_t size = GetSize(output[0]);
-    PADDLE_ENFORCE_GT(size,
-                      0,
-                      paddle::platform::errors::Fatal(
-                          "The size of output should be greater than 0."));
-    float *result = static_cast<float *>(output[0].data.data());
-    // output is probability, which is in (0, 1).
-    for (size_t i = 0; i < size; i++) {
-      EXPECT_GT(result[i], 0);
-      EXPECT_LT(result[i], 1);
-    }
-  }
-}
-
-// Check the fuse status
-TEST(Analyzer_seq_conv1, fuse_statis) {
-  AnalysisConfig cfg;
-  SetConfig(&cfg);
-  int num_ops;
-  auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
-
-  auto fuse_statis = GetFuseStatis(predictor.get(), &num_ops);
-  ASSERT_TRUE(fuse_statis.count("fc_fuse"));
-  ASSERT_TRUE(fuse_statis.count("seqconv_eltadd_relu_fuse"));
-  EXPECT_EQ(fuse_statis.at("fc_fuse"), 2);
-  EXPECT_EQ(fuse_statis.at("seqconv_eltadd_relu_fuse"), 6);
-}
-
-// Compare result of NativeConfig and AnalysisConfig
-TEST(Analyzer_seq_conv1, compare) {
-  AnalysisConfig cfg;
-  SetConfig(&cfg);
-
-  std::vector<std::vector<PaddleTensor>> input_slots_all;
-  SetInput(&input_slots_all);
-  CompareNativeAndAnalysis(
-      reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
-}
-
-// Compare Deterministic result
-TEST(Analyzer_seq_conv1, compare_determine) {
-  AnalysisConfig cfg;
-  SetConfig(&cfg);
-
-  std::vector<std::vector<PaddleTensor>> input_slots_all;
-  SetInput(&input_slots_all);
-  CompareDeterministic(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
-                       input_slots_all);
-}
-
-}  // namespace inference
-}  // namespace paddle

From a6b2593d09ee3b840d0dbcfb469388d871eb94fc Mon Sep 17 00:00:00 2001
From: co63oc <co63@163.com>
Date: Mon, 8 Apr 2024 06:51:31 +0800
Subject: [PATCH 3/6] ci


From 4ad1fea9e46342d60c130c1741ce69daeecd0ac9 Mon Sep 17 00:00:00 2001
From: co63oc <co63@163.com>
Date: Mon, 8 Apr 2024 09:10:57 +0800
Subject: [PATCH 4/6] Fix

---
 test/cpp/inference/api/CMakeLists.txt | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/test/cpp/inference/api/CMakeLists.txt b/test/cpp/inference/api/CMakeLists.txt
index ffb306df0810d..63e4acea5ea2c 100644
--- a/test/cpp/inference/api/CMakeLists.txt
+++ b/test/cpp/inference/api/CMakeLists.txt
@@ -485,14 +485,6 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST)
     test_analyzer_text_classification ${TEXT_CLASSIFICATION_INSTALL_DIR}
     analyzer_text_classification_tester.cc EXTRA_DEPS common)
 
-  # seq_conv1
-  set(SEQ_CONV1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/seq_conv1")
-  download_model_and_data_without_verify(
-    ${SEQ_CONV1_INSTALL_DIR} "seq_conv1_model.tar.gz"
-    "seq_conv1_data.txt.tar.gz")
-  inference_analysis_api_test(test_analyzer_seq_conv1 ${SEQ_CONV1_INSTALL_DIR}
-                              analyzer_seq_conv1_tester.cc EXTRA_DEPS common)
-
   # transformer, the dataset only works on batch_size=8 now
   set(TRANSFORMER_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/transformer")
   download_model_and_data_without_verify(

From 19b16edd905ef320eeffb9933b5f5321215b1fb2 Mon Sep 17 00:00:00 2001
From: co63oc <co63@163.com>
Date: Mon, 8 Apr 2024 16:48:46 +0800
Subject: [PATCH 5/6] Fix

---
 paddle/fluid/inference/api/paddle_pass_builder.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc
index f53e89d9f812c..db52f899698c4 100644
--- a/paddle/fluid/inference/api/paddle_pass_builder.cc
+++ b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -442,7 +442,6 @@ void CpuPassStrategy::EnableMkldnnInt8() {
     passes_.emplace_back("squeeze2_transpose2_onednn_fuse_pass");
     passes_.emplace_back("layer_norm_fuse_pass");
     passes_.emplace_back("attention_lstm_fuse_pass");
-    passes_.emplace_back("seqconv_eltadd_relu_fuse_pass");
     passes_.emplace_back("fc_lstm_fuse_pass");
     passes_.emplace_back("mul_lstm_fuse_pass");
     passes_.emplace_back("fc_gru_fuse_pass");

From f486a87d561df5789600a57cb32ff555b13de69b Mon Sep 17 00:00:00 2001
From: co63oc <co63@163.com>
Date: Mon, 8 Apr 2024 16:49:45 +0800
Subject: [PATCH 6/6] Fix

---
 tools/parallel_UT_rule.py       | 2 --
 tools/static_mode_white_list.py | 1 -
 2 files changed, 3 deletions(-)

diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py
index db192979cee60..cf6aceaa806b0 100755
--- a/tools/parallel_UT_rule.py
+++ b/tools/parallel_UT_rule.py
@@ -1336,7 +1336,6 @@
     'test_imperative_container_parameterlist',
     'test_multiplex_op',
     'test_trt_transpose_flatten_concat_fuse_pass',
-    'test_seqconv_eltadd_relu_fuse_pass',
     'test_assert_op',
     'test_scatter_nd_op',
     'test_sequence_expand',
@@ -2602,7 +2601,6 @@
     'test_elementwise_div_op',
     'test_gather_tree_op',
     'test_imperative_named_members',
-    'test_seqconv_eltadd_relu_fuse_pass',
     'test_analysis_predictor',
     'test_convert_operators',
     'test_add_reader_dependency',
diff --git a/tools/static_mode_white_list.py b/tools/static_mode_white_list.py
index 48f7178fa23dc..0c9be61a7f200 100755
--- a/tools/static_mode_white_list.py
+++ b/tools/static_mode_white_list.py
@@ -542,7 +542,6 @@
     'test_conv_elementwise_add_fuse_pass',
     'test_fc_fuse_pass',
     'test_repeated_fc_relu_fuse_pass',
-    'test_seqconv_eltadd_relu_fuse_pass',
     'test_squared_mat_sub_fuse_pass',
     'test_transpose_flatten_concat_fuse_pass',
     'test_detection_map_op',