From 9efacb5c6a43c2eacc0b911fc271d8778669e452 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 15 Dec 2023 09:49:58 +0000
Subject: [PATCH 01/49] onednn dialect gen

---
 .gitignore                                    |   1 +
 paddle/fluid/pir/dialect/CMakeLists.txt       |  40 +++++-
 .../fluid/pir/dialect/op_generator/op_gen.py  | 115 ++++++++++++++++--
 .../op_generator/ops_onednn_extra_parser.py   |  86 +++++++++++++
 .../fluid/pir/dialect/operator/ir/onednn.yaml |   9 ++
 .../dialect/operator/ir/ops_onednn_extra.yaml |  27 ++++
 .../fluid/pir/dialect/operator/trait/onednn.h |  37 ++++++
 paddle/phi/api/yaml/op_compat.yaml            |   9 ++
 8 files changed, 310 insertions(+), 14 deletions(-)
 create mode 100644 paddle/fluid/pir/dialect/op_generator/ops_onednn_extra_parser.py
 create mode 100644 paddle/fluid/pir/dialect/operator/ir/onednn.yaml
 create mode 100644 paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
 create mode 100644 paddle/fluid/pir/dialect/operator/trait/onednn.h

diff --git a/.gitignore b/.gitignore
index 42c39e2a6d96e7..85cd78e5c7912b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -108,5 +108,6 @@ paddle/fluid/pir/dialect/operator/ir/pd_api.*
 paddle/fluid/pir/dialect/operator/ir/op_decomp.cc
 paddle/fluid/pir/dialect/operator/ir/pd_op_vjp.cc
 paddle/fluid/pir/dialect/operator/ir/pd_op.*
+paddle/fluid/pir/dialect/operator/ir/pd_onednn_op.*
 paddle/cinn/hlir/dialect/generated/ops.parsed.yaml
 paddle/cinn/hlir/dialect/operator/ir/cinn_op.*
diff --git a/paddle/fluid/pir/dialect/CMakeLists.txt b/paddle/fluid/pir/dialect/CMakeLists.txt
index e563831e96e61a..d2fc67bf997074 100644
--- a/paddle/fluid/pir/dialect/CMakeLists.txt
+++ b/paddle/fluid/pir/dialect/CMakeLists.txt
@@ -30,12 +30,20 @@ set(pd_op_forward_yaml_file2
 set(pd_op_backward_yaml_file
     ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/operator/ir/ops_backward.yaml)
 
+set(pd_onednn_op_yaml_file
+    ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/operator/ir/onednn.yaml)
+
+set(pd_ops_onednn_extra_yaml_file
+    ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
+)
+
 set(parsed_op_dir
     ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/operator/ir/generated)
 
 set(op_yaml_file3 ${parsed_op_dir}/ops.parsed.yaml)
 set(op_yaml_file4 ${parsed_op_dir}/ops_backward.parsed.yaml)
 set(op_yaml_file5 ${parsed_op_dir}/update_ops.parsed.yaml)
+set(op_yaml_file6 ${parsed_op_dir}/onednn.parsed.yaml)
 
 set(op_yaml_files
     ${op_forward_yaml_file1},${op_backward_yaml_file1},${fused_op_forward_yaml_file},${fused_op_backward_yaml_file},${op_yaml_file3},${op_yaml_file4},${op_yaml_file5}
@@ -51,12 +59,21 @@ set(op_vjp_source_file ${PD_DIALECT_SOURCE_DIR}/pd_op_vjp.cc)
 set(op_decomp_source_file ${PD_DIALECT_SOURCE_DIR}/op_decomp.cc)
 set(op_vjp_source_file_tmp ${op_vjp_source_file}.tmp)
 
+set(onednn_op_namespace paddle,onednn,dialect)
+set(onednn_dialect_name pd_onednn_op)
+set(onednn_op_header_file ${PD_DIALECT_SOURCE_DIR}/pd_onednn_op.h)
+set(onednn_op_source_file ${PD_DIALECT_SOURCE_DIR}/pd_onednn_op.cc)
+set(onednn_op_header_file_tmp ${onednn_op_header_file}.tmp)
+set(onednn_op_source_file_tmp ${onednn_op_source_file}.tmp)
+
 execute_process(
   COMMAND ${CMAKE_COMMAND} -E make_directory ${parsed_op_dir}
   COMMAND ${PYTHON_EXECUTABLE} ${op_gen_parsed_yaml_file} --op_yaml_path
           ${pd_op_forward_yaml_file1} --output_path ${op_yaml_file3}
   COMMAND ${PYTHON_EXECUTABLE} ${op_gen_parsed_yaml_file} --op_yaml_path
           ${pd_op_forward_yaml_file2} --output_path ${op_yaml_file5}
+  COMMAND ${PYTHON_EXECUTABLE} ${op_gen_parsed_yaml_file} --op_yaml_path
+          ${pd_onednn_op_yaml_file} --output_path ${op_yaml_file6}
   COMMAND ${PYTHON_EXECUTABLE} ${op_gen_parsed_yaml_file} --op_yaml_path
           ${pd_op_backward_yaml_file} --output_path ${op_yaml_file4} --backward)
 
@@ -71,6 +88,18 @@ execute_process(
 set(generated_files_pd_op "${op_header_file}" "${op_source_file}"
                           "${op_vjp_source_file}")
 
+execute_process(
+  COMMAND
+    ${PYTHON_EXECUTABLE} ${op_gen_file} --op_yaml_files ${op_yaml_files}
+    --op_compat_yaml_file ${op_compat_yaml_file} --namespaces
+    ${onednn_op_namespace} --dialect_name ${onednn_dialect_name}
+    --op_def_h_file ${onednn_op_header_file_tmp} --op_def_cc_file
+    ${onednn_op_source_file_tmp} --onednn_yaml_file ${op_yaml_file6}
+    --ops_onednn_extra_yaml_file ${pd_ops_onednn_extra_yaml_file})
+
+set(generated_files_onednn_pd_op "${onednn_op_header_file}"
+                                 "${onednn_op_source_file}")
+
 set(api_gen_yaml_files
     ${op_forward_yaml_file1},${op_backward_yaml_file1},${op_yaml_file3},${op_yaml_file4},${op_yaml_file5}
 )
@@ -122,8 +151,10 @@ execute_process(
 
 set(generated_files_ops_api "${ops_api_source_file}")
 
-set(generated_files_pir ${generated_files_pd_op} ${generated_files_pd_api}
-                        ${generated_files_python_c} ${generated_files_ops_api})
+set(generated_files_pir
+    ${generated_files_pd_op} ${generated_files_onednn_pd_op}
+    ${generated_files_pd_api} ${generated_files_python_c}
+    ${generated_files_ops_api})
 foreach(generated_file ${generated_files_pir})
   if(EXISTS "${generated_file}.tmp" AND EXISTS "${generated_file}")
     execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different
@@ -156,6 +187,11 @@ cc_library(
   SRCS ${op_dialect_srcs}
   DEPS ${op_dialect_deps})
 
+cc_library(
+  op_onednn_dialect
+  SRCS ${onednn_op_source_file}
+  DEPS ${op_dialect_deps})
+
 #Note(risemeup1):compile some *.cc files which depend on primitive_vjp_experimental into op_dialect_vjp.a/lib
 set(op_dialect_vjp_srcs
     ${CMAKE_CURRENT_SOURCE_DIR}/operator/ir/manual_op_decomp.cc
diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py
index 36df441e09707c..9e111eaea3d002 100644
--- a/paddle/fluid/pir/dialect/op_generator/op_gen.py
+++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py
@@ -29,6 +29,7 @@
 from op_kerneltype_gen import gen_kernel_type_for_var_str
 from op_member_func_gen import gen_op_get_inputs_outputs_str
 from op_verify_gen import gen_verify_func_str
+from ops_onednn_extra_parser import parse_extra_args, parse_layout_transform
 from parse_kernel_key_gen import gen_parse_kernel_key_str
 from reify_infer_shape_gen import gen_reify_infer_shape_str
 from vjp_interface_black_list import vjp_interface_black_list
@@ -63,6 +64,7 @@
 #include "paddle/fluid/pir/dialect/operator/interface/parse_kernel_key.h"
 #include "paddle/fluid/pir/dialect/operator/interface/decomp.h"
 #include "paddle/fluid/pir/dialect/operator/trait/inplace.h"
+#include "paddle/fluid/pir/dialect/operator/trait/onednn.h"
 #include "paddle/fluid/pir/dialect/operator/trait/custom_vjp.h"
 #include "paddle/fluid/framework/infershape_utils.h"
 #include "paddle/phi/core/infermeta_utils.h"
@@ -409,7 +411,7 @@ def __init__(self, op_yaml_item, op_compat_item):
             self.non_mutable_attribute_data_type_list,
             self.non_mutable_attribute_build_arg_type_list,
             self.non_mutable_attribute_default_value_list,
-        ) = self.parse_non_nutable_attribute()
+        ) = self.parse_non_mutable_attribute()
 
         # parse infermeta && kernel
         self.infer_meta_map = self.parse_infer_meta_map()
@@ -453,6 +455,16 @@ def __init__(self, op_yaml_item, op_compat_item):
         # parse interfaces list
         self.interfaces_list = self.parse_op_interfaces()
 
+        # OneDnn info
+        if "extra_args" in self.op_yaml_item:
+            self.onednn_extra_args = self.op_yaml_item["extra_args"]
+            self.onednn_layout_transform = self.op_yaml_item["layout_transform"]
+            self.is_onednn_only = self.op_yaml_item["is_onednn_only"]
+        else:
+            self.onednn_extra_args = []
+            self.onednn_layout_transform = None
+            self.is_onednn_only = False
+
     def parse_op_traits(self):
         if 'traits' in self.op_yaml_item:
             return self.op_yaml_item['traits']
@@ -624,7 +636,7 @@ def parse_mutable_attribute(self):
             sorted_mutable_attribute_type_list,
         )
 
-    def parse_non_nutable_attribute(self):
+    def parse_non_mutable_attribute(self):
         op_non_mutable_attribute_name_list = []
         op_non_mutable_attribute_type_list = []
         op_non_mutable_attribute_data_type_list = []
@@ -1043,11 +1055,13 @@ def OpGenerator(
     op_def_h_file,
     op_def_cc_file,
     op_vjp_cc_file,
+    onednn_yaml_file,
+    ops_onednn_extra_yaml_file,
 ):
     # (1) Prepare: Delete existing old files: pd_op.h.tmp, pd_op.cc.tmp
-    if os.path.exists(op_def_h_file):
+    if op_def_h_file is not None and os.path.exists(op_def_h_file):
         os.remove(op_def_h_file)
-    if os.path.exists(op_def_cc_file):
+    if op_def_cc_file is not None and os.path.exists(op_def_cc_file):
         os.remove(op_def_cc_file)
 
     # (2) Prepare: Get all op item in all op_yaml_files
@@ -1059,10 +1073,43 @@ def OpGenerator(
             ops = yaml.safe_load(f)
             op_yaml_items = op_yaml_items + ops
 
+    if dialect_name == "pd_onednn_op":
+        with open(ops_onednn_extra_yaml_file, "r") as f:
+            ops_onednn_extra = yaml.safe_load(f)
+            op_yaml_items_map = {}
+            for op in op_yaml_items:
+                op_yaml_items_map[op['name']] = op
+            op_yaml_items_onednn = []
+            for op in ops_onednn_extra:
+                op_name = op['op']
+                item = op_yaml_items_map[op_name]
+                assert (
+                    item is not None
+                ), f"OneDnn op {op_name} in {ops_onednn_extra_yaml_file} is not define in ops.yaml."
+                item["is_onednn_only"] = False
+                item["extra_args"] = parse_extra_args(op_name, op['extra_args'])
+                if 'layout_transform' in op:
+                    item["layout_transform"] = parse_layout_transform(
+                        op_name, op['layout_transform']
+                    )
+                else:
+                    item["layout_transform"] = None
+                item["attrs"] = item["attrs"] + parse_extra_args(
+                    op_name, op['extra_args']
+                )
+                op_yaml_items_onednn.append(item)
+            op_yaml_items = op_yaml_items_onednn
+
+        with open(onednn_yaml_file, "r") as f:
+            onednn_ops = yaml.safe_load(f)
+            for op in onednn_ops:
+                op["is_onednn_only"] = True
+            op_yaml_items = op_yaml_items + onednn_ops
+
     op_info_items = {}
     for op in op_yaml_items:
         op_compat_item = None
-        if dialect_name == "pd_op":
+        if dialect_name == "pd_op" or dialect_name == "pd_onednn_op":
             op_compat_item = op_compat_parser.get_compat(op['name'])
 
         if (
@@ -1087,6 +1134,23 @@ def OpGenerator(
             op_compat_item['int_array'] = int_array_item
 
         op_info_items[op['name']] = OpInfoParser(op, op_compat_item)
+
+    # if dialect_name == "pd_onednn_op":
+    #     with open(ops_onednn_extra_yaml_file, "r") as f:
+    #         ops_onednn_extra = yaml.safe_load(f)
+    #         op_info_items_onednn = {}
+    #         for op in ops_onednn_extra:
+    #             op_name = op['op']
+    #             item = op_info_items[op_name]
+    #             assert (item is not None), f"OneDnn op {op_name} in {ops_onednn_extra_yaml_file} is not define in ops.yaml."
+    #             item.onednn_extra_args = parse_extra_args(op_name, op['extra_args'])
+    #             if 'layout_transform' in op:
+    #                 item.onednn_layout_transform = parse_layout_transform(op_name, op['layout_transform'])
+    #             else:
+    #                 item.onednn_layout_transform = None
+    #             op_info_items_onednn[op_name] = item
+    #     op_info_items = op_info_items_onednn
+
     # (3) CodeGen: Traverse op_info_items and generate
     ops_name_list = []  # all op class name store in this list
     ops_declare_list = []  # all op class declare store in this list
@@ -1154,17 +1218,21 @@ def OpGenerator(
         if (
             op_info.backward_name
             and op_info.op_phi_name[0] not in vjp_interface_black_list
+            and dialect_name != "pd_onednn_op"
         ):
             op_interfaces += ["paddle::dialect::VjpInterface"]
         exclusive_interface_str = gen_exclusive_interface_str(
             op_info, op_info_items
         )
 
-        if dialect_name == "pd_op":
+        if dialect_name == "pd_op" or dialect_name == "pd_onednn_op":
             op_interfaces += ["paddle::dialect::GetKernelTypeForVarInterface"]
 
         # if op has custom vjp rule, then append a CustomVjpTrait to it
-        if op_info.op_phi_name[0] in custom_vjp_op_name_list:
+        if (
+            op_info.op_phi_name[0] in custom_vjp_op_name_list
+            and dialect_name != "pd_onednn_op"
+        ):
             op_traits += ["paddle::dialect::CustomVjpTrait"]
 
         # check op inputs and mutable_attributes grad semantics
@@ -1183,6 +1251,11 @@ def OpGenerator(
             if op_name[-1] == "_":
                 op_traits += ["paddle::dialect::InplaceTrait"]
 
+            if dialect_name == "pd_onednn_op":
+                op_traits += ["paddle::dialect::OneDNNTrait"]
+
+            if op_info.is_onednn_only:
+                op_traits += ["paddle::dialect::OneDNNOnlyTrait"]
             op_traits_str = ""
             if len(op_traits) > 0:
                 op_traits_str = "," + ",".join(op_traits)
@@ -1261,7 +1334,7 @@ def OpGenerator(
                 build_func_with_muta_attr_is_input = ""
 
                 get_kernel_type_for_var_declare_str = ""
-                if dialect_name == "pd_op":
+                if dialect_name == "pd_op" or dialect_name == "pd_onednn_op":
                     get_kernel_type_for_var_declare_str = (
                         get_kernel_type_for_var_declare_template
                     )
@@ -1619,7 +1692,7 @@ def OpGenerator(
 
                 # generate op GetKernelKeyForVar function str
                 op_get_kernel_type_for_var_str = ''
-                if dialect_name == "pd_op":
+                if dialect_name == "pd_op" or dialect_name == "pd_onednn_op":
                     op_get_kernel_type_for_var_str = (
                         gen_kernel_type_for_var_str(
                             op_class_name,
@@ -1648,6 +1721,7 @@ def OpGenerator(
                         op_info.backward_name
                         and op_info.op_phi_name[0]
                         not in vjp_interface_black_list
+                        and dialect_name != "pd_onednn_op"
                     ):
                         op_vjp_str = gen_op_vjp_str(
                             op_class_name,
@@ -1678,7 +1752,7 @@ def OpGenerator(
                     ops_defined_list.append(reify_infer_shape_define_str)
 
                     # NOTE(chenxi67)skip if dialect_name==cinn
-                    if dialect_name == "cinn":
+                    if dialect_name == "cinn" or dialect_name == "pd_onednn_op":
                         pass
                     else:
                         ops_vjp_defined_list.append(op_vjp_str)
@@ -1775,10 +1849,17 @@ def OpGenerator(
     else:
         op_to_multi_kernels_map_str = ""
 
+    if dialect_name == "pd_onednn_op":
+        op_def_h_file_tmp = (
+            "/data/Eager/Paddle/paddle/fluid/pir/dialect/operator/ir/pd_op.h\"\n#include \""
+            + op_def_h_file
+        )
+    else:
+        op_def_h_file_tmp = op_def_h_file
     source_file_str = CC_FILE_TEMPLATE.format(
         op_declare=op_list_str,
         op_to_multi_kernels_map=op_to_multi_kernels_map_str,
-        h_file=op_def_h_file[:-4],
+        h_file=op_def_h_file_tmp[:-4],
         input=source_file_str,
         define_type_id=define_type_id_str,
     )  # Add head
@@ -1793,7 +1874,11 @@ def OpGenerator(
         f.write(source_file_str)
     # NOTE(Aurelius84): op_gen.py is called multiply times,
     # and vjp is only avaible for pd dialect.
-    if dialect_name != 'cinn' and op_vjp_cc_file:
+    if (
+        dialect_name != 'cinn'
+        and dialect_name != 'pd_onednn_op'
+        and op_vjp_cc_file
+    ):
         with open(op_vjp_cc_file, 'w') as f:
             f.write(vjp_source_file_str)
 
@@ -1812,6 +1897,8 @@ def ParseArguments():
     parser.add_argument('--op_def_h_file', type=str)
     parser.add_argument('--op_def_cc_file', type=str)
     parser.add_argument('--op_vjp_cc_file', type=str)
+    parser.add_argument('--onednn_yaml_file', type=str)
+    parser.add_argument('--ops_onednn_extra_yaml_file', type=str)
     return parser.parse_args()
 
 
@@ -1830,6 +1917,8 @@ def ParseArguments():
     op_def_h_file = args.op_def_h_file
     op_def_cc_file = args.op_def_cc_file
     op_vjp_cc_file = args.op_vjp_cc_file
+    onednn_yaml_file = args.onednn_yaml_file
+    ops_onednn_extra_yaml_file = args.ops_onednn_extra_yaml_file
 
     # auto code generate
     OpGenerator(
@@ -1840,4 +1929,6 @@ def ParseArguments():
         op_def_h_file,
         op_def_cc_file,
         op_vjp_cc_file,
+        onednn_yaml_file,
+        ops_onednn_extra_yaml_file,
     )
diff --git a/paddle/fluid/pir/dialect/op_generator/ops_onednn_extra_parser.py b/paddle/fluid/pir/dialect/op_generator/ops_onednn_extra_parser.py
new file mode 100644
index 00000000000000..3296fa0d68829d
--- /dev/null
+++ b/paddle/fluid/pir/dialect/op_generator/ops_onednn_extra_parser.py
@@ -0,0 +1,86 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+from typing import Any, Dict, List, Tuple
+
+
+def parse_plain_list(s: str, sep=",") -> List[str]:
+    if sep == ",":
+        patten = re.compile(r',(?![^{]*\})')  # support "int[] a={1,2}"
+        items = re.split(patten, s.strip())
+        items = [x.strip() for x in items]
+        return items
+    else:
+        return [item.strip() for item in s.strip().split(sep)]
+
+
+def parse_arg(op_name: str, s: str) -> Dict[str, str]:
+    """parse an argument in following formats:
+    1. typename name
+    2. typename name = default_value
+    """
+    typename, rest = (item.strip() for item in s.split(" ", 1))
+    assert (
+        len(typename) > 0
+    ), f"The arg typename should not be empty. Please check the args of {op_name} in yaml."
+
+    assert (
+        rest.count("=") <= 1
+    ), f"There is more than 1 = in an arg in {op_name}"
+    if rest.count("=") == 1:
+        name, default_value = (item.strip() for item in rest.split("=", 1))
+        assert (
+            len(name) > 0
+        ), f"The arg name should not be empty. Please check the args of {op_name} in yaml."
+        assert (
+            len(default_value) > 0
+        ), f"The default value should not be empty. Please check the args of {op_name} in yaml."
+        return {
+            "typename": typename,
+            "name": name,
+            "default_value": default_value,
+        }
+    else:
+        name = rest.strip()
+        assert (
+            len(name) > 0
+        ), f"The arg name should not be empty. Please check the args of {op_name} in yaml."
+        return {"typename": typename, "name": name}
+
+
+def parse_extra_args(op_name: str, arguments: str) -> List:
+    if arguments is None:
+        return []
+    args_str = arguments.strip()
+    args = parse_plain_list(args_str)
+
+    attrs = []
+
+    for arg in args:
+        item = parse_arg(op_name, arg)
+        typename = item["typename"]
+        name = item["name"]
+        attrs.append(item)
+    return attrs
+
+
+def parse_layout_transform(
+    op_name: str, layout_transform: Dict[str, Any]
+) -> Tuple[str, List]:
+    if layout_transform is None:
+        return "", []
+    return layout_transform["arg_name"], parse_plain_list(
+        layout_transform["tensors"]
+    )
diff --git a/paddle/fluid/pir/dialect/operator/ir/onednn.yaml b/paddle/fluid/pir/dialect/operator/ir/onednn.yaml
new file mode 100644
index 00000000000000..d7de4310d5781f
--- /dev/null
+++ b/paddle/fluid/pir/dialect/operator/ir/onednn.yaml
@@ -0,0 +1,9 @@
+- op : quantize
+  args : (Tensor input, bool is_negative_input=false, float scale=1.0, float shift=0.0, str output_format="NHWC", bool bfloat16=false)
+  output : Tensor(output)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [input]
+  kernel :
+    func : quantize
+    data_type : input
diff --git a/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml b/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
new file mode 100644
index 00000000000000..ec49dd548d44b8
--- /dev/null
+++ b/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
@@ -0,0 +1,27 @@
+
+- op : conv2d
+  extra_args : bool is_test=false
+  layout_transform :
+    arg_name: data_format
+    tensors: input
+
+- op : matmul
+  extra_args : str mkldnn_data_type="float32"
+  layout_transform :
+    arg_name: cur_paddle_data_layout
+    tensors: x, y
+
+- op : pad3d
+  extra_args :
+  layout_transform :
+    arg_name: data_format
+    tensors: x
+
+- op : batch_norm
+  extra_args : bool fuse_with_relu=false
+  layout_transform :
+    arg_name: data_layout
+    tensors: x
+
+- op : prelu
+  extra_args : bool is_test=false, str mkldnn_data_type="float32"
diff --git a/paddle/fluid/pir/dialect/operator/trait/onednn.h b/paddle/fluid/pir/dialect/operator/trait/onednn.h
new file mode 100644
index 00000000000000..b4f6b02ad48206
--- /dev/null
+++ b/paddle/fluid/pir/dialect/operator/trait/onednn.h
@@ -0,0 +1,37 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/pir/core/op_base.h"
+
+namespace paddle {
+namespace dialect {
+class OneDNNTrait : public pir::OpTraitBase<OneDNNTrait> {
+ public:
+  explicit OneDNNTrait(pir::Operation *op)
+      : pir::OpTraitBase<OneDNNTrait>(op) {}
+};
+
+class OneDNNOnlyTrait : public pir::OpTraitBase<OneDNNOnlyTrait> {
+ public:
+  explicit OneDNNOnlyTrait(pir::Operation *op)
+      : pir::OpTraitBase<OneDNNOnlyTrait>(op) {}
+};
+
+}  // namespace dialect
+}  // namespace paddle
+
+IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNTrait)
+IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNOnlyTrait)
diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml
index 1defdb9906bded..72af323f463220 100755
--- a/paddle/phi/api/yaml/op_compat.yaml
+++ b/paddle/phi/api/yaml/op_compat.yaml
@@ -2459,6 +2459,15 @@
   outputs :
     {q : Q, r : R}
 
+- op : quantize
+  backward : quantize_grad
+  inputs :
+    input : Input
+  outputs :
+    output : Output
+  attrs :
+    {scale : Scale, shift : Shift, include_self: Include_self}
+
 - op : quantize_linear
   extra :
     attrs : [float moving_rate = 0.9]

From 7555dc2f84d85ac5a6510732b24f41cd5625871a Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 15 Dec 2023 10:45:06 +0000
Subject: [PATCH 02/49] refine

---
 paddle/fluid/pir/dialect/CMakeLists.txt       | 37 ++++-----
 .../fluid/pir/dialect/op_generator/op_gen.py  | 82 +++++++++----------
 2 files changed, 56 insertions(+), 63 deletions(-)

diff --git a/paddle/fluid/pir/dialect/CMakeLists.txt b/paddle/fluid/pir/dialect/CMakeLists.txt
index c944ae86cde429..c3c05f24c03c3a 100644
--- a/paddle/fluid/pir/dialect/CMakeLists.txt
+++ b/paddle/fluid/pir/dialect/CMakeLists.txt
@@ -35,14 +35,6 @@ set(pd_ops_onednn_extra_yaml_file
     ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
 )
 
-set(parsed_op_dir
-    ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/operator/ir/generated)
-
-set(op_yaml_file3 ${parsed_op_dir}/ops.parsed.yaml)
-set(op_yaml_file4 ${parsed_op_dir}/ops_backward.parsed.yaml)
-set(op_yaml_file5 ${parsed_op_dir}/update_ops.parsed.yaml)
-set(op_yaml_file6 ${parsed_op_dir}/onednn.parsed.yaml)
-
 set(pir_update_op_fwd_src_yaml
     ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/operator/ir/update_ops.yaml)
 set(parsed_op_dir
@@ -50,6 +42,7 @@ set(parsed_op_dir
 set(pir_op_fwd_yaml ${parsed_op_dir}/ops.parsed.yaml)
 set(pir_op_bwd_yaml ${parsed_op_dir}/ops_backward.parsed.yaml)
 set(pir_update_op_fwd_yaml ${parsed_op_dir}/update_ops.parsed.yaml)
+set(pir_op_onednn_yaml ${parsed_op_dir}/onednn.parsed.yaml)
 
 set(op_yaml_files
     ${op_fwd_yaml},${op_bwd_yaml},${fused_op_fwd_yaml},${fused_op_bwd_yaml},${pir_op_fwd_yaml},${pir_op_bwd_yaml},${pir_update_op_fwd_yaml}
@@ -109,7 +102,7 @@ execute_process(
   COMMAND ${PYTHON_EXECUTABLE} ${op_gen_parsed_yaml_file} --op_yaml_path
           ${pir_update_op_fwd_src_yaml} --output_path ${pir_update_op_fwd_yaml}
   COMMAND ${PYTHON_EXECUTABLE} ${op_gen_parsed_yaml_file} --op_yaml_path
-          ${pd_onednn_op_yaml_file} --output_path ${op_yaml_file6}
+          ${pd_onednn_op_yaml_file} --output_path ${pir_op_onednn_yaml}
   COMMAND ${PYTHON_EXECUTABLE} ${op_gen_parsed_yaml_file} --op_yaml_path
           ${pir_op_bwd_src_yaml} --output_path ${pir_op_bwd_yaml} --backward)
 
@@ -118,8 +111,8 @@ execute_process(
     ${PYTHON_EXECUTABLE} ${op_gen_file} --op_yaml_files ${op_yaml_files}
     --op_compat_yaml_file ${op_compat_yaml_file} --namespaces ${op_namespace}
     --dialect_name ${dialect_name} --op_def_h_file ${op_header_file_tmp}
-    --op_info_file ${op_info_file_tmp} --op_def_cc_file ${op_src_files_tmp}
-    --op_vjp_cc_file ${op_vjp_src_file_tmp})
+    --op_def_cc_file ${op_src_files_tmp} --op_vjp_cc_file
+    ${op_vjp_src_file_tmp})
 
 set(generated_files_pd_op
     "${op_header_file}"
@@ -133,17 +126,17 @@ set(generated_files_pd_op
     "${pir_bwd_op_source_file}"
     "${pir_update_op_source_file}")
 
-execute_process(
-  COMMAND
-    ${PYTHON_EXECUTABLE} ${op_gen_file} --op_yaml_files ${op_yaml_files}
-    --op_compat_yaml_file ${op_compat_yaml_file} --namespaces
-    ${onednn_op_namespace} --dialect_name ${onednn_dialect_name}
-    --op_def_h_file ${onednn_op_header_file_tmp} --op_def_cc_file
-    ${onednn_op_source_file_tmp} --onednn_yaml_file ${op_yaml_file6}
-    --ops_onednn_extra_yaml_file ${pd_ops_onednn_extra_yaml_file})
-
-set(generated_files_onednn_pd_op "${onednn_op_header_file}"
-                                 "${onednn_op_source_file}")
+# execute_process(
+#     COMMAND
+#         ${PYTHON_EXECUTABLE} ${op_gen_file} --op_yaml_files ${op_yaml_files}
+#         --op_compat_yaml_file ${op_compat_yaml_file} --namespaces ${onednn_op_namespace}
+#         --dialect_name ${onednn_dialect_name} --op_def_h_file ${onednn_op_header_file_tmp}
+#         --op_info_file ${op_info_file_tmp} --op_def_cc_file ${onednn_op_source_file_tmp}
+#         --onednn_yaml_file ${pir_op_onednn_yaml}
+#         --ops_onednn_extra_yaml_file ${pd_ops_onednn_extra_yaml_file})
+
+# set(generated_files_onednn_pd_op "${onednn_op_header_file}"
+#                                  "${onednn_op_source_file}")
 
 set(api_gen_yaml_files
     ${op_fwd_yaml},${op_bwd_yaml},${pir_op_fwd_yaml},${pir_op_bwd_yaml},${pir_update_op_fwd_yaml}
diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py
index bb912deb76b605..20d4be6fa35326 100644
--- a/paddle/fluid/pir/dialect/op_generator/op_gen.py
+++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py
@@ -29,7 +29,6 @@
 from op_kerneltype_gen import gen_kernel_type_for_var_str
 from op_member_func_gen import gen_op_get_inputs_outputs_str
 from op_verify_gen import gen_verify_func_str
-from ops_onednn_extra_parser import parse_extra_args, parse_layout_transform
 from parse_kernel_key_gen import gen_parse_kernel_key_str
 from reify_infer_shape_gen import gen_reify_infer_shape_str
 from vjp_interface_black_list import vjp_interface_black_list
@@ -1754,6 +1753,39 @@ def OpGenerator(
     # (2) parse yaml files
     op_compat_parser = OpCompatParser(op_compat_yaml_file)
 
+    # if dialect_name == "pd_onednn_op":
+    #     with open(ops_onednn_extra_yaml_file, "r") as f:
+    #         ops_onednn_extra = yaml.safe_load(f)
+    #         op_yaml_items_map = {}
+    #         for op in op_yaml_items:
+    #             op_yaml_items_map[op['name']] = op
+    #         op_yaml_items_onednn = []
+    #         for op in ops_onednn_extra:
+    #             op_name = op['op']
+    #             item = op_yaml_items_map[op_name]
+    #             assert (
+    #                 item is not None
+    #             ), f"OneDnn op {op_name} in {ops_onednn_extra_yaml_file} is not define in ops.yaml."
+    #             item["is_onednn_only"] = False
+    #             item["extra_args"] = parse_extra_args(op_name, op['extra_args'])
+    #             if 'layout_transform' in op:
+    #                 item["layout_transform"] = parse_layout_transform(
+    #                     op_name, op['layout_transform']
+    #                 )
+    #             else:
+    #                 item["layout_transform"] = None
+    #             item["attrs"] = item["attrs"] + parse_extra_args(
+    #                 op_name, op['extra_args']
+    #             )
+    #             op_yaml_items_onednn.append(item)
+    #         op_yaml_items = op_yaml_items_onednn
+
+    #     with open(onednn_yaml_file, "r") as f:
+    #         onednn_ops = yaml.safe_load(f)
+    #         for op in onednn_ops:
+    #             op["is_onednn_only"] = True
+    #         op_yaml_items = op_yaml_items + onednn_ops
+
     op_infos = []
     all_op_info_items = {}
     for yaml_file in op_yaml_files:
@@ -1762,39 +1794,6 @@ def OpGenerator(
             ops = yaml.safe_load(f)
             op_yaml_items = op_yaml_items + ops
 
-    if dialect_name == "pd_onednn_op":
-        with open(ops_onednn_extra_yaml_file, "r") as f:
-            ops_onednn_extra = yaml.safe_load(f)
-            op_yaml_items_map = {}
-            for op in op_yaml_items:
-                op_yaml_items_map[op['name']] = op
-            op_yaml_items_onednn = []
-            for op in ops_onednn_extra:
-                op_name = op['op']
-                item = op_yaml_items_map[op_name]
-                assert (
-                    item is not None
-                ), f"OneDnn op {op_name} in {ops_onednn_extra_yaml_file} is not define in ops.yaml."
-                item["is_onednn_only"] = False
-                item["extra_args"] = parse_extra_args(op_name, op['extra_args'])
-                if 'layout_transform' in op:
-                    item["layout_transform"] = parse_layout_transform(
-                        op_name, op['layout_transform']
-                    )
-                else:
-                    item["layout_transform"] = None
-                item["attrs"] = item["attrs"] + parse_extra_args(
-                    op_name, op['extra_args']
-                )
-                op_yaml_items_onednn.append(item)
-            op_yaml_items = op_yaml_items_onednn
-
-        with open(onednn_yaml_file, "r") as f:
-            onednn_ops = yaml.safe_load(f)
-            for op in onednn_ops:
-                op["is_onednn_only"] = True
-            op_yaml_items = op_yaml_items + onednn_ops
-
         op_info_items = {}
         for op in op_yaml_items:
             op_compat_item = None
@@ -1900,14 +1899,15 @@ def OpGenerator(
     else:
         op_to_multi_kernels_map_str = ""
 
-    op_info_str = CC_OP_INFO_FILE_TEMPLATE.format(
-        op_declare=",".join(op_list_strs).replace("\n", ""),
-        op_to_multi_kernels_map=op_to_multi_kernels_map_str,
-        h_file=op_def_h_file[:-4],
-    )
+    if op_info_file is not None:
+        op_info_str = CC_OP_INFO_FILE_TEMPLATE.format(
+            op_declare=",".join(op_list_strs).replace("\n", ""),
+            op_to_multi_kernels_map=op_to_multi_kernels_map_str,
+            h_file=op_def_h_file[:-4],
+        )
 
-    with open(op_info_file, 'w') as f:
-        f.write(op_info_str)
+        with open(op_info_file, 'w') as f:
+            f.write(op_info_str)
 
     # (6) write to files for xx_op.cc.tmp
     for id in range(len(op_def_cc_file)):

From 7c8c581e0cccfcdb8039550c743e0cf4093eb49d Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 18 Dec 2023 01:58:34 +0000
Subject: [PATCH 03/49] refine

---
 .../fluid/pir/dialect/op_generator/op_gen.py  | 72 ++++++++++---------
 1 file changed, 37 insertions(+), 35 deletions(-)

diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py
index 20d4be6fa35326..c38da5a3118a9c 100644
--- a/paddle/fluid/pir/dialect/op_generator/op_gen.py
+++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py
@@ -29,6 +29,7 @@
 from op_kerneltype_gen import gen_kernel_type_for_var_str
 from op_member_func_gen import gen_op_get_inputs_outputs_str
 from op_verify_gen import gen_verify_func_str
+from ops_onednn_extra_parser import parse_extra_args, parse_layout_transform
 from parse_kernel_key_gen import gen_parse_kernel_key_str
 from reify_infer_shape_gen import gen_reify_infer_shape_str
 from vjp_interface_black_list import vjp_interface_black_list
@@ -1753,41 +1754,28 @@ def OpGenerator(
     # (2) parse yaml files
     op_compat_parser = OpCompatParser(op_compat_yaml_file)
 
-    # if dialect_name == "pd_onednn_op":
-    #     with open(ops_onednn_extra_yaml_file, "r") as f:
-    #         ops_onednn_extra = yaml.safe_load(f)
-    #         op_yaml_items_map = {}
-    #         for op in op_yaml_items:
-    #             op_yaml_items_map[op['name']] = op
-    #         op_yaml_items_onednn = []
-    #         for op in ops_onednn_extra:
-    #             op_name = op['op']
-    #             item = op_yaml_items_map[op_name]
-    #             assert (
-    #                 item is not None
-    #             ), f"OneDnn op {op_name} in {ops_onednn_extra_yaml_file} is not define in ops.yaml."
-    #             item["is_onednn_only"] = False
-    #             item["extra_args"] = parse_extra_args(op_name, op['extra_args'])
-    #             if 'layout_transform' in op:
-    #                 item["layout_transform"] = parse_layout_transform(
-    #                     op_name, op['layout_transform']
-    #                 )
-    #             else:
-    #                 item["layout_transform"] = None
-    #             item["attrs"] = item["attrs"] + parse_extra_args(
-    #                 op_name, op['extra_args']
-    #             )
-    #             op_yaml_items_onednn.append(item)
-    #         op_yaml_items = op_yaml_items_onednn
-
-    #     with open(onednn_yaml_file, "r") as f:
-    #         onednn_ops = yaml.safe_load(f)
-    #         for op in onednn_ops:
-    #             op["is_onednn_only"] = True
-    #         op_yaml_items = op_yaml_items + onednn_ops
+    if dialect_name == "pd_onednn_op":
+        with open(ops_onednn_extra_yaml_file, "r") as f:
+            ops_onednn_extra = yaml.safe_load(f)
+            ops_onednn_extra_map = {}
+            for op in ops_onednn_extra:
+                op_name = op['op']
+                item = {}
+                item["is_onednn_only"] = False
+                item["extra_args"] = parse_extra_args(op_name, op['extra_args'])
+                if 'layout_transform' in op:
+                    item["layout_transform"] = parse_layout_transform(
+                        op_name, op['layout_transform']
+                    )
+                else:
+                    item["layout_transform"] = None
+                item["attrs"] = parse_extra_args(op_name, op['extra_args'])
+                ops_onednn_extra_map[op_name] = item
+        op_yaml_files.insert(0, onednn_yaml_file)
 
     op_infos = []
     all_op_info_items = {}
+    first_file = True
     for yaml_file in op_yaml_files:
         op_yaml_items = []
         with open(yaml_file, "r") as f:
@@ -1823,11 +1811,25 @@ def OpGenerator(
                 ) = op_compat_parser.parse_support_tensor(op)
                 op_compat_item['scalar'] = scalar_item
                 op_compat_item['int_array'] = int_array_item
-
-            op_info_items[op['name']] = OpInfoParser(op, op_compat_item)
-            all_op_info_items[op['name']] = OpInfoParser(op, op_compat_item)
+            if dialect_name == "pd_onednn_op":
+                if first_file:
+                    first_file = False
+                    op["is_onednn_only"] = True
+                elif op['name'] in ops_onednn_extra_map:
+                    onednn_item = ops_onednn_extra_map[op['name']]
+                    op["is_onednn_only"] = onednn_item["is_onednn_only"]
+                    op["extra_args"] = onednn_item["extra_args"]
+                    op["layout_transform"] = onednn_item["layout_transform"]
+                    op["attrs"] = op["attrs"] + onednn_item["attrs"]
+                else:
+                    continue
+            item = OpInfoParser(op, op_compat_item)
+            op_info_items[op['name']] = item
+            all_op_info_items[op['name']] = item
 
         op_infos.append(op_info_items)
+    if dialect_name == "pd_onednn_op":
+        op_infos = [all_op_info_items]
 
     # (3) auto code gen
     op_list_strs = []

From 307d5fc0f274a1707180fabcfb84f7945083cfff Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 18 Dec 2023 02:20:17 +0000
Subject: [PATCH 04/49] refine

---
 paddle/fluid/pir/dialect/CMakeLists.txt | 73 ++++++++++++++-----------
 1 file changed, 40 insertions(+), 33 deletions(-)

diff --git a/paddle/fluid/pir/dialect/CMakeLists.txt b/paddle/fluid/pir/dialect/CMakeLists.txt
index c3c05f24c03c3a..02b7900d4079b4 100644
--- a/paddle/fluid/pir/dialect/CMakeLists.txt
+++ b/paddle/fluid/pir/dialect/CMakeLists.txt
@@ -28,13 +28,6 @@ set(pir_op_fwd_src_yaml
 set(pir_op_bwd_src_yaml
     ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/operator/ir/ops_backward.yaml)
 
-set(pd_onednn_op_yaml_file
-    ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/operator/ir/onednn.yaml)
-
-set(pd_ops_onednn_extra_yaml_file
-    ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
-)
-
 set(pir_update_op_fwd_src_yaml
     ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/operator/ir/update_ops.yaml)
 set(parsed_op_dir
@@ -42,7 +35,6 @@ set(parsed_op_dir
 set(pir_op_fwd_yaml ${parsed_op_dir}/ops.parsed.yaml)
 set(pir_op_bwd_yaml ${parsed_op_dir}/ops_backward.parsed.yaml)
 set(pir_update_op_fwd_yaml ${parsed_op_dir}/update_ops.parsed.yaml)
-set(pir_op_onednn_yaml ${parsed_op_dir}/onednn.parsed.yaml)
 
 set(op_yaml_files
     ${op_fwd_yaml},${op_bwd_yaml},${fused_op_fwd_yaml},${fused_op_bwd_yaml},${pir_op_fwd_yaml},${pir_op_bwd_yaml},${pir_update_op_fwd_yaml}
@@ -60,13 +52,6 @@ set(op_info_file_tmp ${op_info_file}.tmp)
 set(op_vjp_source_file ${PD_DIALECT_SOURCE_DIR}/pd_op_vjp.cc)
 set(op_vjp_source_file_tmp ${op_vjp_source_file}.tmp)
 
-set(onednn_op_namespace paddle,onednn,dialect)
-set(onednn_dialect_name pd_onednn_op)
-set(onednn_op_header_file ${PD_DIALECT_SOURCE_DIR}/pd_onednn_op.h)
-set(onednn_op_source_file ${PD_DIALECT_SOURCE_DIR}/pd_onednn_op.cc)
-set(onednn_op_header_file_tmp ${onednn_op_header_file}.tmp)
-set(onednn_op_source_file_tmp ${onednn_op_source_file}.tmp)
-
 set(op_source_file ${PD_DIALECT_SOURCE_DIR}/pd_op.cc)
 set(op_source_file_tmp ${op_source_file}.tmp)
 
@@ -101,8 +86,6 @@ execute_process(
           ${pir_op_fwd_src_yaml} --output_path ${pir_op_fwd_yaml}
   COMMAND ${PYTHON_EXECUTABLE} ${op_gen_parsed_yaml_file} --op_yaml_path
           ${pir_update_op_fwd_src_yaml} --output_path ${pir_update_op_fwd_yaml}
-  COMMAND ${PYTHON_EXECUTABLE} ${op_gen_parsed_yaml_file} --op_yaml_path
-          ${pd_onednn_op_yaml_file} --output_path ${pir_op_onednn_yaml}
   COMMAND ${PYTHON_EXECUTABLE} ${op_gen_parsed_yaml_file} --op_yaml_path
           ${pir_op_bwd_src_yaml} --output_path ${pir_op_bwd_yaml} --backward)
 
@@ -126,18 +109,40 @@ set(generated_files_pd_op
     "${pir_bwd_op_source_file}"
     "${pir_update_op_source_file}")
 
-# execute_process(
-#     COMMAND
-#         ${PYTHON_EXECUTABLE} ${op_gen_file} --op_yaml_files ${op_yaml_files}
-#         --op_compat_yaml_file ${op_compat_yaml_file} --namespaces ${onednn_op_namespace}
-#         --dialect_name ${onednn_dialect_name} --op_def_h_file ${onednn_op_header_file_tmp}
-#         --op_info_file ${op_info_file_tmp} --op_def_cc_file ${onednn_op_source_file_tmp}
-#         --onednn_yaml_file ${pir_op_onednn_yaml}
-#         --ops_onednn_extra_yaml_file ${pd_ops_onednn_extra_yaml_file})
-
-# set(generated_files_onednn_pd_op "${onednn_op_header_file}"
-#                                  "${onednn_op_source_file}")
-
+if(WITH_MKLDNN)
+  set(pir_op_onednn_yaml ${parsed_op_dir}/onednn.parsed.yaml)
+
+  set(pd_onednn_op_yaml_file
+      ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/operator/ir/onednn.yaml)
+
+  set(pd_ops_onednn_extra_yaml_file
+      ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
+  )
+
+  set(onednn_op_namespace paddle,onednn,dialect)
+  set(onednn_dialect_name pd_onednn_op)
+  set(onednn_op_header_file ${PD_DIALECT_SOURCE_DIR}/pd_onednn_op.h)
+  set(onednn_op_source_file ${PD_DIALECT_SOURCE_DIR}/pd_onednn_op.cc)
+  set(onednn_op_header_file_tmp ${onednn_op_header_file}.tmp)
+  set(onednn_op_source_file_tmp ${onednn_op_source_file}.tmp)
+
+  execute_process(
+    COMMAND ${PYTHON_EXECUTABLE} ${op_gen_parsed_yaml_file} --op_yaml_path
+            ${pd_onednn_op_yaml_file} --output_path ${pir_op_onednn_yaml})
+
+  execute_process(
+    COMMAND
+      ${PYTHON_EXECUTABLE} ${op_gen_file} --op_yaml_files ${op_yaml_files}
+      --op_compat_yaml_file ${op_compat_yaml_file} --namespaces
+      ${onednn_op_namespace} --dialect_name ${onednn_dialect_name}
+      --op_def_h_file ${onednn_op_header_file_tmp} --op_info_file
+      ${op_info_file_tmp} --op_def_cc_file ${onednn_op_source_file_tmp}
+      --onednn_yaml_file ${pir_op_onednn_yaml} --ops_onednn_extra_yaml_file
+      ${pd_ops_onednn_extra_yaml_file})
+
+  set(generated_files_onednn_pd_op "${onednn_op_header_file}"
+                                   "${onednn_op_source_file}")
+endif()
 set(api_gen_yaml_files
     ${op_fwd_yaml},${op_bwd_yaml},${pir_op_fwd_yaml},${pir_op_bwd_yaml},${pir_update_op_fwd_yaml}
 )
@@ -245,10 +250,12 @@ cc_library(
   SRCS ${op_dialect_srcs}
   DEPS ${op_dialect_deps})
 
-cc_library(
-  op_onednn_dialect
-  SRCS ${onednn_op_source_file}
-  DEPS ${op_dialect_deps})
+if(WITH_MKLDNN)
+  cc_library(
+    op_onednn_dialect
+    SRCS ${onednn_op_source_file}
+    DEPS ${op_dialect_deps})
+endif()
 
 #Note(risemeup1):compile some *.cc files which depend on primitive_vjp_experimental into op_dialect_vjp.a/lib
 set(op_decomp_source_file ${PD_DIALECT_SOURCE_DIR}/op_decomp.cc)

From 6e4f8f843247918422d7892c1009fed46fbbd864 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 18 Dec 2023 02:36:45 +0000
Subject: [PATCH 05/49] refine

---
 paddle/fluid/pir/dialect/CMakeLists.txt | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/paddle/fluid/pir/dialect/CMakeLists.txt b/paddle/fluid/pir/dialect/CMakeLists.txt
index 02b7900d4079b4..c9c2d687286978 100644
--- a/paddle/fluid/pir/dialect/CMakeLists.txt
+++ b/paddle/fluid/pir/dialect/CMakeLists.txt
@@ -135,10 +135,9 @@ if(WITH_MKLDNN)
       ${PYTHON_EXECUTABLE} ${op_gen_file} --op_yaml_files ${op_yaml_files}
       --op_compat_yaml_file ${op_compat_yaml_file} --namespaces
       ${onednn_op_namespace} --dialect_name ${onednn_dialect_name}
-      --op_def_h_file ${onednn_op_header_file_tmp} --op_info_file
-      ${op_info_file_tmp} --op_def_cc_file ${onednn_op_source_file_tmp}
-      --onednn_yaml_file ${pir_op_onednn_yaml} --ops_onednn_extra_yaml_file
-      ${pd_ops_onednn_extra_yaml_file})
+      --op_def_h_file ${onednn_op_header_file_tmp} --op_def_cc_file
+      ${onednn_op_source_file_tmp} --onednn_yaml_file ${pir_op_onednn_yaml}
+      --ops_onednn_extra_yaml_file ${pd_ops_onednn_extra_yaml_file})
 
   set(generated_files_onednn_pd_op "${onednn_op_header_file}"
                                    "${onednn_op_source_file}")

From 1907da17959dc832acb0cacfbc8586717fdd4934 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 18 Dec 2023 02:42:37 +0000
Subject: [PATCH 06/49] refine

---
 paddle/fluid/pir/dialect/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddle/fluid/pir/dialect/CMakeLists.txt b/paddle/fluid/pir/dialect/CMakeLists.txt
index c9c2d687286978..5e2d8e2de0cd88 100644
--- a/paddle/fluid/pir/dialect/CMakeLists.txt
+++ b/paddle/fluid/pir/dialect/CMakeLists.txt
@@ -94,8 +94,8 @@ execute_process(
     ${PYTHON_EXECUTABLE} ${op_gen_file} --op_yaml_files ${op_yaml_files}
     --op_compat_yaml_file ${op_compat_yaml_file} --namespaces ${op_namespace}
     --dialect_name ${dialect_name} --op_def_h_file ${op_header_file_tmp}
-    --op_def_cc_file ${op_src_files_tmp} --op_vjp_cc_file
-    ${op_vjp_src_file_tmp})
+    --op_info_file ${op_info_file_tmp} --op_def_cc_file ${op_src_files_tmp}
+    --op_vjp_cc_file ${op_vjp_src_file_tmp})
 
 set(generated_files_pd_op
     "${op_header_file}"

From 2f3f24386f86107df9dde27eaab85f4676acc9cd Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 18 Dec 2023 04:49:07 +0000
Subject: [PATCH 07/49] refine

---
 paddle/fluid/pir/dialect/op_generator/op_gen.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py
index c38da5a3118a9c..ffa99e1a57ae64 100644
--- a/paddle/fluid/pir/dialect/op_generator/op_gen.py
+++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py
@@ -1921,7 +1921,7 @@ def OpGenerator(
 
         if dialect_name == "pd_onednn_op":
             op_def_h_file_tmp = (
-                "/data/Eager/Paddle/paddle/fluid/pir/dialect/operator/ir/pd_op.h\"\n#include \""
+                "paddle/fluid/pir/dialect/operator/ir/pd_op.h\"\n#include \""
                 + op_def_h_file
             )
         else:

From c83a0664fc17d6fcade3fc1a77b1d1c31e57ba4d Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 18 Dec 2023 11:00:19 +0000
Subject: [PATCH 08/49] refine

---
 .../ir_adaptor/translator/op_translator.cc    |  22 +++-
 .../fluid/pir/dialect/op_generator/op_gen.py  |   5 +
 .../dialect/operator/ir/ops_onednn_extra.yaml |   1 +
 .../pir/transforms/pd_op_to_kernel_pass.cc    | 103 ++++++++++++++++++
 4 files changed, 126 insertions(+), 5 deletions(-)

diff --git a/paddle/fluid/ir_adaptor/translator/op_translator.cc b/paddle/fluid/ir_adaptor/translator/op_translator.cc
index d758677d64ef5d..38965c05f7d9c5 100644
--- a/paddle/fluid/ir_adaptor/translator/op_translator.cc
+++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc
@@ -76,7 +76,10 @@ using AttributeHandlerFn = std::function<pir::Attribute(
     pir::IrContext*, const OpDesc&, const OpAttributeInfo&)>;
 using DenseTensorTypeStorage = paddle::dialect::DenseTensorTypeStorage;
 constexpr char kTargetDialectPrefix[] = "pd_op.";  // NOLINT
-constexpr char kEmptyVarName[] = "@EMPTY@";        // NOLINT
+#ifdef PADDLE_WITH_DNNL
+constexpr char kOneDnnTargetDialectPrefix[] = "pd_onednn_op.";  // NOLINT
+#endif
+constexpr char kEmptyVarName[] = "@EMPTY@";  // NOLINT
 
 static const std::unordered_set<std::string> SpecialNonInplaceOps = {};
 
@@ -222,12 +225,21 @@ inline pir::Operation* InsertCreateArrayOp(pir::IrContext* ctx,
   return create_array_op.operation();
 }
 
+inline std::string GetPrefix(const OpDesc& op_desc) {
+#ifdef PADDLE_WITH_DNNL
+  return op_desc.GetAttrIfExists<bool>("use_mkldnn")
+             ? kOneDnnTargetDialectPrefix
+             : kTargetDialectPrefix;
+#else
+  return kTargetDialectPrefix;
+#endif
+}
 }  // namespace
 
 pir::OpInfo OpTranscriber::LoopkUpOpInfo(pir::IrContext* ctx,
                                          const OpDesc& op_desc) {
   std::string target_op_name =
-      kTargetDialectPrefix + OpNameCompatibleMapping(op_desc.Type());
+      GetPrefix(op_desc) + OpNameCompatibleMapping(op_desc.Type());
   if (IsInplace(op_desc) && *target_op_name.rbegin() != '_') {
     target_op_name += "_";
   }
@@ -320,7 +332,7 @@ pir::OpInfo OpTranscriber::LoopkUpOpInfo(pir::IrContext* ctx,
              op_desc.Type(),
              target_op_name);
 
-  target_op_name = kTargetDialectPrefix + target_op_name;
+  target_op_name = GetPrefix(op_desc) + target_op_name;
   if (IsInplace(op_desc) && *target_op_name.rbegin() != '_') {
     target_op_name += "_";
   }
@@ -1041,7 +1053,7 @@ struct EmbeddingGradOpTranscriber : public OpTranscriber {
   pir::OpInfo LoopkUpOpInfo(pir::IrContext* ctx,
                             const OpDesc& op_desc) override {
     std::string target_op_name =
-        kTargetDialectPrefix + OpNameCompatibleMapping(op_desc.Type());
+        GetPrefix(op_desc) + OpNameCompatibleMapping(op_desc.Type());
 
     bool is_sparse = paddle::get<bool>(op_desc.GetAttr("is_sparse"));
 
@@ -1294,7 +1306,7 @@ struct AddNOpTranscriber : public OpTranscriber {
   pir::OpInfo LoopkUpOpInfo(pir::IrContext* ctx,
                             const OpDesc& op_desc) override {
     std::string target_op_name =
-        kTargetDialectPrefix + OpNameCompatibleMapping(op_desc.Type());
+        GetPrefix(op_desc) + OpNameCompatibleMapping(op_desc.Type());
     if (IsInplace(op_desc)) {
       target_op_name += "_";
     } else {
diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py
index ffa99e1a57ae64..2d6b6a3673b54a 100644
--- a/paddle/fluid/pir/dialect/op_generator/op_gen.py
+++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py
@@ -1769,6 +1769,10 @@ def OpGenerator(
                     )
                 else:
                     item["layout_transform"] = None
+                if 'dynamic_fallback' in op:
+                    item["dynamic_fallback"] = op['dynamic_fallback']
+                else:
+                    item["dynamic_fallback"] = False
                 item["attrs"] = parse_extra_args(op_name, op['extra_args'])
                 ops_onednn_extra_map[op_name] = item
         op_yaml_files.insert(0, onednn_yaml_file)
@@ -1820,6 +1824,7 @@ def OpGenerator(
                     op["is_onednn_only"] = onednn_item["is_onednn_only"]
                     op["extra_args"] = onednn_item["extra_args"]
                     op["layout_transform"] = onednn_item["layout_transform"]
+                    op["dynamic_fallback"] = onednn_item["dynamic_fallback"]
                     op["attrs"] = op["attrs"] + onednn_item["attrs"]
                 else:
                     continue
diff --git a/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml b/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
index ec49dd548d44b8..f8062dff6bc4d9 100644
--- a/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
+++ b/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
@@ -16,6 +16,7 @@
   layout_transform :
     arg_name: data_format
     tensors: x
+  dynamic_fallback : True
 
 - op : batch_norm
   extra_args : bool fuse_with_relu=false
diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index 04c4d68933140d..5fdcdc099cab02 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -387,6 +387,46 @@ static pir::Type BuildOutputType(pir::Type type,
   }
 }
 
+#ifdef PADDLE_WITH_DNNL
+template <class IrType1, class IrType2>
+static pir::Type create_type(pir::Type type,
+                             const phi::Place& place,
+                             const phi::DataLayout& layout,
+                             pir::Type out_dtype,
+                             pir::IrContext* ctx) {
+  auto input_type = type.dyn_cast<IrType1>();
+  return IrType2::get(ctx,
+                      place,
+                      out_dtype,
+                      input_type.dims(),
+                      layout,
+                      input_type.lod(),
+                      input_type.offset());
+}
+
+static pir::Type BuildOutputType(pir::Type type,
+                                 const phi::Place& place,
+                                 const phi::DataLayout& layout,
+                                 pir::IrContext* ctx) {
+  if (type.isa<DenseTensorType>()) {
+    auto out_dtype = type.dyn_cast<DenseTensorType>().dtype();
+    return create_type<DenseTensorType, AllocatedDenseTensorType>(
+        type, place, layout, out_dtype, ctx);
+  } else if (type.isa<SelectedRowsType>()) {
+    auto out_dtype = type.dyn_cast<SelectedRowsType>().dtype();
+    return create_type<SelectedRowsType, AllocatedSelectedRowsType>(
+        type, place, layout, out_dtype, ctx);
+  } else if (type.isa<DenseTensorArrayType>()) {
+    auto array_type = type.dyn_cast<DenseTensorArrayType>();
+    return AllocatedDenseTensorArrayType::get(
+        ctx, place, array_type.dtype(), layout);
+  } else {
+    PADDLE_THROW(phi::errors::Unimplemented(
+        "BuildOutputType only support DenseTensorType and SelectedRowsType"));
+  }
+}
+#endif
+
 pir::OpResult AddDtypeTransferOp(pir::Value in,
                                  pir::Block* block,
                                  const phi::KernelKey& kernel_key,
@@ -629,6 +669,37 @@ std::string GetKernelName(const OpYamlInfoParser* op_info_parser,
   return kernel_fn_str;
 }
 
+bool SupportsMKLDNN(const phi::DataType data_type) const {
+  auto phi_kernels = phi::KernelFactory::Instance().SelectKernelMap(
+      phi::TransToPhiKernelName(type_));
+  auto has_phi_kernel =
+      std::any_of(phi_kernels.begin(),
+                  phi_kernels.end(),
+                  [data_type](phi::KernelKeyMap::const_reference kern_pair) {
+                    return kern_pair.first.backend() == phi::Backend::ONEDNN &&
+                           kern_pair.first.dtype() == data_type;
+                  });
+  if (has_phi_kernel) {
+    return true;
+  } else {
+    auto op_kernel_iter = OperatorWithKernel::AllOpKernels().find(type_);
+    if (op_kernel_iter == OperatorWithKernel::AllOpKernels().end()) {
+      return false;
+    } else {
+      auto& op_kernels = op_kernel_iter->second;
+      return std::any_of(
+          op_kernels.begin(),
+          op_kernels.end(),
+          [data_type](OpKernelMap::const_reference kern_pair) {
+            return platform::is_cpu_place(kern_pair.first.place_) &&
+                   kern_pair.first.library_type_ == LibraryType::kMKLDNN &&
+                   kern_pair.first.data_type_ ==
+                       paddle::framework::TransToProtoVarType(data_type);
+          });
+    }
+  }
+}
+
 phi::KernelKey GetKernelKey(
     pir::Operation* op,
     const phi::Place& place,
@@ -857,6 +928,13 @@ phi::KernelKey GetKernelKey(
                "to GPU";
   }
 
+#ifdef PADDLE_WITH_DNNL
+  if (op->HasTrait<OneDNNTrait>() && res.backend() == phi::Backend::CPU &&
+      SupportsMKLDNN(res.dtype())) {
+    res.set_backend(phi::Backend::ONEDNN);
+    res.set_layout(phi::DataLayout::ONEDNN);
+  }
+#endif
   return res;
 }
 
@@ -1324,7 +1402,17 @@ std::vector<pir::Type> BuildOutputs(pir::Operation* op_item,
     } else if (result_type.isa<DenseTensorType>() ||
                result_type.isa<SelectedRowsType>() ||
                result_type.isa<DenseTensorArrayType>()) {
+#ifdef PADDLE_WITH_DNNL
+      if (kernel_key.backend() == phi::Backend::ONEDNN) {
+        op_output_types.push_back(BuildOutputType(
+            result_type, out_place, phi::DataLayout::ONEDNN, ctx));
+      } else {
+        op_output_types.push_back(BuildOutputType(result_type, out_place, ctx));
+      }
+#else
       op_output_types.push_back(BuildOutputType(result_type, out_place, ctx));
+#endif
+
     } else if (result_type.isa<pir::VectorType>()) {
       std::vector<pir::Type> vec_inner_types;
       auto base_types = result_type.dyn_cast<pir::VectorType>().data();
@@ -1332,8 +1420,18 @@ std::vector<pir::Type> BuildOutputs(pir::Operation* op_item,
         if (base_type) {
           if (base_type.isa<DenseTensorType>() ||
               base_type.isa<SelectedRowsType>()) {
+#ifdef PADDLE_WITH_DNNL
+            if (kernel_key.backend() == phi::Backend::ONEDNN) {
+              vec_inner_types.push_back(BuildOutputType(
+                  base_type, out_place, phi::DataLayout::ONEDNN, ctx));
+            } else {
+              vec_inner_types.push_back(
+                  BuildOutputType(base_type, out_place, ctx));
+            }
+#else
             vec_inner_types.push_back(
                 BuildOutputType(base_type, out_place, ctx));
+#endif
           } else {
             PADDLE_THROW(phi::errors::Unimplemented(
                 "only support dense tensor and selected rows in vector type "
@@ -1344,6 +1442,11 @@ std::vector<pir::Type> BuildOutputs(pir::Operation* op_item,
           pir::Type fp32_dtype = pir::Float32Type::get(ctx);
           phi::DDim dims = {};
           phi::DataLayout data_layout = phi::DataLayout::NCHW;
+#ifdef PADDLE_WITH_DNNL
+          if (kernel_key.backend() == phi::Backend::ONEDNN) {
+            data_layout = phi::DataLayout::ONEDNN;
+          }
+#endif
           phi::LoD lod = {{}};
           size_t offset = 0;
           auto dense_tensor_dtype = DenseTensorType::get(

From 5f1746140868d88098b03a0ffc8d0f175fbc5004 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 18 Dec 2023 11:39:23 +0000
Subject: [PATCH 09/49] refine

---
 .../fluid/pir/dialect/op_generator/op_gen.py  | 53 +++++++++++++++++++
 .../operator/utils/op_yaml_info_util.h        | 20 ++++++-
 .../pir/transforms/pd_op_to_kernel_pass.cc    |  2 +-
 3 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py
index 2d6b6a3673b54a..d0264112a3ff7d 100644
--- a/paddle/fluid/pir/dialect/op_generator/op_gen.py
+++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py
@@ -211,6 +211,17 @@ class {op_name} : public pir::Op<{op_name}{interfaces}{traits}> {{
   return std::make_tuple(inputs, attributes, outputs, run_time_info, "{origin_op_name}");
 }}
 """
+
+OP_INFO_ONEDNN_TEMPLATE = """
+OpInfoTuple {op_name}::GetOpInfo() {{
+  std::vector<paddle::dialect::OpInputInfo> inputs = {{ {inputs} }};
+  std::vector<paddle::dialect::OpAttributeInfo> attributes = {{ {attributes} }};
+  std::vector<paddle::dialect::OpOutputInfo> outputs = {{ {outputs} }};
+  paddle::dialect::OpRunTimeInfo run_time_info = paddle::dialect::OpRunTimeInfo("{infer_meta_func}", {{"{infer_meta_param}"}}, "{kernel_func}", {{"{kernel_param}"}}, {{{kernel_key_dtype}}}, {{{kernel_key_backend}}}, {{{inplace}}}, {{{view}}}, {{{extra_args}}}, "{layout_transform_arg}", {{{layout_transform_inputs}}}, {is_onednn_only}, {dynamic_fallback});
+  return std::make_tuple(inputs, attributes, outputs, run_time_info, "{origin_op_name}");
+}}
+"""
+
 CONSTRUCT_INPUT_INFO_TEMPLATE = """paddle::dialect::OpInputInfo("{name}", "{typename}", {optional}, {no_need_buffer}, {is_mutable_attribute}, {with_grad_semantic})"""
 CONSTRUCT_OUTPUT_INFO_TEMPLATE = """paddle::dialect::OpOutputInfo("{name}", "{typename}", {optional}, {intermediate})"""
 CONSTRUCT_ATTRIBUTE_INFO_TEMPLATE = """paddle::dialect::OpAttributeInfo("{name}", "{typename}", "{data_type}")"""
@@ -465,10 +476,12 @@ def __init__(self, op_yaml_item, op_compat_item):
             self.onednn_extra_args = self.op_yaml_item["extra_args"]
             self.onednn_layout_transform = self.op_yaml_item["layout_transform"]
             self.is_onednn_only = self.op_yaml_item["is_onednn_only"]
+            self.dynamic_fallback = self.op_yaml_item["dynamic_fallback"]
         else:
             self.onednn_extra_args = []
             self.onednn_layout_transform = None
             self.is_onednn_only = False
+            self.dynamic_fallback = False
 
     def parse_op_traits(self):
         if 'traits' in self.op_yaml_item:
@@ -1566,6 +1579,46 @@ def AutoCodeGen(op_info_items, all_op_info_items, namespaces, dialect_name):
                     origin_op_name=op_info.op_yaml_item['name'],
                 )
 
+                if dialect_name == "pd_onednn_op":
+                    if len(op_info.onednn_extra_args) > 0:
+                        args_name = []
+                        for arg in op_info.onednn_extra_args:
+                            args_name.append(arg["name"])
+
+                        extra_args = '"' + '", "'.join(args_name) + '"'
+                    else:
+                        extra_args = ""
+                    if op_info.onednn_layout_transform is None:
+                        layout_transform_arg, layout_transform_inputs = "", []
+                    else:
+                        (
+                            layout_transform_arg,
+                            layout_transform_inputs,
+                        ) = op_info.onednn_layout_transform
+                        layout_transform_inputs = (
+                            '"' + '", "'.join(layout_transform_inputs) + '"'
+                        )
+
+                    op_info_func_str = OP_INFO_ONEDNN_TEMPLATE.format(
+                        op_name=op_class_name,
+                        inputs=inputs_info_str,
+                        attributes=attribute_info_str,
+                        outputs=outputs_info_str,
+                        infer_meta_func=infer_meta_func_str,
+                        infer_meta_param=infer_meta_param_str,
+                        kernel_func=kernel_func_str,
+                        kernel_param=kernel_param_str,
+                        kernel_key_dtype=kernel_key_dtype,
+                        kernel_key_backend=kernel_key_backend,
+                        inplace=inplace_str,
+                        view=view_str,
+                        origin_op_name=op_info.op_yaml_item['name'],
+                        extra_args=extra_args,
+                        layout_transform_arg=layout_transform_arg,
+                        layout_transform_inputs=layout_transform_inputs,
+                        is_onednn_only=op_info.is_onednn_only,
+                        dynamic_fallback=op_info.dynamic_fallback,
+                    )
                 # generate op verify function str
                 op_verify_str = ''
                 if not op_info.custom_verify:
diff --git a/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_util.h b/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_util.h
index 637de470675eb1..662616bce773a0 100644
--- a/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_util.h
+++ b/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_util.h
@@ -93,6 +93,12 @@ struct OpRunTimeInfo {
   std::vector<std::string> kernel_key_backend;
   std::vector<std::pair<std::string, std::string>> inplace;
   std::vector<std::pair<std::string, std::string>> view;
+  std::vector<std::string> extra_args;
+  std::string layout_transform_arg;
+  std::vector<std::string> layout_transform_inputs;
+  bool is_onednn_only;
+  bool dynamic_fallback;
+
   OpRunTimeInfo(const std::string& infer_meta_func,
                 const std::vector<std::string>& infer_meta_param,
                 const std::string& kernel_func,
@@ -100,7 +106,12 @@ struct OpRunTimeInfo {
                 const std::vector<std::string>& dtype,
                 const std::vector<std::string>& backend,
                 const std::vector<std::pair<std::string, std::string>>& inplace,
-                const std::vector<std::pair<std::string, std::string>>& view)
+                const std::vector<std::pair<std::string, std::string>>& view,
+                const std::vector<std::string>& extra_args = {},
+                const std::string& layout_transform_arg = "",
+                const std::vector<std::string>& layout_transform_inputs = {},
+                bool is_onednn_only = false,
+                bool dynamic_fallback = false)
       : infer_meta_func(infer_meta_func),
         infer_meta_param(infer_meta_param),
         kernel_func(kernel_func),
@@ -108,7 +119,12 @@ struct OpRunTimeInfo {
         kernel_key_dtype(dtype),
         kernel_key_backend(backend),
         inplace(inplace),
-        view(view) {}
+        view(view),
+        extra_args(extra_args),
+        layout_transform_arg(layout_transform_arg),
+        layout_transform_inputs(layout_transform_inputs),
+        is_onednn_only(is_onednn_only),
+        dynamic_fallback(dynamic_fallback) {}
 };
 
 }  // namespace dialect
diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index 5fdcdc099cab02..1615d36d36bdb1 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -669,7 +669,7 @@ std::string GetKernelName(const OpYamlInfoParser* op_info_parser,
   return kernel_fn_str;
 }
 
-bool SupportsMKLDNN(const phi::DataType data_type) const {
+bool SupportsMKLDNN(const phi::DataType data_type) {
   auto phi_kernels = phi::KernelFactory::Instance().SelectKernelMap(
       phi::TransToPhiKernelName(type_));
   auto has_phi_kernel =

From 94a7cd7a200b3a9efec87c62d4a20d69dd193079 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 18 Dec 2023 11:43:21 +0000
Subject: [PATCH 10/49] refine

---
 paddle/fluid/pir/dialect/op_generator/op_gen.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py
index d0264112a3ff7d..4142093fbd9c21 100644
--- a/paddle/fluid/pir/dialect/op_generator/op_gen.py
+++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py
@@ -1589,7 +1589,10 @@ def AutoCodeGen(op_info_items, all_op_info_items, namespaces, dialect_name):
                     else:
                         extra_args = ""
                     if op_info.onednn_layout_transform is None:
-                        layout_transform_arg, layout_transform_inputs = "", []
+                        layout_transform_arg, layout_transform_inputs = (
+                            "",
+                            r"\{\}",
+                        )
                     else:
                         (
                             layout_transform_arg,
@@ -1616,8 +1619,12 @@ def AutoCodeGen(op_info_items, all_op_info_items, namespaces, dialect_name):
                         extra_args=extra_args,
                         layout_transform_arg=layout_transform_arg,
                         layout_transform_inputs=layout_transform_inputs,
-                        is_onednn_only=op_info.is_onednn_only,
-                        dynamic_fallback=op_info.dynamic_fallback,
+                        is_onednn_only="true"
+                        if op_info.is_onednn_only
+                        else "false",
+                        dynamic_fallback="true"
+                        if op_info.dynamic_fallback
+                        else "false",
                     )
                 # generate op verify function str
                 op_verify_str = ''

From ff94b9050fb2ec0e8d2b16bba51e200dff246204 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 18 Dec 2023 11:45:57 +0000
Subject: [PATCH 11/49] refine

---
 paddle/fluid/pir/dialect/op_generator/op_gen.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py
index 4142093fbd9c21..c3d9bb9b0b4140 100644
--- a/paddle/fluid/pir/dialect/op_generator/op_gen.py
+++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py
@@ -1591,7 +1591,7 @@ def AutoCodeGen(op_info_items, all_op_info_items, namespaces, dialect_name):
                     if op_info.onednn_layout_transform is None:
                         layout_transform_arg, layout_transform_inputs = (
                             "",
-                            r"\{\}",
+                            r"{}",
                         )
                     else:
                         (

From fd4f845265483706280f7bd195758e1f422f0bce Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 18 Dec 2023 11:48:12 +0000
Subject: [PATCH 12/49] refine

---
 paddle/fluid/pir/dialect/op_generator/op_gen.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py
index c3d9bb9b0b4140..3e38664b94f27f 100644
--- a/paddle/fluid/pir/dialect/op_generator/op_gen.py
+++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py
@@ -1591,7 +1591,7 @@ def AutoCodeGen(op_info_items, all_op_info_items, namespaces, dialect_name):
                     if op_info.onednn_layout_transform is None:
                         layout_transform_arg, layout_transform_inputs = (
                             "",
-                            r"{}",
+                            "",
                         )
                     else:
                         (

From 9987288d28b1e01b929b3582b68313e3e8dadf6e Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 19 Dec 2023 02:00:56 +0000
Subject: [PATCH 13/49] refine

---
 .../fluid/pir/dialect/op_generator/op_gen.py  |  3 +++
 .../fluid/pir/dialect/operator/trait/onednn.h |  8 ++++++
 .../pir/transforms/pd_op_to_kernel_pass.cc    | 27 +++++++++++++------
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py
index 3e38664b94f27f..f9e4d236253904 100644
--- a/paddle/fluid/pir/dialect/op_generator/op_gen.py
+++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py
@@ -1174,6 +1174,9 @@ def AutoCodeGen(op_info_items, all_op_info_items, namespaces, dialect_name):
             if op_info.is_onednn_only:
                 op_traits += ["paddle::dialect::OneDNNOnlyTrait"]
 
+            if op_info.dynamic_fallback:
+                op_traits += ["paddle::dialect::OneDNNDynamicFallbackTrait"]
+
             op_traits_str = ""
             if len(op_traits) > 0:
                 op_traits_str = "," + ",".join(op_traits)
diff --git a/paddle/fluid/pir/dialect/operator/trait/onednn.h b/paddle/fluid/pir/dialect/operator/trait/onednn.h
index b4f6b02ad48206..8de01985200401 100644
--- a/paddle/fluid/pir/dialect/operator/trait/onednn.h
+++ b/paddle/fluid/pir/dialect/operator/trait/onednn.h
@@ -30,8 +30,16 @@ class OneDNNOnlyTrait : public pir::OpTraitBase<OneDNNOnlyTrait> {
       : pir::OpTraitBase<OneDNNOnlyTrait>(op) {}
 };
 
+class OneDNNDynamicFallbackTrait
+    : public pir::OpTraitBase<OneDNNDynamicFallbackTrait> {
+ public:
+  explicit OneDNNDynamicFallbackTrait(pir::Operation *op)
+      : pir::OpTraitBase<OneDNNDynamicFallbackTrait>(op) {}
+};
+
 }  // namespace dialect
 }  // namespace paddle
 
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNTrait)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNOnlyTrait)
+IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNDynamicFallbackTrait)
diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index 1615d36d36bdb1..e25072d93be74a 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -17,6 +17,7 @@
 #include <iostream>
 
 #include "paddle/fluid/framework/op_kernel_type.h"
+#include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/pir/dialect/kernel/ir/kernel_attribute.h"
 #include "paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h"
 #include "paddle/fluid/pir/dialect/kernel/ir/kernel_op.h"
@@ -669,9 +670,10 @@ std::string GetKernelName(const OpYamlInfoParser* op_info_parser,
   return kernel_fn_str;
 }
 
-bool SupportsMKLDNN(const phi::DataType data_type) {
-  auto phi_kernels = phi::KernelFactory::Instance().SelectKernelMap(
-      phi::TransToPhiKernelName(type_));
+bool SupportsMKLDNN(const std::string& kernel_name,
+                    const phi::DataType data_type) {
+  auto phi_kernels =
+      phi::KernelFactory::Instance().SelectKernelMap(kernel_name);
   auto has_phi_kernel =
       std::any_of(phi_kernels.begin(),
                   phi_kernels.end(),
@@ -682,17 +684,26 @@ bool SupportsMKLDNN(const phi::DataType data_type) {
   if (has_phi_kernel) {
     return true;
   } else {
-    auto op_kernel_iter = OperatorWithKernel::AllOpKernels().find(type_);
-    if (op_kernel_iter == OperatorWithKernel::AllOpKernels().end()) {
+    auto op_kernel_iter =
+        paddle::framework::OperatorWithKernel::AllOpKernels().find(
+            phi::TransToFluidOpName(kernel_name));
+    if (op_kernel_iter ==
+        paddle::framework::OperatorWithKernel::AllOpKernels().end()) {
       return false;
     } else {
       auto& op_kernels = op_kernel_iter->second;
       return std::any_of(
           op_kernels.begin(),
           op_kernels.end(),
-          [data_type](OpKernelMap::const_reference kern_pair) {
+          [data_type](std::unordered_map<
+                      paddle::framework::OpKernelType,
+                      std::function<void(
+                          const paddle::framework::ExecutionContext&)>,
+                      paddle::framework::OpKernelType::Hash>::const_reference
+                          kern_pair) {
             return platform::is_cpu_place(kern_pair.first.place_) &&
-                   kern_pair.first.library_type_ == LibraryType::kMKLDNN &&
+                   kern_pair.first.library_type_ ==
+                       paddle::framework::LibraryType::kMKLDNN &&
                    kern_pair.first.data_type_ ==
                        paddle::framework::TransToProtoVarType(data_type);
           });
@@ -930,7 +941,7 @@ phi::KernelKey GetKernelKey(
 
 #ifdef PADDLE_WITH_DNNL
   if (op->HasTrait<OneDNNTrait>() && res.backend() == phi::Backend::CPU &&
-      SupportsMKLDNN(res.dtype())) {
+      SupportsMKLDNN(kernel_fn_str, res.dtype())) {
     res.set_backend(phi::Backend::ONEDNN);
     res.set_layout(phi::DataLayout::ONEDNN);
   }

From d189be93be8213c0cffee3ea7ab2a04537d9a373 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 19 Dec 2023 02:37:19 +0000
Subject: [PATCH 14/49] refine

---
 paddle/fluid/pir/dialect/CMakeLists.txt             | 11 ++++-------
 paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc |  1 +
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/paddle/fluid/pir/dialect/CMakeLists.txt b/paddle/fluid/pir/dialect/CMakeLists.txt
index 5e2d8e2de0cd88..9324b0f57d6803 100644
--- a/paddle/fluid/pir/dialect/CMakeLists.txt
+++ b/paddle/fluid/pir/dialect/CMakeLists.txt
@@ -242,6 +242,10 @@ set(op_dialect_srcs
     ${pir_update_op_source_file}
     ${api_source_file})
 
+if(WITH_MKLDNN)
+  set(op_dialect_srcs ${op_dialect_srcs} ${onednn_op_source_file})
+endif()
+
 set(op_dialect_deps phi common pir type_info string_helper)
 
 cc_library(
@@ -249,13 +253,6 @@ cc_library(
   SRCS ${op_dialect_srcs}
   DEPS ${op_dialect_deps})
 
-if(WITH_MKLDNN)
-  cc_library(
-    op_onednn_dialect
-    SRCS ${onednn_op_source_file}
-    DEPS ${op_dialect_deps})
-endif()
-
 #Note(risemeup1):compile some *.cc files which depend on primitive_vjp_experimental into op_dialect_vjp.a/lib
 set(op_decomp_source_file ${PD_DIALECT_SOURCE_DIR}/op_decomp.cc)
 set(op_dialect_vjp_srcs
diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index e25072d93be74a..d3eb966a8420eb 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -31,6 +31,7 @@
 #include "paddle/fluid/pir/dialect/operator/ir/op_type.h"
 #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h"
 #include "paddle/fluid/pir/dialect/operator/trait/inplace.h"
+#include "paddle/fluid/pir/dialect/operator/trait/onednn.h"
 #include "paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.h"
 #include "paddle/fluid/pir/dialect/operator/utils/op_yaml_info_util.h"
 #include "paddle/fluid/pir/dialect/operator/utils/utils.h"

From 46aafd29ebda2c476384cc1098961bbda81cec59 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 19 Dec 2023 03:27:54 +0000
Subject: [PATCH 15/49] refine

---
 paddle/fluid/ir_adaptor/translator/op_translator.cc | 1 +
 paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc | 1 +
 2 files changed, 2 insertions(+)

diff --git a/paddle/fluid/ir_adaptor/translator/op_translator.cc b/paddle/fluid/ir_adaptor/translator/op_translator.cc
index 38965c05f7d9c5..2bcdd8196c25b3 100644
--- a/paddle/fluid/ir_adaptor/translator/op_translator.cc
+++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc
@@ -34,6 +34,7 @@
 #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_type.h"
+#include "paddle/fluid/pir/dialect/operator/ir/pd_onednn_op.h"
 #include "paddle/fluid/pir/dialect/operator/utils/utils.h"
 #include "paddle/phi/core/utils/data_type.h"
 #include "paddle/pir/core/builder.h"
diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index d3eb966a8420eb..d0c7fcbc6441a2 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -29,6 +29,7 @@
 #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_type.h"
+#include "paddle/fluid/pir/dialect/operator/ir/pd_onednn_op.h"
 #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h"
 #include "paddle/fluid/pir/dialect/operator/trait/inplace.h"
 #include "paddle/fluid/pir/dialect/operator/trait/onednn.h"

From 1145d139db2df04613786d3d18972abd79521021 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 19 Dec 2023 05:26:04 +0000
Subject: [PATCH 16/49] refine

---
 paddle/fluid/pir/dialect/operator/trait/trait.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/paddle/fluid/pir/dialect/operator/trait/trait.cc b/paddle/fluid/pir/dialect/operator/trait/trait.cc
index 2a5b7575959b9f..7444faec519bc2 100644
--- a/paddle/fluid/pir/dialect/operator/trait/trait.cc
+++ b/paddle/fluid/pir/dialect/operator/trait/trait.cc
@@ -14,6 +14,10 @@
 
 #include "paddle/fluid/pir/dialect/operator/trait/custom_vjp.h"
 #include "paddle/fluid/pir/dialect/operator/trait/inplace.h"
+#include "paddle/fluid/pir/dialect/operator/trait/onednn.h"
 
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::InplaceTrait)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomVjpTrait)
+IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNTrait)
+IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNOnlyTrait)
+IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNDynamicFallbackTrait)

From d7d9438e841232ddaf7c1287fddfe6130712aefa Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 19 Dec 2023 08:08:48 +0000
Subject: [PATCH 17/49] instruction

---
 .../new_executor/instruction/CMakeLists.txt   |  20 ++
 .../onednn_legacy_kernel_instruction.cc       | 173 ++++++++++++++++++
 .../onednn_legacy_kernel_instruction.h        |  72 ++++++++
 .../onednn_mixed_phi_kernel_instruction.cc    | 168 +++++++++++++++++
 .../onednn_mixed_phi_kernel_instruction.h     |  73 ++++++++
 .../onednn_phi_kernel_instruction.cc          | 168 +++++++++++++++++
 .../onednn_phi_kernel_instruction.h           |  73 ++++++++
 .../framework/new_executor/pir_interpreter.cc |  19 ++
 .../ir_adaptor/translator/op_translator.cc    |   4 +-
 .../pir/dialect/kernel/ir/kernel_dialect.cc   |  99 ++++++++++
 .../pir/dialect/kernel/ir/kernel_dialect.h    |  18 ++
 .../fluid/pir/dialect/kernel/ir/kernel_op.cc  | 123 +++++++++++++
 .../fluid/pir/dialect/kernel/ir/kernel_op.h   |  39 ++++
 .../fluid/pir/dialect/op_generator/op_gen.py  |   2 +-
 paddle/fluid/pir/dialect/operator/ir/ops.yaml |   9 +
 .../fluid/pir/dialect/operator/utils/utils.cc |   5 +
 .../fluid/pir/dialect/operator/utils/utils.h  |   2 +
 .../pir/transforms/pd_op_to_kernel_pass.cc    | 119 ++++++++++--
 .../cpu/onednn_to_paddle_layout_kernel.cc     |  94 ++++++++++
 .../phi/kernels/cpu/transfer_layout_kernel.h  |  44 +++++
 .../kernels/onednn_to_paddle_layout_kernel.h  |  28 +++
 21 files changed, 1338 insertions(+), 14 deletions(-)
 create mode 100644 paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.cc
 create mode 100644 paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.h
 create mode 100644 paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.cc
 create mode 100644 paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.h
 create mode 100644 paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.cc
 create mode 100644 paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.h
 create mode 100644 paddle/phi/kernels/cpu/onednn_to_paddle_layout_kernel.cc
 create mode 100644 paddle/phi/kernels/cpu/transfer_layout_kernel.h
 create mode 100644 paddle/phi/kernels/onednn_to_paddle_layout_kernel.h

diff --git a/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt b/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
index 46d907d60841b8..b9ac389c69d875 100644
--- a/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
+++ b/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
@@ -1,3 +1,23 @@
+set(instruction_base_srcs
+    instruction_base.cc
+    phi_kernel_instruction.cc
+    legacy_kernel_instruction.cc
+    if_instruction.cc
+    while_instruction.cc
+    select_input_instruction.cc
+    has_elements_instruction.cc
+    tuple_push_instruction.cc
+    tuple_pop_instruction.cc
+    builtin_combine_instruction.cc
+    instruction_util.cc)
+
+if(WITH_MKLDNN)
+  set(instruction_base_srcs
+      ${instruction_base_srcs} onednn_phi_kernel_instruction.cc
+      onednn_mixed_phi_kernel_instruction.cc
+      onednn_legacy_kernel_instruction.cc)
+endif()
+
 cc_library(
   instruction_base
   SRCS instruction_base.cc
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.cc
new file mode 100644
index 00000000000000..c562dae6264f6e
--- /dev/null
+++ b/paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.cc
@@ -0,0 +1,173 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.h"
+
+#include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
+#include "paddle/fluid/framework/new_executor/interpreter/interpreter_util.h"
+#include "paddle/fluid/framework/new_executor/interpreter/stream_analyzer.h"
+#include "paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.h"
+#include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/pir/dialect/operator/interface/infermeta.h"
+#include "paddle/fluid/pir/dialect/operator/interface/op_yaml_info.h"
+#include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h"
+#include "paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.h"
+
+#include "paddle/fluid/platform/device_context.h"
+#include "paddle/phi/core/infermeta_utils.h"
+#include "paddle/phi/core/meta_tensor.h"
+#include "paddle/phi/core/type_defs.h"
+
+namespace paddle {
+namespace framework {
+
+OneDNNLegacyKernelInstruction::OneDNNLegacyKernelInstruction(
+    size_t id,
+    const platform::Place& place,
+    pir::Operation* op,
+    const ValueExecutionInfo* value_exec_info)
+    : InstructionBase(id, place), value_exec_info_(value_exec_info) {
+  auto& op_attributes = op->attributes();
+  auto op_name =
+      op_attributes.at("op_name").dyn_cast<pir::StrAttribute>().AsString();
+  pir::OpInfo op_info =
+      pir::IrContext::Instance()->GetRegisteredOpInfo(op_name);
+  op_ = op;
+  legacy_op_name_ = op_name;
+  VLOG(6) << "construct phi kernel instruction for: " << legacy_op_name_;
+
+  if (op_attributes.count("execution_stream") != 0) {
+    SetExecutionStream(op_attributes.at("execution_stream")
+                           .dyn_cast<pir::StrAttribute>()
+                           .AsString());
+  }
+  if (op_attributes.count("stream_priority") != 0) {
+    SetStreamPriority(op_attributes.at("stream_priority")
+                          .dyn_cast<pir::Int32Attribute>()
+                          .data());
+  }
+  if (op_attributes.count("scheduling_priority") != 0) {
+    SetSchedulingPriority(op_attributes.at("scheduling_priority")
+                              .dyn_cast<pir::Int64Attribute>()
+                              .data());
+  } else {
+    if (interpreter::IsCommunicationOp(op_)) {
+      // NOTE(Ruibiao): Dispatching computation before communication improves
+      // multi-stream overlap when the time cost of communication less than
+      // that of the calculation (e.g., ResNet50_bs128_pure_fp16 N4C32
+      // training).
+      SetSchedulingPriority(1);
+    }
+  }
+  VLOG(6) << "finish process dist attributes";
+
+  SetKernelType(AnalyseOpFuncType(op, place));
+  VLOG(6) << "finish process analyse kernel type";
+
+  infer_meta_interface_ =
+      op_info.GetInterfaceImpl<paddle::dialect::InferMetaInterface>();
+  VLOG(6) << "finish process infer_meta_interface_";
+
+  auto yaml_interface =
+      op_info.GetInterfaceImpl<paddle::dialect::OpYamlInfoInterface>();
+  PADDLE_ENFORCE_NOT_NULL(
+      yaml_interface,
+      phi::errors::PreconditionNotMet(
+          "can not find OpYamlInfoInterface from [%s]", legacy_op_name_));
+  paddle::dialect::OpYamlInfoParser yaml_info_parser(
+      yaml_interface->get_op_info_(), paddle::dialect::IsLegacyOp(op_name));
+  VLOG(6) << "finish process yaml_info_parser";
+
+  if (infer_meta_interface_) {
+    BuildPhiContext<
+        phi::InferMetaContext,
+        phi::MetaTensor,
+        phi::MetaTensor,
+        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
+        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
+        false>(op, *value_exec_info_, yaml_info_parser, &infer_meta_context_);
+  }
+  VLOG(6) << "finish process infer meta context";
+
+  auto kernel_name =
+      op_attributes.at("kernel_name").dyn_cast<pir::StrAttribute>().AsString();
+  auto kernel_key = op_attributes.at("kernel_key")
+                        .dyn_cast<paddle::dialect::KernelAttribute>()
+                        .data();
+  auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
+      kernel_name, kernel_key);
+  phi_kernel_ = new phi::Kernel(kernel_result.kernel);
+  PADDLE_ENFORCE_EQ(
+      phi_kernel_->IsValid(), true, "not found kernel for [%s]", kernel_name);
+  VLOG(6) << "finish process select kernel: " << kernel_name;
+
+  const Scope* inner_scope = value_exec_info_->GetScope();
+
+  operator_base_ = BuildOperatorBase(op, *value_exec_info_, yaml_info_parser);
+
+  paddle::framework::VariableValueMap in_map;
+  paddle::framework::VariableValueMap out_map;
+  auto dev_ctx = phi::DeviceContextPool::Instance().Get(
+      phi::TransToPhiPlace(kernel_key.backend()));
+
+  runtime_context_ = std::make_shared<paddle::framework::RuntimeContext>(
+      paddle::framework::RuntimeContext(in_map, out_map));
+  BuildRuntimeContext(
+      op, *value_exec_info, yaml_info_parser, runtime_context_.get());
+
+  kernel_context_ = new paddle::framework::ExecutionContext(
+      *operator_base_, *inner_scope, *dev_ctx, *(runtime_context_.get()));
+
+  VLOG(6) << "finish process kernel context";
+  SetDeviceContext(
+      ParseDeviceContext(op,
+                         phi::DeviceContextPool::Instance().Get(
+                             phi::TransToPhiPlace(kernel_key.backend())),
+                         place,
+                         GetExecutionStream(),
+                         GetStreamPriority()));
+  VLOG(6) << "finish process device context";
+
+  InitInputsOutputsIds(op, *value_exec_info);
+  VLOG(6) << "finish process inputs outputs index";
+
+  auto& no_need_buffer_ids = yaml_info_parser.NoNeedBufferIds();
+  std::unordered_set<pir::Value> no_need_buffer_values;
+  for (size_t id = 0; id < no_need_buffer_ids.size(); id++) {
+    no_need_buffer_values.insert(op->operand_source(no_need_buffer_ids[id]));
+  }
+  SetNoNeedBuffer(no_need_buffer_values);
+  VLOG(6) << "finish process no need buffer";
+}
+
+OneDNNLegacyKernelInstruction::~OneDNNLegacyKernelInstruction() {
+  if (kernel_context_ != nullptr) {
+    delete kernel_context_;
+  }
+
+  if (phi_kernel_ != nullptr) {
+    delete phi_kernel_;
+  }
+}
+
+void OneDNNLegacyKernelInstruction::Run() {
+  VLOG(6) << "Run op " << legacy_op_name_ << " infer meta.";
+  if (infer_meta_interface_) {
+    infer_meta_interface_->infer_meta_(&(infer_meta_context_));
+  }
+  VLOG(6) << "Run op " << legacy_op_name_ << " kernel.";
+  (*(phi_kernel_))((kernel_context_));
+}
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.h b/paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.h
new file mode 100644
index 00000000000000..e5c7b0cd151765
--- /dev/null
+++ b/paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.h
@@ -0,0 +1,72 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/framework/new_executor/instruction/instruction_base.h"
+
+namespace pir {
+class Operation;
+}  // namespace pir
+
+namespace paddle {
+namespace framework {
+class Scope;
+class ValueExecutionInfo;
+
+class OneDNNLegacyKernelInstruction : public InstructionBase {
+ public:
+  OneDNNLegacyKernelInstruction(size_t id,
+                                const platform::Place& place,
+                                ::pir::Operation* op,
+                                const ValueExecutionInfo* value_exec_info);
+
+  ~OneDNNLegacyKernelInstruction();
+  phi::Kernel* PhiKernel() const { return phi_kernel_; }
+
+  const phi::InferMetaContext& InferMetaContext() const {
+    return infer_meta_context_;
+  }
+
+  paddle::dialect::InferMetaInterface::Concept* InferMetaInterface() const {
+    return infer_meta_interface_;
+  }
+
+  void Run() override;
+
+  const std::string& Name() const override { return legacy_op_name_; }
+
+  ::pir::Operation* Operation() const override { return op_; }
+
+ private:
+  std::string legacy_op_name_;
+
+  paddle::dialect::InferMetaInterface::Concept* infer_meta_interface_{
+      nullptr};  // not owned
+
+  phi::InferMetaContext infer_meta_context_;
+
+  paddle::framework::ExecutionContext* kernel_context_{nullptr};
+  std::shared_ptr<framework::RuntimeContext> runtime_context_;
+  std::shared_ptr<paddle::framework::OperatorBase> operator_base_;
+
+  phi::Kernel* phi_kernel_{nullptr};  // not owned
+
+  ::pir::Operation* op_{nullptr};  // not owned
+
+  const ValueExecutionInfo* value_exec_info_;  // not owned
+};
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.cc
new file mode 100644
index 00000000000000..ffdf06783df24a
--- /dev/null
+++ b/paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.cc
@@ -0,0 +1,168 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.h"
+
+#include "paddle/fluid/framework/new_executor/interpreter/interpreter_util.h"
+#include "paddle/fluid/framework/new_executor/interpreter/stream_analyzer.h"
+#include "paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.h"
+#include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/pir/dialect/operator/interface/infermeta.h"
+#include "paddle/fluid/pir/dialect/operator/interface/op_yaml_info.h"
+#include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h"
+#include "paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.h"
+#include "paddle/fluid/platform/collective_helper.h"
+#include "paddle/fluid/platform/device_context.h"
+#include "paddle/phi/core/infermeta_utils.h"
+#include "paddle/phi/core/meta_tensor.h"
+#include "paddle/phi/core/type_defs.h"
+
+#include "paddle/pir/core/builtin_attribute.h"
+#include "paddle/pir/core/operation.h"
+#include "paddle/pir/core/value.h"
+
+#include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
+namespace paddle {
+namespace framework {
+
+OneDNNMixedPhiKernelInstruction::OneDNNMixedPhiKernelInstruction(
+    size_t id,
+    const platform::Place& place,
+    pir::Operation* op,
+    const ValueExecutionInfo* value_exec_info)
+    : InstructionBase(id, place), value_exec_info_(value_exec_info) {
+  auto op_attributes = op->attributes();
+  auto op_name =
+      op_attributes.at("op_name").dyn_cast<pir::StrAttribute>().AsString();
+  pir::OpInfo op_info =
+      pir::IrContext::Instance()->GetRegisteredOpInfo(op_name);
+  op_ = op;
+  phi_op_name_ = op_name;
+  VLOG(6) << "construct phi kernel instruction for: " << phi_op_name_;
+
+  if (op_attributes.count("execution_stream") != 0) {
+    SetExecutionStream(op_attributes.at("execution_stream")
+                           .dyn_cast<pir::StrAttribute>()
+                           .AsString());
+  }
+  if (op_attributes.count("stream_priority") != 0) {
+    SetStreamPriority(op_attributes.at("stream_priority")
+                          .dyn_cast<pir::Int32Attribute>()
+                          .data());
+  }
+  if (op_attributes.count("scheduling_priority") != 0) {
+    SetSchedulingPriority(op_attributes.at("scheduling_priority")
+                              .dyn_cast<pir::Int64Attribute>()
+                              .data());
+  } else {
+    if (interpreter::IsCommunicationOp(op_)) {
+      // NOTE(Ruibiao): Dispatching computation before communication improves
+      // multi-stream overlap when the time cost of communication less than
+      // that of the calculation (e.g., ResNet50_bs128_pure_fp16 N4C32
+      // training).
+      SetSchedulingPriority(1);
+    }
+  }
+  VLOG(6) << "finish process dist attributes";
+
+  SetKernelType(AnalyseOpFuncType(op, place));
+  VLOG(6) << "finish process analyse kernel type";
+
+  infer_meta_interface_ =
+      op_info.GetInterfaceImpl<paddle::dialect::InferMetaInterface>();
+  VLOG(6) << "finish process infer_meta_interface_";
+
+  auto yaml_interface =
+      op_info.GetInterfaceImpl<paddle::dialect::OpYamlInfoInterface>();
+  PADDLE_ENFORCE_NOT_NULL(
+      yaml_interface,
+      phi::errors::PreconditionNotMet(
+          "can not find OpYamlInfoInterface from [%s]", phi_op_name_));
+  paddle::dialect::OpYamlInfoParser yaml_info_parser(
+      yaml_interface->get_op_info_(), paddle::dialect::IsLegacyOp(op_name));
+  VLOG(6) << "finish process yaml_info_parser";
+
+  if (infer_meta_interface_) {
+    BuildPhiContext<
+        phi::InferMetaContext,
+        phi::MetaTensor,
+        phi::MetaTensor,
+        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
+        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
+        false>(op, *value_exec_info_, yaml_info_parser, &infer_meta_context_);
+  }
+  VLOG(6) << "finish process infer meta context";
+
+  auto kernel_name =
+      op_attributes.at("kernel_name").dyn_cast<pir::StrAttribute>().AsString();
+  auto kernel_key = op_attributes.at("kernel_key")
+                        .dyn_cast<paddle::dialect::KernelAttribute>()
+                        .data();
+  auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
+      kernel_name, kernel_key);
+  phi_kernel_ = new phi::Kernel(kernel_result.kernel);
+  PADDLE_ENFORCE_EQ(
+      phi_kernel_->IsValid(), true, "not found kernel for [%s]", kernel_name);
+  VLOG(6) << "finish process select kernel";
+
+  BuildPhiContext<phi::KernelContext,
+                  const phi::TensorBase*,
+                  phi::TensorBase*,
+                  paddle::small_vector<const phi::TensorBase*>,
+                  paddle::small_vector<phi::TensorBase*>,
+                  true>(
+      op, *value_exec_info_, yaml_info_parser, &kernel_context_);
+
+  kernel_context_.SetDeviceContext(phi::DeviceContextPool::Instance().Get(
+      phi::TransToPhiPlace(kernel_key.backend())));
+  VLOG(6) << "finish process kernel context";
+
+  SetDeviceContext(
+      ParseDeviceContext(op,
+                         phi::DeviceContextPool::Instance().Get(
+                             phi::TransToPhiPlace(kernel_key.backend())),
+                         place,
+                         GetExecutionStream(),
+                         GetStreamPriority()));
+  VLOG(6) << "finish process device context";
+
+  InitInputsOutputsIds(op, *value_exec_info);
+  VLOG(6) << "finish process inputs outputs index";
+
+  auto& no_need_buffer_ids = yaml_info_parser.NoNeedBufferIds();
+  std::unordered_set<pir::Value> no_need_buffer_values;
+  for (size_t id = 0; id < no_need_buffer_ids.size(); id++) {
+    no_need_buffer_values.insert(op->operand_source(no_need_buffer_ids[id]));
+  }
+  SetNoNeedBuffer(no_need_buffer_values);
+  VLOG(6) << "finish process no need buffer";
+}
+
+OneDNNMixedPhiKernelInstruction::~OneDNNMixedPhiKernelInstruction() {
+  if (phi_kernel_ != nullptr) {
+    delete phi_kernel_;
+  }
+}
+
+void OneDNNMixedPhiKernelInstruction::Run() {
+  if (infer_meta_interface_) {
+    infer_meta_interface_->infer_meta_(&(infer_meta_context_));
+  }
+  VLOG(6) << "Run op " << phi_op_name_ << " infer meta.";
+  (*(phi_kernel_))(&(kernel_context_));
+  VLOG(6) << "Run op " << phi_op_name_ << " kernel.";
+}
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.h b/paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.h
new file mode 100644
index 00000000000000..bcacc13233302d
--- /dev/null
+++ b/paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.h
@@ -0,0 +1,73 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/framework/new_executor/instruction/instruction_base.h"
+
+namespace pir {
+class Operation;
+}  // namespace pir
+
+namespace paddle {
+namespace framework {
+class Scope;
+class ValueExecutionInfo;
+
+class OneDNNMixedPhiKernelInstruction : public InstructionBase {
+ public:
+  OneDNNMixedPhiKernelInstruction(size_t id,
+                                  const platform::Place& place,
+                                  ::pir::Operation* op,
+                                  const ValueExecutionInfo* value_exec_info);
+
+  ~OneDNNMixedPhiKernelInstruction();
+
+  phi::Kernel* PhiKernel() const { return phi_kernel_; }
+
+  const phi::KernelContext& KernelContext() const { return kernel_context_; }
+
+  const phi::InferMetaContext& InferMetaContext() const {
+    return infer_meta_context_;
+  }
+
+  paddle::dialect::InferMetaInterface::Concept* InferMetaInterface() const {
+    return infer_meta_interface_;
+  }
+
+  ::pir::Operation* Operation() const override { return op_; }
+
+  void Run() override;
+
+  const std::string& Name() const override { return phi_op_name_; }
+
+ private:
+  paddle::dialect::InferMetaInterface::Concept* infer_meta_interface_{
+      nullptr};  // not owned
+
+  phi::InferMetaContext infer_meta_context_;
+
+  phi::KernelContext kernel_context_;
+
+  phi::Kernel* phi_kernel_{nullptr};  // not owned
+
+  std::string phi_op_name_;
+
+  ::pir::Operation* op_{nullptr};  // not owned
+
+  const ValueExecutionInfo* value_exec_info_;  // not owned
+};
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.cc
new file mode 100644
index 00000000000000..476e4374e0e219
--- /dev/null
+++ b/paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.cc
@@ -0,0 +1,168 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.h"
+
+#include "paddle/fluid/framework/new_executor/interpreter/interpreter_util.h"
+#include "paddle/fluid/framework/new_executor/interpreter/stream_analyzer.h"
+#include "paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.h"
+#include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/pir/dialect/operator/interface/infermeta.h"
+#include "paddle/fluid/pir/dialect/operator/interface/op_yaml_info.h"
+#include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h"
+#include "paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.h"
+#include "paddle/fluid/platform/collective_helper.h"
+#include "paddle/fluid/platform/device_context.h"
+#include "paddle/phi/core/infermeta_utils.h"
+#include "paddle/phi/core/meta_tensor.h"
+#include "paddle/phi/core/type_defs.h"
+
+#include "paddle/pir/core/builtin_attribute.h"
+#include "paddle/pir/core/operation.h"
+#include "paddle/pir/core/value.h"
+
+#include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
+namespace paddle {
+namespace framework {
+
+OneDNNPhiKernelInstruction::OneDNNPhiKernelInstruction(
+    size_t id,
+    const platform::Place& place,
+    pir::Operation* op,
+    const ValueExecutionInfo* value_exec_info)
+    : InstructionBase(id, place), value_exec_info_(value_exec_info) {
+  auto op_attributes = op->attributes();
+  auto op_name =
+      op_attributes.at("op_name").dyn_cast<pir::StrAttribute>().AsString();
+  pir::OpInfo op_info =
+      pir::IrContext::Instance()->GetRegisteredOpInfo(op_name);
+  op_ = op;
+  phi_op_name_ = op_name;
+  VLOG(6) << "construct phi kernel instruction for: " << phi_op_name_;
+
+  if (op_attributes.count("execution_stream") != 0) {
+    SetExecutionStream(op_attributes.at("execution_stream")
+                           .dyn_cast<pir::StrAttribute>()
+                           .AsString());
+  }
+  if (op_attributes.count("stream_priority") != 0) {
+    SetStreamPriority(op_attributes.at("stream_priority")
+                          .dyn_cast<pir::Int32Attribute>()
+                          .data());
+  }
+  if (op_attributes.count("scheduling_priority") != 0) {
+    SetSchedulingPriority(op_attributes.at("scheduling_priority")
+                              .dyn_cast<pir::Int64Attribute>()
+                              .data());
+  } else {
+    if (interpreter::IsCommunicationOp(op_)) {
+      // NOTE(Ruibiao): Dispatching computation before communication improves
+      // multi-stream overlap when the time cost of communication less than
+      // that of the calculation (e.g., ResNet50_bs128_pure_fp16 N4C32
+      // training).
+      SetSchedulingPriority(1);
+    }
+  }
+  VLOG(6) << "finish process dist attributes";
+
+  SetKernelType(AnalyseOpFuncType(op, place));
+  VLOG(6) << "finish process analyse kernel type";
+
+  infer_meta_interface_ =
+      op_info.GetInterfaceImpl<paddle::dialect::InferMetaInterface>();
+  VLOG(6) << "finish process infer_meta_interface_";
+
+  auto yaml_interface =
+      op_info.GetInterfaceImpl<paddle::dialect::OpYamlInfoInterface>();
+  PADDLE_ENFORCE_NOT_NULL(
+      yaml_interface,
+      phi::errors::PreconditionNotMet(
+          "can not find OpYamlInfoInterface from [%s]", phi_op_name_));
+  paddle::dialect::OpYamlInfoParser yaml_info_parser(
+      yaml_interface->get_op_info_(), paddle::dialect::IsLegacyOp(op_name));
+  VLOG(6) << "finish process yaml_info_parser";
+
+  if (infer_meta_interface_) {
+    BuildPhiContext<
+        phi::InferMetaContext,
+        phi::MetaTensor,
+        phi::MetaTensor,
+        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
+        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
+        false>(op, *value_exec_info_, yaml_info_parser, &infer_meta_context_);
+  }
+  VLOG(6) << "finish process infer meta context";
+
+  auto kernel_name =
+      op_attributes.at("kernel_name").dyn_cast<pir::StrAttribute>().AsString();
+  auto kernel_key = op_attributes.at("kernel_key")
+                        .dyn_cast<paddle::dialect::KernelAttribute>()
+                        .data();
+  auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
+      kernel_name, kernel_key);
+  phi_kernel_ = new phi::Kernel(kernel_result.kernel);
+  PADDLE_ENFORCE_EQ(
+      phi_kernel_->IsValid(), true, "not found kernel for [%s]", kernel_name);
+  VLOG(6) << "finish process select kernel";
+
+  BuildPhiContext<phi::KernelContext,
+                  const phi::TensorBase*,
+                  phi::TensorBase*,
+                  paddle::small_vector<const phi::TensorBase*>,
+                  paddle::small_vector<phi::TensorBase*>,
+                  true>(
+      op, *value_exec_info_, yaml_info_parser, &kernel_context_);
+
+  kernel_context_.SetDeviceContext(phi::DeviceContextPool::Instance().Get(
+      phi::TransToPhiPlace(kernel_key.backend())));
+  VLOG(6) << "finish process kernel context";
+
+  SetDeviceContext(
+      ParseDeviceContext(op,
+                         phi::DeviceContextPool::Instance().Get(
+                             phi::TransToPhiPlace(kernel_key.backend())),
+                         place,
+                         GetExecutionStream(),
+                         GetStreamPriority()));
+  VLOG(6) << "finish process device context";
+
+  InitInputsOutputsIds(op, *value_exec_info);
+  VLOG(6) << "finish process inputs outputs index";
+
+  auto& no_need_buffer_ids = yaml_info_parser.NoNeedBufferIds();
+  std::unordered_set<pir::Value> no_need_buffer_values;
+  for (size_t id = 0; id < no_need_buffer_ids.size(); id++) {
+    no_need_buffer_values.insert(op->operand_source(no_need_buffer_ids[id]));
+  }
+  SetNoNeedBuffer(no_need_buffer_values);
+  VLOG(6) << "finish process no need buffer";
+}
+
+OneDNNPhiKernelInstruction::~OneDNNPhiKernelInstruction() {
+  if (phi_kernel_ != nullptr) {
+    delete phi_kernel_;
+  }
+}
+
+void OneDNNPhiKernelInstruction::Run() {
+  if (infer_meta_interface_) {
+    infer_meta_interface_->infer_meta_(&(infer_meta_context_));
+  }
+  VLOG(6) << "Run op " << phi_op_name_ << " infer meta.";
+  (*(phi_kernel_))(&(kernel_context_));
+  VLOG(6) << "Run op " << phi_op_name_ << " kernel.";
+}
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.h b/paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.h
new file mode 100644
index 00000000000000..da84b8bb0370f3
--- /dev/null
+++ b/paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.h
@@ -0,0 +1,73 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/framework/new_executor/instruction/instruction_base.h"
+
+namespace pir {
+class Operation;
+}  // namespace pir
+
+namespace paddle {
+namespace framework {
+class Scope;
+class ValueExecutionInfo;
+
+class OneDNNPhiKernelInstruction : public InstructionBase {
+ public:
+  OneDNNPhiKernelInstruction(size_t id,
+                             const platform::Place& place,
+                             ::pir::Operation* op,
+                             const ValueExecutionInfo* value_exec_info);
+
+  ~OneDNNPhiKernelInstruction();
+
+  phi::Kernel* PhiKernel() const { return phi_kernel_; }
+
+  const phi::KernelContext& KernelContext() const { return kernel_context_; }
+
+  const phi::InferMetaContext& InferMetaContext() const {
+    return infer_meta_context_;
+  }
+
+  paddle::dialect::InferMetaInterface::Concept* InferMetaInterface() const {
+    return infer_meta_interface_;
+  }
+
+  ::pir::Operation* Operation() const override { return op_; }
+
+  void Run() override;
+
+  const std::string& Name() const override { return phi_op_name_; }
+
+ private:
+  paddle::dialect::InferMetaInterface::Concept* infer_meta_interface_{
+      nullptr};  // not owned
+
+  phi::InferMetaContext infer_meta_context_;
+
+  phi::KernelContext kernel_context_;
+
+  phi::Kernel* phi_kernel_{nullptr};  // not owned
+
+  std::string phi_op_name_;
+
+  ::pir::Operation* op_{nullptr};  // not owned
+
+  const ValueExecutionInfo* value_exec_info_;  // not owned
+};
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc
index 5ce3186e58f7a0..397a7ade7285b7 100644
--- a/paddle/fluid/framework/new_executor/pir_interpreter.cc
+++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc
@@ -34,6 +34,9 @@
 #include "paddle/phi/core/sparse_csr_tensor.h"
 
 #ifdef PADDLE_WITH_DNNL
+#include "paddle/fluid/framework/new_executor/instruction/onddnn_legacy_kernel_instruction.h"
+#include "paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.h"
+#include "paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
 #endif
 
@@ -714,6 +717,22 @@ void PirInterpreter::BuildInstruction() {
       } else {
         CREATE_INSTR(PhiKernelInstruction);
       }
+#ifdef PADDLE_WITH_DNNL
+    } else if (op.dialect()->name() == "pd_onednn_kernel") {
+      auto op_name = op.attributes()
+                         .at("op_name")
+                         .dyn_cast<::pir::StrAttribute>()
+                         .AsString();
+      VLOG(6) << "process " << op_name;
+
+      if (op.isa<paddle::dialect::OneDNNPhiKernelOp>()) {
+        CREATE_INSTR(OneDNNPhiKernelInstruction);
+      } else if (op.isa<paddle::dialect::OneDNNMixedPhiKernelOp>()) {
+        CREATE_INSTR(OneDNNMixedPhiKernelInstruction);
+      } else {
+        CREATE_INSTR(OneDNNLegacyKernelInstruction);
+      }
+#endif
 #ifdef PADDLE_WITH_CINN
     } else if (op.dialect()->name() == "cinn_runtime") {
       CREATE_INSTR(CinnJitInstruction);
diff --git a/paddle/fluid/ir_adaptor/translator/op_translator.cc b/paddle/fluid/ir_adaptor/translator/op_translator.cc
index 8c2d42d4442ac0..f1f820dbb393b1 100644
--- a/paddle/fluid/ir_adaptor/translator/op_translator.cc
+++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc
@@ -79,7 +79,7 @@ using AttributeHandlerFn = std::function<pir::Attribute(
 using DenseTensorTypeStorage = paddle::dialect::DenseTensorTypeStorage;
 constexpr char kTargetDialectPrefix[] = "pd_op.";  // NOLINT
 #ifdef PADDLE_WITH_DNNL
-constexpr char kOneDnnTargetDialectPrefix[] = "pd_onednn_op.";  // NOLINT
+constexpr char kOneDNNTargetDialectPrefix[] = "pd_onednn_op.";  // NOLINT
 #endif
 constexpr char kEmptyVarName[] = "@EMPTY@";  // NOLINT
 
@@ -230,7 +230,7 @@ inline pir::Operation* InsertCreateArrayOp(pir::IrContext* ctx,
 inline std::string GetPrefix(const OpDesc& op_desc) {
 #ifdef PADDLE_WITH_DNNL
   return op_desc.GetAttrIfExists<bool>("use_mkldnn")
-             ? kOneDnnTargetDialectPrefix
+             ? kOneDNNTargetDialectPrefix
              : kTargetDialectPrefix;
 #else
   return kTargetDialectPrefix;
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc
index 95e77ff6169c68..e04c267178d191 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc
@@ -122,7 +122,106 @@ void KernelDialect::PrintOperation(pir::Operation *op,
   }
 }
 
+OneDNNKernelDialect::OneDNNKernelDialect(pir::IrContext *context)
+    : pir::Dialect(name(), context, pir::TypeId::get<OneDNNKernelDialect>()) {
+  initialize();
+}
+
+void OneDNNKernelDialect::initialize() {
+  RegisterTypes<paddle::dialect::AllocatedDenseTensorType,
+                paddle::dialect::AllocatedSelectedRowsType,
+                paddle::dialect::AllocatedDenseTensorArrayType>();
+  RegisterOps<dialect::OneDNNPhiKernelOp,
+              dialect::OneDNNMixedPhiKernelOp,
+              dialect::OneDNNLegacyKernelOp>();
+  RegisterAttributes<paddle::dialect::KernelAttribute>();
+}
+
+void OneDNNKernelDialect::PrintType(pir::Type type, std::ostream &os) const {
+  if (type.isa<AllocatedDenseTensorType>()) {
+    AllocatedDenseTensorType tensor_type =
+        type.dyn_cast<AllocatedDenseTensorType>();
+
+    os << phi::AllocationTypeStr(tensor_type.place().GetType()) << "_";
+    os << "tensor<";
+    for (auto d : common::vectorize(tensor_type.dims())) {
+      os << d;
+      os << "x";
+    }
+    tensor_type.dtype().Print(os);
+    os << ">";
+  } else if (type.isa<AllocatedSelectedRowsType>()) {
+    AllocatedSelectedRowsType tensor_type =
+        type.dyn_cast<AllocatedSelectedRowsType>();
+
+    os << phi::AllocationTypeStr(tensor_type.place().GetType()) << "_";
+    os << "tensor<";
+    for (auto d : common::vectorize(tensor_type.dims())) {
+      os << d;
+      os << "x";
+    }
+    tensor_type.dtype().Print(os);
+    os << ">";
+  } else if (type.isa<AllocatedDenseTensorArrayType>()) {
+    AllocatedDenseTensorArrayType tensor_array_type =
+        type.dyn_cast<AllocatedDenseTensorArrayType>();
+
+    os << phi::AllocationTypeStr(tensor_array_type.place().GetType()) << "_";
+    os << "tensor_array<";
+    tensor_array_type.dtype().Print(os);
+    os << ">";
+  }
+}
+
+void OneDNNKernelDialect::PrintAttribute(pir::Attribute attr,
+                                         std::ostream &os) const {
+  phi::KernelKey kernel = attr.dyn_cast<KernelAttribute>().data();
+
+  os << "<backend:" << kernel.backend() << "|layout:" << kernel.layout()
+     << "|dtype:" << kernel.dtype() << ">";
+}
+
+void OneDNNKernelDialect::PrintOperation(pir::Operation *op,
+                                         pir::IrPrinter &printer) const {
+  if (op->dyn_cast<PhiKernelOp>() || op->dyn_cast<LegacyKernelOp>()) {
+    auto &os = printer.os;
+    printer.PrintOpResult(op);
+    os << " =";
+    if (auto phi_kernel_op = op->dyn_cast<PhiKernelOp>()) {
+      std::string kernel_name = phi_kernel_op.kernel_name();
+      if (op->attributes().count("is_inplace") != 0 &&
+          op->attributes()
+              .at("is_inplace")
+              .dyn_cast<pir::BoolAttribute>()
+              .data()) {
+        kernel_name = kernel_name + "_";
+      }
+      os << " \"" << kernel_name << "(phi_kernel)\"";
+    } else {
+      auto legacy_kernel_op = op->dyn_cast<LegacyKernelOp>();
+      std::string kernel_name = legacy_kernel_op.kernel_name();
+      if (op->attributes().count("is_inplace") != 0 &&
+          op->attributes()
+              .at("is_inplace")
+              .dyn_cast<pir::BoolAttribute>()
+              .data()) {
+        kernel_name = kernel_name + "_";
+      }
+      os << " \"" << kernel_name << "(legacy_kernel)\"";
+    }
+    printer.PrintOpOperands(op);
+    printer.PrintAttributeMap(op);
+    os << " :";
+    printer.PrintOperandsType(op);
+    os << " -> ";
+    printer.PrintOpReturnType(op);
+  } else {
+    printer.PrintGeneralOperation(op);
+  }
+}
+
 }  // namespace dialect
 }  // namespace paddle
 
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::KernelDialect)
+IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNKernelDialect)
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h
index d2fbcadaf8cf2a..c73aa8baed10b5 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h
@@ -36,7 +36,25 @@ class KernelDialect : public pir::Dialect {
   void initialize();
 };
 
+class OneDNNKernelDialect : public pir::Dialect {
+ public:
+  explicit OneDNNKernelDialect(pir::IrContext* context);
+
+  static const char* name() { return "pd_onednn_kernel"; }
+
+  void PrintType(pir::Type type, std::ostream& os) const override;
+
+  void PrintAttribute(pir::Attribute attr, std::ostream& os) const override;
+
+  void PrintOperation(pir::Operation* op,
+                      pir::IrPrinter& printer) const override;  // NOLINT
+
+ private:
+  void initialize();
+};
+
 }  // namespace dialect
 }  // namespace paddle
 
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::KernelDialect)
+IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNKernelDialect)
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc
index 8ad46bc8906adb..01510b9e430938 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc
@@ -98,8 +98,131 @@ phi::KernelKey LegacyKernelOp::kernel_key() {
   return attributes().at("kernel_key").dyn_cast<KernelAttribute>().data();
 }
 
+const char* OneDNNPhiKernelOp::attributes_name[attributes_num] = {  // NOLINT
+    "op_name",
+    "kernel_name",
+    "kernel_key"};
+
+void OneDNNPhiKernelOp::VerifySig() {
+  VLOG(4) << "Verifying inputs, outputs and attributes for: OneDNNPhiKernelOp.";
+
+  auto& attributes = this->attributes();
+
+  PADDLE_ENFORCE(attributes.count("op_name") > 0 &&
+                     attributes.at("op_name").isa<pir::StrAttribute>(),
+                 phi::errors::PreconditionNotMet(
+                     "Type of attribute: op_name is not right."));
+
+  PADDLE_ENFORCE(attributes.count("kernel_name") > 0 &&
+                     attributes.at("kernel_name").isa<pir::StrAttribute>(),
+                 phi::errors::PreconditionNotMet(
+                     "Type of attribute: kernel_name is not right."));
+
+  PADDLE_ENFORCE(attributes.count("kernel_key") > 0 &&
+                     attributes.at("kernel_key").isa<KernelAttribute>(),
+                 phi::errors::PreconditionNotMet(
+                     "Type of attribute: kernel_key is not right."));
+}
+
+std::string OneDNNPhiKernelOp::op_name() {
+  return attributes().at("op_name").dyn_cast<pir::StrAttribute>().AsString();
+}
+std::string OneDNNPhiKernelOp::kernel_name() {
+  return attributes()
+      .at("kernel_name")
+      .dyn_cast<pir::StrAttribute>()
+      .AsString();
+}
+phi::KernelKey OneDNNPhiKernelOp::kernel_key() {
+  return attributes().at("kernel_key").dyn_cast<KernelAttribute>().data();
+}
+
+const char* OneDNNMixedPhiKernelOp::attributes_name[attributes_num] =
+    {  // NOLINT
+        "op_name",
+        "kernel_name",
+        "kernel_key"};
+
+void OneDNNMixedPhiKernelOp::VerifySig() {
+  VLOG(4) << "Verifying inputs, outputs and attributes for: "
+             "OneDNNMixedPhiKernelOp.";
+
+  auto& attributes = this->attributes();
+
+  PADDLE_ENFORCE(attributes.count("op_name") > 0 &&
+                     attributes.at("op_name").isa<pir::StrAttribute>(),
+                 phi::errors::PreconditionNotMet(
+                     "Type of attribute: op_name is not right."));
+
+  PADDLE_ENFORCE(attributes.count("kernel_name") > 0 &&
+                     attributes.at("kernel_name").isa<pir::StrAttribute>(),
+                 phi::errors::PreconditionNotMet(
+                     "Type of attribute: kernel_name is not right."));
+
+  PADDLE_ENFORCE(attributes.count("kernel_key") > 0 &&
+                     attributes.at("kernel_key").isa<KernelAttribute>(),
+                 phi::errors::PreconditionNotMet(
+                     "Type of attribute: kernel_key is not right."));
+}
+
+std::string OneDNNMixedPhiKernelOp::op_name() {
+  return attributes().at("op_name").dyn_cast<pir::StrAttribute>().AsString();
+}
+std::string OneDNNMixedPhiKernelOp::kernel_name() {
+  return attributes()
+      .at("kernel_name")
+      .dyn_cast<pir::StrAttribute>()
+      .AsString();
+}
+phi::KernelKey OneDNNMixedPhiKernelOp::kernel_key() {
+  return attributes().at("kernel_key").dyn_cast<KernelAttribute>().data();
+}
+
+const char* OneDNNLegacyKernelOp::attributes_name[attributes_num] = {  // NOLINT
+    "op_name",
+    "kernel_name",
+    "kernel_key"};
+
+void OneDNNLegacyKernelOp::VerifySig() {
+  VLOG(4)
+      << "Verifying inputs, outputs and attributes for: OneDNNLegacyKernelOp.";
+
+  auto& attributes = this->attributes();
+
+  PADDLE_ENFORCE(attributes.count("op_name") > 0 &&
+                     attributes.at("op_name").isa<pir::StrAttribute>(),
+                 phi::errors::PreconditionNotMet(
+                     "Type of attribute: op_name is not right."));
+
+  PADDLE_ENFORCE(attributes.count("kernel_name") > 0 &&
+                     attributes.at("kernel_name").isa<pir::StrAttribute>(),
+                 phi::errors::PreconditionNotMet(
+                     "Type of attribute: kernel_name is not right."));
+
+  PADDLE_ENFORCE(attributes.count("kernel_key") > 0 &&
+                     attributes.at("kernel_key").isa<KernelAttribute>(),
+                 phi::errors::PreconditionNotMet(
+                     "Type of attribute: kernel_key is not right."));
+}
+
+std::string OneDNNLegacyKernelOp::op_name() {
+  return attributes().at("op_name").dyn_cast<pir::StrAttribute>().AsString();
+}
+std::string OneDNNLegacyKernelOp::kernel_name() {
+  return attributes()
+      .at("kernel_name")
+      .dyn_cast<pir::StrAttribute>()
+      .AsString();
+}
+phi::KernelKey OneDNNLegacyKernelOp::kernel_key() {
+  return attributes().at("kernel_key").dyn_cast<KernelAttribute>().data();
+}
+
 }  // namespace dialect
 }  // namespace paddle
 
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::PhiKernelOp)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::LegacyKernelOp)
+IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNPhiKernelOp)
+IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNMixedPhiKernelOp)
+IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNLegacyKernelOp)
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h
index a96aa5732d5806..83133ee61fb66a 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h
@@ -44,8 +44,47 @@ class LegacyKernelOp : public pir::Op<LegacyKernelOp> {
   void VerifySig();
 };
 
+class OneDNNPhiKernelOp : public pir::Op<OneDNNPhiKernelOp> {
+ public:
+  using Op::Op;
+  static const char *name() { return "pd_kernel.phi_kernel"; }
+  static constexpr uint32_t attributes_num = 3;
+  static const char *attributes_name[attributes_num];
+  std::string op_name();
+  std::string kernel_name();
+  phi::KernelKey kernel_key();
+  void VerifySig();
+};
+
+class OneDNNMixedPhiKernelOp : public pir::Op<OneDNNMixedPhiKernelOp> {
+ public:
+  using Op::Op;
+  static const char *name() { return "pd_kernel.phi_kernel"; }
+  static constexpr uint32_t attributes_num = 3;
+  static const char *attributes_name[attributes_num];
+  std::string op_name();
+  std::string kernel_name();
+  phi::KernelKey kernel_key();
+  void VerifySig();
+};
+
+class OneDNNLegacyKernelOp : public pir::Op<OneDNNLegacyKernelOp> {
+ public:
+  using Op::Op;
+  static const char *name() { return "pd_kernel.legacy_kernel"; }
+  static constexpr uint32_t attributes_num = 3;
+  static const char *attributes_name[attributes_num];
+  std::string op_name();
+  std::string kernel_name();
+  phi::KernelKey kernel_key();
+  void VerifySig();
+};
+
 }  // namespace dialect
 }  // namespace paddle
 
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::PhiKernelOp)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::LegacyKernelOp)
+IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNPhiKernelOp)
+IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNMixedPhiKernelOp)
+IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNLegacyKernelOp)
diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py
index f9e4d236253904..54ae571e182712 100644
--- a/paddle/fluid/pir/dialect/op_generator/op_gen.py
+++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py
@@ -471,7 +471,7 @@ def __init__(self, op_yaml_item, op_compat_item):
         # parse interfaces list
         self.interfaces_list = self.parse_op_interfaces()
 
-        # OneDnn info
+        # OneDNN info
         if "extra_args" in self.op_yaml_item:
             self.onednn_extra_args = self.op_yaml_item["extra_args"]
             self.onednn_layout_transform = self.op_yaml_item["layout_transform"]
diff --git a/paddle/fluid/pir/dialect/operator/ir/ops.yaml b/paddle/fluid/pir/dialect/operator/ir/ops.yaml
index 6c2dd1d4764ca8..48d282f0ae4a27 100644
--- a/paddle/fluid/pir/dialect/operator/ir/ops.yaml
+++ b/paddle/fluid/pir/dialect/operator/ir/ops.yaml
@@ -1437,6 +1437,15 @@
      func: number_count
      data_type: numbers
 
+- op: onednn_to_paddle_layout
+  args: (Tensor x, int dst_layout)
+  output: Tensor(out)
+  infer_meta:
+    func : UnchangedInferMeta
+    param : [x]
+  kernel:
+    func: onednn_to_paddle_layout
+
 - op: sparse_momentum
   args: (Tensor param, Tensor grad, Tensor velocity, Tensor index, Tensor learning_rate, Tensor master_param,float mu, Scalar axis=0, bool use_nesterov=false,str regularization_method="", float regularization_coeff=0.0f, bool multi_precision=false, float rescale_grad=1.0f)
   output: Tensor(param_out), Tensor(velocity_out), Tensor(master_param_out)
diff --git a/paddle/fluid/pir/dialect/operator/utils/utils.cc b/paddle/fluid/pir/dialect/operator/utils/utils.cc
index 696d4ee34dcde4..9e67b51047477e 100644
--- a/paddle/fluid/pir/dialect/operator/utils/utils.cc
+++ b/paddle/fluid/pir/dialect/operator/utils/utils.cc
@@ -58,6 +58,7 @@ const std::unordered_set<std::string> LegacyOpList = {
     SoftReluOp::name(),
     SoftReluGradOp::name()};
 
+const std::unordered_set<std::string> OneDNNLegacyOpList = {};
 enum class AttrType {
   UNDEFINED = 0,
   BOOL,
@@ -218,6 +219,10 @@ VariantType GetAttributeData(const pir::Attribute& attr) {
 
 bool IsLegacyOp(const std::string& name) { return LegacyOpList.count(name); }
 
+bool IsOneDNNLegacyOp(const std::string& name) {
+  return OneDNNLegacyOpList.count(name);
+}
+
 bool IsEmptyValue(const pir::Value& value) {
   return !value.impl() || !value.type();
 }
diff --git a/paddle/fluid/pir/dialect/operator/utils/utils.h b/paddle/fluid/pir/dialect/operator/utils/utils.h
index 1ebe7d244affdd..addaabed9ec4e4 100644
--- a/paddle/fluid/pir/dialect/operator/utils/utils.h
+++ b/paddle/fluid/pir/dialect/operator/utils/utils.h
@@ -132,6 +132,8 @@ VariantType GetAttributeData(const pir::Attribute& attr);
 
 bool IsLegacyOp(const std::string& name);
 
+bool IsOneDNNLegacyOp(const std::string& name);
+
 bool IsEmptyValue(const pir::Value& value);
 
 std::vector<int64_t> GetInt64Vector(const pir::Attribute& attr);
diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index d0c7fcbc6441a2..87bd2d4b64c576 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -303,6 +303,49 @@ static pir::OpResult AddPlaceTransferOp(pir::Value in,
   return new_in;
 }
 
+static pir::OpResult AddOneDNN2PaddleLayoutTransferOp(
+    pir::Value in, const phi::DataLayout& dst_layout, pir::Block* block) {
+  pir::IrContext* ctx = pir::IrContext::Instance();
+  auto in_alloc_type = in.type().dyn_cast<AllocatedDenseTensorType>();
+
+  phi::KernelKey kernel_key;
+  kernel_key.set_backend(phi::Backend::CPU);
+  kernel_key.set_layout(phi::DataLayout::ANY);
+  kernel_key.set_dtype(dialect::TransToPhiDataType(in_alloc_type.dtype()));
+
+  std::unordered_map<std::string, pir::Attribute> op_attribute;
+  op_attribute = {
+      {"op_name",
+       pir::StrAttribute::get(ctx, "pd_op.onednn_to_paddle_layout_kernel")},
+      {"kernel_name",
+       pir::StrAttribute::get(ctx, "onednn_to_paddle_layout_kernel")},
+      {"kernel_key", KernelAttribute::get(ctx, kernel_key)},
+      {"dst_layout",
+       pir::Int32Attribute::get(ctx, static_cast<int>(dst_layout))}};
+
+  auto out_type = AllocatedDenseTensorType::get(ctx,
+                                                in_alloc_type.place(),
+                                                in_alloc_type.dtype(),
+                                                in_alloc_type.dims(),
+                                                dst_layout,
+                                                in_alloc_type.lod(),
+                                                in_alloc_type.offset());
+
+  pir::OpInfo kernel_op_info = ctx->GetRegisteredOpInfo(PhiKernelOp::name());
+  pir::Operation* op =
+      pir::Operation::Create({in}, op_attribute, {out_type}, kernel_op_info);
+
+  auto in_op = in.dyn_cast<pir::OpResult>().owner();
+  if (in_op && in_op->HasAttribute(kAttrIsPersisable)) {
+    op->set_attribute(kAttrIsPersisable, in_op->attribute(kAttrIsPersisable));
+  }
+
+  block->push_back(op);
+  auto new_in = op->result(0);
+
+  return new_in;
+}
+
 static bool NeedTransformDataType(const phi::DataType& l,
                                   const phi::DataType& r) {
   return l != phi::DataType::ALL_DTYPE && r != phi::DataType::ALL_DTYPE &&
@@ -1759,6 +1802,22 @@ std::vector<pir::Value> BuildInputs(
         }
       }
     }
+
+    // 3. layout transfer(only for onednn)
+#ifdef PADDLE_WITH_DNNL
+    if (kernel_key.backend() == phi::Backend::CPU &&
+        cur_in.dyn_cast<pir::OpResult>().owner()->HasTrait<OneDNNTrait>()) {
+      auto new_in_type = new_in.type();
+      if (new_in_type.isa<AllocatedDenseTensorType>()) {
+        new_in = AddOneDNN2PaddleLayoutTransferOp(
+            new_in, phi::DataLayout::ANY, block);
+      } else {
+        PADDLE_THROW(
+            phi::errors::Unimplemented("PIR layout transfer only support "
+                                       "allocated dense tensor type for now"));
+      }
+    }
+#endif
     vec_inputs.push_back(new_in);
   }
   return vec_inputs;
@@ -1837,19 +1896,57 @@ pir::Operation* BuildKernelOp(
   if (op_item->HasTrait<InplaceTrait>()) {
     op_attribute.emplace("is_inplace", pir::BoolAttribute::get(ctx, true));
   }
-
-  pir::OpInfo phi_kernel_op_info =
-      ctx->GetRegisteredOpInfo(PhiKernelOp::name());
-
-  pir::OpInfo legacy_kernel_op_info =
-      ctx->GetRegisteredOpInfo(LegacyKernelOp::name());
+#ifdef PADDLE_WITH_DNNL
+#endif
   pir::Operation* op = nullptr;
-  if (IsLegacyOp(op_item->name())) {
-    op = pir::Operation::Create(
-        vec_inputs, op_attribute, op_output_types, legacy_kernel_op_info);
+  if (op_item->HasTrait<OneDNNTrait>()) {
+    if (IsOneDNNLegacyOp(op_item->name())) {
+      pir::OpInfo legacy_kernel_op_info =
+          ctx->GetRegisteredOpInfo(OneDNNLegacyKernelOp::name());
+      op = pir::Operation::Create(
+          vec_inputs, op_attribute, op_output_types, legacy_kernel_op_info);
+    } else {
+      auto op_info_parser = GetOpYamlInfoParser(op_item);
+      op_attribute.emplace("extra_args",
+                           op_info_parser->OpRuntimeInfo().extra_args);
+      op_attribute.emplace(
+          "layout_transform_arg",
+          op_info_parser->OpRuntimeInfo().layout_transform_arg);
+      op_attribute.emplace(
+          "layout_transform_inputs",
+          op_info_parser->OpRuntimeInfo().layout_transform_inputs);
+      op_attribute.emplace("is_onednn_only",
+                           op_info_parser->OpRuntimeInfo().is_onednn_only);
+      op_attribute.emplace("dynamic_fallback",
+                           op_info_parser->OpRuntimeInfo().dynamic_fallback);
+      if (op_item->HasTrait<OneDNNDynamicFallbackTrait>()) {
+        pir::OpInfo phi_kernel_op_info =
+            ctx->GetRegisteredOpInfo(OneDNNMixedPhiKernelOp::name());
+
+        op = pir::Operation::Create(
+            vec_inputs, op_attribute, op_output_types, phi_kernel_op_info);
+      } else {
+        pir::OpInfo phi_kernel_op_info =
+            ctx->GetRegisteredOpInfo(OneDNNPhiKernelOp::name());
+
+        op = pir::Operation::Create(
+            vec_inputs, op_attribute, op_output_types, phi_kernel_op_info);
+      }
+    }
   } else {
-    op = pir::Operation::Create(
-        vec_inputs, op_attribute, op_output_types, phi_kernel_op_info);
+    if (IsLegacyOp(op_item->name())) {
+      pir::OpInfo legacy_kernel_op_info =
+          ctx->GetRegisteredOpInfo(LegacyKernelOp::name());
+
+      op = pir::Operation::Create(
+          vec_inputs, op_attribute, op_output_types, legacy_kernel_op_info);
+    } else {
+      pir::OpInfo phi_kernel_op_info =
+          ctx->GetRegisteredOpInfo(PhiKernelOp::name());
+
+      op = pir::Operation::Create(
+          vec_inputs, op_attribute, op_output_types, phi_kernel_op_info);
+    }
   }
 
   (*map_op_pair)[op_item] = op;
diff --git a/paddle/phi/kernels/cpu/onednn_to_paddle_layout_kernel.cc b/paddle/phi/kernels/cpu/onednn_to_paddle_layout_kernel.cc
new file mode 100644
index 00000000000000..301125ef5ca298
--- /dev/null
+++ b/paddle/phi/kernels/cpu/onednn_to_paddle_layout_kernel.cc
@@ -0,0 +1,94 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/kernels/onednn_to_paddle_layout_kernel.h"
+
+#include <sstream>
+#include <string>
+
+#include "glog/logging.h"
+
+#include "paddle/phi/backends/all_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/core/visit_type.h"
+#include "paddle/phi/kernels/funcs/data_layout_transform.h"
+#include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/memcpy_kernel.h"
+#ifdef PADDLE_WITH_DNNL
+#include "paddle/phi/backends/onednn/onednn_helper.h"
+#endif
+namespace phi {
+
+template <typename Context>
+void OneDNN2PaddleLayout(const Context& dev_ctx,
+                         const DenseTensor& x,
+                         int dst_layout,
+                         DenseTensor* out) {
+#ifdef PADDLE_WITH_DNNL
+  DataLayout src_layout = x.layout();
+  VLOG(10) << "TransDataLayout from " << static_cast<DataLayout>(src_layout)
+           << " -> " << static_cast<DataLayout>(dst_layout);
+
+  auto print_tensor_meta = [](const DenseTensor& x) {
+    std::ostringstream oss;
+
+    oss << "[";
+    oss << "layout:" << x.layout() << " ,";
+    oss << "dims:" << x.dims() << " ,";
+    if (x.IsInitialized()) oss << "place:" << x.place();
+    oss << "]";
+
+    return oss.str();
+  };
+  VLOG(10) << " x: " << print_tensor_meta(x);
+  VLOG(10) << " out: " << print_tensor_meta(*out) << " " << out;
+
+  if (src_layout != DataLayout::ONEDNN) {
+    out->ShareDataWith(x);
+    out->ShareInplaceVersionCounterWith(x);
+    out->set_layout(static_cast<DataLayout>(dst_layout));
+    return;
+  }
+
+  DataLayout tmp_layout = static_cast<DataLayout>(dst_layout);
+  if (static_cast<DataLayout>(dst_layout) == DataLayout::ANY) {
+    tmp_layout = phi::OneDNNContext::tls().get_cur_paddle_data_layout();
+  }
+
+  if (tmp_layout == DataLayout::ANY) {
+    tmp_layout = phi::OneDNNContext::tls().get_cur_paddle_data_layout();
+  }
+
+  // NOTE(zhiqiu): to handle the special case in ApplyDataTransform() in
+  // data_transfer.cc
+  if (!x.IsInitialized() && src_layout == DataLayout::ONEDNN &&
+      tmp_layout == DataLayout::NHWC) {
+    VLOG(4) << src_layout << "->" << tmp_layout << " " << x.layout();
+    out->Resize(x.dims());
+    out->set_layout(tmp_layout);
+    funcs::MatchShapeToLayout(out, src_layout, tmp_layout);
+    return;
+  }
+
+  funcs::TransDataLayoutFromOneDNN(
+      src_layout, tmp_layout, x, out, dev_ctx.GetPlace());
+#endif
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL_FOR_ALL_DTYPE(onednn_to_paddle_layout,
+                                 CPU,
+                                 ALL_LAYOUT,
+                                 phi::OneDNN2PaddleLayout<phi::CPUContext>) {}
diff --git a/paddle/phi/kernels/cpu/transfer_layout_kernel.h b/paddle/phi/kernels/cpu/transfer_layout_kernel.h
new file mode 100644
index 00000000000000..73e12927d7ffe5
--- /dev/null
+++ b/paddle/phi/kernels/cpu/transfer_layout_kernel.h
@@ -0,0 +1,44 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/infermeta/unary.h"
+#include "paddle/phi/kernels/empty_kernel.h"
+
+namespace phi {
+
+template <typename Context>
+void TransferLayoutKernel(const Context& dev_ctx,
+                          const DenseTensor& x,
+                          int src_layout,
+                          int dst_layout,
+                          DenseTensor* out);
+
+template <typename Context>
+DenseTensor TransferLayout(const Context& dev_ctx,
+                           const DenseTensor& x,
+                           DataLayout dst_layout) {
+  phi::DenseTensor dense_out =
+      phi::Empty(dev_ctx, {x.dtype(), x.dims(), dst_layout});
+  TransferLayoutKernel<Context>(dev_ctx,
+                                x,
+                                static_cast<int>(x.layout()),
+                                static_cast<int>(dst_layout),
+                                &dense_out);
+  return dense_out;
+}
+
+}  // namespace phi
diff --git a/paddle/phi/kernels/onednn_to_paddle_layout_kernel.h b/paddle/phi/kernels/onednn_to_paddle_layout_kernel.h
new file mode 100644
index 00000000000000..8dc86b48901ca1
--- /dev/null
+++ b/paddle/phi/kernels/onednn_to_paddle_layout_kernel.h
@@ -0,0 +1,28 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/infermeta/unary.h"
+#include "paddle/phi/kernels/empty_kernel.h"
+
+namespace phi {
+
+template <typename Context>
+void OneDNN2PaddleLayout(const Context& dev_ctx,
+                         const DenseTensor& x,
+                         int dst_layout,
+                         DenseTensor* out);
+}  // namespace phi

From 43a3f5972d614018ef2c35c7356e9b3901a79da4 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 19 Dec 2023 09:03:58 +0000
Subject: [PATCH 18/49] refine

---
 .../new_executor/instruction/CMakeLists.txt   | 12 +------
 .../framework/new_executor/pir_interpreter.cc |  2 +-
 .../pir/dialect/op_generator/ops_api_gen.py   |  1 +
 .../pir/transforms/pd_op_to_kernel_pass.cc    | 33 ++++++++++++++-----
 4 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt b/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
index b9ac389c69d875..1836d85e7cd995 100644
--- a/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
+++ b/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
@@ -20,17 +20,7 @@ endif()
 
 cc_library(
   instruction_base
-  SRCS instruction_base.cc
-       phi_kernel_instruction.cc
-       legacy_kernel_instruction.cc
-       if_instruction.cc
-       while_instruction.cc
-       select_input_instruction.cc
-       has_elements_instruction.cc
-       tuple_push_instruction.cc
-       tuple_pop_instruction.cc
-       builtin_combine_instruction.cc
-       instruction_util.cc
+  SRCS ${instruction_base_srcs}
   DEPS pir_adaptor phi common framework_proto)
 
 if(WITH_CINN AND NOT CINN_ONLY)
diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc
index 397a7ade7285b7..64cd167f814e51 100644
--- a/paddle/fluid/framework/new_executor/pir_interpreter.cc
+++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc
@@ -34,7 +34,7 @@
 #include "paddle/phi/core/sparse_csr_tensor.h"
 
 #ifdef PADDLE_WITH_DNNL
-#include "paddle/fluid/framework/new_executor/instruction/onddnn_legacy_kernel_instruction.h"
+#include "paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.h"
 #include "paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.h"
 #include "paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
diff --git a/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py b/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py
index 86abd12c82dfd7..af075537f60b88 100644
--- a/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py
+++ b/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py
@@ -103,6 +103,7 @@
     'sequence_mask',
     'number_count',
     'assign_value',
+    'onednn_to_paddle_layout',
 ]
 
 NO_NEED_GEN_STATIC_ONLY_APIS = [
diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index 87bd2d4b64c576..9308c02db59961 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -1907,18 +1907,33 @@ pir::Operation* BuildKernelOp(
           vec_inputs, op_attribute, op_output_types, legacy_kernel_op_info);
     } else {
       auto op_info_parser = GetOpYamlInfoParser(op_item);
-      op_attribute.emplace("extra_args",
-                           op_info_parser->OpRuntimeInfo().extra_args);
+      std::vector<pir::Attribute> extra_args;
+      for (auto& arg : op_info_parser->OpRuntimeInfo().extra_args) {
+        extra_args.push_back(pir::StrAttribute::get(ctx, arg));
+      }
+      op_attribute.emplace(
+          "extra_args",
+          pir::ArrayAttribute::get(pir::IrContext::Instance(), extra_args));
       op_attribute.emplace(
           "layout_transform_arg",
-          op_info_parser->OpRuntimeInfo().layout_transform_arg);
+          pir::StrAttribute::get(
+              ctx, op_info_parser->OpRuntimeInfo().layout_transform_arg));
+      std::vector<pir::Attribute> layout_transform_inputs;
+      for (auto& input :
+           op_info_parser->OpRuntimeInfo().layout_transform_inputs) {
+        layout_transform_inputs.push_back(pir::StrAttribute::get(ctx, input));
+      }
+      op_attribute.emplace("layout_transform_inputs",
+                           pir::ArrayAttribute::get(pir::IrContext::Instance(),
+                                                    layout_transform_inputs));
+      op_attribute.emplace(
+          "is_onednn_only",
+          pir::BoolAttribute::get(
+              ctx, op_info_parser->OpRuntimeInfo().is_onednn_only));
       op_attribute.emplace(
-          "layout_transform_inputs",
-          op_info_parser->OpRuntimeInfo().layout_transform_inputs);
-      op_attribute.emplace("is_onednn_only",
-                           op_info_parser->OpRuntimeInfo().is_onednn_only);
-      op_attribute.emplace("dynamic_fallback",
-                           op_info_parser->OpRuntimeInfo().dynamic_fallback);
+          "dynamic_fallback",
+          pir::BoolAttribute::get(
+              ctx, op_info_parser->OpRuntimeInfo().dynamic_fallback));
       if (op_item->HasTrait<OneDNNDynamicFallbackTrait>()) {
         pir::OpInfo phi_kernel_op_info =
             ctx->GetRegisteredOpInfo(OneDNNMixedPhiKernelOp::name());

From 53d0f0ddc64e2ddc07761fc9f1c22d385fcb3335 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 19 Dec 2023 09:29:32 +0000
Subject: [PATCH 19/49] refine

---
 paddle/fluid/pir/dialect/CMakeLists.txt       |  17 +-
 .../dialect/operator/ir/op_onednn_dialect.cc  | 179 ++++++++++++++++++
 .../dialect/operator/ir/op_onednn_dialect.h   |  44 +++++
 3 files changed, 234 insertions(+), 6 deletions(-)
 create mode 100644 paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc
 create mode 100644 paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.h

diff --git a/paddle/fluid/pir/dialect/CMakeLists.txt b/paddle/fluid/pir/dialect/CMakeLists.txt
index 9324b0f57d6803..e0f6a269d0da15 100644
--- a/paddle/fluid/pir/dialect/CMakeLists.txt
+++ b/paddle/fluid/pir/dialect/CMakeLists.txt
@@ -119,6 +119,9 @@ if(WITH_MKLDNN)
       ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
   )
 
+  set(op_onednn_info_file ${PD_DIALECT_SOURCE_DIR}/pd_onednn_op_info.cc)
+  set(op_onednn_info_file_tmp ${op_onednn_info_file}.tmp)
+
   set(onednn_op_namespace paddle,onednn,dialect)
   set(onednn_dialect_name pd_onednn_op)
   set(onednn_op_header_file ${PD_DIALECT_SOURCE_DIR}/pd_onednn_op.h)
@@ -135,12 +138,14 @@ if(WITH_MKLDNN)
       ${PYTHON_EXECUTABLE} ${op_gen_file} --op_yaml_files ${op_yaml_files}
       --op_compat_yaml_file ${op_compat_yaml_file} --namespaces
       ${onednn_op_namespace} --dialect_name ${onednn_dialect_name}
-      --op_def_h_file ${onednn_op_header_file_tmp} --op_def_cc_file
-      ${onednn_op_source_file_tmp} --onednn_yaml_file ${pir_op_onednn_yaml}
-      --ops_onednn_extra_yaml_file ${pd_ops_onednn_extra_yaml_file})
-
-  set(generated_files_onednn_pd_op "${onednn_op_header_file}"
-                                   "${onednn_op_source_file}")
+      --op_def_h_file ${onednn_op_header_file_tmp} --op_info_file
+      ${op_onednn_info_file_tmp} --op_def_cc_file ${onednn_op_source_file_tmp}
+      --onednn_yaml_file ${pir_op_onednn_yaml} --ops_onednn_extra_yaml_file
+      ${pd_ops_onednn_extra_yaml_file})
+
+  set(generated_files_onednn_pd_op
+      "${onednn_op_header_file}" "${onednn_op_source_file}"
+      "${op_onednn_info_file}")
 endif()
 set(api_gen_yaml_files
     ${op_fwd_yaml},${op_bwd_yaml},${pir_op_fwd_yaml},${pir_op_bwd_yaml},${pir_update_op_fwd_yaml}
diff --git a/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc b/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc
new file mode 100644
index 00000000000000..8e1e91967074c7
--- /dev/null
+++ b/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc
@@ -0,0 +1,179 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.h"
+#include "paddle/fluid/pir/dialect/operator/ir/control_flow_op.h"
+#include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h"
+#include "paddle/fluid/pir/dialect/operator/ir/op_type.h"
+#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h"
+#include "paddle/fluid/pir/dialect/operator/ir/type_storage.h"
+#include "paddle/fluid/pir/dialect/operator/transforms/param_to_variable.h"
+#include "paddle/pir/core/builtin_type_interfaces.h"
+#include "paddle/pir/core/interface_value.h"
+#include "paddle/pir/core/ir_printer.h"
+#include "paddle/pir/core/utils.h"
+#include "paddle/pir/dialect/control_flow/ir/cf_dialect.h"
+#include "paddle/pir/dialect/control_flow/ir/cf_op.h"
+
+namespace paddle {
+namespace dialect {
+
+OneDNNOperatorDialect::OneDNNOperatorDialect(pir::IrContext *ctx)
+    : pir::Dialect(name(), ctx, pir::TypeId::get<OneDNNOperatorDialect>()) {
+  initialize();
+  ctx->GetOrRegisterDialect<::pir::ControlFlowDialect>();
+  auto info = ctx->GetRegisteredOpInfo(pir::TuplePushOp::name());
+  info.AttachInterface(std::move(
+      pir::InterfaceValue::
+          Get<pir::TuplePushOp, VjpInterface, TuplePushOpVjpInterfaceModel>()));
+}
+
+void OneDNNOperatorDialect::initialize() {
+  RegisterTypes<paddle::dialect::DenseTensorType,
+                paddle::dialect::SelectedRowsType,
+                paddle::dialect::DenseTensorArrayType>();
+
+  RegisterAttributes<paddle::dialect::IntArrayAttribute,
+                     paddle::dialect::DataTypeAttribute,
+                     paddle::dialect::PlaceAttribute,
+                     paddle::dialect::DataLayoutAttribute>();
+
+  // NOTE(zhangbo9674): GET_OP_LIST is defined in pd_op.h which is
+  // generated by op_gen.py, see details in
+  // paddle/fluid/pir/dialect/CMakeLists.txt.
+  // NOTE(Ruting)GET_MANUAL_OP_LIST is define in manual_op.h"
+  // use RegisterOps when list has more than two ops.
+  RegisterOps<
+#define GET_OP_LIST
+#include "paddle/fluid/pir/dialect/operator/ir/pd_op_info.cc"  // NOLINT
+      >();
+  RegisterInterfaces<ParameterConvertInterface>();
+}
+
+void OneDNNOperatorDialect::PrintType(pir::Type type, std::ostream &os) const {
+  os << type.dialect().name();
+  os << '.';
+  if (auto tensor_type = type.dyn_cast<DenseTensorType>()) {
+    os << "tensor<";
+    for (auto d : common::vectorize(tensor_type.dims())) {
+      os << d;
+      os << "x";
+    }
+    tensor_type.dtype().Print(os);
+    os << ">";
+  } else if (auto selected_rows_type = type.dyn_cast<SelectedRowsType>()) {
+    os << "selectedrows<";
+    for (auto d : common::vectorize(selected_rows_type.dims())) {
+      os << d;
+      os << "x";
+    }
+    selected_rows_type.dtype().Print(os);
+    os << ">";
+  } else if (auto tensor_array_type = type.dyn_cast<DenseTensorArrayType>()) {
+    os << "tensor_array<";
+    tensor_array_type.dtype().Print(os);
+    os << ">";
+  }
+}
+
+void OneDNNOperatorDialect::PrintAttribute(pir::Attribute attr,
+                                           std::ostream &os) const {
+  os << "(" << attr.dialect().name();
+  os << '.';
+  if (auto int_array_attr = attr.dyn_cast<IntArrayAttribute>()) {
+    phi::IntArray data = int_array_attr.data();
+    os << "IntArray)"
+       << "[";
+    const auto &inner_data = data.GetData();
+    pir::PrintInterleave(
+        inner_data.begin(),
+        inner_data.end(),
+        [&os](int64_t i) { os << i; },
+        [&os]() { os << ","; });
+    os << "]";
+  } else if (auto data_type_attr = attr.dyn_cast<DataTypeAttribute>()) {
+    os << "DataType)" << data_type_attr.data();
+  } else if (auto place_type_attr = attr.dyn_cast<PlaceAttribute>()) {
+    os << "Place)" << place_type_attr.data();
+  } else if (auto data_layout_attr = attr.dyn_cast<DataLayoutAttribute>()) {
+    os << "DataLayout)" << data_layout_attr.data();
+  } else {
+    os << "<#AttrNotImplemented>";
+  }
+}
+
+pir::Type OneDNNOperatorDialect::ParseType(pir::IrParser &parser) {  // NOLINT
+  parser.ConsumeAToken("pd_op.tensor");
+  parser.ConsumeAToken("<");
+  std::vector<int> dim{};
+  Token dim_token = parser.PeekToken();
+  while (dim_token.token_type_ == DIGIT) {
+    dim_token = parser.ConsumeToken();
+    dim.push_back(atoi(dim_token.val_.c_str()));
+    std::string peek_token_val = parser.PeekToken().val_;
+    if (peek_token_val[0] != 'x') {
+      break;
+    }
+    parser.ConsumeToken();
+    parser.lexer->Unget(static_cast<int>(peek_token_val.size() - 1));
+    if (parser.PeekToken().token_type_ != DIGIT) {
+      break;
+    }
+  }
+  phi::DDim ddim = common::make_ddim(dim);
+  pir::Type dtype = parser.ParseType();
+  std::vector<std::vector<size_t>> lod;
+  std::vector<size_t> lodv;
+  lodv.push_back(0);
+  lod.push_back(lodv);
+  parser.ConsumeAToken(">");
+  return DenseTensorType::get(
+      parser.ctx, dtype, ddim, phi::DataLayout::UNDEFINED, lod, 0);
+}
+
+pir::Attribute OneDNNOperatorDialect::ParseAttribute(
+    pir::IrParser &parser) {  // NOLINT
+  std::string type_name = parser.ConsumeToken().val_;
+  std::string attribute_name =
+      type_name.substr(type_name.find('.') + 1, std::string::npos);
+  parser.ConsumeAToken(")");
+  if (attribute_name == "IntArray") {
+    return IntArrayAttribute::Parse(parser);
+  } else if (attribute_name == "DataType") {
+    return DataTypeAttribute::Parse(parser);
+  } else if (attribute_name == "Place") {
+    return PlaceAttribute::Parse(parser);
+  } else if (attribute_name == "DataLayout") {
+    return DataLayoutAttribute::Parse(parser);
+  } else {
+    IR_THROW("No function to parse " + attribute_name + " exists!" +
+             parser.GetErrorLocationInfo());
+  }
+}
+
+void OneDNNOperatorDialect::PrintOperation(pir::Operation *op,
+                                           pir::IrPrinter &printer) const {
+  if (auto if_op = op->dyn_cast<IfOp>()) {
+    if_op.Print(printer);
+  } else if (auto while_op = op->dyn_cast<WhileOp>()) {
+    while_op.Print(printer);
+  } else {
+    printer.PrintGeneralOperation(op);
+  }
+}
+
+}  // namespace dialect
+}  // namespace paddle
+
+IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNOperatorDialect)
diff --git a/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.h b/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.h
new file mode 100644
index 00000000000000..ac6483d4d53ecb
--- /dev/null
+++ b/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.h
@@ -0,0 +1,44 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/pir/core/dialect.h"
+
+namespace paddle {
+namespace dialect {
+
+class OneDNNOperatorDialect : public pir::Dialect {
+ public:
+  explicit OneDNNOperatorDialect(pir::IrContext* context);
+
+  static const char* name() { return "pd_onednn_op"; }
+
+  pir::Type ParseType(pir::IrParser& parser) override;            // NOLINT
+  pir::Attribute ParseAttribute(pir::IrParser& parser) override;  // NOLINT
+
+  void PrintType(pir::Type type, std::ostream& os) const override;
+  void PrintAttribute(pir::Attribute type, std::ostream& os) const override;
+
+  void PrintOperation(pir::Operation* op,
+                      pir::IrPrinter& printer) const override;  // NOLINT
+
+ private:
+  void initialize();
+};
+
+}  // namespace dialect
+}  // namespace paddle
+
+IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNOperatorDialect)

From fe4ee218d6a489564e1ad577b0abbbe3827e52b6 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 19 Dec 2023 09:35:22 +0000
Subject: [PATCH 20/49] refine

---
 .gitignore                                                | 1 +
 paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 12d8d93ac03bf3..c4046a8d6b6e38 100644
--- a/.gitignore
+++ b/.gitignore
@@ -109,6 +109,7 @@ paddle/fluid/pir/dialect/operator/ir/op_decomp.cc
 paddle/fluid/pir/dialect/operator/ir/pd_op_vjp.cc
 paddle/fluid/pir/dialect/operator/ir/pd_op.*
 paddle/fluid/pir/dialect/operator/ir/pd_onednn_op.*
+paddle/fluid/pir/dialect/operator/ir/pd_onednn_op_info.*
 paddle/fluid/pir/dialect/operator/ir/pd_op_bwd.*
 paddle/fluid/pir/dialect/operator/ir/pd_op_fused.*
 paddle/fluid/pir/dialect/operator/ir/pd_op_fused_bwd.*
diff --git a/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc b/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc
index 8e1e91967074c7..b101f45c4dcfbc 100644
--- a/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc
+++ b/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc
@@ -56,7 +56,7 @@ void OneDNNOperatorDialect::initialize() {
   // use RegisterOps when list has more than two ops.
   RegisterOps<
 #define GET_OP_LIST
-#include "paddle/fluid/pir/dialect/operator/ir/pd_op_info.cc"  // NOLINT
+#include "paddle/fluid/pir/dialect/operator/ir/pd_onednn_op_info.cc"  // NOLINT
       >();
   RegisterInterfaces<ParameterConvertInterface>();
 }

From 50d2638e50afedb08dff7cd981189efd9c8804f0 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 19 Dec 2023 09:41:45 +0000
Subject: [PATCH 21/49] refine

---
 paddle/fluid/ir_adaptor/translator/translate.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/paddle/fluid/ir_adaptor/translator/translate.cc b/paddle/fluid/ir_adaptor/translator/translate.cc
index 7a7081fe1acbf2..dbab7e2e6fd1dc 100644
--- a/paddle/fluid/ir_adaptor/translator/translate.cc
+++ b/paddle/fluid/ir_adaptor/translator/translate.cc
@@ -19,6 +19,7 @@
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/ir_adaptor/translator/program_translator.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h"
+#include "paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.h"
 #include "paddle/pir/core/builtin_dialect.h"
 #include "paddle/pir/core/program.h"
 
@@ -31,6 +32,7 @@ std::unique_ptr<Program> TranslateLegacyProgramToProgram(
     const LegacyProgramDesc& legacy_program) {
   pir::IrContext* ctx = pir::IrContext::Instance();
   ctx->GetOrRegisterDialect<dialect::OperatorDialect>();
+  ctx->GetOrRegisterDialect<dialect::OneDNNOperatorDialect>();
   auto program = std::make_unique<Program>(ctx);
   translator::ProgramTranslator program_translator(&legacy_program,
                                                    program.get());

From 7711601093bac22e89061a67677c9f147a390061 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 19 Dec 2023 09:44:49 +0000
Subject: [PATCH 22/49] refine

---
 paddle/fluid/pir/dialect/CMakeLists.txt | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/paddle/fluid/pir/dialect/CMakeLists.txt b/paddle/fluid/pir/dialect/CMakeLists.txt
index e0f6a269d0da15..337841b2274971 100644
--- a/paddle/fluid/pir/dialect/CMakeLists.txt
+++ b/paddle/fluid/pir/dialect/CMakeLists.txt
@@ -267,6 +267,13 @@ set(op_dialect_vjp_srcs
     ${op_decomp_source_file}
     ${op_vjp_source_file}
     ${PADDLE_SOURCE_DIR}/paddle/fluid/primitive/base/decomp_trans.cc)
+
+if(WITH_MKLDNN)
+  set(op_dialect_vjp_srcs
+      ${op_dialect_vjp_srcs}
+      ${CMAKE_CURRENT_SOURCE_DIR}/operator/ir/op_onednn_dialect.cc)
+endif()
+
 set(op_dialect_vjp_deps primitive_vjp_experimental op_dialect)
 
 cc_library(

From 7383d3af290ffc694df1be060e7084669599d456 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Wed, 20 Dec 2023 04:46:26 +0000
Subject: [PATCH 23/49] refine

---
 paddle/fluid/ir_adaptor/translator/utils.cc      |  2 ++
 paddle/fluid/pir/dialect/kernel/ir/kernel_op.h   |  6 +++---
 paddle/fluid/pir/dialect/op_generator/op_gen.py  |  1 +
 .../pir/dialect/operator/ir/op_onednn_dialect.cc | 16 +---------------
 paddle/fluid/pir/transforms/inplace_pass.cc      | 16 ++++++++++++----
 .../fluid/pir/transforms/pd_op_to_kernel_pass.cc | 12 ++++++++----
 paddle/phi/api/lib/data_transform.h              |  4 +++-
 7 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/paddle/fluid/ir_adaptor/translator/utils.cc b/paddle/fluid/ir_adaptor/translator/utils.cc
index ebba4428220f70..8ebf090a5df1b8 100644
--- a/paddle/fluid/ir_adaptor/translator/utils.cc
+++ b/paddle/fluid/ir_adaptor/translator/utils.cc
@@ -19,6 +19,7 @@
 #include "paddle/common/enforce.h"
 #include "paddle/fluid/ir_adaptor/translator/op_translator.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h"
+#include "paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.h"
 #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h"
 #include "paddle/pir/core/builtin_attribute.h"
 #include "paddle/pir/core/builtin_type.h"
@@ -94,6 +95,7 @@ std::vector<std::string> CheckUnregisteredOperationInBlock(
 std::vector<std::string> CheckUnregisteredOperation(
     pir::IrContext* ctx, const framework::ProgramDesc& legacy_program) {
   ctx->GetOrRegisterDialect<dialect::OperatorDialect>();
+  ctx->GetOrRegisterDialect<dialect::OneDNNOperatorDialect>();
 
   std::vector<std::string> unregistered_ops;
   for (size_t block_idx = 0; block_idx < legacy_program.Size(); block_idx++) {
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h
index 83133ee61fb66a..57babb00a6570e 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h
@@ -47,7 +47,7 @@ class LegacyKernelOp : public pir::Op<LegacyKernelOp> {
 class OneDNNPhiKernelOp : public pir::Op<OneDNNPhiKernelOp> {
  public:
   using Op::Op;
-  static const char *name() { return "pd_kernel.phi_kernel"; }
+  static const char *name() { return "pd_onednn_kernel.phi_kernel"; }
   static constexpr uint32_t attributes_num = 3;
   static const char *attributes_name[attributes_num];
   std::string op_name();
@@ -59,7 +59,7 @@ class OneDNNPhiKernelOp : public pir::Op<OneDNNPhiKernelOp> {
 class OneDNNMixedPhiKernelOp : public pir::Op<OneDNNMixedPhiKernelOp> {
  public:
   using Op::Op;
-  static const char *name() { return "pd_kernel.phi_kernel"; }
+  static const char *name() { return "pd_onednn_kernel.phi_mixed_kernel"; }
   static constexpr uint32_t attributes_num = 3;
   static const char *attributes_name[attributes_num];
   std::string op_name();
@@ -71,7 +71,7 @@ class OneDNNMixedPhiKernelOp : public pir::Op<OneDNNMixedPhiKernelOp> {
 class OneDNNLegacyKernelOp : public pir::Op<OneDNNLegacyKernelOp> {
  public:
   using Op::Op;
-  static const char *name() { return "pd_kernel.legacy_kernel"; }
+  static const char *name() { return "pd_onednn_kernel.legacy_kernel"; }
   static constexpr uint32_t attributes_num = 3;
   static const char *attributes_name[attributes_num];
   std::string op_name();
diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py
index 54ae571e182712..57e99923284803 100644
--- a/paddle/fluid/pir/dialect/op_generator/op_gen.py
+++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py
@@ -1192,6 +1192,7 @@ def AutoCodeGen(op_info_items, all_op_info_items, namespaces, dialect_name):
                 if (
                     op_name in decomp_interface_declare_gen_op_list
                     and kernel_func_name in decomp_interface_declare_gen_op_list
+                    and dialect_name != "pd_onednn_op"
                 ):
                     op_interfaces = op_interfaces + [
                         "paddle::dialect::DecompInterface"
diff --git a/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc b/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc
index b101f45c4dcfbc..a61d1c6ee0fe75 100644
--- a/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc
+++ b/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc
@@ -16,6 +16,7 @@
 #include "paddle/fluid/pir/dialect/operator/ir/control_flow_op.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_type.h"
+#include "paddle/fluid/pir/dialect/operator/ir/pd_onednn_op.h"
 #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h"
 #include "paddle/fluid/pir/dialect/operator/ir/type_storage.h"
 #include "paddle/fluid/pir/dialect/operator/transforms/param_to_variable.h"
@@ -32,23 +33,9 @@ namespace dialect {
 OneDNNOperatorDialect::OneDNNOperatorDialect(pir::IrContext *ctx)
     : pir::Dialect(name(), ctx, pir::TypeId::get<OneDNNOperatorDialect>()) {
   initialize();
-  ctx->GetOrRegisterDialect<::pir::ControlFlowDialect>();
-  auto info = ctx->GetRegisteredOpInfo(pir::TuplePushOp::name());
-  info.AttachInterface(std::move(
-      pir::InterfaceValue::
-          Get<pir::TuplePushOp, VjpInterface, TuplePushOpVjpInterfaceModel>()));
 }
 
 void OneDNNOperatorDialect::initialize() {
-  RegisterTypes<paddle::dialect::DenseTensorType,
-                paddle::dialect::SelectedRowsType,
-                paddle::dialect::DenseTensorArrayType>();
-
-  RegisterAttributes<paddle::dialect::IntArrayAttribute,
-                     paddle::dialect::DataTypeAttribute,
-                     paddle::dialect::PlaceAttribute,
-                     paddle::dialect::DataLayoutAttribute>();
-
   // NOTE(zhangbo9674): GET_OP_LIST is defined in pd_op.h which is
   // generated by op_gen.py, see details in
   // paddle/fluid/pir/dialect/CMakeLists.txt.
@@ -58,7 +45,6 @@ void OneDNNOperatorDialect::initialize() {
 #define GET_OP_LIST
 #include "paddle/fluid/pir/dialect/operator/ir/pd_onednn_op_info.cc"  // NOLINT
       >();
-  RegisterInterfaces<ParameterConvertInterface>();
 }
 
 void OneDNNOperatorDialect::PrintType(pir::Type type, std::ostream &os) const {
diff --git a/paddle/fluid/pir/transforms/inplace_pass.cc b/paddle/fluid/pir/transforms/inplace_pass.cc
index 153c39172d5fae..2aa512b8348ff5 100644
--- a/paddle/fluid/pir/transforms/inplace_pass.cc
+++ b/paddle/fluid/pir/transforms/inplace_pass.cc
@@ -156,7 +156,9 @@ static bool CanDoInplace(const std::unordered_set<pir::Value>& eager_dels,
 
 static bool IsNoNeedBuffer(pir::Operation* op, pir::Value value) {
   if (op->dialect()->name().compare(paddle::dialect::KernelDialect::name()) !=
-      0) {
+          0 ||
+      op->dialect()->name().compare(
+          paddle::dialect::OneDNNKernelDialect::name()) != 0) {
     VLOG(8) << op->name()
             << "is not a kernel_dialect op, no need buffer is false";
     return false;
@@ -188,7 +190,9 @@ static std::unordered_set<pir::Value> GetSkipDeletionValues(pir::Block* block) {
   std::unordered_set<pir::Value> skip_dels;
   for (auto& op : *block) {
     if (op.dialect()->name().compare(paddle::dialect::KernelDialect::name()) !=
-        0) {
+            0 ||
+        op.dialect()->name().compare(
+            paddle::dialect::OneDNNKernelDialect::name()) != 0) {
       continue;
     }
     IR_ENFORCE(op.attributes().count("op_name") > 0,
@@ -220,7 +224,9 @@ static void GetEagerDelValueOfOp(
   for (auto& op : *block) {
     std::string upper_op_name = op.name();
     if (op.dialect()->name().compare(paddle::dialect::KernelDialect::name()) ==
-        0) {
+            0 ||
+        op.dialect()->name().compare(
+            paddle::dialect::OneDNNKernelDialect::name()) != 0) {
       IR_ENFORCE(op.attributes().count("op_name") > 0,
                  "kernel_dialect op should own an 'op_name' attribute.");
       upper_op_name = op.attributes()
@@ -291,7 +297,9 @@ static std::unordered_map<pir::Operation*, std::string> GetInplaceOps(
     }
 
     if (op.dialect()->name().compare(paddle::dialect::KernelDialect::name()) !=
-        0) {
+            0 ||
+        op.dialect()->name().compare(
+            paddle::dialect::OneDNNKernelDialect::name()) != 0) {
       VLOG(6) << op.name()
               << "is not a kernel_dialect op, inplace only support "
                  "kernel_dialect operators";
diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index 9308c02db59961..f98fe25a809440 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -28,6 +28,7 @@
 #include "paddle/fluid/pir/dialect/operator/ir/manual_op.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h"
+#include "paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_type.h"
 #include "paddle/fluid/pir/dialect/operator/ir/pd_onednn_op.h"
 #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h"
@@ -315,10 +316,8 @@ static pir::OpResult AddOneDNN2PaddleLayoutTransferOp(
 
   std::unordered_map<std::string, pir::Attribute> op_attribute;
   op_attribute = {
-      {"op_name",
-       pir::StrAttribute::get(ctx, "pd_op.onednn_to_paddle_layout_kernel")},
-      {"kernel_name",
-       pir::StrAttribute::get(ctx, "onednn_to_paddle_layout_kernel")},
+      {"op_name", pir::StrAttribute::get(ctx, "pd_op.onednn_to_paddle_layout")},
+      {"kernel_name", pir::StrAttribute::get(ctx, "onednn_to_paddle_layout")},
       {"kernel_key", KernelAttribute::get(ctx, kernel_key)},
       {"dst_layout",
        pir::Int32Attribute::get(ctx, static_cast<int>(dst_layout))}};
@@ -1901,6 +1900,7 @@ pir::Operation* BuildKernelOp(
   pir::Operation* op = nullptr;
   if (op_item->HasTrait<OneDNNTrait>()) {
     if (IsOneDNNLegacyOp(op_item->name())) {
+      VLOG(4) << "choose OneDNNLegacyKernelOp";
       pir::OpInfo legacy_kernel_op_info =
           ctx->GetRegisteredOpInfo(OneDNNLegacyKernelOp::name());
       op = pir::Operation::Create(
@@ -1935,12 +1935,14 @@ pir::Operation* BuildKernelOp(
           pir::BoolAttribute::get(
               ctx, op_info_parser->OpRuntimeInfo().dynamic_fallback));
       if (op_item->HasTrait<OneDNNDynamicFallbackTrait>()) {
+        VLOG(4) << "choose OneDNNMixedPhiKernelOp";
         pir::OpInfo phi_kernel_op_info =
             ctx->GetRegisteredOpInfo(OneDNNMixedPhiKernelOp::name());
 
         op = pir::Operation::Create(
             vec_inputs, op_attribute, op_output_types, phi_kernel_op_info);
       } else {
+        VLOG(4) << "choose OneDNNPhiKernelOp";
         pir::OpInfo phi_kernel_op_info =
             ctx->GetRegisteredOpInfo(OneDNNPhiKernelOp::name());
 
@@ -2054,6 +2056,8 @@ std::unique_ptr<pir::Program> PdOpLowerToKernelPass(pir::Program* prog,
   pir::IrContext* ctx = pir::IrContext::Instance();
   ctx->GetOrRegisterDialect<OperatorDialect>();
   ctx->GetOrRegisterDialect<KernelDialect>();
+  ctx->GetOrRegisterDialect<OneDNNOperatorDialect>();
+  ctx->GetOrRegisterDialect<OneDNNKernelDialect>();
 
   std::unordered_map<pir::Operation*, pir::Operation*> map_op_pair;
   std::unordered_map<pir::Value, pir::Value> map_value_pair;
diff --git a/paddle/phi/api/lib/data_transform.h b/paddle/phi/api/lib/data_transform.h
index dd3166f05c3ef9..7ad1d0fe12eade 100644
--- a/paddle/phi/api/lib/data_transform.h
+++ b/paddle/phi/api/lib/data_transform.h
@@ -177,7 +177,9 @@ inline bool NeedTransformPlace(const phi::Place& src_place,
   bool ret = src_place.GetType() == AllocationType::GPUPINNED ||
              (target != Backend::ALL_BACKEND &&
               phi::TransToPhiBackend(src_place) !=
-                  (target != Backend::GPUDNN ? target : Backend::GPU));
+                  (target != Backend::GPUDNN ? target : Backend::GPU) &&
+              (src_place.GetType() != AllocationType::CPU &&
+               target == Backend::ONEDNN));
   return ret;
 }
 

From e497a50e7c7339a0a2df604ff2989c09fe7fe135 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Wed, 20 Dec 2023 07:03:53 +0000
Subject: [PATCH 24/49] refine

---
 .../instruction/instruction_util.cc           |   4 +-
 .../onednn_legacy_kernel_instruction.cc       | 131 +-----------------
 .../onednn_mixed_phi_kernel_instruction.cc    | 121 +---------------
 .../onednn_phi_kernel_instruction.cc          | 121 +---------------
 4 files changed, 18 insertions(+), 359 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/instruction_util.cc b/paddle/fluid/framework/new_executor/instruction/instruction_util.cc
index 55dc035b6e0638..4dab1cc419fb76 100644
--- a/paddle/fluid/framework/new_executor/instruction/instruction_util.cc
+++ b/paddle/fluid/framework/new_executor/instruction/instruction_util.cc
@@ -154,7 +154,9 @@ OpFuncType AnalyseOpFuncType(pir::Operation* op, const platform::Place& place) {
   auto& op_attributes = op->attributes();
 
   if ((op->dialect()->name().compare(paddle::dialect::KernelDialect::name()) ==
-       0) &&
+           0 ||
+       op->dialect()->name().compare(
+           paddle::dialect::OneDNNKernelDialect::name()) == 0) &&
       (op_attributes.count("kernel_key") > 0)) {
     auto kernel_key = op_attributes.at("kernel_key")
                           .dyn_cast<dialect::KernelAttribute>()
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.cc
index c562dae6264f6e..5cf89af6a9f200 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.cc
@@ -38,136 +38,15 @@ OneDNNLegacyKernelInstruction::OneDNNLegacyKernelInstruction(
     pir::Operation* op,
     const ValueExecutionInfo* value_exec_info)
     : InstructionBase(id, place), value_exec_info_(value_exec_info) {
-  auto& op_attributes = op->attributes();
-  auto op_name =
-      op_attributes.at("op_name").dyn_cast<pir::StrAttribute>().AsString();
-  pir::OpInfo op_info =
-      pir::IrContext::Instance()->GetRegisteredOpInfo(op_name);
-  op_ = op;
-  legacy_op_name_ = op_name;
-  VLOG(6) << "construct phi kernel instruction for: " << legacy_op_name_;
-
-  if (op_attributes.count("execution_stream") != 0) {
-    SetExecutionStream(op_attributes.at("execution_stream")
-                           .dyn_cast<pir::StrAttribute>()
-                           .AsString());
-  }
-  if (op_attributes.count("stream_priority") != 0) {
-    SetStreamPriority(op_attributes.at("stream_priority")
-                          .dyn_cast<pir::Int32Attribute>()
-                          .data());
-  }
-  if (op_attributes.count("scheduling_priority") != 0) {
-    SetSchedulingPriority(op_attributes.at("scheduling_priority")
-                              .dyn_cast<pir::Int64Attribute>()
-                              .data());
-  } else {
-    if (interpreter::IsCommunicationOp(op_)) {
-      // NOTE(Ruibiao): Dispatching computation before communication improves
-      // multi-stream overlap when the time cost of communication less than
-      // that of the calculation (e.g., ResNet50_bs128_pure_fp16 N4C32
-      // training).
-      SetSchedulingPriority(1);
-    }
-  }
-  VLOG(6) << "finish process dist attributes";
-
-  SetKernelType(AnalyseOpFuncType(op, place));
-  VLOG(6) << "finish process analyse kernel type";
-
-  infer_meta_interface_ =
-      op_info.GetInterfaceImpl<paddle::dialect::InferMetaInterface>();
-  VLOG(6) << "finish process infer_meta_interface_";
-
-  auto yaml_interface =
-      op_info.GetInterfaceImpl<paddle::dialect::OpYamlInfoInterface>();
-  PADDLE_ENFORCE_NOT_NULL(
-      yaml_interface,
-      phi::errors::PreconditionNotMet(
-          "can not find OpYamlInfoInterface from [%s]", legacy_op_name_));
-  paddle::dialect::OpYamlInfoParser yaml_info_parser(
-      yaml_interface->get_op_info_(), paddle::dialect::IsLegacyOp(op_name));
-  VLOG(6) << "finish process yaml_info_parser";
-
-  if (infer_meta_interface_) {
-    BuildPhiContext<
-        phi::InferMetaContext,
-        phi::MetaTensor,
-        phi::MetaTensor,
-        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
-        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
-        false>(op, *value_exec_info_, yaml_info_parser, &infer_meta_context_);
-  }
-  VLOG(6) << "finish process infer meta context";
-
-  auto kernel_name =
-      op_attributes.at("kernel_name").dyn_cast<pir::StrAttribute>().AsString();
-  auto kernel_key = op_attributes.at("kernel_key")
-                        .dyn_cast<paddle::dialect::KernelAttribute>()
-                        .data();
-  auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
-      kernel_name, kernel_key);
-  phi_kernel_ = new phi::Kernel(kernel_result.kernel);
-  PADDLE_ENFORCE_EQ(
-      phi_kernel_->IsValid(), true, "not found kernel for [%s]", kernel_name);
-  VLOG(6) << "finish process select kernel: " << kernel_name;
-
-  const Scope* inner_scope = value_exec_info_->GetScope();
-
-  operator_base_ = BuildOperatorBase(op, *value_exec_info_, yaml_info_parser);
-
-  paddle::framework::VariableValueMap in_map;
-  paddle::framework::VariableValueMap out_map;
-  auto dev_ctx = phi::DeviceContextPool::Instance().Get(
-      phi::TransToPhiPlace(kernel_key.backend()));
-
-  runtime_context_ = std::make_shared<paddle::framework::RuntimeContext>(
-      paddle::framework::RuntimeContext(in_map, out_map));
-  BuildRuntimeContext(
-      op, *value_exec_info, yaml_info_parser, runtime_context_.get());
-
-  kernel_context_ = new paddle::framework::ExecutionContext(
-      *operator_base_, *inner_scope, *dev_ctx, *(runtime_context_.get()));
-
-  VLOG(6) << "finish process kernel context";
-  SetDeviceContext(
-      ParseDeviceContext(op,
-                         phi::DeviceContextPool::Instance().Get(
-                             phi::TransToPhiPlace(kernel_key.backend())),
-                         place,
-                         GetExecutionStream(),
-                         GetStreamPriority()));
-  VLOG(6) << "finish process device context";
-
-  InitInputsOutputsIds(op, *value_exec_info);
-  VLOG(6) << "finish process inputs outputs index";
-
-  auto& no_need_buffer_ids = yaml_info_parser.NoNeedBufferIds();
-  std::unordered_set<pir::Value> no_need_buffer_values;
-  for (size_t id = 0; id < no_need_buffer_ids.size(); id++) {
-    no_need_buffer_values.insert(op->operand_source(no_need_buffer_ids[id]));
-  }
-  SetNoNeedBuffer(no_need_buffer_values);
-  VLOG(6) << "finish process no need buffer";
+  PADDLE_THROW(platform::errors::Unimplemented(
+      "OneDNNLegacyKernelInstruction not defined now."));
 }
 
-OneDNNLegacyKernelInstruction::~OneDNNLegacyKernelInstruction() {
-  if (kernel_context_ != nullptr) {
-    delete kernel_context_;
-  }
-
-  if (phi_kernel_ != nullptr) {
-    delete phi_kernel_;
-  }
-}
+OneDNNLegacyKernelInstruction::~OneDNNLegacyKernelInstruction() {}
 
 void OneDNNLegacyKernelInstruction::Run() {
-  VLOG(6) << "Run op " << legacy_op_name_ << " infer meta.";
-  if (infer_meta_interface_) {
-    infer_meta_interface_->infer_meta_(&(infer_meta_context_));
-  }
-  VLOG(6) << "Run op " << legacy_op_name_ << " kernel.";
-  (*(phi_kernel_))((kernel_context_));
+  PADDLE_THROW(platform::errors::Unimplemented(
+      "OneDNNLegacyKernelInstruction not defined now."));
 }
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.cc
index ffdf06783df24a..7e577710f62222 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.cc
@@ -42,126 +42,15 @@ OneDNNMixedPhiKernelInstruction::OneDNNMixedPhiKernelInstruction(
     pir::Operation* op,
     const ValueExecutionInfo* value_exec_info)
     : InstructionBase(id, place), value_exec_info_(value_exec_info) {
-  auto op_attributes = op->attributes();
-  auto op_name =
-      op_attributes.at("op_name").dyn_cast<pir::StrAttribute>().AsString();
-  pir::OpInfo op_info =
-      pir::IrContext::Instance()->GetRegisteredOpInfo(op_name);
-  op_ = op;
-  phi_op_name_ = op_name;
-  VLOG(6) << "construct phi kernel instruction for: " << phi_op_name_;
-
-  if (op_attributes.count("execution_stream") != 0) {
-    SetExecutionStream(op_attributes.at("execution_stream")
-                           .dyn_cast<pir::StrAttribute>()
-                           .AsString());
-  }
-  if (op_attributes.count("stream_priority") != 0) {
-    SetStreamPriority(op_attributes.at("stream_priority")
-                          .dyn_cast<pir::Int32Attribute>()
-                          .data());
-  }
-  if (op_attributes.count("scheduling_priority") != 0) {
-    SetSchedulingPriority(op_attributes.at("scheduling_priority")
-                              .dyn_cast<pir::Int64Attribute>()
-                              .data());
-  } else {
-    if (interpreter::IsCommunicationOp(op_)) {
-      // NOTE(Ruibiao): Dispatching computation before communication improves
-      // multi-stream overlap when the time cost of communication less than
-      // that of the calculation (e.g., ResNet50_bs128_pure_fp16 N4C32
-      // training).
-      SetSchedulingPriority(1);
-    }
-  }
-  VLOG(6) << "finish process dist attributes";
-
-  SetKernelType(AnalyseOpFuncType(op, place));
-  VLOG(6) << "finish process analyse kernel type";
-
-  infer_meta_interface_ =
-      op_info.GetInterfaceImpl<paddle::dialect::InferMetaInterface>();
-  VLOG(6) << "finish process infer_meta_interface_";
-
-  auto yaml_interface =
-      op_info.GetInterfaceImpl<paddle::dialect::OpYamlInfoInterface>();
-  PADDLE_ENFORCE_NOT_NULL(
-      yaml_interface,
-      phi::errors::PreconditionNotMet(
-          "can not find OpYamlInfoInterface from [%s]", phi_op_name_));
-  paddle::dialect::OpYamlInfoParser yaml_info_parser(
-      yaml_interface->get_op_info_(), paddle::dialect::IsLegacyOp(op_name));
-  VLOG(6) << "finish process yaml_info_parser";
-
-  if (infer_meta_interface_) {
-    BuildPhiContext<
-        phi::InferMetaContext,
-        phi::MetaTensor,
-        phi::MetaTensor,
-        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
-        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
-        false>(op, *value_exec_info_, yaml_info_parser, &infer_meta_context_);
-  }
-  VLOG(6) << "finish process infer meta context";
-
-  auto kernel_name =
-      op_attributes.at("kernel_name").dyn_cast<pir::StrAttribute>().AsString();
-  auto kernel_key = op_attributes.at("kernel_key")
-                        .dyn_cast<paddle::dialect::KernelAttribute>()
-                        .data();
-  auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
-      kernel_name, kernel_key);
-  phi_kernel_ = new phi::Kernel(kernel_result.kernel);
-  PADDLE_ENFORCE_EQ(
-      phi_kernel_->IsValid(), true, "not found kernel for [%s]", kernel_name);
-  VLOG(6) << "finish process select kernel";
-
-  BuildPhiContext<phi::KernelContext,
-                  const phi::TensorBase*,
-                  phi::TensorBase*,
-                  paddle::small_vector<const phi::TensorBase*>,
-                  paddle::small_vector<phi::TensorBase*>,
-                  true>(
-      op, *value_exec_info_, yaml_info_parser, &kernel_context_);
-
-  kernel_context_.SetDeviceContext(phi::DeviceContextPool::Instance().Get(
-      phi::TransToPhiPlace(kernel_key.backend())));
-  VLOG(6) << "finish process kernel context";
-
-  SetDeviceContext(
-      ParseDeviceContext(op,
-                         phi::DeviceContextPool::Instance().Get(
-                             phi::TransToPhiPlace(kernel_key.backend())),
-                         place,
-                         GetExecutionStream(),
-                         GetStreamPriority()));
-  VLOG(6) << "finish process device context";
-
-  InitInputsOutputsIds(op, *value_exec_info);
-  VLOG(6) << "finish process inputs outputs index";
-
-  auto& no_need_buffer_ids = yaml_info_parser.NoNeedBufferIds();
-  std::unordered_set<pir::Value> no_need_buffer_values;
-  for (size_t id = 0; id < no_need_buffer_ids.size(); id++) {
-    no_need_buffer_values.insert(op->operand_source(no_need_buffer_ids[id]));
-  }
-  SetNoNeedBuffer(no_need_buffer_values);
-  VLOG(6) << "finish process no need buffer";
+  PADDLE_THROW(platform::errors::Unimplemented(
+      "OneDNNMixedPhiKernelInstruction not defined now."));
 }
 
-OneDNNMixedPhiKernelInstruction::~OneDNNMixedPhiKernelInstruction() {
-  if (phi_kernel_ != nullptr) {
-    delete phi_kernel_;
-  }
-}
+OneDNNMixedPhiKernelInstruction::~OneDNNMixedPhiKernelInstruction() {}
 
 void OneDNNMixedPhiKernelInstruction::Run() {
-  if (infer_meta_interface_) {
-    infer_meta_interface_->infer_meta_(&(infer_meta_context_));
-  }
-  VLOG(6) << "Run op " << phi_op_name_ << " infer meta.";
-  (*(phi_kernel_))(&(kernel_context_));
-  VLOG(6) << "Run op " << phi_op_name_ << " kernel.";
+  PADDLE_THROW(platform::errors::Unimplemented(
+      "OneDNNMixedPhiKernelInstruction not defined now."));
 }
 
 }  // namespace framework
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.cc
index 476e4374e0e219..5e64579d3e9cb8 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.cc
@@ -42,126 +42,15 @@ OneDNNPhiKernelInstruction::OneDNNPhiKernelInstruction(
     pir::Operation* op,
     const ValueExecutionInfo* value_exec_info)
     : InstructionBase(id, place), value_exec_info_(value_exec_info) {
-  auto op_attributes = op->attributes();
-  auto op_name =
-      op_attributes.at("op_name").dyn_cast<pir::StrAttribute>().AsString();
-  pir::OpInfo op_info =
-      pir::IrContext::Instance()->GetRegisteredOpInfo(op_name);
-  op_ = op;
-  phi_op_name_ = op_name;
-  VLOG(6) << "construct phi kernel instruction for: " << phi_op_name_;
-
-  if (op_attributes.count("execution_stream") != 0) {
-    SetExecutionStream(op_attributes.at("execution_stream")
-                           .dyn_cast<pir::StrAttribute>()
-                           .AsString());
-  }
-  if (op_attributes.count("stream_priority") != 0) {
-    SetStreamPriority(op_attributes.at("stream_priority")
-                          .dyn_cast<pir::Int32Attribute>()
-                          .data());
-  }
-  if (op_attributes.count("scheduling_priority") != 0) {
-    SetSchedulingPriority(op_attributes.at("scheduling_priority")
-                              .dyn_cast<pir::Int64Attribute>()
-                              .data());
-  } else {
-    if (interpreter::IsCommunicationOp(op_)) {
-      // NOTE(Ruibiao): Dispatching computation before communication improves
-      // multi-stream overlap when the time cost of communication less than
-      // that of the calculation (e.g., ResNet50_bs128_pure_fp16 N4C32
-      // training).
-      SetSchedulingPriority(1);
-    }
-  }
-  VLOG(6) << "finish process dist attributes";
-
-  SetKernelType(AnalyseOpFuncType(op, place));
-  VLOG(6) << "finish process analyse kernel type";
-
-  infer_meta_interface_ =
-      op_info.GetInterfaceImpl<paddle::dialect::InferMetaInterface>();
-  VLOG(6) << "finish process infer_meta_interface_";
-
-  auto yaml_interface =
-      op_info.GetInterfaceImpl<paddle::dialect::OpYamlInfoInterface>();
-  PADDLE_ENFORCE_NOT_NULL(
-      yaml_interface,
-      phi::errors::PreconditionNotMet(
-          "can not find OpYamlInfoInterface from [%s]", phi_op_name_));
-  paddle::dialect::OpYamlInfoParser yaml_info_parser(
-      yaml_interface->get_op_info_(), paddle::dialect::IsLegacyOp(op_name));
-  VLOG(6) << "finish process yaml_info_parser";
-
-  if (infer_meta_interface_) {
-    BuildPhiContext<
-        phi::InferMetaContext,
-        phi::MetaTensor,
-        phi::MetaTensor,
-        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
-        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
-        false>(op, *value_exec_info_, yaml_info_parser, &infer_meta_context_);
-  }
-  VLOG(6) << "finish process infer meta context";
-
-  auto kernel_name =
-      op_attributes.at("kernel_name").dyn_cast<pir::StrAttribute>().AsString();
-  auto kernel_key = op_attributes.at("kernel_key")
-                        .dyn_cast<paddle::dialect::KernelAttribute>()
-                        .data();
-  auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
-      kernel_name, kernel_key);
-  phi_kernel_ = new phi::Kernel(kernel_result.kernel);
-  PADDLE_ENFORCE_EQ(
-      phi_kernel_->IsValid(), true, "not found kernel for [%s]", kernel_name);
-  VLOG(6) << "finish process select kernel";
-
-  BuildPhiContext<phi::KernelContext,
-                  const phi::TensorBase*,
-                  phi::TensorBase*,
-                  paddle::small_vector<const phi::TensorBase*>,
-                  paddle::small_vector<phi::TensorBase*>,
-                  true>(
-      op, *value_exec_info_, yaml_info_parser, &kernel_context_);
-
-  kernel_context_.SetDeviceContext(phi::DeviceContextPool::Instance().Get(
-      phi::TransToPhiPlace(kernel_key.backend())));
-  VLOG(6) << "finish process kernel context";
-
-  SetDeviceContext(
-      ParseDeviceContext(op,
-                         phi::DeviceContextPool::Instance().Get(
-                             phi::TransToPhiPlace(kernel_key.backend())),
-                         place,
-                         GetExecutionStream(),
-                         GetStreamPriority()));
-  VLOG(6) << "finish process device context";
-
-  InitInputsOutputsIds(op, *value_exec_info);
-  VLOG(6) << "finish process inputs outputs index";
-
-  auto& no_need_buffer_ids = yaml_info_parser.NoNeedBufferIds();
-  std::unordered_set<pir::Value> no_need_buffer_values;
-  for (size_t id = 0; id < no_need_buffer_ids.size(); id++) {
-    no_need_buffer_values.insert(op->operand_source(no_need_buffer_ids[id]));
-  }
-  SetNoNeedBuffer(no_need_buffer_values);
-  VLOG(6) << "finish process no need buffer";
+  PADDLE_THROW(platform::errors::Unimplemented(
+      "OneDNNPhiKernelInstruction not defined now."));
 }
 
-OneDNNPhiKernelInstruction::~OneDNNPhiKernelInstruction() {
-  if (phi_kernel_ != nullptr) {
-    delete phi_kernel_;
-  }
-}
+OneDNNPhiKernelInstruction::~OneDNNPhiKernelInstruction() {}
 
 void OneDNNPhiKernelInstruction::Run() {
-  if (infer_meta_interface_) {
-    infer_meta_interface_->infer_meta_(&(infer_meta_context_));
-  }
-  VLOG(6) << "Run op " << phi_op_name_ << " infer meta.";
-  (*(phi_kernel_))(&(kernel_context_));
-  VLOG(6) << "Run op " << phi_op_name_ << " kernel.";
+  PADDLE_THROW(platform::errors::Unimplemented(
+      "OneDNNPhiKernelInstruction not defined now."));
 }
 
 }  // namespace framework

From 98043905f4f65d94dbd04b7d1e95933269c55696 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Wed, 20 Dec 2023 08:02:03 +0000
Subject: [PATCH 25/49] refine

---
 paddle/fluid/ir_adaptor/translator/op_translator.cc       | 4 +++-
 paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc | 5 ++++-
 paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc       | 5 ++++-
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/paddle/fluid/ir_adaptor/translator/op_translator.cc b/paddle/fluid/ir_adaptor/translator/op_translator.cc
index f1f820dbb393b1..69d488186deff2 100644
--- a/paddle/fluid/ir_adaptor/translator/op_translator.cc
+++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc
@@ -35,7 +35,6 @@
 #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_type.h"
-#include "paddle/fluid/pir/dialect/operator/ir/pd_onednn_op.h"
 #include "paddle/fluid/pir/dialect/operator/utils/utils.h"
 #include "paddle/phi/core/utils/data_type.h"
 #include "paddle/pir/core/builder.h"
@@ -45,6 +44,9 @@
 #include "paddle/pir/core/operation.h"
 #include "paddle/pir/core/value.h"
 
+#ifdef PADDLE_WITH_DNNL
+#include "paddle/fluid/pir/dialect/operator/ir/pd_onednn_op.h"
+#endif
 // NOTE(zhangbo9674): File pd_op.h is generated by op_gen.py, see details in
 // paddle/fluid/pir/dialect/CMakeLists.txt.
 #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h"
diff --git a/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc b/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc
index a61d1c6ee0fe75..0d65389cc4922b 100644
--- a/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc
+++ b/paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.cc
@@ -16,7 +16,6 @@
 #include "paddle/fluid/pir/dialect/operator/ir/control_flow_op.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_type.h"
-#include "paddle/fluid/pir/dialect/operator/ir/pd_onednn_op.h"
 #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h"
 #include "paddle/fluid/pir/dialect/operator/ir/type_storage.h"
 #include "paddle/fluid/pir/dialect/operator/transforms/param_to_variable.h"
@@ -27,6 +26,10 @@
 #include "paddle/pir/dialect/control_flow/ir/cf_dialect.h"
 #include "paddle/pir/dialect/control_flow/ir/cf_op.h"
 
+#ifdef PADDLE_WITH_DNNL
+#include "paddle/fluid/pir/dialect/operator/ir/pd_onednn_op.h"
+#endif
+
 namespace paddle {
 namespace dialect {
 
diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index f98fe25a809440..5c42382df9656d 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -30,7 +30,6 @@
 #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_type.h"
-#include "paddle/fluid/pir/dialect/operator/ir/pd_onednn_op.h"
 #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h"
 #include "paddle/fluid/pir/dialect/operator/trait/inplace.h"
 #include "paddle/fluid/pir/dialect/operator/trait/onednn.h"
@@ -48,6 +47,10 @@
 #include "paddle/pir/dialect/control_flow/ir/cf_op.h"
 #include "paddle/utils/flags.h"
 
+#ifdef PADDLE_WITH_DNNL
+#include "paddle/fluid/pir/dialect/operator/ir/pd_onednn_op.h"
+#endif
+
 PHI_DECLARE_bool(print_ir);
 namespace paddle {
 namespace dialect {

From 387b5c0520f5237a32198253902d2ef4565b6a28 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Thu, 21 Dec 2023 01:34:03 +0000
Subject: [PATCH 26/49] refine

---
 .../new_executor/instruction/CMakeLists.txt   |  2 +-
 .../instruction/instruction_util.cc           | 10 +++--
 .../fluid/ir_adaptor/translator/translate.cc  |  6 ++-
 paddle/fluid/ir_adaptor/translator/utils.cc   |  6 ++-
 .../pir/dialect/kernel/ir/kernel_dialect.cc   |  4 ++
 .../pir/dialect/kernel/ir/kernel_dialect.h    |  4 ++
 .../fluid/pir/dialect/kernel/ir/kernel_op.cc  |  4 ++
 .../fluid/pir/dialect/kernel/ir/kernel_op.h   |  4 ++
 .../fluid/pir/dialect/operator/trait/trait.cc |  6 ++-
 .../fluid/pir/dialect/operator/utils/utils.cc |  2 +
 .../fluid/pir/dialect/operator/utils/utils.h  |  2 +
 paddle/fluid/pir/transforms/inplace_pass.cc   | 37 ++++++++++------
 .../pir/transforms/pd_op_to_kernel_pass.cc    | 19 +++++---
 .../phi/kernels/cpu/transfer_layout_kernel.h  | 44 -------------------
 14 files changed, 80 insertions(+), 70 deletions(-)
 delete mode 100644 paddle/phi/kernels/cpu/transfer_layout_kernel.h

diff --git a/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt b/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
index 8996f216c72193..69b4f472ebd9a3 100644
--- a/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
+++ b/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
@@ -21,7 +21,7 @@ endif()
 cc_library(
   instruction_base
   SRCS ${instruction_base_srcs}
-  DEPS framework_proto)
+  DEPS pir_adaptor phi common framework_proto)
 if(WITH_MKLDNN)
   add_dependencies(instruction_base mkldnn)
 endif()
diff --git a/paddle/fluid/framework/new_executor/instruction/instruction_util.cc b/paddle/fluid/framework/new_executor/instruction/instruction_util.cc
index 4dab1cc419fb76..8d6d25d0a32664 100644
--- a/paddle/fluid/framework/new_executor/instruction/instruction_util.cc
+++ b/paddle/fluid/framework/new_executor/instruction/instruction_util.cc
@@ -154,9 +154,13 @@ OpFuncType AnalyseOpFuncType(pir::Operation* op, const platform::Place& place) {
   auto& op_attributes = op->attributes();
 
   if ((op->dialect()->name().compare(paddle::dialect::KernelDialect::name()) ==
-           0 ||
-       op->dialect()->name().compare(
-           paddle::dialect::OneDNNKernelDialect::name()) == 0) &&
+           0
+#ifdef PADDLE_WITH_DNNL
+       || op->dialect()->name().compare(
+              paddle::dialect::OneDNNKernelDialect::name()) == 0
+#endif
+
+       ) &&
       (op_attributes.count("kernel_key") > 0)) {
     auto kernel_key = op_attributes.at("kernel_key")
                           .dyn_cast<dialect::KernelAttribute>()
diff --git a/paddle/fluid/ir_adaptor/translator/translate.cc b/paddle/fluid/ir_adaptor/translator/translate.cc
index dbab7e2e6fd1dc..04ddf1d13a5a8a 100644
--- a/paddle/fluid/ir_adaptor/translator/translate.cc
+++ b/paddle/fluid/ir_adaptor/translator/translate.cc
@@ -19,10 +19,12 @@
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/ir_adaptor/translator/program_translator.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h"
-#include "paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.h"
 #include "paddle/pir/core/builtin_dialect.h"
 #include "paddle/pir/core/program.h"
 
+#ifdef PADDLE_WITH_DNNL
+#include "paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.h"
+#endif
 namespace paddle {
 
 using LegacyProgramDesc = ::paddle::framework::ProgramDesc;
@@ -32,7 +34,9 @@ std::unique_ptr<Program> TranslateLegacyProgramToProgram(
     const LegacyProgramDesc& legacy_program) {
   pir::IrContext* ctx = pir::IrContext::Instance();
   ctx->GetOrRegisterDialect<dialect::OperatorDialect>();
+#ifdef PADDLE_WITH_DNNL
   ctx->GetOrRegisterDialect<dialect::OneDNNOperatorDialect>();
+#endif
   auto program = std::make_unique<Program>(ctx);
   translator::ProgramTranslator program_translator(&legacy_program,
                                                    program.get());
diff --git a/paddle/fluid/ir_adaptor/translator/utils.cc b/paddle/fluid/ir_adaptor/translator/utils.cc
index 8ebf090a5df1b8..dbd85292974bf0 100644
--- a/paddle/fluid/ir_adaptor/translator/utils.cc
+++ b/paddle/fluid/ir_adaptor/translator/utils.cc
@@ -19,11 +19,13 @@
 #include "paddle/common/enforce.h"
 #include "paddle/fluid/ir_adaptor/translator/op_translator.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h"
-#include "paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.h"
 #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h"
 #include "paddle/pir/core/builtin_attribute.h"
 #include "paddle/pir/core/builtin_type.h"
 #include "paddle/pir/core/utils.h"
+#ifdef PADDLE_WITH_DNNL
+#include "paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.h"
+#endif
 
 namespace paddle {
 namespace dialect {
@@ -95,7 +97,9 @@ std::vector<std::string> CheckUnregisteredOperationInBlock(
 std::vector<std::string> CheckUnregisteredOperation(
     pir::IrContext* ctx, const framework::ProgramDesc& legacy_program) {
   ctx->GetOrRegisterDialect<dialect::OperatorDialect>();
+#ifdef PADDLE_WITH_DNNL
   ctx->GetOrRegisterDialect<dialect::OneDNNOperatorDialect>();
+#endif
 
   std::vector<std::string> unregistered_ops;
   for (size_t block_idx = 0; block_idx < legacy_program.Size(); block_idx++) {
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc
index e04c267178d191..ecf04d4411397b 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc
@@ -122,6 +122,7 @@ void KernelDialect::PrintOperation(pir::Operation *op,
   }
 }
 
+#ifdef PADDLE_WITH_DNNL
 OneDNNKernelDialect::OneDNNKernelDialect(pir::IrContext *context)
     : pir::Dialect(name(), context, pir::TypeId::get<OneDNNKernelDialect>()) {
   initialize();
@@ -219,9 +220,12 @@ void OneDNNKernelDialect::PrintOperation(pir::Operation *op,
     printer.PrintGeneralOperation(op);
   }
 }
+#endif
 
 }  // namespace dialect
 }  // namespace paddle
 
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::KernelDialect)
+#ifdef PADDLE_WITH_DNNL
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNKernelDialect)
+#endif
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h
index c73aa8baed10b5..fbdb53a40b183d 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h
@@ -36,6 +36,7 @@ class KernelDialect : public pir::Dialect {
   void initialize();
 };
 
+#ifdef PADDLE_WITH_DNNL
 class OneDNNKernelDialect : public pir::Dialect {
  public:
   explicit OneDNNKernelDialect(pir::IrContext* context);
@@ -52,9 +53,12 @@ class OneDNNKernelDialect : public pir::Dialect {
  private:
   void initialize();
 };
+#endif
 
 }  // namespace dialect
 }  // namespace paddle
 
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::KernelDialect)
+#ifdef PADDLE_WITH_DNNL
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNKernelDialect)
+#endif
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc
index 01510b9e430938..45f0a848fc174d 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.cc
@@ -98,6 +98,7 @@ phi::KernelKey LegacyKernelOp::kernel_key() {
   return attributes().at("kernel_key").dyn_cast<KernelAttribute>().data();
 }
 
+#ifdef PADDLE_WITH_DNNL
 const char* OneDNNPhiKernelOp::attributes_name[attributes_num] = {  // NOLINT
     "op_name",
     "kernel_name",
@@ -217,12 +218,15 @@ std::string OneDNNLegacyKernelOp::kernel_name() {
 phi::KernelKey OneDNNLegacyKernelOp::kernel_key() {
   return attributes().at("kernel_key").dyn_cast<KernelAttribute>().data();
 }
+#endif
 
 }  // namespace dialect
 }  // namespace paddle
 
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::PhiKernelOp)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::LegacyKernelOp)
+#ifdef PADDLE_WITH_DNNL
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNPhiKernelOp)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNMixedPhiKernelOp)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNLegacyKernelOp)
+#endif
diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h
index 57babb00a6570e..df723158702085 100644
--- a/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h
+++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_op.h
@@ -44,6 +44,7 @@ class LegacyKernelOp : public pir::Op<LegacyKernelOp> {
   void VerifySig();
 };
 
+#ifdef PADDLE_WITH_DNNL
 class OneDNNPhiKernelOp : public pir::Op<OneDNNPhiKernelOp> {
  public:
   using Op::Op;
@@ -79,12 +80,15 @@ class OneDNNLegacyKernelOp : public pir::Op<OneDNNLegacyKernelOp> {
   phi::KernelKey kernel_key();
   void VerifySig();
 };
+#endif
 
 }  // namespace dialect
 }  // namespace paddle
 
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::PhiKernelOp)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::LegacyKernelOp)
+#ifdef PADDLE_WITH_DNNL
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNPhiKernelOp)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNMixedPhiKernelOp)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNLegacyKernelOp)
+#endif
diff --git a/paddle/fluid/pir/dialect/operator/trait/trait.cc b/paddle/fluid/pir/dialect/operator/trait/trait.cc
index 7444faec519bc2..9d828570d389aa 100644
--- a/paddle/fluid/pir/dialect/operator/trait/trait.cc
+++ b/paddle/fluid/pir/dialect/operator/trait/trait.cc
@@ -14,10 +14,14 @@
 
 #include "paddle/fluid/pir/dialect/operator/trait/custom_vjp.h"
 #include "paddle/fluid/pir/dialect/operator/trait/inplace.h"
+#ifdef PADDLE_WITH_DNNL
 #include "paddle/fluid/pir/dialect/operator/trait/onednn.h"
-
+#endif
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::InplaceTrait)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::CustomVjpTrait)
+
+#ifdef PADDLE_WITH_DNNL
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNTrait)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNOnlyTrait)
 IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNDynamicFallbackTrait)
+#endif
diff --git a/paddle/fluid/pir/dialect/operator/utils/utils.cc b/paddle/fluid/pir/dialect/operator/utils/utils.cc
index 9e67b51047477e..746aeb2145a851 100644
--- a/paddle/fluid/pir/dialect/operator/utils/utils.cc
+++ b/paddle/fluid/pir/dialect/operator/utils/utils.cc
@@ -219,9 +219,11 @@ VariantType GetAttributeData(const pir::Attribute& attr) {
 
 bool IsLegacyOp(const std::string& name) { return LegacyOpList.count(name); }
 
+#ifdef PADDLE_WITH_DNNL
 bool IsOneDNNLegacyOp(const std::string& name) {
   return OneDNNLegacyOpList.count(name);
 }
+#endif
 
 bool IsEmptyValue(const pir::Value& value) {
   return !value.impl() || !value.type();
diff --git a/paddle/fluid/pir/dialect/operator/utils/utils.h b/paddle/fluid/pir/dialect/operator/utils/utils.h
index addaabed9ec4e4..0e14077bb8559d 100644
--- a/paddle/fluid/pir/dialect/operator/utils/utils.h
+++ b/paddle/fluid/pir/dialect/operator/utils/utils.h
@@ -132,7 +132,9 @@ VariantType GetAttributeData(const pir::Attribute& attr);
 
 bool IsLegacyOp(const std::string& name);
 
+#ifdef PADDLE_WITH_DNNL
 bool IsOneDNNLegacyOp(const std::string& name);
+#endif
 
 bool IsEmptyValue(const pir::Value& value);
 
diff --git a/paddle/fluid/pir/transforms/inplace_pass.cc b/paddle/fluid/pir/transforms/inplace_pass.cc
index 7eac9f79e234e2..01ebd454acefac 100644
--- a/paddle/fluid/pir/transforms/inplace_pass.cc
+++ b/paddle/fluid/pir/transforms/inplace_pass.cc
@@ -165,10 +165,12 @@ static bool CanDoInplace(const std::unordered_set<pir::Value>& eager_dels,
 }
 
 static bool IsNoNeedBuffer(pir::Operation* op, pir::Value value) {
-  if (op->dialect()->name().compare(paddle::dialect::KernelDialect::name()) !=
-          0 ||
-      op->dialect()->name().compare(
-          paddle::dialect::OneDNNKernelDialect::name()) != 0) {
+  if (op->dialect()->name().compare(paddle::dialect::KernelDialect::name()) != 0
+#ifdef PADDLE_WITH_DNNL
+      || op->dialect()->name().compare(
+             paddle::dialect::OneDNNKernelDialect::name()) != 0
+#endif
+  ) {
     VLOG(8) << op->name()
             << "is not a kernel_dialect op, no need buffer is false";
     return false;
@@ -200,9 +202,12 @@ static std::unordered_set<pir::Value> GetSkipDeletionValues(pir::Block* block) {
   std::unordered_set<pir::Value> skip_dels;
   for (auto& op : *block) {
     if (op.dialect()->name().compare(paddle::dialect::KernelDialect::name()) !=
-            0 ||
-        op.dialect()->name().compare(
-            paddle::dialect::OneDNNKernelDialect::name()) != 0) {
+            0
+#ifdef PADDLE_WITH_DNNL
+        || op.dialect()->name().compare(
+               paddle::dialect::OneDNNKernelDialect::name()) != 0
+#endif
+    ) {
       continue;
     }
     IR_ENFORCE(op.attributes().count("op_name") > 0,
@@ -234,9 +239,12 @@ static void GetEagerDelValueOfOp(
   for (auto& op : *block) {
     std::string upper_op_name = op.name();
     if (op.dialect()->name().compare(paddle::dialect::KernelDialect::name()) ==
-            0 ||
-        op.dialect()->name().compare(
-            paddle::dialect::OneDNNKernelDialect::name()) != 0) {
+            0
+#ifdef PADDLE_WITH_DNNL
+        || op.dialect()->name().compare(
+               paddle::dialect::OneDNNKernelDialect::name()) != 0
+#endif
+    ) {
       IR_ENFORCE(op.attributes().count("op_name") > 0,
                  "kernel_dialect op should own an 'op_name' attribute.");
       upper_op_name = op.attributes()
@@ -307,9 +315,12 @@ static std::unordered_map<pir::Operation*, std::string> GetInplaceOps(
     }
 
     if (op.dialect()->name().compare(paddle::dialect::KernelDialect::name()) !=
-            0 ||
-        op.dialect()->name().compare(
-            paddle::dialect::OneDNNKernelDialect::name()) != 0) {
+            0
+#ifdef PADDLE_WITH_DNNL
+        || op.dialect()->name().compare(
+               paddle::dialect::OneDNNKernelDialect::name()) != 0
+#endif
+    ) {
       VLOG(6) << op.name()
               << "is not a kernel_dialect op, inplace only support "
                  "kernel_dialect operators";
diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index 4650c0510a9de6..938c89cd7af163 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -28,11 +28,9 @@
 #include "paddle/fluid/pir/dialect/operator/ir/manual_op.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h"
-#include "paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_type.h"
 #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h"
 #include "paddle/fluid/pir/dialect/operator/trait/inplace.h"
-#include "paddle/fluid/pir/dialect/operator/trait/onednn.h"
 #include "paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.h"
 #include "paddle/fluid/pir/dialect/operator/utils/op_yaml_info_util.h"
 #include "paddle/fluid/pir/dialect/operator/utils/utils.h"
@@ -48,7 +46,9 @@
 #include "paddle/utils/flags.h"
 
 #ifdef PADDLE_WITH_DNNL
+#include "paddle/fluid/pir/dialect/operator/ir/op_onednn_dialect.h"
 #include "paddle/fluid/pir/dialect/operator/ir/pd_onednn_op.h"
+#include "paddle/fluid/pir/dialect/operator/trait/onednn.h"
 #endif
 
 PHI_DECLARE_bool(print_ir);
@@ -308,6 +308,7 @@ static pir::OpResult AddPlaceTransferOp(pir::Value in,
   return new_in;
 }
 
+#ifdef PADDLE_WITH_DNNL
 static pir::OpResult AddOneDNN2PaddleLayoutTransferOp(
     pir::Value in, const phi::DataLayout& dst_layout, pir::Block* block) {
   pir::IrContext* ctx = pir::IrContext::Instance();
@@ -348,6 +349,7 @@ static pir::OpResult AddOneDNN2PaddleLayoutTransferOp(
 
   return new_in;
 }
+#endif
 
 static bool NeedTransformDataType(const phi::DataType& l,
                                   const phi::DataType& r) {
@@ -718,6 +720,7 @@ std::string GetKernelName(const OpYamlInfoParser* op_info_parser,
   return kernel_fn_str;
 }
 
+#ifdef PADDLE_WITH_DNNL
 bool SupportsMKLDNN(const std::string& kernel_name,
                     const phi::DataType data_type) {
   auto phi_kernels =
@@ -758,6 +761,7 @@ bool SupportsMKLDNN(const std::string& kernel_name,
     }
   }
 }
+#endif
 
 phi::KernelKey GetKernelKey(
     pir::Operation* op,
@@ -1908,9 +1912,9 @@ pir::Operation* BuildKernelOp(
   if (op_item->HasTrait<InplaceTrait>()) {
     op_attribute.emplace("is_inplace", pir::BoolAttribute::get(ctx, true));
   }
-#ifdef PADDLE_WITH_DNNL
-#endif
+
   pir::Operation* op = nullptr;
+#ifdef PADDLE_WITH_DNNL
   if (op_item->HasTrait<OneDNNTrait>()) {
     if (IsOneDNNLegacyOp(op_item->name())) {
       VLOG(4) << "choose OneDNNLegacyKernelOp";
@@ -1963,7 +1967,9 @@ pir::Operation* BuildKernelOp(
             vec_inputs, op_attribute, op_output_types, phi_kernel_op_info);
       }
     }
-  } else {
+  } else  // NOLINT
+#endif
+  {
     if (IsLegacyOp(op_item->name())) {
       pir::OpInfo legacy_kernel_op_info =
           ctx->GetRegisteredOpInfo(LegacyKernelOp::name());
@@ -2069,9 +2075,10 @@ std::unique_ptr<pir::Program> PdOpLowerToKernelPass(pir::Program* prog,
   pir::IrContext* ctx = pir::IrContext::Instance();
   ctx->GetOrRegisterDialect<OperatorDialect>();
   ctx->GetOrRegisterDialect<KernelDialect>();
+#ifdef PADDLE_WITH_DNNL
   ctx->GetOrRegisterDialect<OneDNNOperatorDialect>();
   ctx->GetOrRegisterDialect<OneDNNKernelDialect>();
-
+#endif
   std::unordered_map<pir::Operation*, pir::Operation*> map_op_pair;
   std::unordered_map<pir::Value, pir::Value> map_value_pair;
 
diff --git a/paddle/phi/kernels/cpu/transfer_layout_kernel.h b/paddle/phi/kernels/cpu/transfer_layout_kernel.h
deleted file mode 100644
index 73e12927d7ffe5..00000000000000
--- a/paddle/phi/kernels/cpu/transfer_layout_kernel.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/infermeta/unary.h"
-#include "paddle/phi/kernels/empty_kernel.h"
-
-namespace phi {
-
-template <typename Context>
-void TransferLayoutKernel(const Context& dev_ctx,
-                          const DenseTensor& x,
-                          int src_layout,
-                          int dst_layout,
-                          DenseTensor* out);
-
-template <typename Context>
-DenseTensor TransferLayout(const Context& dev_ctx,
-                           const DenseTensor& x,
-                           DataLayout dst_layout) {
-  phi::DenseTensor dense_out =
-      phi::Empty(dev_ctx, {x.dtype(), x.dims(), dst_layout});
-  TransferLayoutKernel<Context>(dev_ctx,
-                                x,
-                                static_cast<int>(x.layout()),
-                                static_cast<int>(dst_layout),
-                                &dense_out);
-  return dense_out;
-}
-
-}  // namespace phi

From 2560f7fd64551329be3594037abe78c477e62460 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Thu, 21 Dec 2023 02:01:43 +0000
Subject: [PATCH 27/49] refine

---
 paddle/fluid/framework/new_executor/instruction/CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt b/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
index 69b4f472ebd9a3..4221e24f982366 100644
--- a/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
+++ b/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
@@ -1,4 +1,5 @@
-set(nstruction_base.cc
+set(instruction_base_srcs
+    instruction_base.cc
     phi_kernel_instruction.cc
     legacy_kernel_instruction.cc
     if_instruction.cc

From dc2fecdfd584fe0d5ce84a7fd9ba74ae61085ee9 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Thu, 21 Dec 2023 02:02:37 +0000
Subject: [PATCH 28/49] refine

---
 paddle/fluid/framework/new_executor/instruction/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt b/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
index 4221e24f982366..5073e4713d395d 100644
--- a/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
+++ b/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
@@ -22,7 +22,7 @@ endif()
 cc_library(
   instruction_base
   SRCS ${instruction_base_srcs}
-  DEPS pir_adaptor phi common framework_proto)
+  DEPS framework_proto)
 if(WITH_MKLDNN)
   add_dependencies(instruction_base mkldnn)
 endif()

From 24b55d1a3816e2d53fd6b58883220a3ac0683058 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Thu, 21 Dec 2023 02:26:09 +0000
Subject: [PATCH 29/49] refine

---
 paddle/phi/kernels/cpu/onednn_to_paddle_layout_kernel.cc | 2 +-
 paddle/phi/kernels/onednn_to_paddle_layout_kernel.h      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddle/phi/kernels/cpu/onednn_to_paddle_layout_kernel.cc b/paddle/phi/kernels/cpu/onednn_to_paddle_layout_kernel.cc
index 301125ef5ca298..eba8b2b61f4d27 100644
--- a/paddle/phi/kernels/cpu/onednn_to_paddle_layout_kernel.cc
+++ b/paddle/phi/kernels/cpu/onednn_to_paddle_layout_kernel.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
diff --git a/paddle/phi/kernels/onednn_to_paddle_layout_kernel.h b/paddle/phi/kernels/onednn_to_paddle_layout_kernel.h
index 8dc86b48901ca1..a6ddc280c4e3c8 100644
--- a/paddle/phi/kernels/onednn_to_paddle_layout_kernel.h
+++ b/paddle/phi/kernels/onednn_to_paddle_layout_kernel.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.

From b2466eb414e521010fe7a4321379030b60231ede Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Thu, 21 Dec 2023 04:55:40 +0000
Subject: [PATCH 30/49] refine

---
 .../instruction/instruction_util.cc           |  8 +----
 paddle/fluid/pir/transforms/inplace_pass.cc   | 29 ++++---------------
 2 files changed, 6 insertions(+), 31 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/instruction_util.cc b/paddle/fluid/framework/new_executor/instruction/instruction_util.cc
index 8d6d25d0a32664..55dc035b6e0638 100644
--- a/paddle/fluid/framework/new_executor/instruction/instruction_util.cc
+++ b/paddle/fluid/framework/new_executor/instruction/instruction_util.cc
@@ -154,13 +154,7 @@ OpFuncType AnalyseOpFuncType(pir::Operation* op, const platform::Place& place) {
   auto& op_attributes = op->attributes();
 
   if ((op->dialect()->name().compare(paddle::dialect::KernelDialect::name()) ==
-           0
-#ifdef PADDLE_WITH_DNNL
-       || op->dialect()->name().compare(
-              paddle::dialect::OneDNNKernelDialect::name()) == 0
-#endif
-
-       ) &&
+       0) &&
       (op_attributes.count("kernel_key") > 0)) {
     auto kernel_key = op_attributes.at("kernel_key")
                           .dyn_cast<dialect::KernelAttribute>()
diff --git a/paddle/fluid/pir/transforms/inplace_pass.cc b/paddle/fluid/pir/transforms/inplace_pass.cc
index 01ebd454acefac..eaaaeba7b28b64 100644
--- a/paddle/fluid/pir/transforms/inplace_pass.cc
+++ b/paddle/fluid/pir/transforms/inplace_pass.cc
@@ -165,12 +165,8 @@ static bool CanDoInplace(const std::unordered_set<pir::Value>& eager_dels,
 }
 
 static bool IsNoNeedBuffer(pir::Operation* op, pir::Value value) {
-  if (op->dialect()->name().compare(paddle::dialect::KernelDialect::name()) != 0
-#ifdef PADDLE_WITH_DNNL
-      || op->dialect()->name().compare(
-             paddle::dialect::OneDNNKernelDialect::name()) != 0
-#endif
-  ) {
+  if (op->dialect()->name().compare(paddle::dialect::KernelDialect::name()) !=
+      0) {
     VLOG(8) << op->name()
             << "is not a kernel_dialect op, no need buffer is false";
     return false;
@@ -202,12 +198,7 @@ static std::unordered_set<pir::Value> GetSkipDeletionValues(pir::Block* block) {
   std::unordered_set<pir::Value> skip_dels;
   for (auto& op : *block) {
     if (op.dialect()->name().compare(paddle::dialect::KernelDialect::name()) !=
-            0
-#ifdef PADDLE_WITH_DNNL
-        || op.dialect()->name().compare(
-               paddle::dialect::OneDNNKernelDialect::name()) != 0
-#endif
-    ) {
+        0) {
       continue;
     }
     IR_ENFORCE(op.attributes().count("op_name") > 0,
@@ -239,12 +230,7 @@ static void GetEagerDelValueOfOp(
   for (auto& op : *block) {
     std::string upper_op_name = op.name();
     if (op.dialect()->name().compare(paddle::dialect::KernelDialect::name()) ==
-            0
-#ifdef PADDLE_WITH_DNNL
-        || op.dialect()->name().compare(
-               paddle::dialect::OneDNNKernelDialect::name()) != 0
-#endif
-    ) {
+        0) {
       IR_ENFORCE(op.attributes().count("op_name") > 0,
                  "kernel_dialect op should own an 'op_name' attribute.");
       upper_op_name = op.attributes()
@@ -315,12 +301,7 @@ static std::unordered_map<pir::Operation*, std::string> GetInplaceOps(
     }
 
     if (op.dialect()->name().compare(paddle::dialect::KernelDialect::name()) !=
-            0
-#ifdef PADDLE_WITH_DNNL
-        || op.dialect()->name().compare(
-               paddle::dialect::OneDNNKernelDialect::name()) != 0
-#endif
-    ) {
+        0) {
       VLOG(6) << op.name()
               << "is not a kernel_dialect op, inplace only support "
                  "kernel_dialect operators";

From 328484827249514a5f0850d94e301037097df396 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Thu, 21 Dec 2023 07:04:06 +0000
Subject: [PATCH 31/49] refine

---
 paddle/phi/api/lib/data_transform.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/paddle/phi/api/lib/data_transform.h b/paddle/phi/api/lib/data_transform.h
index 7ad1d0fe12eade..e0509fa8582ae2 100644
--- a/paddle/phi/api/lib/data_transform.h
+++ b/paddle/phi/api/lib/data_transform.h
@@ -177,9 +177,12 @@ inline bool NeedTransformPlace(const phi::Place& src_place,
   bool ret = src_place.GetType() == AllocationType::GPUPINNED ||
              (target != Backend::ALL_BACKEND &&
               phi::TransToPhiBackend(src_place) !=
-                  (target != Backend::GPUDNN ? target : Backend::GPU) &&
-              (src_place.GetType() != AllocationType::CPU &&
-               target == Backend::ONEDNN));
+                  (target != Backend::GPUDNN ? target : Backend::GPU));
+#ifdef PADDLE_WITH_DNNL
+  if (target == Backend::ONEDNN) {
+    ret = src_place.GetType() != AllocationType::CPU;
+  }
+#endif
   return ret;
 }
 

From 716894edef2e0a29f922e7c1e84cf71842c0d9a5 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Thu, 21 Dec 2023 07:12:47 +0000
Subject: [PATCH 32/49] refine

---
 .../fluid/framework/new_executor/instruction/CMakeLists.txt | 6 +++---
 .../{ => onednn}/onednn_legacy_kernel_instruction.cc        | 2 +-
 .../{ => onednn}/onednn_legacy_kernel_instruction.h         | 0
 .../{ => onednn}/onednn_mixed_phi_kernel_instruction.cc     | 2 +-
 .../{ => onednn}/onednn_mixed_phi_kernel_instruction.h      | 0
 .../{ => onednn}/onednn_phi_kernel_instruction.cc           | 2 +-
 .../{ => onednn}/onednn_phi_kernel_instruction.h            | 0
 paddle/fluid/framework/new_executor/pir_interpreter.cc      | 6 +++---
 8 files changed, 9 insertions(+), 9 deletions(-)
 rename paddle/fluid/framework/new_executor/instruction/{ => onednn}/onednn_legacy_kernel_instruction.cc (98%)
 rename paddle/fluid/framework/new_executor/instruction/{ => onednn}/onednn_legacy_kernel_instruction.h (100%)
 rename paddle/fluid/framework/new_executor/instruction/{ => onednn}/onednn_mixed_phi_kernel_instruction.cc (98%)
 rename paddle/fluid/framework/new_executor/instruction/{ => onednn}/onednn_mixed_phi_kernel_instruction.h (100%)
 rename paddle/fluid/framework/new_executor/instruction/{ => onednn}/onednn_phi_kernel_instruction.cc (98%)
 rename paddle/fluid/framework/new_executor/instruction/{ => onednn}/onednn_phi_kernel_instruction.h (100%)

diff --git a/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt b/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
index 5073e4713d395d..14585a68dcbdd4 100644
--- a/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
+++ b/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt
@@ -14,9 +14,9 @@ set(instruction_base_srcs
 
 if(WITH_MKLDNN)
   set(instruction_base_srcs
-      ${instruction_base_srcs} onednn_phi_kernel_instruction.cc
-      onednn_mixed_phi_kernel_instruction.cc
-      onednn_legacy_kernel_instruction.cc)
+      ${instruction_base_srcs} onednn/onednn_phi_kernel_instruction.cc
+      onednn/onednn_mixed_phi_kernel_instruction.cc
+      onednn/onednn_legacy_kernel_instruction.cc)
 endif()
 
 cc_library(
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_legacy_kernel_instruction.cc
similarity index 98%
rename from paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.cc
rename to paddle/fluid/framework/new_executor/instruction/onednn/onednn_legacy_kernel_instruction.cc
index 5cf89af6a9f200..6d1944219a2dc9 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_legacy_kernel_instruction.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.h"
+#include "paddle/fluid/framework/new_executor/instruction/onednn/onednn_legacy_kernel_instruction.h"
 
 #include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
 #include "paddle/fluid/framework/new_executor/interpreter/interpreter_util.h"
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.h b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_legacy_kernel_instruction.h
similarity index 100%
rename from paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.h
rename to paddle/fluid/framework/new_executor/instruction/onednn/onednn_legacy_kernel_instruction.h
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.cc
similarity index 98%
rename from paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.cc
rename to paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.cc
index 7e577710f62222..3c7a3f368d2a67 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.h"
+#include "paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.h"
 
 #include "paddle/fluid/framework/new_executor/interpreter/interpreter_util.h"
 #include "paddle/fluid/framework/new_executor/interpreter/stream_analyzer.h"
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.h b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.h
similarity index 100%
rename from paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.h
rename to paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.h
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
similarity index 98%
rename from paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.cc
rename to paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
index 5e64579d3e9cb8..afe366348999d7 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.h"
+#include "paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.h"
 
 #include "paddle/fluid/framework/new_executor/interpreter/interpreter_util.h"
 #include "paddle/fluid/framework/new_executor/interpreter/stream_analyzer.h"
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.h b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.h
similarity index 100%
rename from paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.h
rename to paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.h
diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc
index dfcf1e4eb6b1d5..9cdd35a9971a12 100644
--- a/paddle/fluid/framework/new_executor/pir_interpreter.cc
+++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc
@@ -34,9 +34,9 @@
 #include "paddle/phi/core/sparse_csr_tensor.h"
 
 #ifdef PADDLE_WITH_DNNL
-#include "paddle/fluid/framework/new_executor/instruction/onednn_legacy_kernel_instruction.h"
-#include "paddle/fluid/framework/new_executor/instruction/onednn_mixed_phi_kernel_instruction.h"
-#include "paddle/fluid/framework/new_executor/instruction/onednn_phi_kernel_instruction.h"
+#include "paddle/fluid/framework/new_executor/instruction/onednn/onednn_legacy_kernel_instruction.h"
+#include "paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.h"
+#include "paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
 #endif
 

From f3a8bd829c3098ee54c2062d202237eff6f9573b Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 22 Dec 2023 06:31:12 +0000
Subject: [PATCH 33/49] refine

---
 .../onednn/onednn_phi_kernel_instruction.cc   | 340 +++++++++++++++++-
 .../onednn/onednn_phi_kernel_instruction.h    |   9 +
 2 files changed, 344 insertions(+), 5 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
index afe366348999d7..6bab57d6f3b79c 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
@@ -32,25 +32,355 @@
 #include "paddle/pir/core/operation.h"
 #include "paddle/pir/core/value.h"
 
+#include "dnnl.hpp"  // NOLINT
 #include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
+#include "paddle/fluid/framework/type_defs.h"
+#include "paddle/fluid/ir_adaptor/translator/op_compat_info.h"
+#include "paddle/phi/backends/onednn/onednn_context.h"
+#include "paddle/phi/backends/onednn/onednn_helper.h"
+#include "paddle/phi/kernels/funcs/data_layout_transform.h"
+
 namespace paddle {
 namespace framework {
 
+static RuntimeAttribute ConvertPIRAttribute2RuntimeAttribute(
+    PIRAttribute attr,
+    const std::string& attr_name,
+    const paddle::dialect::OpYamlInfoParser& op_yaml_info) {
+  auto& attr_type_name = op_yaml_info.AttrTypeName(attr_name);
+  if (attr_type_name == "pir::Int32Attribute") {
+    return attr.dyn_cast<pir::Int32Attribute>().data();
+  } else if (attr_type_name == "pir::FloatAttribute") {
+    return attr.dyn_cast<pir::FloatAttribute>().data();
+  } else if (attr_type_name == "pir::BoolAttribute") {
+    return attr.dyn_cast<pir::BoolAttribute>().data();
+  } else if (attr_type_name == "pir::StrAttribute") {
+    return attr.dyn_cast<pir::StrAttribute>().AsString();
+  } else if (attr_type_name == "pir::ArrayAttribute<pir::Int32Attribute>") {
+    auto array_list = attr.dyn_cast<pir::ArrayAttribute>().AsVector();
+    std::vector<int32_t> vec_res;
+    if (array_list.size() > 0) {
+      PADDLE_ENFORCE_EQ(array_list[0].isa<pir::Int32Attribute>(),
+                        true,
+                        phi::errors::Unimplemented(
+                            "the 0th elementwise MUST be pir::Int32Attribute"));
+      for (size_t i = 0; i < array_list.size(); ++i) {
+        vec_res.push_back(array_list[i].dyn_cast<pir::Int32Attribute>().data());
+      }
+    }
+    return vec_res;
+  } else if (attr_type_name == "pir::ArrayAttribute<pir::FloatAttribute>") {
+    auto array_list = attr.dyn_cast<pir::ArrayAttribute>().AsVector();
+    std::vector<float> vec_res;
+    if (array_list.size() > 0) {
+      if (array_list[0].isa<pir::FloatAttribute>()) {
+        for (size_t i = 0; i < array_list.size(); ++i) {
+          vec_res.push_back(
+              array_list[i].dyn_cast<pir::FloatAttribute>().data());
+        }
+
+      } else {
+        PADDLE_THROW(phi::errors::Unimplemented(
+            "ConvertPIRAttribute2RuntimeAttribute not support [%s] ",
+            attr_type_name));
+      }
+    }
+    return vec_res;
+  } else {
+    PADDLE_THROW(phi::errors::Unimplemented(
+        "ConvertPIRAttribute2RuntimeAttribute not support [%s] ",
+        attr_type_name));
+  }
+}
+
+void TensorNameMap(pir::Operation* op,
+                   const ValueExecutionInfo& value_exec_info,
+                   const paddle::dialect::OpYamlInfoParser& op_yaml_info,
+                   std::map<std::string, std::vector<std::string>>&
+                       inputs_tensor_name_map,  // NOLINT
+                   std::map<std::string, std::vector<std::string>>&
+                       outputs_tensor_name_map) {  // NOLINT
+  const Scope* inner_scope = value_exec_info.GetScope();
+  VLOG(6) << "TensorNameMap in scope[" << inner_scope << "]";
+
+  auto& vec_kernel_fn_tensor_params = op_yaml_info.TensorParams(true);
+
+  auto& name2id = op_yaml_info.InputName2Id();
+
+  std::string fluid_op_name = op_yaml_info.GetOriginOpName();
+
+  auto& op_normalizer = paddle::translator::OpNameNormalizer::instance();
+
+  for (auto& name : vec_kernel_fn_tensor_params) {
+    PADDLE_ENFORCE_EQ(
+        name2id.count(name),
+        true,
+        phi::errors::NotFound("param [%s] MUST in name2id map", name));
+    auto index = name2id.at(name);
+    pir::Value ptr = op->operand_source(index);
+
+    if (!IsInvalid(ptr)) {
+      continue;
+    }
+
+    auto legacy_arg_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
+    auto in_var_name = value_exec_info.GetVarName(ptr);
+    PADDLE_ENFORCE_NOT_NULL(inner_scope->FindVar(in_var_name),
+                            phi::errors::PreconditionNotMet(
+                                "can not find var[%s] in scope", in_var_name));
+
+    auto type = ptr.type();
+    if (type.isa<paddle::dialect::AllocatedDenseTensorType>() ||
+        type.isa<paddle::dialect::AllocatedSelectedRowsType>()) {
+      inputs_tensor_name_map[legacy_arg_name] = {in_var_name};
+    } else if (type.isa<pir::VectorType>()) {
+      auto var = inner_scope->FindVar(in_var_name);
+      auto var_ref = var->Get<VariableRefArray>();
+      std::vector<std::string> vec_tmp;
+      vec_tmp.reserve(var_ref.size());
+      for (size_t k = 0; k < var_ref.size(); ++k) {
+        vec_tmp.push_back(value_exec_info.GetVarName(var_ref[k]));
+      }
+      inputs_tensor_name_map[legacy_arg_name] = vec_tmp;
+    } else {
+      PADDLE_THROW(phi::errors::Unimplemented(
+          "only support AllocatedDenseTensor, AllocatedSelectedRowsType  and "
+          "pir::vector type"));
+    }
+  }
+
+  auto& output_name_list = op_yaml_info.OutputNames();
+  for (size_t i = 0; i < output_name_list.size(); ++i) {
+    auto name = output_name_list[i];
+    pir::Value ptr = op->result(i);
+    auto legacy_arg_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
+
+    if (!IsInvalid(ptr)) {
+      continue;
+    }
+
+    auto out_var_name = value_exec_info.GetVarName(ptr);
+
+    PADDLE_ENFORCE_NOT_NULL(inner_scope->FindVar(out_var_name),
+                            phi::errors::PreconditionNotMet(
+                                "can not find var[%s] in scope", out_var_name));
+
+    auto type = ptr.type();
+    if (type.isa<paddle::dialect::AllocatedDenseTensorType>() ||
+        type.isa<paddle::dialect::AllocatedSelectedRowsType>()) {
+      outputs_tensor_name_map[legacy_arg_name] = {out_var_name};
+    } else if (type.isa<pir::VectorType>()) {
+      auto var = inner_scope->FindVar(out_var_name);
+      auto var_ref = var->Get<VariableRefArray>();
+      std::vector<std::string> vec_tmp;
+      vec_tmp.reserve(var_ref.size());
+      for (size_t k = 0; k < var_ref.size(); ++k) {
+        vec_tmp.push_back(value_exec_info.GetVarName(var_ref[k]));
+      }
+      outputs_tensor_name_map[legacy_arg_name] = vec_tmp;
+    } else {
+      PADDLE_THROW(phi::errors::Unimplemented(
+          "only support AllocatedDenseTensor, AllocatedSelectedRowsType  and "
+          "pir::vector type"));
+    }
+  }
+}
+
 OneDNNPhiKernelInstruction::OneDNNPhiKernelInstruction(
     size_t id,
     const platform::Place& place,
     pir::Operation* op,
     const ValueExecutionInfo* value_exec_info)
     : InstructionBase(id, place), value_exec_info_(value_exec_info) {
-  PADDLE_THROW(platform::errors::Unimplemented(
-      "OneDNNPhiKernelInstruction not defined now."));
+  // Step1: build phi kernel instruction as PhiKernelInstruction
+  auto op_attributes = op->attributes();
+  auto op_name =
+      op_attributes.at("op_name").dyn_cast<pir::StrAttribute>().AsString();
+  pir::OpInfo op_info =
+      pir::IrContext::Instance()->GetRegisteredOpInfo(op_name);
+  op_ = op;
+  phi_op_name_ = op_name;
+  VLOG(6) << "construct phi kernel instruction for: " << phi_op_name_;
+
+  SetKernelType(AnalyseOpFuncType(op, place));
+  VLOG(6) << "finish process analyse kernel type";
+
+  infer_meta_interface_ =
+      op_info.GetInterfaceImpl<paddle::dialect::InferMetaInterface>();
+  VLOG(6) << "finish process infer_meta_interface_";
+
+  auto yaml_interface =
+      op_info.GetInterfaceImpl<paddle::dialect::OpYamlInfoInterface>();
+  PADDLE_ENFORCE_NOT_NULL(
+      yaml_interface,
+      phi::errors::PreconditionNotMet(
+          "can not find OpYamlInfoInterface from [%s]", phi_op_name_));
+  paddle::dialect::OpYamlInfoParser yaml_info_parser(
+      yaml_interface->get_op_info_(),
+      paddle::dialect::IsOneDNNLegacyOp(op_name));
+  VLOG(6) << "finish process yaml_info_parser";
+
+  if (infer_meta_interface_) {
+    BuildPhiContext<
+        phi::InferMetaContext,
+        phi::MetaTensor,
+        phi::MetaTensor,
+        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
+        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
+        false>(op, *value_exec_info_, yaml_info_parser, &infer_meta_context_);
+  }
+  VLOG(6) << "finish process infer meta context";
+
+  auto kernel_name =
+      op_attributes.at("kernel_name").dyn_cast<pir::StrAttribute>().AsString();
+  auto kernel_key = op_attributes.at("kernel_key")
+                        .dyn_cast<paddle::dialect::KernelAttribute>()
+                        .data();
+  auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
+      kernel_name, kernel_key);
+  phi_kernel_ = new phi::Kernel(kernel_result.kernel);
+  PADDLE_ENFORCE_EQ(
+      phi_kernel_->IsValid(), true, "not found kernel for [%s]", kernel_name);
+  VLOG(6) << "finish process select kernel";
+
+  BuildPhiContext<phi::KernelContext,
+                  const phi::TensorBase*,
+                  phi::TensorBase*,
+                  paddle::small_vector<const phi::TensorBase*>,
+                  paddle::small_vector<phi::TensorBase*>,
+                  true>(
+      op, *value_exec_info_, yaml_info_parser, &kernel_context_);
+
+  kernel_context_.SetDeviceContext(phi::DeviceContextPool::Instance().Get(
+      phi::TransToPhiPlace(kernel_key.backend())));
+  VLOG(6) << "finish process kernel context";
+
+  SetDeviceContext(
+      ParseDeviceContext(op,
+                         phi::DeviceContextPool::Instance().Get(
+                             phi::TransToPhiPlace(kernel_key.backend())),
+                         place,
+                         GetExecutionStream(),
+                         GetStreamPriority()));
+  VLOG(6) << "finish process device context";
+
+  InitInputsOutputsIds(op, *value_exec_info);
+  VLOG(6) << "finish process inputs outputs index";
+
+  auto& no_need_buffer_ids = yaml_info_parser.NoNeedBufferIds();
+  std::unordered_set<pir::Value> no_need_buffer_values;
+  for (size_t id = 0; id < no_need_buffer_ids.size(); id++) {
+    no_need_buffer_values.insert(op->operand_source(no_need_buffer_ids[id]));
+  }
+  SetNoNeedBuffer(no_need_buffer_values);
+  VLOG(6) << "finish process no need buffer";
+
+  // Step2: build layout_transform information
+  if (op_attributes.count("layout_transform_arg")) {
+    auto layout_transform_arg = op_attributes.at("layout_transform_arg")
+                                    .dyn_cast<pir::StrAttribute>()
+                                    .AsString();
+    auto data_layout = op_attributes.at(layout_transform_arg)
+                           .dyn_cast<pir::StrAttribute>()
+                           .AsString();
+    input_layout_ = common::StringToDataLayout(data_layout);
+    std::vector<pir::Attribute> layout_transform_inputs_attr =
+        op->attributes()
+            .at("layout_transform_inputs")
+            .dyn_cast<pir::ArrayAttribute>()
+            .AsVector();
+    std::vector<std::string> layout_transform_inputs;
+    for (auto& attr : layout_transform_inputs_attr) {
+      auto pair = kernel_context_.InputRangeAt(value_exec_info_->GetIdByName(
+          attr.dyn_cast<pir::StrAttribute>().AsString()));
+      for (int i = pair.first; i < pair.second; ++i) {
+        layout_transform_inputs_.insert(i);
+      }
+    }
+  }
+
+  // Step3: build extra attr information
+  if (op_attributes.count("extra_args")) {
+    std::vector<pir::Attribute> extra_args_attr =
+        op->attributes()
+            .at("extra_args")
+            .dyn_cast<pir::ArrayAttribute>()
+            .AsVector();
+    std::vector<std::string> extra_args;
+    for (auto& attr : extra_args_attr) {
+      auto attr_name = attr.dyn_cast<pir::StrAttribute>().AsString();
+      extra_attr_[attr_name] = ConvertPIRAttribute2RuntimeAttribute(
+          op_attributes.at(attr_name), attr_name, yaml_info_parser);
+    }
+  }
+  TensorNameMap(op, *value_exec_info_, yaml_info_parser, inputs_, outputs_);
 }
 
-OneDNNPhiKernelInstruction::~OneDNNPhiKernelInstruction() {}
+OneDNNPhiKernelInstruction::~OneDNNPhiKernelInstruction() {
+  if (phi_kernel_ != nullptr) {
+    delete phi_kernel_;
+  }
+}
 
 void OneDNNPhiKernelInstruction::Run() {
-  PADDLE_THROW(platform::errors::Unimplemented(
-      "OneDNNPhiKernelInstruction not defined now."));
+  // Step1. Mixed Dynamic Choose Kernel
+  // todo if (input_tensor.layout() != phi::DataLayout::ONEDNN)
+
+  // Step2. TransLayout
+  auto inputs = kernel_context_.InputsBetween<phi::DenseTensor>(
+      size_t(0), kernel_context_.InputsSize());
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    auto input = inputs[i];
+    if (input->layout() != phi::DataLayout::ONEDNN) {
+      phi::DataLayout from_layout = input->layout();
+
+      //  Handle 'layout_transform' in
+      //  ops_onednn_extra.yaml(GetKernelTypeForVar)
+      if (layout_transform_inputs_.count(i) &&
+          input_layout_ != phi::DataLayout::kAnyLayout) {
+        from_layout = input_layout_;
+      }
+
+      auto transed_tensor = const_cast<phi::DenseTensor*>(input);
+
+      if (from_layout == DataLayout::kNHWC ||
+          from_layout == DataLayout::kNDHWC) {
+        phi::funcs::MatchShapeToLayout(
+            transed_tensor, from_layout, phi::DataLayout::ONEDNN);
+        // We register only NHWC assuming that model is consistent e.g. either
+        // NHWC or NCHW
+        phi::OneDNNContext::tls().set_cur_paddle_data_layout(from_layout);
+      }
+
+      dnnl::memory::desc out_mem_desc =
+          phi::funcs::make_memory_desc(*input, from_layout);
+      transed_tensor->set_mem_desc(out_mem_desc);
+    }
+  }
+
+  // Step3. Append extra information into ctx
+  // SetDnnAttrIntoDeviceContext
+  // SetInputsName SetOutputsName
+  auto one_dnn_ctx = const_cast<phi::OneDNNContext*>(
+      &kernel_context_.GetDeviceContext<phi::OneDNNContext>());
+  for (auto& attr : extra_attr_) {
+    one_dnn_ctx->SetDnnAttr(attr.first, attr.second);
+  }
+  one_dnn_ctx->SetInputsName(inputs_);
+  one_dnn_ctx->SetOutputsName(outputs_);
+
+  // Step4. InferMeta
+  if (infer_meta_interface_) {
+    infer_meta_interface_->infer_meta_(&(infer_meta_context_));
+  }
+
+  // Step5. Run kernel
+  VLOG(6) << "Run op " << phi_op_name_ << " infer meta.";
+  (*(phi_kernel_))(&(kernel_context_));
+  VLOG(6) << "Run op " << phi_op_name_ << " kernel.";
+
+  // Step6. ClearDnnAttr
+  one_dnn_ctx->ClearDnnAttr();
 }
 
 }  // namespace framework
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.h b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.h
index da84b8bb0370f3..c15a69728f9c3d 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.h
+++ b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.h
@@ -25,6 +25,9 @@ namespace framework {
 class Scope;
 class ValueExecutionInfo;
 
+using RuntimeAttribute = phi::Attribute;
+using PIRAttribute = pir::Attribute;
+
 class OneDNNPhiKernelInstruction : public InstructionBase {
  public:
   OneDNNPhiKernelInstruction(size_t id,
@@ -67,6 +70,12 @@ class OneDNNPhiKernelInstruction : public InstructionBase {
   ::pir::Operation* op_{nullptr};  // not owned
 
   const ValueExecutionInfo* value_exec_info_;  // not owned
+
+  std::set<int> layout_transform_inputs_{};
+  phi::DataLayout input_layout_{phi::DataLayout::kAnyLayout};
+  std::map<std::string, RuntimeAttribute> extra_attr_{};
+  std::map<std::string, std::vector<std::string>> inputs_{};
+  std::map<std::string, std::vector<std::string>> outputs_{};
 };
 
 }  // namespace framework

From dcd2ade1548a5392c542614e1f456fe2d29c5b78 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 22 Dec 2023 07:01:27 +0000
Subject: [PATCH 34/49] refine

---
 .../instruction/onednn/onednn_phi_kernel_instruction.cc     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
index 6bab57d6f3b79c..174e1ed445981b 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
@@ -236,9 +236,9 @@ OneDNNPhiKernelInstruction::OneDNNPhiKernelInstruction(
   auto kernel_key = op_attributes.at("kernel_key")
                         .dyn_cast<paddle::dialect::KernelAttribute>()
                         .data();
-  auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
-      kernel_name, kernel_key);
-  phi_kernel_ = new phi::Kernel(kernel_result.kernel);
+
+  phi_kernel_ = new phi::Kernel(
+      phi::KernelFactory::Instance().SelectKernel(kernel_name, kernel_key));
   PADDLE_ENFORCE_EQ(
       phi_kernel_->IsValid(), true, "not found kernel for [%s]", kernel_name);
   VLOG(6) << "finish process select kernel";

From bfb3b42a1c4d7bc26e6213683690af479c9a8fa5 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 22 Dec 2023 07:20:33 +0000
Subject: [PATCH 35/49] refine

---
 .../ir_adaptor/translator/op_translator.cc    | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/paddle/fluid/ir_adaptor/translator/op_translator.cc b/paddle/fluid/ir_adaptor/translator/op_translator.cc
index 69d488186deff2..b76f4a9b7e2598 100644
--- a/paddle/fluid/ir_adaptor/translator/op_translator.cc
+++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc
@@ -231,9 +231,22 @@ inline pir::Operation* InsertCreateArrayOp(pir::IrContext* ctx,
 
 inline std::string GetPrefix(const OpDesc& op_desc) {
 #ifdef PADDLE_WITH_DNNL
-  return op_desc.GetAttrIfExists<bool>("use_mkldnn")
-             ? kOneDNNTargetDialectPrefix
-             : kTargetDialectPrefix;
+  if (op_desc.GetAttrIfExists<bool>("use_mkldnn")) {
+    std::string target_op_name =
+        kOneDNNTargetDialectPrefix + OpNameCompatibleMapping(op_desc.Type());
+    if (IsInplace(op_desc) && *target_op_name.rbegin() != '_') {
+      target_op_name += "_";
+    }
+    auto op_info = ctx->GetRegisteredOpInfo(target_op_name);
+    if (!op_info) {
+      VLOG(3) << op_desc.Type()
+              << "'s use_mkldnn == True, but PIR not support OneDNN for this "
+                 "op right now.";
+      return kTargetDialectPrefix
+    } else {
+      return kOneDNNTargetDialectPrefix;
+    }
+  }
 #else
   return kTargetDialectPrefix;
 #endif

From 556c194c8e284779849d463e0d7beb4ae2f7a691 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 22 Dec 2023 07:30:05 +0000
Subject: [PATCH 36/49] refine

---
 paddle/fluid/ir_adaptor/translator/op_translator.cc | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/paddle/fluid/ir_adaptor/translator/op_translator.cc b/paddle/fluid/ir_adaptor/translator/op_translator.cc
index b76f4a9b7e2598..44598fe49bd366 100644
--- a/paddle/fluid/ir_adaptor/translator/op_translator.cc
+++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc
@@ -229,7 +229,7 @@ inline pir::Operation* InsertCreateArrayOp(pir::IrContext* ctx,
   return create_array_op.operation();
 }
 
-inline std::string GetPrefix(const OpDesc& op_desc) {
+inline std::string GetPrefix(pir::IrContext* ctx, const OpDesc& op_desc) {
 #ifdef PADDLE_WITH_DNNL
   if (op_desc.GetAttrIfExists<bool>("use_mkldnn")) {
     std::string target_op_name =
@@ -242,7 +242,7 @@ inline std::string GetPrefix(const OpDesc& op_desc) {
       VLOG(3) << op_desc.Type()
               << "'s use_mkldnn == True, but PIR not support OneDNN for this "
                  "op right now.";
-      return kTargetDialectPrefix
+      return kTargetDialectPrefix;
     } else {
       return kOneDNNTargetDialectPrefix;
     }
@@ -256,7 +256,7 @@ inline std::string GetPrefix(const OpDesc& op_desc) {
 pir::OpInfo OpTranscriber::LoopkUpOpInfo(pir::IrContext* ctx,
                                          const OpDesc& op_desc) {
   std::string target_op_name =
-      GetPrefix(op_desc) + OpNameCompatibleMapping(op_desc.Type());
+      GetPrefix(ctx, op_desc) + OpNameCompatibleMapping(op_desc.Type());
   if (IsInplace(op_desc) && *target_op_name.rbegin() != '_') {
     target_op_name += "_";
   }
@@ -349,7 +349,7 @@ pir::OpInfo OpTranscriber::LoopkUpOpInfo(pir::IrContext* ctx,
              op_desc.Type(),
              target_op_name);
 
-  target_op_name = GetPrefix(op_desc) + target_op_name;
+  target_op_name = GetPrefix(ctx, op_desc) + target_op_name;
   if (IsInplace(op_desc) && *target_op_name.rbegin() != '_') {
     target_op_name += "_";
   }
@@ -1070,7 +1070,7 @@ struct EmbeddingGradOpTranscriber : public OpTranscriber {
   pir::OpInfo LoopkUpOpInfo(pir::IrContext* ctx,
                             const OpDesc& op_desc) override {
     std::string target_op_name =
-        GetPrefix(op_desc) + OpNameCompatibleMapping(op_desc.Type());
+        GetPrefix(ctx, op_desc) + OpNameCompatibleMapping(op_desc.Type());
 
     bool is_sparse = paddle::get<bool>(op_desc.GetAttr("is_sparse"));
 
@@ -1323,7 +1323,7 @@ struct AddNOpTranscriber : public OpTranscriber {
   pir::OpInfo LoopkUpOpInfo(pir::IrContext* ctx,
                             const OpDesc& op_desc) override {
     std::string target_op_name =
-        GetPrefix(op_desc) + OpNameCompatibleMapping(op_desc.Type());
+        GetPrefix(ctx, op_desc) + OpNameCompatibleMapping(op_desc.Type());
     if (IsInplace(op_desc)) {
       target_op_name += "_";
     } else {

From 9dee464016f5ef0465a7cd3d423c7cf77e829563 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 22 Dec 2023 07:32:01 +0000
Subject: [PATCH 37/49] refine

---
 paddle/fluid/ir_adaptor/translator/op_translator.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/paddle/fluid/ir_adaptor/translator/op_translator.cc b/paddle/fluid/ir_adaptor/translator/op_translator.cc
index 44598fe49bd366..36e33737be8ebe 100644
--- a/paddle/fluid/ir_adaptor/translator/op_translator.cc
+++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc
@@ -246,6 +246,8 @@ inline std::string GetPrefix(pir::IrContext* ctx, const OpDesc& op_desc) {
     } else {
       return kOneDNNTargetDialectPrefix;
     }
+  } else {
+    return kTargetDialectPrefix;
   }
 #else
   return kTargetDialectPrefix;

From afdc6173ce40c12ec03b9bba2fa10864c3e6fb8b Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 22 Dec 2023 07:37:15 +0000
Subject: [PATCH 38/49] refine

---
 test/mkldnn/test_conv2d_mkldnn_op.py | 91 ++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/test/mkldnn/test_conv2d_mkldnn_op.py b/test/mkldnn/test_conv2d_mkldnn_op.py
index 3c77581acf80db..2d6cafdbc3734b 100644
--- a/test/mkldnn/test_conv2d_mkldnn_op.py
+++ b/test/mkldnn/test_conv2d_mkldnn_op.py
@@ -17,6 +17,9 @@
 import numpy as np
 from op_test import OpTest, skip_check_grad_ci
 from test_conv2d_op import TestConv2DOp, TestConv2DOp_v2
+from utils import compare_legacy_with_pt
+
+from paddle.base import core
 
 
 def conv2d_bias_naive(out, bias):
@@ -113,6 +116,94 @@ def setUp(self):
         self.outputs['Output'] = output
 
 
+class TestConv2DMKLDNNOp2(TestConv2DOp):
+    def init_group(self):
+        self.groups = 1
+
+    def init_kernel_type(self):
+        self.data_format = "NCHW"
+        self.use_mkldnn = True
+        self._cpu_only = True
+        self.dtype = np.float32
+
+    def init_test_case(self):
+        self.pad = [0, 0]
+        self.stride = [1, 1]
+        self.input_size = [2, 3, 5, 5]  # NCHW
+        assert np.mod(self.input_size[1], self.groups) == 0
+        f_c = self.input_size[1] // self.groups
+        self.filter_size = [6, f_c, 3, 3]
+
+    def setUp(self):
+        self.fuse_bias = False
+        self.bias_size = None
+        self.fuse_activation = ""
+        self.fuse_alpha = 0
+        self.fuse_beta = 0
+        self.fuse_residual_connection = False
+        self.input_residual_size = None
+
+        TestConv2DOp.setUp(self)
+
+        output = self.outputs['Output']
+
+        # mkldnn only support either conv-sum-relu, or conv-relu.
+        if self.fuse_bias and self.bias_size is not None:
+            bias = np.random.random(self.bias_size).astype(self.dtype)
+            output = conv2d_bias_naive(output, bias)
+            output = output.astype(self.dtype)
+            self.attrs['fuse_bias'] = self.fuse_bias
+            self.inputs['Bias'] = OpTest.np_dtype_to_base_dtype(bias)
+
+        if (
+            self.fuse_residual_connection
+            and self.input_residual_size is not None
+        ):
+            input_residual = np.random.random(self.input_residual_size).astype(
+                self.dtype
+            )
+            output = conv2d_residual_naive(output, input_residual)
+
+            self.attrs[
+                'fuse_residual_connection'
+            ] = self.fuse_residual_connection
+            self.inputs['ResidualData'] = OpTest.np_dtype_to_base_dtype(
+                input_residual
+            )
+
+        if self.fuse_activation == "relu":
+            output = np.maximum(output, 0).astype(self.dsttype)
+
+        if self.fuse_activation == "relu6":
+            output = np.minimum(np.maximum(output, 0), self.fuse_beta).astype(
+                self.dsttype
+            )
+        if (
+            self.fuse_activation != ""
+            or self.fuse_bias
+            or self.fuse_residual_connection
+        ):
+            self.op_type = 'fused_conv2d'
+
+        output = output.astype(self.dtype)
+
+        self.attrs['fuse_bias'] = self.fuse_bias
+        self.attrs['fuse_activation'] = self.fuse_activation
+        self.attrs['fuse_alpha'] = self.fuse_alpha
+        self.attrs['fuse_beta'] = self.fuse_beta
+        self.attrs['fuse_residual_connection'] = self.fuse_residual_connection
+
+        self.outputs['Output'] = output
+
+    @compare_legacy_with_pt
+    def test_check_output(self):
+        place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace()
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output_with_place(
+            place, atol=1e-5, check_dygraph=(not self.use_mkldnn)
+        )
+
+
 @skip_check_grad_ci(
     reason="Fusion is for inference only, check_grad is not required."
 )

From bea457277b8af9a557092d27fb2569a3217b2d56 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 22 Dec 2023 07:40:57 +0000
Subject: [PATCH 39/49] refine

---
 .../onednn_mixed_phi_kernel_instruction.cc    | 340 +++++++++++++++++-
 .../onednn_mixed_phi_kernel_instruction.h     |   9 +
 .../onednn/onednn_phi_kernel_instruction.cc   |  13 +-
 3 files changed, 349 insertions(+), 13 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.cc
index 3c7a3f368d2a67..4796e6de6a9f58 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.cc
@@ -32,25 +32,355 @@
 #include "paddle/pir/core/operation.h"
 #include "paddle/pir/core/value.h"
 
+#include "dnnl.hpp"  // NOLINT
 #include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
+#include "paddle/fluid/framework/type_defs.h"
+#include "paddle/fluid/ir_adaptor/translator/op_compat_info.h"
+#include "paddle/phi/backends/onednn/onednn_context.h"
+#include "paddle/phi/backends/onednn/onednn_helper.h"
+#include "paddle/phi/kernels/funcs/data_layout_transform.h"
+
 namespace paddle {
 namespace framework {
 
+static RuntimeAttribute ConvertPIRAttribute2RuntimeAttribute(
+    PIRAttribute attr,
+    const std::string& attr_name,
+    const paddle::dialect::OpYamlInfoParser& op_yaml_info) {
+  auto& attr_type_name = op_yaml_info.AttrTypeName(attr_name);
+  if (attr_type_name == "pir::Int32Attribute") {
+    return attr.dyn_cast<pir::Int32Attribute>().data();
+  } else if (attr_type_name == "pir::FloatAttribute") {
+    return attr.dyn_cast<pir::FloatAttribute>().data();
+  } else if (attr_type_name == "pir::BoolAttribute") {
+    return attr.dyn_cast<pir::BoolAttribute>().data();
+  } else if (attr_type_name == "pir::StrAttribute") {
+    return attr.dyn_cast<pir::StrAttribute>().AsString();
+  } else if (attr_type_name == "pir::ArrayAttribute<pir::Int32Attribute>") {
+    auto array_list = attr.dyn_cast<pir::ArrayAttribute>().AsVector();
+    std::vector<int32_t> vec_res;
+    if (array_list.size() > 0) {
+      PADDLE_ENFORCE_EQ(array_list[0].isa<pir::Int32Attribute>(),
+                        true,
+                        phi::errors::Unimplemented(
+                            "the 0th elementwise MUST be pir::Int32Attribute"));
+      for (size_t i = 0; i < array_list.size(); ++i) {
+        vec_res.push_back(array_list[i].dyn_cast<pir::Int32Attribute>().data());
+      }
+    }
+    return vec_res;
+  } else if (attr_type_name == "pir::ArrayAttribute<pir::FloatAttribute>") {
+    auto array_list = attr.dyn_cast<pir::ArrayAttribute>().AsVector();
+    std::vector<float> vec_res;
+    if (array_list.size() > 0) {
+      if (array_list[0].isa<pir::FloatAttribute>()) {
+        for (size_t i = 0; i < array_list.size(); ++i) {
+          vec_res.push_back(
+              array_list[i].dyn_cast<pir::FloatAttribute>().data());
+        }
+
+      } else {
+        PADDLE_THROW(phi::errors::Unimplemented(
+            "ConvertPIRAttribute2RuntimeAttribute not support [%s] ",
+            attr_type_name));
+      }
+    }
+    return vec_res;
+  } else {
+    PADDLE_THROW(phi::errors::Unimplemented(
+        "ConvertPIRAttribute2RuntimeAttribute not support [%s] ",
+        attr_type_name));
+  }
+}
+
+void TensorNameMap(pir::Operation* op,
+                   const ValueExecutionInfo& value_exec_info,
+                   const paddle::dialect::OpYamlInfoParser& op_yaml_info,
+                   std::map<std::string, std::vector<std::string>>&
+                       inputs_tensor_name_map,  // NOLINT
+                   std::map<std::string, std::vector<std::string>>&
+                       outputs_tensor_name_map) {  // NOLINT
+  const Scope* inner_scope = value_exec_info.GetScope();
+  VLOG(6) << "TensorNameMap in scope[" << inner_scope << "]";
+
+  auto& vec_kernel_fn_tensor_params = op_yaml_info.TensorParams(true);
+
+  auto& name2id = op_yaml_info.InputName2Id();
+
+  std::string fluid_op_name = op_yaml_info.GetOriginOpName();
+
+  auto& op_normalizer = paddle::translator::OpNameNormalizer::instance();
+
+  for (auto& name : vec_kernel_fn_tensor_params) {
+    PADDLE_ENFORCE_EQ(
+        name2id.count(name),
+        true,
+        phi::errors::NotFound("param [%s] MUST in name2id map", name));
+    auto index = name2id.at(name);
+    pir::Value ptr = op->operand_source(index);
+
+    if (!IsInvalid(ptr)) {
+      continue;
+    }
+
+    auto legacy_arg_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
+    auto in_var_name = value_exec_info.GetVarName(ptr);
+    PADDLE_ENFORCE_NOT_NULL(inner_scope->FindVar(in_var_name),
+                            phi::errors::PreconditionNotMet(
+                                "can not find var[%s] in scope", in_var_name));
+
+    auto type = ptr.type();
+    if (type.isa<paddle::dialect::AllocatedDenseTensorType>() ||
+        type.isa<paddle::dialect::AllocatedSelectedRowsType>()) {
+      inputs_tensor_name_map[legacy_arg_name] = {in_var_name};
+    } else if (type.isa<pir::VectorType>()) {
+      auto var = inner_scope->FindVar(in_var_name);
+      auto var_ref = var->Get<VariableRefArray>();
+      std::vector<std::string> vec_tmp;
+      vec_tmp.reserve(var_ref.size());
+      for (size_t k = 0; k < var_ref.size(); ++k) {
+        vec_tmp.push_back(value_exec_info.GetVarName(var_ref[k]));
+      }
+      inputs_tensor_name_map[legacy_arg_name] = vec_tmp;
+    } else {
+      PADDLE_THROW(phi::errors::Unimplemented(
+          "only support AllocatedDenseTensor, AllocatedSelectedRowsType  and "
+          "pir::vector type"));
+    }
+  }
+
+  auto& output_name_list = op_yaml_info.OutputNames();
+  for (size_t i = 0; i < output_name_list.size(); ++i) {
+    auto name = output_name_list[i];
+    pir::Value ptr = op->result(i);
+    auto legacy_arg_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
+
+    if (!IsInvalid(ptr)) {
+      continue;
+    }
+
+    auto out_var_name = value_exec_info.GetVarName(ptr);
+
+    PADDLE_ENFORCE_NOT_NULL(inner_scope->FindVar(out_var_name),
+                            phi::errors::PreconditionNotMet(
+                                "can not find var[%s] in scope", out_var_name));
+
+    auto type = ptr.type();
+    if (type.isa<paddle::dialect::AllocatedDenseTensorType>() ||
+        type.isa<paddle::dialect::AllocatedSelectedRowsType>()) {
+      outputs_tensor_name_map[legacy_arg_name] = {out_var_name};
+    } else if (type.isa<pir::VectorType>()) {
+      auto var = inner_scope->FindVar(out_var_name);
+      auto var_ref = var->Get<VariableRefArray>();
+      std::vector<std::string> vec_tmp;
+      vec_tmp.reserve(var_ref.size());
+      for (size_t k = 0; k < var_ref.size(); ++k) {
+        vec_tmp.push_back(value_exec_info.GetVarName(var_ref[k]));
+      }
+      outputs_tensor_name_map[legacy_arg_name] = vec_tmp;
+    } else {
+      PADDLE_THROW(phi::errors::Unimplemented(
+          "only support AllocatedDenseTensor, AllocatedSelectedRowsType  and "
+          "pir::vector type"));
+    }
+  }
+}
+
 OneDNNMixedPhiKernelInstruction::OneDNNMixedPhiKernelInstruction(
     size_t id,
     const platform::Place& place,
     pir::Operation* op,
     const ValueExecutionInfo* value_exec_info)
     : InstructionBase(id, place), value_exec_info_(value_exec_info) {
-  PADDLE_THROW(platform::errors::Unimplemented(
-      "OneDNNMixedPhiKernelInstruction not defined now."));
+  // Step1: build phi kernel instruction as PhiKernelInstruction
+  auto op_attributes = op->attributes();
+  auto op_name =
+      op_attributes.at("op_name").dyn_cast<pir::StrAttribute>().AsString();
+  pir::OpInfo op_info =
+      pir::IrContext::Instance()->GetRegisteredOpInfo(op_name);
+  op_ = op;
+  phi_op_name_ = op_name;
+  VLOG(6) << "construct phi kernel instruction for: " << phi_op_name_;
+
+  SetKernelType(AnalyseOpFuncType(op, place));
+  VLOG(6) << "finish process analyse kernel type";
+
+  infer_meta_interface_ =
+      op_info.GetInterfaceImpl<paddle::dialect::InferMetaInterface>();
+  VLOG(6) << "finish process infer_meta_interface_";
+
+  auto yaml_interface =
+      op_info.GetInterfaceImpl<paddle::dialect::OpYamlInfoInterface>();
+  PADDLE_ENFORCE_NOT_NULL(
+      yaml_interface,
+      phi::errors::PreconditionNotMet(
+          "can not find OpYamlInfoInterface from [%s]", phi_op_name_));
+  paddle::dialect::OpYamlInfoParser yaml_info_parser(
+      yaml_interface->get_op_info_(),
+      paddle::dialect::IsOneDNNLegacyOp(op_name));
+  VLOG(6) << "finish process yaml_info_parser";
+
+  if (infer_meta_interface_) {
+    BuildPhiContext<
+        phi::InferMetaContext,
+        phi::MetaTensor,
+        phi::MetaTensor,
+        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
+        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
+        false>(op, *value_exec_info_, yaml_info_parser, &infer_meta_context_);
+  }
+  VLOG(6) << "finish process infer meta context";
+
+  auto kernel_name =
+      op_attributes.at("kernel_name").dyn_cast<pir::StrAttribute>().AsString();
+  auto kernel_key = op_attributes.at("kernel_key")
+                        .dyn_cast<paddle::dialect::KernelAttribute>()
+                        .data();
+
+  phi_kernel_ = new phi::Kernel(
+      phi::KernelFactory::Instance().SelectKernel(kernel_name, kernel_key));
+  PADDLE_ENFORCE_EQ(
+      phi_kernel_->IsValid(), true, "not found kernel for [%s]", kernel_name);
+  VLOG(6) << "finish process select kernel";
+
+  BuildPhiContext<phi::KernelContext,
+                  const phi::TensorBase*,
+                  phi::TensorBase*,
+                  paddle::small_vector<const phi::TensorBase*>,
+                  paddle::small_vector<phi::TensorBase*>,
+                  true>(
+      op, *value_exec_info_, yaml_info_parser, &kernel_context_);
+
+  kernel_context_.SetDeviceContext(phi::DeviceContextPool::Instance().Get(
+      phi::TransToPhiPlace(kernel_key.backend())));
+  VLOG(6) << "finish process kernel context";
+
+  SetDeviceContext(
+      ParseDeviceContext(op,
+                         phi::DeviceContextPool::Instance().Get(
+                             phi::TransToPhiPlace(kernel_key.backend())),
+                         place,
+                         GetExecutionStream(),
+                         GetStreamPriority()));
+  VLOG(6) << "finish process device context";
+
+  InitInputsOutputsIds(op, *value_exec_info);
+  VLOG(6) << "finish process inputs outputs index";
+
+  auto& no_need_buffer_ids = yaml_info_parser.NoNeedBufferIds();
+  std::unordered_set<pir::Value> no_need_buffer_values;
+  for (size_t id = 0; id < no_need_buffer_ids.size(); id++) {
+    no_need_buffer_values.insert(op->operand_source(no_need_buffer_ids[id]));
+  }
+  SetNoNeedBuffer(no_need_buffer_values);
+  VLOG(6) << "finish process no need buffer";
+
+  // Step2: build layout_transform information
+  if (op_attributes.count("layout_transform_arg")) {
+    auto layout_transform_arg = op_attributes.at("layout_transform_arg")
+                                    .dyn_cast<pir::StrAttribute>()
+                                    .AsString();
+    auto data_layout = op_attributes.at(layout_transform_arg)
+                           .dyn_cast<pir::StrAttribute>()
+                           .AsString();
+    input_layout_ = common::StringToDataLayout(data_layout);
+    std::vector<pir::Attribute> layout_transform_inputs_attr =
+        op->attributes()
+            .at("layout_transform_inputs")
+            .dyn_cast<pir::ArrayAttribute>()
+            .AsVector();
+    std::vector<std::string> layout_transform_inputs;
+    for (auto& attr : layout_transform_inputs_attr) {
+      auto pair = kernel_context_.InputRangeAt(value_exec_info_->GetIdByName(
+          attr.dyn_cast<pir::StrAttribute>().AsString()));
+      for (int i = pair.first; i < pair.second; ++i) {
+        layout_transform_inputs_.insert(i);
+      }
+    }
+  }
+
+  // Step3: build extra attr information
+  if (op_attributes.count("extra_args")) {
+    std::vector<pir::Attribute> extra_args_attr =
+        op->attributes()
+            .at("extra_args")
+            .dyn_cast<pir::ArrayAttribute>()
+            .AsVector();
+    std::vector<std::string> extra_args;
+    for (auto& attr : extra_args_attr) {
+      auto attr_name = attr.dyn_cast<pir::StrAttribute>().AsString();
+      extra_attr_[attr_name] = ConvertPIRAttribute2RuntimeAttribute(
+          op_attributes.at(attr_name), attr_name, yaml_info_parser);
+    }
+  }
+  TensorNameMap(op, *value_exec_info_, yaml_info_parser, inputs_, outputs_);
 }
 
-OneDNNMixedPhiKernelInstruction::~OneDNNMixedPhiKernelInstruction() {}
+OneDNNMixedPhiKernelInstruction::~OneDNNMixedPhiKernelInstruction() {
+  if (phi_kernel_ != nullptr) {
+    delete phi_kernel_;
+  }
+}
 
 void OneDNNMixedPhiKernelInstruction::Run() {
-  PADDLE_THROW(platform::errors::Unimplemented(
-      "OneDNNMixedPhiKernelInstruction not defined now."));
+  // Step1. Mixed Dynamic Choose Kernel
+  // todo if (input_tensor.layout() != phi::DataLayout::ONEDNN)
+
+  // Step2. TransLayout
+  auto inputs = kernel_context_.InputsBetween<phi::DenseTensor>(
+      size_t(0), kernel_context_.InputsSize());
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    auto input = inputs[i];
+    if (input->layout() != phi::DataLayout::ONEDNN) {
+      phi::DataLayout from_layout = input->layout();
+
+      //  Handle 'layout_transform' in
+      //  ops_onednn_extra.yaml(GetKernelTypeForVar)
+      if (layout_transform_inputs_.count(i) &&
+          input_layout_ != phi::DataLayout::kAnyLayout) {
+        from_layout = input_layout_;
+      }
+
+      auto transed_tensor = const_cast<phi::DenseTensor*>(input);
+
+      if (from_layout == DataLayout::kNHWC ||
+          from_layout == DataLayout::kNDHWC) {
+        phi::funcs::MatchShapeToLayout(
+            transed_tensor, from_layout, phi::DataLayout::ONEDNN);
+        // We register only NHWC assuming that model is consistent e.g. either
+        // NHWC or NCHW
+        phi::OneDNNContext::tls().set_cur_paddle_data_layout(from_layout);
+      }
+
+      dnnl::memory::desc out_mem_desc =
+          phi::funcs::make_memory_desc(*input, from_layout);
+      transed_tensor->set_mem_desc(out_mem_desc);
+    }
+  }
+
+  // Step3. Append extra information into ctx
+  // SetDnnAttrIntoDeviceContext
+  // SetInputsName SetOutputsName
+  auto one_dnn_ctx = const_cast<phi::OneDNNContext*>(
+      &kernel_context_.GetDeviceContext<phi::OneDNNContext>());
+  for (auto& attr : extra_attr_) {
+    one_dnn_ctx->SetDnnAttr(attr.first, attr.second);
+  }
+  one_dnn_ctx->SetInputsName(inputs_);
+  one_dnn_ctx->SetOutputsName(outputs_);
+
+  // Step4. InferMeta
+  if (infer_meta_interface_) {
+    infer_meta_interface_->infer_meta_(&(infer_meta_context_));
+  }
+
+  // Step5. Run kernel
+  VLOG(6) << "Run op " << phi_op_name_ << " infer meta.";
+  (*(phi_kernel_))(&(kernel_context_));
+  VLOG(6) << "Run op " << phi_op_name_ << " kernel.";
+
+  // Step6. ClearDnnAttr
+  one_dnn_ctx->ClearDnnAttr();
 }
 
 }  // namespace framework
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.h b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.h
index bcacc13233302d..1022e22ac19e89 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.h
+++ b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.h
@@ -25,6 +25,9 @@ namespace framework {
 class Scope;
 class ValueExecutionInfo;
 
+using RuntimeAttribute = phi::Attribute;
+using PIRAttribute = pir::Attribute;
+
 class OneDNNMixedPhiKernelInstruction : public InstructionBase {
  public:
   OneDNNMixedPhiKernelInstruction(size_t id,
@@ -67,6 +70,12 @@ class OneDNNMixedPhiKernelInstruction : public InstructionBase {
   ::pir::Operation* op_{nullptr};  // not owned
 
   const ValueExecutionInfo* value_exec_info_;  // not owned
+
+  std::set<int> layout_transform_inputs_{};
+  phi::DataLayout input_layout_{phi::DataLayout::kAnyLayout};
+  std::map<std::string, RuntimeAttribute> extra_attr_{};
+  std::map<std::string, std::vector<std::string>> inputs_{};
+  std::map<std::string, std::vector<std::string>> outputs_{};
 };
 
 }  // namespace framework
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
index 174e1ed445981b..7c30e9adee7729 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
@@ -323,10 +323,7 @@ OneDNNPhiKernelInstruction::~OneDNNPhiKernelInstruction() {
 }
 
 void OneDNNPhiKernelInstruction::Run() {
-  // Step1. Mixed Dynamic Choose Kernel
-  // todo if (input_tensor.layout() != phi::DataLayout::ONEDNN)
-
-  // Step2. TransLayout
+  // Step1. TransLayout
   auto inputs = kernel_context_.InputsBetween<phi::DenseTensor>(
       size_t(0), kernel_context_.InputsSize());
   for (size_t i = 0; i < inputs.size(); ++i) {
@@ -358,7 +355,7 @@ void OneDNNPhiKernelInstruction::Run() {
     }
   }
 
-  // Step3. Append extra information into ctx
+  // Step2. Append extra information into ctx
   // SetDnnAttrIntoDeviceContext
   // SetInputsName SetOutputsName
   auto one_dnn_ctx = const_cast<phi::OneDNNContext*>(
@@ -369,17 +366,17 @@ void OneDNNPhiKernelInstruction::Run() {
   one_dnn_ctx->SetInputsName(inputs_);
   one_dnn_ctx->SetOutputsName(outputs_);
 
-  // Step4. InferMeta
+  // Step3. InferMeta
   if (infer_meta_interface_) {
     infer_meta_interface_->infer_meta_(&(infer_meta_context_));
   }
 
-  // Step5. Run kernel
+  // Step4. Run kernel
   VLOG(6) << "Run op " << phi_op_name_ << " infer meta.";
   (*(phi_kernel_))(&(kernel_context_));
   VLOG(6) << "Run op " << phi_op_name_ << " kernel.";
 
-  // Step6. ClearDnnAttr
+  // Step5. ClearDnnAttr
   one_dnn_ctx->ClearDnnAttr();
 }
 

From 8608d4957d1fd7e19f85561516efc6e3bd7a1430 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 22 Dec 2023 08:23:15 +0000
Subject: [PATCH 40/49] refine

---
 .../onednn_mixed_phi_kernel_instruction.cc    | 330 +-----------------
 .../onednn_mixed_phi_kernel_instruction.h     |  44 +--
 2 files changed, 4 insertions(+), 370 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.cc
index 4796e6de6a9f58..572c26eb420789 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.cc
@@ -43,344 +43,18 @@
 namespace paddle {
 namespace framework {
 
-static RuntimeAttribute ConvertPIRAttribute2RuntimeAttribute(
-    PIRAttribute attr,
-    const std::string& attr_name,
-    const paddle::dialect::OpYamlInfoParser& op_yaml_info) {
-  auto& attr_type_name = op_yaml_info.AttrTypeName(attr_name);
-  if (attr_type_name == "pir::Int32Attribute") {
-    return attr.dyn_cast<pir::Int32Attribute>().data();
-  } else if (attr_type_name == "pir::FloatAttribute") {
-    return attr.dyn_cast<pir::FloatAttribute>().data();
-  } else if (attr_type_name == "pir::BoolAttribute") {
-    return attr.dyn_cast<pir::BoolAttribute>().data();
-  } else if (attr_type_name == "pir::StrAttribute") {
-    return attr.dyn_cast<pir::StrAttribute>().AsString();
-  } else if (attr_type_name == "pir::ArrayAttribute<pir::Int32Attribute>") {
-    auto array_list = attr.dyn_cast<pir::ArrayAttribute>().AsVector();
-    std::vector<int32_t> vec_res;
-    if (array_list.size() > 0) {
-      PADDLE_ENFORCE_EQ(array_list[0].isa<pir::Int32Attribute>(),
-                        true,
-                        phi::errors::Unimplemented(
-                            "the 0th elementwise MUST be pir::Int32Attribute"));
-      for (size_t i = 0; i < array_list.size(); ++i) {
-        vec_res.push_back(array_list[i].dyn_cast<pir::Int32Attribute>().data());
-      }
-    }
-    return vec_res;
-  } else if (attr_type_name == "pir::ArrayAttribute<pir::FloatAttribute>") {
-    auto array_list = attr.dyn_cast<pir::ArrayAttribute>().AsVector();
-    std::vector<float> vec_res;
-    if (array_list.size() > 0) {
-      if (array_list[0].isa<pir::FloatAttribute>()) {
-        for (size_t i = 0; i < array_list.size(); ++i) {
-          vec_res.push_back(
-              array_list[i].dyn_cast<pir::FloatAttribute>().data());
-        }
-
-      } else {
-        PADDLE_THROW(phi::errors::Unimplemented(
-            "ConvertPIRAttribute2RuntimeAttribute not support [%s] ",
-            attr_type_name));
-      }
-    }
-    return vec_res;
-  } else {
-    PADDLE_THROW(phi::errors::Unimplemented(
-        "ConvertPIRAttribute2RuntimeAttribute not support [%s] ",
-        attr_type_name));
-  }
-}
-
-void TensorNameMap(pir::Operation* op,
-                   const ValueExecutionInfo& value_exec_info,
-                   const paddle::dialect::OpYamlInfoParser& op_yaml_info,
-                   std::map<std::string, std::vector<std::string>>&
-                       inputs_tensor_name_map,  // NOLINT
-                   std::map<std::string, std::vector<std::string>>&
-                       outputs_tensor_name_map) {  // NOLINT
-  const Scope* inner_scope = value_exec_info.GetScope();
-  VLOG(6) << "TensorNameMap in scope[" << inner_scope << "]";
-
-  auto& vec_kernel_fn_tensor_params = op_yaml_info.TensorParams(true);
-
-  auto& name2id = op_yaml_info.InputName2Id();
-
-  std::string fluid_op_name = op_yaml_info.GetOriginOpName();
-
-  auto& op_normalizer = paddle::translator::OpNameNormalizer::instance();
-
-  for (auto& name : vec_kernel_fn_tensor_params) {
-    PADDLE_ENFORCE_EQ(
-        name2id.count(name),
-        true,
-        phi::errors::NotFound("param [%s] MUST in name2id map", name));
-    auto index = name2id.at(name);
-    pir::Value ptr = op->operand_source(index);
-
-    if (!IsInvalid(ptr)) {
-      continue;
-    }
-
-    auto legacy_arg_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
-    auto in_var_name = value_exec_info.GetVarName(ptr);
-    PADDLE_ENFORCE_NOT_NULL(inner_scope->FindVar(in_var_name),
-                            phi::errors::PreconditionNotMet(
-                                "can not find var[%s] in scope", in_var_name));
-
-    auto type = ptr.type();
-    if (type.isa<paddle::dialect::AllocatedDenseTensorType>() ||
-        type.isa<paddle::dialect::AllocatedSelectedRowsType>()) {
-      inputs_tensor_name_map[legacy_arg_name] = {in_var_name};
-    } else if (type.isa<pir::VectorType>()) {
-      auto var = inner_scope->FindVar(in_var_name);
-      auto var_ref = var->Get<VariableRefArray>();
-      std::vector<std::string> vec_tmp;
-      vec_tmp.reserve(var_ref.size());
-      for (size_t k = 0; k < var_ref.size(); ++k) {
-        vec_tmp.push_back(value_exec_info.GetVarName(var_ref[k]));
-      }
-      inputs_tensor_name_map[legacy_arg_name] = vec_tmp;
-    } else {
-      PADDLE_THROW(phi::errors::Unimplemented(
-          "only support AllocatedDenseTensor, AllocatedSelectedRowsType  and "
-          "pir::vector type"));
-    }
-  }
-
-  auto& output_name_list = op_yaml_info.OutputNames();
-  for (size_t i = 0; i < output_name_list.size(); ++i) {
-    auto name = output_name_list[i];
-    pir::Value ptr = op->result(i);
-    auto legacy_arg_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
-
-    if (!IsInvalid(ptr)) {
-      continue;
-    }
-
-    auto out_var_name = value_exec_info.GetVarName(ptr);
-
-    PADDLE_ENFORCE_NOT_NULL(inner_scope->FindVar(out_var_name),
-                            phi::errors::PreconditionNotMet(
-                                "can not find var[%s] in scope", out_var_name));
-
-    auto type = ptr.type();
-    if (type.isa<paddle::dialect::AllocatedDenseTensorType>() ||
-        type.isa<paddle::dialect::AllocatedSelectedRowsType>()) {
-      outputs_tensor_name_map[legacy_arg_name] = {out_var_name};
-    } else if (type.isa<pir::VectorType>()) {
-      auto var = inner_scope->FindVar(out_var_name);
-      auto var_ref = var->Get<VariableRefArray>();
-      std::vector<std::string> vec_tmp;
-      vec_tmp.reserve(var_ref.size());
-      for (size_t k = 0; k < var_ref.size(); ++k) {
-        vec_tmp.push_back(value_exec_info.GetVarName(var_ref[k]));
-      }
-      outputs_tensor_name_map[legacy_arg_name] = vec_tmp;
-    } else {
-      PADDLE_THROW(phi::errors::Unimplemented(
-          "only support AllocatedDenseTensor, AllocatedSelectedRowsType  and "
-          "pir::vector type"));
-    }
-  }
-}
-
 OneDNNMixedPhiKernelInstruction::OneDNNMixedPhiKernelInstruction(
     size_t id,
     const platform::Place& place,
     pir::Operation* op,
     const ValueExecutionInfo* value_exec_info)
-    : InstructionBase(id, place), value_exec_info_(value_exec_info) {
-  // Step1: build phi kernel instruction as PhiKernelInstruction
-  auto op_attributes = op->attributes();
-  auto op_name =
-      op_attributes.at("op_name").dyn_cast<pir::StrAttribute>().AsString();
-  pir::OpInfo op_info =
-      pir::IrContext::Instance()->GetRegisteredOpInfo(op_name);
-  op_ = op;
-  phi_op_name_ = op_name;
-  VLOG(6) << "construct phi kernel instruction for: " << phi_op_name_;
-
-  SetKernelType(AnalyseOpFuncType(op, place));
-  VLOG(6) << "finish process analyse kernel type";
-
-  infer_meta_interface_ =
-      op_info.GetInterfaceImpl<paddle::dialect::InferMetaInterface>();
-  VLOG(6) << "finish process infer_meta_interface_";
-
-  auto yaml_interface =
-      op_info.GetInterfaceImpl<paddle::dialect::OpYamlInfoInterface>();
-  PADDLE_ENFORCE_NOT_NULL(
-      yaml_interface,
-      phi::errors::PreconditionNotMet(
-          "can not find OpYamlInfoInterface from [%s]", phi_op_name_));
-  paddle::dialect::OpYamlInfoParser yaml_info_parser(
-      yaml_interface->get_op_info_(),
-      paddle::dialect::IsOneDNNLegacyOp(op_name));
-  VLOG(6) << "finish process yaml_info_parser";
-
-  if (infer_meta_interface_) {
-    BuildPhiContext<
-        phi::InferMetaContext,
-        phi::MetaTensor,
-        phi::MetaTensor,
-        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
-        paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
-        false>(op, *value_exec_info_, yaml_info_parser, &infer_meta_context_);
-  }
-  VLOG(6) << "finish process infer meta context";
-
-  auto kernel_name =
-      op_attributes.at("kernel_name").dyn_cast<pir::StrAttribute>().AsString();
-  auto kernel_key = op_attributes.at("kernel_key")
-                        .dyn_cast<paddle::dialect::KernelAttribute>()
-                        .data();
-
-  phi_kernel_ = new phi::Kernel(
-      phi::KernelFactory::Instance().SelectKernel(kernel_name, kernel_key));
-  PADDLE_ENFORCE_EQ(
-      phi_kernel_->IsValid(), true, "not found kernel for [%s]", kernel_name);
-  VLOG(6) << "finish process select kernel";
-
-  BuildPhiContext<phi::KernelContext,
-                  const phi::TensorBase*,
-                  phi::TensorBase*,
-                  paddle::small_vector<const phi::TensorBase*>,
-                  paddle::small_vector<phi::TensorBase*>,
-                  true>(
-      op, *value_exec_info_, yaml_info_parser, &kernel_context_);
-
-  kernel_context_.SetDeviceContext(phi::DeviceContextPool::Instance().Get(
-      phi::TransToPhiPlace(kernel_key.backend())));
-  VLOG(6) << "finish process kernel context";
-
-  SetDeviceContext(
-      ParseDeviceContext(op,
-                         phi::DeviceContextPool::Instance().Get(
-                             phi::TransToPhiPlace(kernel_key.backend())),
-                         place,
-                         GetExecutionStream(),
-                         GetStreamPriority()));
-  VLOG(6) << "finish process device context";
-
-  InitInputsOutputsIds(op, *value_exec_info);
-  VLOG(6) << "finish process inputs outputs index";
-
-  auto& no_need_buffer_ids = yaml_info_parser.NoNeedBufferIds();
-  std::unordered_set<pir::Value> no_need_buffer_values;
-  for (size_t id = 0; id < no_need_buffer_ids.size(); id++) {
-    no_need_buffer_values.insert(op->operand_source(no_need_buffer_ids[id]));
-  }
-  SetNoNeedBuffer(no_need_buffer_values);
-  VLOG(6) << "finish process no need buffer";
-
-  // Step2: build layout_transform information
-  if (op_attributes.count("layout_transform_arg")) {
-    auto layout_transform_arg = op_attributes.at("layout_transform_arg")
-                                    .dyn_cast<pir::StrAttribute>()
-                                    .AsString();
-    auto data_layout = op_attributes.at(layout_transform_arg)
-                           .dyn_cast<pir::StrAttribute>()
-                           .AsString();
-    input_layout_ = common::StringToDataLayout(data_layout);
-    std::vector<pir::Attribute> layout_transform_inputs_attr =
-        op->attributes()
-            .at("layout_transform_inputs")
-            .dyn_cast<pir::ArrayAttribute>()
-            .AsVector();
-    std::vector<std::string> layout_transform_inputs;
-    for (auto& attr : layout_transform_inputs_attr) {
-      auto pair = kernel_context_.InputRangeAt(value_exec_info_->GetIdByName(
-          attr.dyn_cast<pir::StrAttribute>().AsString()));
-      for (int i = pair.first; i < pair.second; ++i) {
-        layout_transform_inputs_.insert(i);
-      }
-    }
-  }
-
-  // Step3: build extra attr information
-  if (op_attributes.count("extra_args")) {
-    std::vector<pir::Attribute> extra_args_attr =
-        op->attributes()
-            .at("extra_args")
-            .dyn_cast<pir::ArrayAttribute>()
-            .AsVector();
-    std::vector<std::string> extra_args;
-    for (auto& attr : extra_args_attr) {
-      auto attr_name = attr.dyn_cast<pir::StrAttribute>().AsString();
-      extra_attr_[attr_name] = ConvertPIRAttribute2RuntimeAttribute(
-          op_attributes.at(attr_name), attr_name, yaml_info_parser);
-    }
-  }
-  TensorNameMap(op, *value_exec_info_, yaml_info_parser, inputs_, outputs_);
-}
-
-OneDNNMixedPhiKernelInstruction::~OneDNNMixedPhiKernelInstruction() {
-  if (phi_kernel_ != nullptr) {
-    delete phi_kernel_;
-  }
-}
+    : OneDNNPhiKernelInstruction(id, place, op, value_exec_info) {}
 
 void OneDNNMixedPhiKernelInstruction::Run() {
   // Step1. Mixed Dynamic Choose Kernel
   // todo if (input_tensor.layout() != phi::DataLayout::ONEDNN)
 
-  // Step2. TransLayout
-  auto inputs = kernel_context_.InputsBetween<phi::DenseTensor>(
-      size_t(0), kernel_context_.InputsSize());
-  for (size_t i = 0; i < inputs.size(); ++i) {
-    auto input = inputs[i];
-    if (input->layout() != phi::DataLayout::ONEDNN) {
-      phi::DataLayout from_layout = input->layout();
-
-      //  Handle 'layout_transform' in
-      //  ops_onednn_extra.yaml(GetKernelTypeForVar)
-      if (layout_transform_inputs_.count(i) &&
-          input_layout_ != phi::DataLayout::kAnyLayout) {
-        from_layout = input_layout_;
-      }
-
-      auto transed_tensor = const_cast<phi::DenseTensor*>(input);
-
-      if (from_layout == DataLayout::kNHWC ||
-          from_layout == DataLayout::kNDHWC) {
-        phi::funcs::MatchShapeToLayout(
-            transed_tensor, from_layout, phi::DataLayout::ONEDNN);
-        // We register only NHWC assuming that model is consistent e.g. either
-        // NHWC or NCHW
-        phi::OneDNNContext::tls().set_cur_paddle_data_layout(from_layout);
-      }
-
-      dnnl::memory::desc out_mem_desc =
-          phi::funcs::make_memory_desc(*input, from_layout);
-      transed_tensor->set_mem_desc(out_mem_desc);
-    }
-  }
-
-  // Step3. Append extra information into ctx
-  // SetDnnAttrIntoDeviceContext
-  // SetInputsName SetOutputsName
-  auto one_dnn_ctx = const_cast<phi::OneDNNContext*>(
-      &kernel_context_.GetDeviceContext<phi::OneDNNContext>());
-  for (auto& attr : extra_attr_) {
-    one_dnn_ctx->SetDnnAttr(attr.first, attr.second);
-  }
-  one_dnn_ctx->SetInputsName(inputs_);
-  one_dnn_ctx->SetOutputsName(outputs_);
-
-  // Step4. InferMeta
-  if (infer_meta_interface_) {
-    infer_meta_interface_->infer_meta_(&(infer_meta_context_));
-  }
-
-  // Step5. Run kernel
-  VLOG(6) << "Run op " << phi_op_name_ << " infer meta.";
-  (*(phi_kernel_))(&(kernel_context_));
-  VLOG(6) << "Run op " << phi_op_name_ << " kernel.";
-
-  // Step6. ClearDnnAttr
-  one_dnn_ctx->ClearDnnAttr();
+  OneDNNPhiKernelInstruction::Run();
 }
 
 }  // namespace framework
diff --git a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.h b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.h
index 1022e22ac19e89..d39e5fa9d1fea0 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.h
+++ b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_mixed_phi_kernel_instruction.h
@@ -14,7 +14,7 @@
 
 #pragma once
 
-#include "paddle/fluid/framework/new_executor/instruction/instruction_base.h"
+#include "paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.h"
 
 namespace pir {
 class Operation;
@@ -28,54 +28,14 @@ class ValueExecutionInfo;
 using RuntimeAttribute = phi::Attribute;
 using PIRAttribute = pir::Attribute;
 
-class OneDNNMixedPhiKernelInstruction : public InstructionBase {
+class OneDNNMixedPhiKernelInstruction : public OneDNNPhiKernelInstruction {
  public:
   OneDNNMixedPhiKernelInstruction(size_t id,
                                   const platform::Place& place,
                                   ::pir::Operation* op,
                                   const ValueExecutionInfo* value_exec_info);
 
-  ~OneDNNMixedPhiKernelInstruction();
-
-  phi::Kernel* PhiKernel() const { return phi_kernel_; }
-
-  const phi::KernelContext& KernelContext() const { return kernel_context_; }
-
-  const phi::InferMetaContext& InferMetaContext() const {
-    return infer_meta_context_;
-  }
-
-  paddle::dialect::InferMetaInterface::Concept* InferMetaInterface() const {
-    return infer_meta_interface_;
-  }
-
-  ::pir::Operation* Operation() const override { return op_; }
-
   void Run() override;
-
-  const std::string& Name() const override { return phi_op_name_; }
-
- private:
-  paddle::dialect::InferMetaInterface::Concept* infer_meta_interface_{
-      nullptr};  // not owned
-
-  phi::InferMetaContext infer_meta_context_;
-
-  phi::KernelContext kernel_context_;
-
-  phi::Kernel* phi_kernel_{nullptr};  // not owned
-
-  std::string phi_op_name_;
-
-  ::pir::Operation* op_{nullptr};  // not owned
-
-  const ValueExecutionInfo* value_exec_info_;  // not owned
-
-  std::set<int> layout_transform_inputs_{};
-  phi::DataLayout input_layout_{phi::DataLayout::kAnyLayout};
-  std::map<std::string, RuntimeAttribute> extra_attr_{};
-  std::map<std::string, std::vector<std::string>> inputs_{};
-  std::map<std::string, std::vector<std::string>> outputs_{};
 };
 
 }  // namespace framework

From 7a3412a8648aa19c19ae58d6e6d344c1dce84690 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 25 Dec 2023 01:51:16 +0000
Subject: [PATCH 41/49] refine

---
 .../dialect/operator/ir/ops_onednn_extra.yaml | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml b/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
index f8062dff6bc4d9..d7a17f9e80c8c4 100644
--- a/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
+++ b/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
@@ -5,24 +5,24 @@
     arg_name: data_format
     tensors: input
 
-- op : matmul
-  extra_args : str mkldnn_data_type="float32"
-  layout_transform :
-    arg_name: cur_paddle_data_layout
-    tensors: x, y
+# - op : matmul
+#   extra_args : str mkldnn_data_type="float32"
+#   layout_transform :
+#     arg_name: cur_paddle_data_layout
+#     tensors: x, y
 
-- op : pad3d
-  extra_args :
-  layout_transform :
-    arg_name: data_format
-    tensors: x
-  dynamic_fallback : True
+# - op : pad3d
+#   extra_args :
+#   layout_transform :
+#     arg_name: data_format
+#     tensors: x
+#   dynamic_fallback : True
 
-- op : batch_norm
-  extra_args : bool fuse_with_relu=false
-  layout_transform :
-    arg_name: data_layout
-    tensors: x
+# - op : batch_norm
+#   extra_args : bool fuse_with_relu=false
+#   layout_transform :
+#     arg_name: data_layout
+#     tensors: x
 
-- op : prelu
-  extra_args : bool is_test=false, str mkldnn_data_type="float32"
+# - op : prelu
+#   extra_args : bool is_test=false, str mkldnn_data_type="float32"

From b5493dc20d181b8b82c380c82f317db4bfa12cbe Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 25 Dec 2023 08:01:07 +0000
Subject: [PATCH 42/49] refine

---
 paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index 745220f1112659..82e962bafd151a 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -1028,7 +1028,8 @@ phi::KernelKey GetKernelKey(
   }
 
 #ifdef PADDLE_WITH_DNNL
-  if (op->HasTrait<OneDNNTrait>() && res.backend() == phi::Backend::CPU &&
+  if (op->dialect()->name() == "pd_onednn_op" && op->HasTrait<OneDNNTrait>() &&
+      res.backend() == phi::Backend::CPU &&
       SupportsMKLDNN(kernel_fn_str, res.dtype())) {
     res.set_backend(phi::Backend::ONEDNN);
     res.set_layout(phi::DataLayout::ONEDNN);
@@ -1858,6 +1859,8 @@ std::vector<pir::Value> BuildInputs(
     // 3. layout transfer(only for onednn)
 #ifdef PADDLE_WITH_DNNL
     if (kernel_key.backend() == phi::Backend::CPU &&
+        cur_in.dyn_cast<pir::OpResult>().owner()->dialect()->name() ==
+            "pd_onednn_op" &&
         cur_in.dyn_cast<pir::OpResult>().owner()->HasTrait<OneDNNTrait>()) {
       auto new_in_type = new_in.type();
       if (new_in_type.isa<AllocatedDenseTensorType>()) {
@@ -1951,7 +1954,8 @@ pir::Operation* BuildKernelOp(
 
   pir::Operation* op = nullptr;
 #ifdef PADDLE_WITH_DNNL
-  if (op_item->HasTrait<OneDNNTrait>()) {
+  if (op_item->dialect()->name() == "pd_onednn_op" &&
+      op_item->HasTrait<OneDNNTrait>()) {
     if (IsOneDNNLegacyOp(op_item->name())) {
       VLOG(4) << "choose OneDNNLegacyKernelOp";
       pir::OpInfo legacy_kernel_op_info =
@@ -1987,7 +1991,8 @@ pir::Operation* BuildKernelOp(
           "dynamic_fallback",
           pir::BoolAttribute::get(
               ctx, op_info_parser->OpRuntimeInfo().dynamic_fallback));
-      if (op_item->HasTrait<OneDNNDynamicFallbackTrait>()) {
+      if (op_item->dialect()->name() == "pd_onednn_op" &&
+          op_item->HasTrait<OneDNNDynamicFallbackTrait>()) {
         VLOG(4) << "choose OneDNNMixedPhiKernelOp";
         pir::OpInfo phi_kernel_op_info =
             ctx->GetRegisteredOpInfo(OneDNNMixedPhiKernelOp::name());

From 43a69218f12b4a3ebe526ebf865e9e830d45e917 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 25 Dec 2023 09:00:11 +0000
Subject: [PATCH 43/49] refine

---
 paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index 82e962bafd151a..7de759d62231b0 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -1859,6 +1859,7 @@ std::vector<pir::Value> BuildInputs(
     // 3. layout transfer(only for onednn)
 #ifdef PADDLE_WITH_DNNL
     if (kernel_key.backend() == phi::Backend::CPU &&
+        cur_in.dyn_cast<pir::OpResult>().owner() != nullptr &&
         cur_in.dyn_cast<pir::OpResult>().owner()->dialect()->name() ==
             "pd_onednn_op" &&
         cur_in.dyn_cast<pir::OpResult>().owner()->HasTrait<OneDNNTrait>()) {

From 6ff60e142bd802c3898696aae54914725c177cac Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 25 Dec 2023 10:54:31 +0000
Subject: [PATCH 44/49] refine

---
 paddle/fluid/pir/dialect/operator/trait/onednn.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/paddle/fluid/pir/dialect/operator/trait/onednn.h b/paddle/fluid/pir/dialect/operator/trait/onednn.h
index 8de01985200401..df810c6707df12 100644
--- a/paddle/fluid/pir/dialect/operator/trait/onednn.h
+++ b/paddle/fluid/pir/dialect/operator/trait/onednn.h
@@ -14,6 +14,8 @@
 
 #pragma once
 
+#ifdef PADDLE_WITH_DNNL
+
 #include "paddle/pir/core/op_base.h"
 
 namespace paddle {
@@ -43,3 +45,5 @@ class OneDNNDynamicFallbackTrait
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNTrait)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNOnlyTrait)
 IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::OneDNNDynamicFallbackTrait)
+
+#endif

From 29acccab74db8921519dd3d4e47ac03149e53c92 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 26 Dec 2023 07:38:26 +0000
Subject: [PATCH 45/49] refine

---
 .../onednn/onednn_phi_kernel_instruction.cc   |   4 +
 .../dialect/operator/ir/ops_onednn_extra.yaml |   5 +
 .../pir/transforms/pd_op_to_kernel_pass.cc    | 104 +++++++++++-------
 3 files changed, 73 insertions(+), 40 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
index 7c30e9adee7729..5beae83e6ae3d7 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
@@ -349,6 +349,10 @@ void OneDNNPhiKernelInstruction::Run() {
         phi::OneDNNContext::tls().set_cur_paddle_data_layout(from_layout);
       }
 
+      if (from_layout == DataLayout::kAnyLayout) {
+        from_layout = phi::OneDNNContext::tls().get_cur_paddle_data_layout();
+      }
+
       dnnl::memory::desc out_mem_desc =
           phi::funcs::make_memory_desc(*input, from_layout);
       transed_tensor->set_mem_desc(out_mem_desc);
diff --git a/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml b/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
index d7a17f9e80c8c4..58897216793dd6 100644
--- a/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
+++ b/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
@@ -5,6 +5,11 @@
     arg_name: data_format
     tensors: input
 
+- op : conv2d_grad
+  extra_args : bool is_test=false
+  layout_transform :
+    arg_name: data_format
+    tensors: input, out_grad
 # - op : matmul
 #   extra_args : str mkldnn_data_type="float32"
 #   layout_transform :
diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index 7de759d62231b0..7a9058fc91023b 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -1028,8 +1028,7 @@ phi::KernelKey GetKernelKey(
   }
 
 #ifdef PADDLE_WITH_DNNL
-  if (op->dialect()->name() == "pd_onednn_op" && op->HasTrait<OneDNNTrait>() &&
-      res.backend() == phi::Backend::CPU &&
+  if (op->HasTrait<OneDNNTrait>() && res.backend() == phi::Backend::CPU &&
       SupportsMKLDNN(kernel_fn_str, res.dtype())) {
     res.set_backend(phi::Backend::ONEDNN);
     res.set_layout(phi::DataLayout::ONEDNN);
@@ -1624,7 +1623,21 @@ std::vector<pir::Value> BuildInputs(
       }
     }
 
-    // 1.backend transfer
+    // 1. layout transfer(only for onednn)
+#ifdef PADDLE_WITH_DNNL
+    if (kernel_key.backend() != phi::Backend::ONEDNN) {
+      auto new_in_type = new_in.type();
+      if (new_in_type.isa<AllocatedDenseTensorType>()) {
+        if (new_in_type.dyn_cast<AllocatedDenseTensorType>().data_layout() ==
+            phi::DataLayout::ONEDNN) {
+          new_in = AddOneDNN2PaddleLayoutTransferOp(
+              new_in, phi::DataLayout::ANY, block);
+        }
+      }
+    }
+#endif
+
+    // 2.backend transfer
     bool check_place_transfer =
         (op_item->isa<::pir::SetParameterOp>()) ||
         (kernel.IsValid() && (!UnchangeOutputOps.count(op_item->name())));
@@ -1825,7 +1838,7 @@ std::vector<pir::Value> BuildInputs(
       }
     }
 
-    // 2. dtype transfer
+    // 3. dtype transfer
     if (op_info_parser != nullptr) {
       std::string var_name = op_info_parser->InputNames()[i];
       auto fake_tensors = PrepareFakeTensors(new_in);
@@ -1856,24 +1869,6 @@ std::vector<pir::Value> BuildInputs(
       }
     }
 
-    // 3. layout transfer(only for onednn)
-#ifdef PADDLE_WITH_DNNL
-    if (kernel_key.backend() == phi::Backend::CPU &&
-        cur_in.dyn_cast<pir::OpResult>().owner() != nullptr &&
-        cur_in.dyn_cast<pir::OpResult>().owner()->dialect()->name() ==
-            "pd_onednn_op" &&
-        cur_in.dyn_cast<pir::OpResult>().owner()->HasTrait<OneDNNTrait>()) {
-      auto new_in_type = new_in.type();
-      if (new_in_type.isa<AllocatedDenseTensorType>()) {
-        new_in = AddOneDNN2PaddleLayoutTransferOp(
-            new_in, phi::DataLayout::ANY, block);
-      } else {
-        PADDLE_THROW(
-            phi::errors::Unimplemented("PIR layout transfer only support "
-                                       "allocated dense tensor type for now"));
-      }
-    }
-#endif
     vec_inputs.push_back(new_in);
   }
   return vec_inputs;
@@ -1955,8 +1950,7 @@ pir::Operation* BuildKernelOp(
 
   pir::Operation* op = nullptr;
 #ifdef PADDLE_WITH_DNNL
-  if (op_item->dialect()->name() == "pd_onednn_op" &&
-      op_item->HasTrait<OneDNNTrait>()) {
+  if (op_item->HasTrait<OneDNNTrait>()) {
     if (IsOneDNNLegacyOp(op_item->name())) {
       VLOG(4) << "choose OneDNNLegacyKernelOp";
       pir::OpInfo legacy_kernel_op_info =
@@ -1992,8 +1986,7 @@ pir::Operation* BuildKernelOp(
           "dynamic_fallback",
           pir::BoolAttribute::get(
               ctx, op_info_parser->OpRuntimeInfo().dynamic_fallback));
-      if (op_item->dialect()->name() == "pd_onednn_op" &&
-          op_item->HasTrait<OneDNNDynamicFallbackTrait>()) {
+      if (op_item->HasTrait<OneDNNDynamicFallbackTrait>()) {
         VLOG(4) << "choose OneDNNMixedPhiKernelOp";
         pir::OpInfo phi_kernel_op_info =
             ctx->GetRegisteredOpInfo(OneDNNMixedPhiKernelOp::name());
@@ -2049,10 +2042,11 @@ void ProcessBlock(
     std::unordered_map<pir::Value, pir::Value>* map_value_pair) {
   auto inputs_by_data_op = GetInputsByDataOp(block);
 
-  for (auto& op_item : *block) {
-    VLOG(6) << "op name " << op_item.name();
-    if ((op_item.isa<FeedOp>()) &&
-        inputs_by_data_op.count(op_item.attributes()
+  for (auto iter = block->begin(); iter != block->end(); ++iter) {
+    pir::Operation* op_item = &(*iter);
+    VLOG(6) << "op name " << op_item->name();
+    if ((op_item->isa<FeedOp>()) &&
+        inputs_by_data_op.count(op_item->attributes()
                                     .at("name")
                                     .dyn_cast<pir::StrAttribute>()
                                     .AsString())) {
@@ -2061,24 +2055,54 @@ void ProcessBlock(
     }
 
     // HandleSpecialOp
-    if (SpecialLowerOps.count(op_item.name())) {
-      VLOG(6) << "Handle Special Op: [" << op_item.name()
+    if (SpecialLowerOps.count(op_item->name())) {
+      VLOG(6) << "Handle Special Op: [" << op_item->name()
               << "] while lowering to kernel pass";
       HandleForSpecialOp(
-          place, &op_item, new_block, ctx, map_op_pair, map_value_pair);
+          place, op_item, new_block, ctx, map_op_pair, map_value_pair);
       continue;
     }
 
-    auto op_info_parser = GetOpYamlInfoParser(&op_item);
-    auto kernel_name = GetKernelName(op_info_parser.get(), &op_item);
+    auto op_info_parser = GetOpYamlInfoParser(op_item);
+    auto kernel_name = GetKernelName(op_info_parser.get(), op_item);
     auto kernel_key = GetKernelKey(
-        &op_item, place, kernel_name, *map_value_pair, op_info_parser.get());
+        op_item, place, kernel_name, *map_value_pair, op_info_parser.get());
     VLOG(6) << "kernel type " << kernel_key;
 
+#ifdef PADDLE_WITH_DNNL
+    if (op_item->HasTrait<OneDNNTrait>() &&
+        kernel_key.backend() != phi::Backend::ONEDNN) {
+      std::vector<pir::Type> op_item_inner_output_types;
+      if (op_item->num_results() > 0) {
+        for (size_t i = 0; i < op_item->num_results(); ++i) {
+          op_item_inner_output_types.push_back(op_item->result_type(i));
+        }
+      }
+      std::string target_op_name = op_item->name();
+      target_op_name.replace(0, 12, "pd_op");
+      auto op_info = ctx->GetRegisteredOpInfo(target_op_name);
+      if (!op_info) {
+        IR_THROW("Ctx should have corresponding OpInfo %s", target_op_name);
+      }
+      pir::Operation* op_item_inner =
+          pir::Operation::Create(op_item->operands_source(),
+                                 op_item->attributes(),
+                                 op_item_inner_output_types,
+                                 op_info);
+      for (auto iter = block->begin(); iter != block->end(); ++iter) {
+        if (*iter == *op_item) {
+          block->Assign(iter, op_item_inner);
+          break;
+        }
+      }
+      op_item = op_item_inner;
+      op_info_parser = GetOpYamlInfoParser(op_item);
+    }
+#endif
     // build output type
-    auto op_output_types = BuildOutputs(&op_item, kernel_name, kernel_key, ctx);
+    auto op_output_types = BuildOutputs(op_item, kernel_name, kernel_key, ctx);
     // build input
-    auto vec_inputs = BuildInputs(&op_item,
+    auto vec_inputs = BuildInputs(op_item,
                                   kernel_name,
                                   kernel_key,
                                   place,
@@ -2093,14 +2117,14 @@ void ProcessBlock(
                                        kernel_key,
                                        vec_inputs,
                                        op_output_types,
-                                       &op_item,
+                                       op_item,
                                        new_block,
                                        ctx,
                                        map_op_pair,
                                        map_value_pair);
 
     AddShadowFeedOpForDataOrFeed(
-        place, &op_item, op, new_block, ctx, map_op_pair, map_value_pair);
+        place, op_item, op, new_block, ctx, map_op_pair, map_value_pair);
   }
 }
 

From 19423a84241910ea763f99968546ce62e9c218af Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 26 Dec 2023 08:01:50 +0000
Subject: [PATCH 46/49] refine

---
 paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
index 7a9058fc91023b..7d1896de1d7cf8 100644
--- a/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
@@ -2089,6 +2089,7 @@ void ProcessBlock(
                                  op_item->attributes(),
                                  op_item_inner_output_types,
                                  op_info);
+      op_item->ReplaceAllUsesWith(op_item_inner->results());
       for (auto iter = block->begin(); iter != block->end(); ++iter) {
         if (*iter == *op_item) {
           block->Assign(iter, op_item_inner);
@@ -2096,7 +2097,7 @@ void ProcessBlock(
         }
       }
       op_item = op_item_inner;
-      op_info_parser = GetOpYamlInfoParser(op_item);
+      op_info_parser = GetOpYamlInfoParser(op_item_inner);
     }
 #endif
     // build output type

From d8e315fa864c32a84b19bbae8ad0f175ee0dde4d Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 26 Dec 2023 08:04:10 +0000
Subject: [PATCH 47/49] refine

---
 .../instruction/onednn/onednn_phi_kernel_instruction.cc         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
index 5beae83e6ae3d7..8551c1e269e9e1 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
@@ -43,7 +43,7 @@
 namespace paddle {
 namespace framework {
 
-static RuntimeAttribute ConvertPIRAttribute2RuntimeAttribute(
+static RuntimeAttribute ConvertPirAttribute2RuntimeAttribute(
     PIRAttribute attr,
     const std::string& attr_name,
     const paddle::dialect::OpYamlInfoParser& op_yaml_info) {

From 8ca1f82512357bc271bab2bc5b25145024812291 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 26 Dec 2023 08:29:28 +0000
Subject: [PATCH 48/49] refine

---
 .../instruction/onednn/onednn_phi_kernel_instruction.cc     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
index 8551c1e269e9e1..71385619cb958b 100644
--- a/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
+++ b/paddle/fluid/framework/new_executor/instruction/onednn/onednn_phi_kernel_instruction.cc
@@ -81,14 +81,14 @@ static RuntimeAttribute ConvertPirAttribute2RuntimeAttribute(
 
       } else {
         PADDLE_THROW(phi::errors::Unimplemented(
-            "ConvertPIRAttribute2RuntimeAttribute not support [%s] ",
+            "ConvertPirAttribute2RuntimeAttribute not support [%s] ",
             attr_type_name));
       }
     }
     return vec_res;
   } else {
     PADDLE_THROW(phi::errors::Unimplemented(
-        "ConvertPIRAttribute2RuntimeAttribute not support [%s] ",
+        "ConvertPirAttribute2RuntimeAttribute not support [%s] ",
         attr_type_name));
   }
 }
@@ -309,7 +309,7 @@ OneDNNPhiKernelInstruction::OneDNNPhiKernelInstruction(
     std::vector<std::string> extra_args;
     for (auto& attr : extra_args_attr) {
       auto attr_name = attr.dyn_cast<pir::StrAttribute>().AsString();
-      extra_attr_[attr_name] = ConvertPIRAttribute2RuntimeAttribute(
+      extra_attr_[attr_name] = ConvertPirAttribute2RuntimeAttribute(
           op_attributes.at(attr_name), attr_name, yaml_info_parser);
     }
   }

From 972a8a0119082c358048cc8ef8a26c78b02a9ea3 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Wed, 27 Dec 2023 01:37:59 +0000
Subject: [PATCH 49/49] refine

---
 paddle/fluid/framework/new_executor/CMakeLists.txt | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/paddle/fluid/framework/new_executor/CMakeLists.txt b/paddle/fluid/framework/new_executor/CMakeLists.txt
index 1b98c99b9c0d9c..990f82efa8edeb 100644
--- a/paddle/fluid/framework/new_executor/CMakeLists.txt
+++ b/paddle/fluid/framework/new_executor/CMakeLists.txt
@@ -9,18 +9,10 @@ if(NOT WITH_MKLDNN)
   list(
     REMOVE_ITEM
     standalone_executor_srcs
+    ${CMAKE_CURRENT_SOURCE_DIR}/instruction/onednn/onednn_legacy_kernel_instruction.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/instruction/onednn/onednn_phi_kernel_instruction.cc
-  )
-  list(
-    REMOVE_ITEM
-    standalone_executor_srcs
     ${CMAKE_CURRENT_SOURCE_DIR}/instruction/onednn/onednn_mixed_phi_kernel_instruction.cc
   )
-  list(
-    REMOVE_ITEM
-    standalone_executor_srcs
-    ${CMAKE_CURRENT_SOURCE_DIR}/instruction/onednn/onednn_legacy_kernel_instruction.cc
-  )
 endif()
 
 set(standalone_executor_deps