From 6c3e3a2a40bb5dd5c92ac6e1a53a95404b51a16a Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Thu, 14 Dec 2023 11:31:50 +0000
Subject: [PATCH 01/12] delete dense_tensor mem_desc_

---
 paddle/phi/core/dense_tensor.cc      | 10 ----------
 paddle/phi/core/dense_tensor.h       | 18 ------------------
 paddle/phi/core/dense_tensor.inl     | 12 +-----------
 paddle/phi/core/dense_tensor_impl.cc | 27 +++++++++++++++++++++++----
 4 files changed, 24 insertions(+), 43 deletions(-)
diff --git a/paddle/phi/core/dense_tensor.cc b/paddle/phi/core/dense_tensor.cc
index c86a06bedef8d..1181a81266976 100644
--- a/paddle/phi/core/dense_tensor.cc
+++ b/paddle/phi/core/dense_tensor.cc
@@ -59,10 +59,6 @@ DenseTensor::DenseTensor(const DenseTensor& other) {
   storage_properties_ =
       std::move(CopyStorageProperties(other.storage_properties_));
   inplace_version_counter_ = other.inplace_version_counter_;
-
-#ifdef PADDLE_WITH_DNNL
-  mem_desc_ = other.mem_desc_;
-#endif
 }
 
 DenseTensor& DenseTensor::operator=(const DenseTensor& other) {
@@ -74,9 +70,6 @@ DenseTensor& DenseTensor::operator=(const DenseTensor& other) {
   storage_properties_ =
       std::move(CopyStorageProperties(other.storage_properties_));
   inplace_version_counter_ = other.inplace_version_counter_;
-#ifdef PADDLE_WITH_DNNL
-  mem_desc_ = other.mem_desc_;
-#endif
   return *this;
 }
 
@@ -85,9 +78,6 @@ DenseTensor& DenseTensor::operator=(DenseTensor&& other) noexcept {
   std::swap(holder_, other.holder_);
   storage_properties_ = std::move(other.storage_properties_);
   std::swap(inplace_version_counter_, other.inplace_version_counter_);
-#ifdef PADDLE_WITH_DNNL
-  mem_desc_ = other.mem_desc_;
-#endif
   return *this;
 }
 
diff --git a/paddle/phi/core/dense_tensor.h b/paddle/phi/core/dense_tensor.h
index bcc2b07a89e3a..b78cec1483272 100644
--- a/paddle/phi/core/dense_tensor.h
+++ b/paddle/phi/core/dense_tensor.h
@@ -22,12 +22,6 @@ limitations under the License. */
 #include "paddle/phi/core/tensor_meta.h"
 #include "paddle/utils/test_macros.h"
 
-/* @jim19930609: Move to MKLDNN_Tensor in the future
- */
-#ifdef PADDLE_WITH_DNNL
-#include "dnnl.hpp"  // NOLINT
-#endif
-
 namespace phi {
 
 class DenseTensorUtils;
@@ -290,18 +284,6 @@ class TEST_API DenseTensor : public TensorBase,
   std::shared_ptr<InplaceVersion> inplace_version_counter_ =
       std::make_shared<InplaceVersion>();
 
-/* @jim19930609: This is a hack
-In general, it is badly designed to fuse MKLDNN-specific objects into a
-generic Tensor.
-We temporarily leave them here to unblock Tensor Unification progress.
-In the final state, we should come up with a MKLDNN_Tensor and move the
-following codes there.
-*/
-#ifdef PADDLE_WITH_DNNL
-  /// \brief memory descriptor of tensor which have layout set as kMKLDNN
-  dnnl::memory::desc mem_desc_;
-#endif
-
 #ifndef PADDLE_WITH_CUSTOM_KERNEL
 #include "paddle/phi/core/dense_tensor.inl"
 #endif
diff --git a/paddle/phi/core/dense_tensor.inl b/paddle/phi/core/dense_tensor.inl
index 19101e7093f74..a8672b2171143 100644
--- a/paddle/phi/core/dense_tensor.inl
+++ b/paddle/phi/core/dense_tensor.inl
@@ -97,22 +97,12 @@ std::vector<DenseTensor> Split(int64_t split_size, int64_t axis) const;
 
 std::vector<DenseTensor> Chunk(int64_t chunks, int64_t axis) const;
 
-/* @jim19930609: This is a hack
-In general, it is badly designed to fuse MKLDNN-specific objects into a
-generic Tensor.
-We temporarily leave them here to unblock Tensor Unification progress.
-In the final state, we should come up with a MKLDNN_Tensor and move the
-following codes there.
-*/
 #ifdef PADDLE_WITH_DNNL
 
 public:
 const dnnl::memory::desc& mem_desc() const;
 
-inline void set_mem_desc(const dnnl::memory::desc& mem_desc) {
-  mem_desc_ = mem_desc;
-  meta_.layout = DataLayout::ONEDNN;
-}
+void set_mem_desc(const dnnl::memory::desc& mem_desc);
 
 #endif
 
diff --git a/paddle/phi/core/dense_tensor_impl.cc b/paddle/phi/core/dense_tensor_impl.cc
index 5fa43647da19c..770443acf1838 100644
--- a/paddle/phi/core/dense_tensor_impl.cc
+++ b/paddle/phi/core/dense_tensor_impl.cc
@@ -377,7 +377,29 @@ std::vector<DenseTensor> DenseTensor::Chunk(int64_t chunks,
 }
 
 #ifdef PADDLE_WITH_DNNL
-const dnnl::memory::desc& DenseTensor::mem_desc() const { return mem_desc_; }
+const dnnl::memory::desc& DenseTensor::mem_desc() const {
+  if (storage_properties_ == nullptr) {
+    std::unique_ptr<StorageProperties>* storage_properties_ptr =
+        const_cast<std::unique_ptr<StorageProperties>*>(&storage_properties_);
+    *storage_properties_ptr = std::make_unique<OneDNNStorageProperties>();
+  }
+  return this->storage_properties<OneDNNStorageProperties>().mem_desc;
+}
+
+void DenseTensor::set_mem_desc(const dnnl::memory::desc& mem_desc) {
+  if (storage_properties_ == nullptr) {
+    storage_properties_ = std::make_unique<OneDNNStorageProperties>();
+  }
+  if (OneDNNStorageProperties::classof(storage_properties_.get())) {
+    static_cast<OneDNNStorageProperties*>(storage_properties_.get())->mem_desc =
+        mem_desc;
+    meta_.layout = DataLayout::ONEDNN;
+  } else {
+    PADDLE_THROW(phi::errors::InvalidArgument(
+        "The actual type of storage_properties is inconsistent with the type "
+        "of the template parameter passed in."));
+  }
+}
 #endif
 
 // NOTE: For historical reasons, this interface has a special behavior,
@@ -394,9 +416,6 @@ DenseTensor& DenseTensor::ShareDataWith(const DenseTensor& src) {
   meta_.strides = src.meta_.strides;
   storage_properties_ =
       std::move(CopyStorageProperties(src.storage_properties_));
-#ifdef PADDLE_WITH_DNNL
-  mem_desc_ = src.mem_desc_;
-#endif
   return *this;
 }
 

From 239a6a260c905c41ffd798716ccdf9bab3bdc025 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 15 Dec 2023 02:37:35 +0000
Subject: [PATCH 02/12] refine

---
 paddle/phi/backends/onednn/onednn_reuse.h | 96 +++++++++++++++++++----
 paddle/phi/kernels/onednn/conv_function.h | 66 ++++++++++++++--
 paddle/phi/kernels/onednn/conv_handler.h  | 35 ++++++++-
 3 files changed, 172 insertions(+), 25 deletions(-)

diff --git a/paddle/phi/backends/onednn/onednn_reuse.h b/paddle/phi/backends/onednn/onednn_reuse.h
index d9719c6f3e5b2..db6ceef72b329 100644
--- a/paddle/phi/backends/onednn/onednn_reuse.h
+++ b/paddle/phi/backends/onednn/onednn_reuse.h
@@ -318,16 +318,32 @@ class OneDNNHandlerT {
   typename std::enable_if<std::is_same<typename std::decay<First>::type,
                                        dnnl::primitive_attr>::value>::type
   CreateForwardPrimitiveDescriptor(First&& first, Args&&... args) {
-    fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
-        engine_, std::forward<Args>(args)..., first);
+    try {
+      fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
+          engine_, std::forward<Args>(args)..., first);
+    } catch (std::exception& ex) {
+      LOG(WARNING) << Type() << " raises an exception "
+                   << platform::demangle(typeid(ex).name()) << ", "
+                   << ex.what();
+      PADDLE_THROW(platform::errors::Unavailable("wanghuan7"));
+      std::rethrow_exception(std::current_exception());
+    }
   }
 
   template <class First, class... Args>
   typename std::enable_if<!std::is_same<typename std::decay<First>::type,
                                         dnnl::primitive_attr>::value>::type
   CreateForwardPrimitiveDescriptor(First&& first, Args&&... args) {
-    fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
-        engine_, std::forward<First>(first), std::forward<Args>(args)...);
+    try {
+      fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
+          engine_, std::forward<First>(first), std::forward<Args>(args)...);
+    } catch (std::exception& ex) {
+      LOG(WARNING) << Type() << " raises an exception "
+                   << platform::demangle(typeid(ex).name()) << ", "
+                   << ex.what();
+      PADDLE_THROW(platform::errors::Unavailable("wanghuan8"));
+      std::rethrow_exception(std::current_exception());
+    }
   }
 
   template <typename... Args>
@@ -342,8 +358,16 @@ class OneDNNHandlerT {
     bwd_pd_ = std::static_pointer_cast<typename TBackward::primitive_desc>(
         dev_ctx_.GetBlob(key_pd));
     if (bwd_pd_ == nullptr) {
-      bwd_pd_ = std::make_shared<typename TBackward::primitive_desc>(
-          engine_, std::forward<Args>(args)..., *fwd_pd_);
+      try {
+        bwd_pd_ = std::make_shared<typename TBackward::primitive_desc>(
+            engine_, std::forward<Args>(args)..., *fwd_pd_);
+      } catch (std::exception& ex) {
+        LOG(WARNING) << Type() << " raises an exception "
+                     << platform::demangle(typeid(ex).name()) << ", "
+                     << ex.what();
+        PADDLE_THROW(platform::errors::Unavailable("wanghuan1"));
+        std::rethrow_exception(std::current_exception());
+      }
       dev_ctx_.SetBlob(key_pd, bwd_pd_);
     }
   }
@@ -361,8 +385,16 @@ class OneDNNHandlerT {
         std::static_pointer_cast<typename TBackward_params::primitive_desc>(
             dev_ctx_.GetBlob(key_pd));
     if (bwd_w_pd_ == nullptr) {
-      bwd_w_pd_ = std::make_shared<typename TBackward_params::primitive_desc>(
-          engine_, std::forward<Args>(args)..., *fwd_pd_);
+      try {
+        bwd_w_pd_ = std::make_shared<typename TBackward_params::primitive_desc>(
+            engine_, std::forward<Args>(args)..., *fwd_pd_);
+      } catch (std::exception& ex) {
+        LOG(WARNING) << Type() << " raises an exception "
+                     << platform::demangle(typeid(ex).name()) << ", "
+                     << ex.what();
+        PADDLE_THROW(platform::errors::Unavailable("wanghuan2"));
+        std::rethrow_exception(std::current_exception());
+      }
       dev_ctx_.SetBlob(key_pd, bwd_w_pd_);
     }
   }
@@ -621,16 +653,32 @@ class OneDNNHandlerNoCachingT {
   typename std::enable_if<std::is_same<typename std::decay<First>::type,
                                        dnnl::primitive_attr>::value>::type
   CreateForwardPrimitiveDescriptor(First&& first, Args&&... args) {
-    fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
-        engine_, std::forward<Args>(args)..., first);
+    try {
+      fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
+          engine_, std::forward<Args>(args)..., first);
+    } catch (std::exception& ex) {
+      LOG(WARNING) << Type() << " raises an exception "
+                   << platform::demangle(typeid(ex).name()) << ", "
+                   << ex.what();
+      PADDLE_THROW(platform::errors::Unavailable("wanghuan3"));
+      std::rethrow_exception(std::current_exception());
+    }
   }
 
   template <class First, class... Args>
   typename std::enable_if<!std::is_same<typename std::decay<First>::type,
                                         dnnl::primitive_attr>::value>::type
   CreateForwardPrimitiveDescriptor(First&& first, Args&&... args) {
-    fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
-        engine_, std::forward<First>(first), std::forward<Args>(args)...);
+    try {
+      fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
+          engine_, std::forward<First>(first), std::forward<Args>(args)...);
+    } catch (std::exception& ex) {
+      LOG(WARNING) << Type() << " raises an exception "
+                   << platform::demangle(typeid(ex).name()) << ", "
+                   << ex.what();
+      PADDLE_THROW(platform::errors::Unavailable("wanghuan4"));
+      std::rethrow_exception(std::current_exception());
+    }
   }
 
   template <typename... Args>
@@ -640,8 +688,16 @@ class OneDNNHandlerNoCachingT {
     PADDLE_ENFORCE_NOT_NULL(
         fwd_pd_,
         errors::Unavailable("Get oneDNN Forward primitive %s failed."));
-    bwd_pd_ = std::make_shared<typename TBackward::primitive_desc>(
-        engine_, std::forward<Args>(args)..., *fwd_pd_);
+    try {
+      bwd_pd_ = std::make_shared<typename TBackward::primitive_desc>(
+          engine_, std::forward<Args>(args)..., *fwd_pd_);
+    } catch (std::exception& ex) {
+      LOG(WARNING) << Type() << " raises an exception "
+                   << platform::demangle(typeid(ex).name()) << ", "
+                   << ex.what();
+      PADDLE_THROW(platform::errors::Unavailable("wanghuan5"));
+      std::rethrow_exception(std::current_exception());
+    }
   }
 
   template <typename... Args>
@@ -653,8 +709,16 @@ class OneDNNHandlerNoCachingT {
         errors::Unavailable("Get oneDNN Forward primitive %s failed."));
     auto bwd_desc =
         typename TBackward_params::desc(std::forward<Args>(args)...);
-    bwd_w_pd_ = std::make_shared<typename TBackward_params::primitive_desc>(
-        bwd_desc, engine_, *fwd_pd_);
+    try {
+      bwd_w_pd_ = std::make_shared<typename TBackward_params::primitive_desc>(
+          bwd_desc, engine_, *fwd_pd_);
+    } catch (std::exception& ex) {
+      LOG(WARNING) << Type() << " raises an exception "
+                   << platform::demangle(typeid(ex).name()) << ", "
+                   << ex.what();
+      PADDLE_THROW(platform::errors::Unavailable("wanghuan6"));
+      std::rethrow_exception(std::current_exception());
+    }
   }
 
   std::shared_ptr<dnnl::memory> AcquireMemoryFromPrimitive(
diff --git a/paddle/phi/kernels/onednn/conv_function.h b/paddle/phi/kernels/onednn/conv_function.h
index 7d7e74f691a02..e9d66786c6225 100644
--- a/paddle/phi/kernels/onednn/conv_function.h
+++ b/paddle/phi/kernels/onednn/conv_function.h
@@ -29,8 +29,10 @@ static dnnl::memory::data_type GetDstType(
     std::string fuse_activation,
     bool fuse_residual_conn,
     const phi::DenseTensor* residual_param) {
+  std::cout << "GetDstType" << std::endl;
   auto dst_dt = dnnl::memory::data_type::f32;
   if (is_int8) {
+    std::cout << "GetDstType1" << std::endl;
     dst_dt = (fuse_activation == "relu" || fuse_activation == "relu6")
                  ? dnnl::memory::data_type::u8
                  : dnnl::memory::data_type::s8;
@@ -38,17 +40,21 @@ static dnnl::memory::data_type GetDstType(
       dst_dt = dnnl::memory::data_type::f32;
     }
     if (fuse_residual_conn && residual_param) {
+      std::cout << "GetDstType2" << std::endl;
       auto residual_dt = funcs::ToOneDNNDataType(residual_param->dtype());
       if (dst_dt != residual_dt) dst_dt = residual_dt;
     }
   } else {
+    std::cout << "GetDstType3" << std::endl;
     if (!force_fp32_output && is_bfloat16) {
       dst_dt = dnnl::memory::data_type::bf16;
       if (fuse_residual_conn && residual_param) {
+        std::cout << "GetDstType4" << std::endl;
         dst_dt = funcs::ToOneDNNDataType(residual_param->dtype());
       }
     }
   }
+  std::cout << "GetDstType5" << std::endl;
   return dst_dt;
 }
 
@@ -85,10 +91,12 @@ void ComputeFP32(const OneDNNContext& dev_ctx,
                  bool fuse_residual_conn,
                  bool force_fp32_output,
                  DenseTensor* output) {
+  std::cout << "ComputeFP32 " << std::endl;
   const auto& onednn_engine = dev_ctx.GetEngine();
   const bool is_conv3d = strides.size() == 3U;
   const std::string& unique_name =
       dev_ctx.GetInputsName("Input")[0] + dev_ctx.GetInputsName("Filter")[0];
+  std::cout << "ComputeFP32 2" << std::endl;
   PD_VISIT_FLOAT_AND_INT8_TYPES(
       filter->dtype(), "ConvOneDNNHandlerT", ([&] {
         onednn::ConvOneDNNHandlerT<T, data_t, T_out> handler(dev_ctx,
@@ -110,33 +118,47 @@ void ComputeFP32(const OneDNNContext& dev_ctx,
                                                              force_fp32_output,
                                                              output,
                                                              unique_name);
+        std::cout << "ComputeFP32 3" << std::endl;
         auto src_memory_p = handler.AcquireSrcMemoryWithReorder(input);
+        std::cout << "ComputeFP32 4" << std::endl;
         auto weights_memory_p = handler.AcquireWeightsMemoryWithReorder(
             filter, groups, is_conv3d, is_test);
+        std::cout << "ComputeFP32 5" << std::endl;
         std::shared_ptr<dnnl::memory> dst_memory_p;
         if (fuse_residual_conn) {
+          std::cout << "ComputeFP32 6" << std::endl;
           dst_memory_p =
               handler.AcquireDstMemoryWithResidual(output, residual_param);
+          std::cout << "ComputeFP32 7" << std::endl;
         } else {
+          std::cout << "ComputeFP32 8" << std::endl;
           dst_memory_p = handler.template AcquireDstMemory<T_out>(output);
+          std::cout << "ComputeFP32 9" << std::endl;
         }
-
+        std::cout << "ComputeFP32 10" << std::endl;
         auto conv_p = handler.AcquireForwardPrimitive();
+        std::cout << "ComputeFP32 11" << std::endl;
         std::unordered_map<int, dnnl::memory> args = {
             {DNNL_ARG_SRC, *src_memory_p},
             {DNNL_ARG_WEIGHTS, *weights_memory_p},
             {DNNL_ARG_DST, *dst_memory_p}};
 
         if (bias) {
+          std::cout << "ComputeFP32 12" << std::endl;
           auto bias_memory_p =
               handler.AcquireBiasMemoryWithReorder(bias, is_test);
+          std::cout << "ComputeFP32 13" << std::endl;
           args.insert({DNNL_ARG_BIAS, *bias_memory_p});
         }
 
         auto& astream = OneDNNContext::tls().get_stream();
+        std::cout << "ComputeFP32 14" << std::endl;
         conv_p->execute(astream, args);
+        std::cout << "ComputeFP32 15" << std::endl;
         astream.wait();
+        std::cout << "ComputeFP32 16" << std::endl;
         output->set_mem_desc(dst_memory_p->get_desc());
+        std::cout << "ComputeFP32 17" << std::endl;
       }));
 }
 
@@ -158,7 +180,9 @@ void ComputeINT8(const OneDNNContext& dev_ctx,
                  bool fuse_residual_conn,
                  bool force_fp32_output,
                  DenseTensor* output) {
+  std::cout << "ComputeINT8 " << std::endl;
   const auto& onednn_engine = dev_ctx.GetEngine();
+  std::cout << "ComputeINT8 2" << std::endl;
   const bool is_conv3d = strides.size() == 3U;
 
   bool unsigned_output =
@@ -177,6 +201,7 @@ void ComputeINT8(const OneDNNContext& dev_ctx,
           "residual fusion does not support force output with fp32"));
   const std::string& unique_name =
       dev_ctx.GetInputsName("Input")[0] + dev_ctx.GetInputsName("Filter")[0];
+  std::cout << "ComputeINT8 3" << std::endl;
   PD_VISIT_FLOAT_AND_INT8_TYPES(
       filter->dtype(), "ConvOneDNNHandlerT", ([&] {
         onednn::ConvOneDNNHandlerT<T, data_t, T_out> handler(dev_ctx,
@@ -198,9 +223,9 @@ void ComputeINT8(const OneDNNContext& dev_ctx,
                                                              force_fp32_output,
                                                              output,
                                                              unique_name);
-
+        std::cout << "ComputeINT8 4" << std::endl;
         auto src_memory_p = handler.AcquireSrcMemoryWithReorder(input);
-
+        std::cout << "ComputeINT8 5" << std::endl;
         const auto& scale_weights_data =
             dev_ctx.HasDnnAttr("Scale_weights")
                 ? PADDLE_GET_CONST(std::vector<float>,
@@ -210,9 +235,10 @@ void ComputeINT8(const OneDNNContext& dev_ctx,
         int mask_reorder = is_multi_channel
                                ? ((groups != 1) ? (1 << 1) + (1 << 0) : 1 << 0)
                                : 0;
+        std::cout << "ComputeINT8 6" << std::endl;
         auto weights_memory_p = handler.AcquireWeightsMemoryWithReorder(
             filter, groups, false, true, scale_weights_data, mask_reorder);
-
+        std::cout << "ComputeINT8 7" << std::endl;
         std::shared_ptr<dnnl::memory> dst_memory_p;
         if (fuse_residual_conn) {
           PADDLE_ENFORCE_EQ(
@@ -224,49 +250,65 @@ void ComputeINT8(const OneDNNContext& dev_ctx,
                   " and residual param's dimension =%d .",
                   output->dims().size(),
                   residual_param->dims().size()));
+          std::cout << "ComputeINT8 8" << std::endl;
           dst_memory_p =
               handler.AcquireDstMemoryWithResidual(output, residual_param);
+          std::cout << "ComputeINT8 9" << std::endl;
           need_s8_to_u8 = (funcs::OneDNNGetDataType<T_out>() ==
                            dnnl::memory::data_type::s8) &&
                           unsigned_output;
+          std::cout << "ComputeINT8 10" << std::endl;
         } else {
+          std::cout << "ComputeINT8 11" << std::endl;
           dst_memory_p = handler.template AcquireDstMemory<T_out>(output);
+          std::cout << "ComputeINT8 12" << std::endl;
         }
 
+        std::cout << "ComputeINT8 13" << std::endl;
         auto conv_p = handler.AcquireForwardPrimitive();
-
+        std::cout << "ComputeINT8 14" << std::endl;
         std::unordered_map<int, dnnl::memory> args = {
             {DNNL_ARG_SRC, *src_memory_p},
             {DNNL_ARG_WEIGHTS, *weights_memory_p},
             {DNNL_ARG_DST, *dst_memory_p}};
 
         if (bias) {
+          std::cout << "ComputeINT8 15" << std::endl;
           auto bias_memory_p = handler.AcquireBiasMemoryWithReorder(bias, true);
+          std::cout << "ComputeINT8 16" << std::endl;
           args.insert({DNNL_ARG_BIAS, *bias_memory_p});
         }
-
+        std::cout << "ComputeINT8 17" << std::endl;
         auto src_scales_memory = handler.AcquireScalesMemory(DNNL_ARG_SRC);
+        std::cout << "ComputeINT8 18" << std::endl;
         args.insert({DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC, *src_scales_memory});
-
+        std::cout << "ComputeINT8 19" << std::endl;
         auto wei_scales_memory = handler.AcquireScalesMemory(DNNL_ARG_WEIGHTS);
+        std::cout << "ComputeINT8 20" << std::endl;
         args.insert(
             {DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS, *wei_scales_memory});
 
         if (!force_fp32_output) {
+          std::cout << "ComputeINT8 21" << std::endl;
           auto dst_scales_memory = handler.AcquireScalesMemory(DNNL_ARG_DST);
+          std::cout << "ComputeINT8 22" << std::endl;
           args.insert(
               {DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST, *dst_scales_memory});
         }
 
         auto& astream = OneDNNContext::tls().get_stream();
+        std::cout << "ComputeINT8 23" << std::endl;
         conv_p->execute(astream, args);
+        std::cout << "ComputeINT8 24" << std::endl;
         astream.wait();
+        std::cout << "ComputeINT8 25" << std::endl;
 
         if (need_s8_to_u8) {
           dev_ctx.Alloc<uint8_t>(output);
         }
-
+        std::cout << "ComputeINT8 26" << std::endl;
         output->set_mem_desc(dst_memory_p->get_desc());
+        std::cout << "ComputeINT8 27" << std::endl;
       }));
 }
 
@@ -288,6 +330,7 @@ void ConvOnednn(const Context& dev_ctx,
                 bool fuse_residual_connection,
                 bool force_fp32_output,
                 DenseTensor* out) {
+  std::cout << "ConvOnednn" << std::endl;
   PADDLE_ENFORCE_EQ(
       dev_ctx.GetPlace().GetType(),
       AllocationType::CPU,
@@ -301,8 +344,10 @@ void ConvOnednn(const Context& dev_ctx,
                            fuse_activation,
                            fuse_residual_connection,
                            residual_param);
+  std::cout << "ConvOnednn2" << std::endl;
   if (!is_INT8) {
     if (dst_dt == dnnl::memory::data_type::f32) {
+      std::cout << "ConvOnednn3" << std::endl;
       ComputeFP32<T, float>(dev_ctx,
                             input,
                             filter,
@@ -321,6 +366,7 @@ void ConvOnednn(const Context& dev_ctx,
                             force_fp32_output,
                             out);
     } else if (dst_dt == dnnl::memory::data_type::bf16) {
+      std::cout << "ConvOnednn4" << std::endl;
       ComputeFP32<T, dtype::bfloat16>(dev_ctx,
                                       input,
                                       filter,
@@ -341,6 +387,7 @@ void ConvOnednn(const Context& dev_ctx,
     }
   } else {
     if (dst_dt == dnnl::memory::data_type::f32) {
+      std::cout << "ConvOnednn5" << std::endl;
       ComputeINT8<T, float>(dev_ctx,
                             input,
                             filter,
@@ -359,6 +406,7 @@ void ConvOnednn(const Context& dev_ctx,
                             force_fp32_output,
                             out);
     } else if (dst_dt == dnnl::memory::data_type::u8) {
+      std::cout << "ConvOnednn6" << std::endl;
       ComputeINT8<T, uint8_t>(dev_ctx,
                               input,
                               filter,
@@ -377,6 +425,7 @@ void ConvOnednn(const Context& dev_ctx,
                               force_fp32_output,
                               out);
     } else if (dst_dt == dnnl::memory::data_type::s8) {
+      std::cout << "ConvOnednn7" << std::endl;
       ComputeINT8<T, int8_t>(dev_ctx,
                              input,
                              filter,
@@ -396,6 +445,7 @@ void ConvOnednn(const Context& dev_ctx,
                              out);
     }
   }
+  std::cout << "ConvOnednn8" << std::endl;
 }
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/onednn/conv_handler.h b/paddle/phi/kernels/onednn/conv_handler.h
index 3d41c274de24e..0d570e9e84fe1 100644
--- a/paddle/phi/kernels/onednn/conv_handler.h
+++ b/paddle/phi/kernels/onednn/conv_handler.h
@@ -69,6 +69,7 @@ class ConvOneDNNHandlerT
             cpu_place,
             funcs::CreateKey(
                 dev_ctx, common::vectorize(input->dims()), unique_name)) {
+    std::cout << "ConvOneDNNHandlerT" << std::endl;
     if (unlikely(!this->isCached())) {
       PADDLE_ENFORCE_EQ(
           input->layout(),
@@ -142,8 +143,10 @@ class ConvOneDNNHandlerT
       std::vector<int64_t> strides(begin(strides_in), end(strides_in));
       std::vector<int64_t> paddings(begin(paddings_in), end(paddings_in));
       std::vector<int64_t> dilations(begin(dilations_in), end(dilations_in));
+      std::cout << "ConvOneDNNHandlerT2" << std::endl;
       UpdatePaddingAndDilation(
           &paddings, &dilations, padding_algorithm, data_dims, strides, ksize);
+      std::cout << "ConvOneDNNHandlerT3" << std::endl;
       std::transform(
           dilations.begin(), dilations.end(), dilations.begin(), [](int64_t i) {
             return i - 1;
@@ -155,9 +158,11 @@ class ConvOneDNNHandlerT
       funcs::GetGroupConvWeightsTz(weights_tz, groups);
 
       const auto dst_tz = common::vectorize(output->dims());
-
+      std::cout << "ConvOneDNNHandlerT4" << std::endl;
       const dnnl::memory::dims stride_dims = strides;
+      std::cout << "ConvOneDNNHandlerT5" << std::endl;
       const auto onednn_paddings = funcs::ToOneDNNPadding(paddings);
+      std::cout << "ConvOneDNNHandlerT6" << std::endl;
       const dnnl::memory::dims dilations_dims = dilations;
       /* create memory descriptor for convolution without specified format
        * ('any') which lets a primitive (convolution in this case) choose
@@ -171,34 +176,46 @@ class ConvOneDNNHandlerT
 
       dnnl::memory::desc src_md, weights_md;
       if (funcs::is_int8<T>()) {
+        std::cout << "ConvOneDNNHandlerT7" << std::endl;
         src_md = funcs::OneDNNMemDesc(src_tz,
                                       funcs::ToOneDNNDataType(input->dtype()),
                                       chosen_memory_format);
+        std::cout << "ConvOneDNNHandlerT8" << std::endl;
         weights_md = funcs::OneDNNMemDesc(
             weights_tz, dnnl::memory::data_type::s8, chosen_memory_format);
+        std::cout << "ConvOneDNNHandlerT9" << std::endl;
       } else {
+        std::cout << "ConvOneDNNHandlerT10" << std::endl;
         src_md = funcs::OneDNNMemDesc(src_tz, data_type, chosen_memory_format);
+        std::cout << "ConvOneDNNHandlerT11" << std::endl;
         weights_md = funcs::OneDNNMemDesc(
             weights_tz, data_type, funcs::OneDNNMemoryFormat::any);
+        std::cout << "ConvOneDNNHandlerT12" << std::endl;
       }
       if (input->dims().size() == 4 && input->dims()[1] <= 4) {
         chosen_memory_format = funcs::OneDNNMemoryFormat::nhwc;
       }
+      std::cout << "ConvOneDNNHandlerT13" << std::endl;
       const auto dst_md = funcs::OneDNNMemDesc(
           dst_tz, funcs::OneDNNGetDataType<T_out>(), chosen_memory_format);
+      std::cout << "ConvOneDNNHandlerT14" << std::endl;
       const auto fwd_prop_kind = dnnl::prop_kind::forward_inference;
+      std::cout << "ConvOneDNNHandlerT15" << std::endl;
       const dnnl::primitive_attr conv_attr = CreateConvAttrs(filter,
                                                              groups,
                                                              force_fp32_output,
                                                              fuse_residual_conn,
                                                              fuse_activation);
 
+      std::cout << "ConvOneDNNHandlerT16" << std::endl;
       if (bias) {
         auto bias_tz = common::vectorize(bias->dims());
+        std::cout << "ConvOneDNNHandlerT17" << std::endl;
         dnnl::memory::desc bias_md =
             funcs::OneDNNMemDesc(bias_tz,
                                  dnnl::memory::data_type::f32,
                                  funcs::OneDNNMemoryFormat::x);
+        std::cout << "ConvOneDNNHandlerT18" << std::endl;
 
         this->AcquireForwardPrimitiveDescriptor(
             conv_attr,
@@ -212,7 +229,9 @@ class ConvOneDNNHandlerT
             dilations_dims,
             onednn_paddings[0],
             onednn_paddings[1]);
+        std::cout << "ConvOneDNNHandlerT19" << std::endl;
       } else {
+        std::cout << "ConvOneDNNHandlerT20" << std::endl;
         this->AcquireForwardPrimitiveDescriptor(
             conv_attr,
             fwd_prop_kind,
@@ -224,6 +243,7 @@ class ConvOneDNNHandlerT
             dilations_dims,
             onednn_paddings[0],
             onednn_paddings[1]);
+        std::cout << "ConvOneDNNHandlerT21" << std::endl;
       }
     }
   }
@@ -253,6 +273,7 @@ class ConvOneDNNHandlerT
             cpu_place,
             funcs::CreateKey(
                 dev_ctx, common::vectorize(in->dims()), unique_name)) {
+    std::cout << "ConvOneDNNHandlerT22" << std::endl;
     if (unlikely(!this->isBwdCached())) {
       PADDLE_ENFORCE_EQ(
           in->layout(),
@@ -294,9 +315,11 @@ class ConvOneDNNHandlerT
       auto filter_data_dims =
           common::slice_ddim(filter_dims, 2, filter_dims.size());
       auto ksize = common::vectorize(filter_data_dims);
+      std::cout << "ConvOneDNNHandlerT23" << std::endl;
 
       UpdatePaddingAndDilation(
           &paddings, &dilations, padding_algorithm, data_dims, strides, ksize);
+      std::cout << "ConvOneDNNHandlerT24" << std::endl;
 
       auto src_tz = common::vectorize(in->dims());
       auto weights_tz = common::vectorize(filter->dims());
@@ -311,6 +334,7 @@ class ConvOneDNNHandlerT
        */
       const auto chosen_memory_format = funcs::OneDNNMemoryFormat::any;
       const auto weights_format = funcs::OneDNNMemoryFormat::any;
+      std::cout << "ConvOneDNNHandlerT25" << std::endl;
 
       auto src_md = funcs::OneDNNMemDesc(
           src_tz, funcs::OneDNNGetDataType<T>(), chosen_memory_format);
@@ -324,6 +348,7 @@ class ConvOneDNNHandlerT
           weights_tz, funcs::OneDNNGetDataType<T>(), weights_format);
       auto diff_dst_md = funcs::OneDNNMemDesc(
           dst_tz, funcs::OneDNNGetDataType<T>(), chosen_memory_format);
+      std::cout << "ConvOneDNNHandlerT26" << std::endl;
 
       auto onednn_paddings = funcs::ToOneDNNPadding(paddings);
       std::transform(
@@ -337,10 +362,12 @@ class ConvOneDNNHandlerT
       dnnl::primitive_attr conv_attr;
       if (bias) {
         auto bias_tz = common::vectorize(bias->dims());
+        std::cout << "ConvOneDNNHandlerT27" << std::endl;
         dnnl::memory::desc bias_md =
             funcs::OneDNNMemDesc(bias_tz,
                                  dnnl::memory::data_type::f32,
                                  funcs::OneDNNMemoryFormat::x);
+        std::cout << "ConvOneDNNHandlerT28" << std::endl;
 
         this->AcquireForwardPrimitiveDescriptor(
             conv_attr,
@@ -354,7 +381,9 @@ class ConvOneDNNHandlerT
             dilations_dims,
             onednn_paddings[0],
             onednn_paddings[1]);
+        std::cout << "ConvOneDNNHandlerT29" << std::endl;
       } else {
+        std::cout << "ConvOneDNNHandlerT30" << std::endl;
         this->AcquireForwardPrimitiveDescriptor(
             conv_attr,
             dnnl::prop_kind::forward_inference,
@@ -366,7 +395,9 @@ class ConvOneDNNHandlerT
             dilations_dims,
             onednn_paddings[0],
             onednn_paddings[1]);
+        std::cout << "ConvOneDNNHandlerT31" << std::endl;
       }
+      std::cout << "ConvOneDNNHandlerT32" << std::endl;
 
       this->AcquireBackwardPrimitiveDescriptor(
           dnnl::algorithm::convolution_direct,
@@ -377,6 +408,7 @@ class ConvOneDNNHandlerT
           dilations_dims,
           onednn_paddings[0],
           onednn_paddings[1]);
+      std::cout << "ConvOneDNNHandlerT33" << std::endl;
 
       this->AcquireBackwardWeightsPrimitiveDescriptor(
           dnnl::algorithm::convolution_direct,
@@ -388,6 +420,7 @@ class ConvOneDNNHandlerT
           onednn_paddings[0],
           onednn_paddings[1]);
     }
+    std::cout << "ConvOneDNNHandlerT34" << std::endl;
   }
 
   dnnl::primitive_attr CreateConvAttrs(const DenseTensor* filter,

From 1d9cbf4c98d630a313b7cd923ee2bff212e646c0 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 15 Dec 2023 02:42:54 +0000
Subject: [PATCH 03/12] refine

---
 paddle/phi/backends/onednn/onednn_reuse.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/paddle/phi/backends/onednn/onednn_reuse.h b/paddle/phi/backends/onednn/onednn_reuse.h
index db6ceef72b329..8835d5d3bf14a 100644
--- a/paddle/phi/backends/onednn/onednn_reuse.h
+++ b/paddle/phi/backends/onednn/onednn_reuse.h
@@ -325,7 +325,7 @@ class OneDNNHandlerT {
       LOG(WARNING) << Type() << " raises an exception "
                    << platform::demangle(typeid(ex).name()) << ", "
                    << ex.what();
-      PADDLE_THROW(platform::errors::Unavailable("wanghuan7"));
+      PADDLE_THROW(phi::errors::Unavailable("wanghuan7"));
       std::rethrow_exception(std::current_exception());
     }
   }
@@ -341,7 +341,7 @@ class OneDNNHandlerT {
       LOG(WARNING) << Type() << " raises an exception "
                    << platform::demangle(typeid(ex).name()) << ", "
                    << ex.what();
-      PADDLE_THROW(platform::errors::Unavailable("wanghuan8"));
+      PADDLE_THROW(phi::errors::Unavailable("wanghuan8"));
       std::rethrow_exception(std::current_exception());
     }
   }
@@ -365,7 +365,7 @@ class OneDNNHandlerT {
         LOG(WARNING) << Type() << " raises an exception "
                      << platform::demangle(typeid(ex).name()) << ", "
                      << ex.what();
-        PADDLE_THROW(platform::errors::Unavailable("wanghuan1"));
+        PADDLE_THROW(phi::errors::Unavailable("wanghuan1"));
         std::rethrow_exception(std::current_exception());
       }
       dev_ctx_.SetBlob(key_pd, bwd_pd_);
@@ -392,7 +392,7 @@ class OneDNNHandlerT {
         LOG(WARNING) << Type() << " raises an exception "
                      << platform::demangle(typeid(ex).name()) << ", "
                      << ex.what();
-        PADDLE_THROW(platform::errors::Unavailable("wanghuan2"));
+        PADDLE_THROW(phi::errors::Unavailable("wanghuan2"));
         std::rethrow_exception(std::current_exception());
       }
       dev_ctx_.SetBlob(key_pd, bwd_w_pd_);
@@ -660,7 +660,7 @@ class OneDNNHandlerNoCachingT {
       LOG(WARNING) << Type() << " raises an exception "
                    << platform::demangle(typeid(ex).name()) << ", "
                    << ex.what();
-      PADDLE_THROW(platform::errors::Unavailable("wanghuan3"));
+      PADDLE_THROW(phi::errors::Unavailable("wanghuan3"));
       std::rethrow_exception(std::current_exception());
     }
   }
@@ -676,7 +676,7 @@ class OneDNNHandlerNoCachingT {
       LOG(WARNING) << Type() << " raises an exception "
                    << platform::demangle(typeid(ex).name()) << ", "
                    << ex.what();
-      PADDLE_THROW(platform::errors::Unavailable("wanghuan4"));
+      PADDLE_THROW(phi::errors::Unavailable("wanghuan4"));
       std::rethrow_exception(std::current_exception());
     }
   }
@@ -695,7 +695,7 @@ class OneDNNHandlerNoCachingT {
       LOG(WARNING) << Type() << " raises an exception "
                    << platform::demangle(typeid(ex).name()) << ", "
                    << ex.what();
-      PADDLE_THROW(platform::errors::Unavailable("wanghuan5"));
+      PADDLE_THROW(phi::errors::Unavailable("wanghuan5"));
       std::rethrow_exception(std::current_exception());
     }
   }
@@ -716,7 +716,7 @@ class OneDNNHandlerNoCachingT {
       LOG(WARNING) << Type() << " raises an exception "
                    << platform::demangle(typeid(ex).name()) << ", "
                    << ex.what();
-      PADDLE_THROW(platform::errors::Unavailable("wanghuan6"));
+      PADDLE_THROW(phi::errors::Unavailable("wanghuan6"));
       std::rethrow_exception(std::current_exception());
     }
   }

From 5751e54eaecce1f8633c41f3c71c1ee36eacb967 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 15 Dec 2023 02:45:19 +0000
Subject: [PATCH 04/12] refine

---
 paddle/phi/backends/onednn/onednn_reuse.h | 32 ++++++-----------------
 1 file changed, 8 insertions(+), 24 deletions(-)

diff --git a/paddle/phi/backends/onednn/onednn_reuse.h b/paddle/phi/backends/onednn/onednn_reuse.h
index 8835d5d3bf14a..a4aa1730503fb 100644
--- a/paddle/phi/backends/onednn/onednn_reuse.h
+++ b/paddle/phi/backends/onednn/onednn_reuse.h
@@ -322,9 +322,7 @@ class OneDNNHandlerT {
       fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
           engine_, std::forward<Args>(args)..., first);
     } catch (std::exception& ex) {
-      LOG(WARNING) << Type() << " raises an exception "
-                   << platform::demangle(typeid(ex).name()) << ", "
-                   << ex.what();
+      LOG(WARNING) << ex.what();
       PADDLE_THROW(phi::errors::Unavailable("wanghuan7"));
       std::rethrow_exception(std::current_exception());
     }
@@ -338,9 +336,7 @@ class OneDNNHandlerT {
       fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
           engine_, std::forward<First>(first), std::forward<Args>(args)...);
     } catch (std::exception& ex) {
-      LOG(WARNING) << Type() << " raises an exception "
-                   << platform::demangle(typeid(ex).name()) << ", "
-                   << ex.what();
+      LOG(WARNING) << ex.what();
       PADDLE_THROW(phi::errors::Unavailable("wanghuan8"));
       std::rethrow_exception(std::current_exception());
     }
@@ -362,9 +358,7 @@ class OneDNNHandlerT {
         bwd_pd_ = std::make_shared<typename TBackward::primitive_desc>(
             engine_, std::forward<Args>(args)..., *fwd_pd_);
       } catch (std::exception& ex) {
-        LOG(WARNING) << Type() << " raises an exception "
-                     << platform::demangle(typeid(ex).name()) << ", "
-                     << ex.what();
+        LOG(WARNING) << ex.what();
         PADDLE_THROW(phi::errors::Unavailable("wanghuan1"));
         std::rethrow_exception(std::current_exception());
       }
@@ -389,9 +383,7 @@ class OneDNNHandlerT {
         bwd_w_pd_ = std::make_shared<typename TBackward_params::primitive_desc>(
             engine_, std::forward<Args>(args)..., *fwd_pd_);
       } catch (std::exception& ex) {
-        LOG(WARNING) << Type() << " raises an exception "
-                     << platform::demangle(typeid(ex).name()) << ", "
-                     << ex.what();
+        LOG(WARNING) << ex.what();
         PADDLE_THROW(phi::errors::Unavailable("wanghuan2"));
         std::rethrow_exception(std::current_exception());
       }
@@ -657,9 +649,7 @@ class OneDNNHandlerNoCachingT {
       fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
           engine_, std::forward<Args>(args)..., first);
     } catch (std::exception& ex) {
-      LOG(WARNING) << Type() << " raises an exception "
-                   << platform::demangle(typeid(ex).name()) << ", "
-                   << ex.what();
+      LOG(WARNING) << ex.what();
       PADDLE_THROW(phi::errors::Unavailable("wanghuan3"));
       std::rethrow_exception(std::current_exception());
     }
@@ -673,9 +663,7 @@ class OneDNNHandlerNoCachingT {
       fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
           engine_, std::forward<First>(first), std::forward<Args>(args)...);
     } catch (std::exception& ex) {
-      LOG(WARNING) << Type() << " raises an exception "
-                   << platform::demangle(typeid(ex).name()) << ", "
-                   << ex.what();
+      LOG(WARNING) << ex.what();
       PADDLE_THROW(phi::errors::Unavailable("wanghuan4"));
       std::rethrow_exception(std::current_exception());
     }
@@ -692,9 +680,7 @@ class OneDNNHandlerNoCachingT {
       bwd_pd_ = std::make_shared<typename TBackward::primitive_desc>(
           engine_, std::forward<Args>(args)..., *fwd_pd_);
     } catch (std::exception& ex) {
-      LOG(WARNING) << Type() << " raises an exception "
-                   << platform::demangle(typeid(ex).name()) << ", "
-                   << ex.what();
+      LOG(WARNING) << ex.what();
       PADDLE_THROW(phi::errors::Unavailable("wanghuan5"));
       std::rethrow_exception(std::current_exception());
     }
@@ -713,9 +699,7 @@ class OneDNNHandlerNoCachingT {
       bwd_w_pd_ = std::make_shared<typename TBackward_params::primitive_desc>(
           bwd_desc, engine_, *fwd_pd_);
     } catch (std::exception& ex) {
-      LOG(WARNING) << Type() << " raises an exception "
-                   << platform::demangle(typeid(ex).name()) << ", "
-                   << ex.what();
+      LOG(WARNING) << ex.what();
       PADDLE_THROW(phi::errors::Unavailable("wanghuan6"));
       std::rethrow_exception(std::current_exception());
     }

From 84095d911cd80be27ff87e7e9a064ca579ba70db Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 15 Dec 2023 06:11:58 +0000
Subject: [PATCH 05/12] refine

---
 test/cpp/inference/infer_ut/run.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/cpp/inference/infer_ut/run.sh b/test/cpp/inference/infer_ut/run.sh
index 6f967eb0aa6c7..93ad591df3a74 100755
--- a/test/cpp/inference/infer_ut/run.sh
+++ b/test/cpp/inference/infer_ut/run.sh
@@ -31,6 +31,7 @@ test_suite_list="cpu_tester*" # init test suite list, pass to --gtest_filter
 export RED='\033[0;31m' # red color
 export NC='\033[0m' # no color
 export YELLOW='\033[33m' # yellow color
+export DNNL_VERBOSE=1
 
 cd `dirname $0`
 current_dir=`pwd`

From 319d536d82b001c6db7cf95f0c518eb95abe8d7a Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 18 Dec 2023 08:06:34 +0000
Subject: [PATCH 06/12] refine

---
 paddle/phi/backends/onednn/onednn_reuse.h | 80 +++++------------------
 paddle/phi/core/dense_tensor_impl.cc      |  2 +
 paddle/phi/kernels/onednn/conv_function.h | 66 +++----------------
 paddle/phi/kernels/onednn/conv_handler.h  | 35 +---------
 test/cpp/inference/infer_ut/run.sh        |  1 -
 5 files changed, 27 insertions(+), 157 deletions(-)

diff --git a/paddle/phi/backends/onednn/onednn_reuse.h b/paddle/phi/backends/onednn/onednn_reuse.h
index a4aa1730503fb..d9719c6f3e5b2 100644
--- a/paddle/phi/backends/onednn/onednn_reuse.h
+++ b/paddle/phi/backends/onednn/onednn_reuse.h
@@ -318,28 +318,16 @@ class OneDNNHandlerT {
   typename std::enable_if<std::is_same<typename std::decay<First>::type,
                                        dnnl::primitive_attr>::value>::type
   CreateForwardPrimitiveDescriptor(First&& first, Args&&... args) {
-    try {
-      fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
-          engine_, std::forward<Args>(args)..., first);
-    } catch (std::exception& ex) {
-      LOG(WARNING) << ex.what();
-      PADDLE_THROW(phi::errors::Unavailable("wanghuan7"));
-      std::rethrow_exception(std::current_exception());
-    }
+    fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
+        engine_, std::forward<Args>(args)..., first);
   }
 
   template <class First, class... Args>
   typename std::enable_if<!std::is_same<typename std::decay<First>::type,
                                         dnnl::primitive_attr>::value>::type
   CreateForwardPrimitiveDescriptor(First&& first, Args&&... args) {
-    try {
-      fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
-          engine_, std::forward<First>(first), std::forward<Args>(args)...);
-    } catch (std::exception& ex) {
-      LOG(WARNING) << ex.what();
-      PADDLE_THROW(phi::errors::Unavailable("wanghuan8"));
-      std::rethrow_exception(std::current_exception());
-    }
+    fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
+        engine_, std::forward<First>(first), std::forward<Args>(args)...);
   }
 
   template <typename... Args>
@@ -354,14 +342,8 @@ class OneDNNHandlerT {
     bwd_pd_ = std::static_pointer_cast<typename TBackward::primitive_desc>(
         dev_ctx_.GetBlob(key_pd));
     if (bwd_pd_ == nullptr) {
-      try {
-        bwd_pd_ = std::make_shared<typename TBackward::primitive_desc>(
-            engine_, std::forward<Args>(args)..., *fwd_pd_);
-      } catch (std::exception& ex) {
-        LOG(WARNING) << ex.what();
-        PADDLE_THROW(phi::errors::Unavailable("wanghuan1"));
-        std::rethrow_exception(std::current_exception());
-      }
+      bwd_pd_ = std::make_shared<typename TBackward::primitive_desc>(
+          engine_, std::forward<Args>(args)..., *fwd_pd_);
       dev_ctx_.SetBlob(key_pd, bwd_pd_);
     }
   }
@@ -379,14 +361,8 @@ class OneDNNHandlerT {
         std::static_pointer_cast<typename TBackward_params::primitive_desc>(
             dev_ctx_.GetBlob(key_pd));
     if (bwd_w_pd_ == nullptr) {
-      try {
-        bwd_w_pd_ = std::make_shared<typename TBackward_params::primitive_desc>(
-            engine_, std::forward<Args>(args)..., *fwd_pd_);
-      } catch (std::exception& ex) {
-        LOG(WARNING) << ex.what();
-        PADDLE_THROW(phi::errors::Unavailable("wanghuan2"));
-        std::rethrow_exception(std::current_exception());
-      }
+      bwd_w_pd_ = std::make_shared<typename TBackward_params::primitive_desc>(
+          engine_, std::forward<Args>(args)..., *fwd_pd_);
       dev_ctx_.SetBlob(key_pd, bwd_w_pd_);
     }
   }
@@ -645,28 +621,16 @@ class OneDNNHandlerNoCachingT {
   typename std::enable_if<std::is_same<typename std::decay<First>::type,
                                        dnnl::primitive_attr>::value>::type
   CreateForwardPrimitiveDescriptor(First&& first, Args&&... args) {
-    try {
-      fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
-          engine_, std::forward<Args>(args)..., first);
-    } catch (std::exception& ex) {
-      LOG(WARNING) << ex.what();
-      PADDLE_THROW(phi::errors::Unavailable("wanghuan3"));
-      std::rethrow_exception(std::current_exception());
-    }
+    fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
+        engine_, std::forward<Args>(args)..., first);
   }
 
   template <class First, class... Args>
   typename std::enable_if<!std::is_same<typename std::decay<First>::type,
                                         dnnl::primitive_attr>::value>::type
   CreateForwardPrimitiveDescriptor(First&& first, Args&&... args) {
-    try {
-      fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
-          engine_, std::forward<First>(first), std::forward<Args>(args)...);
-    } catch (std::exception& ex) {
-      LOG(WARNING) << ex.what();
-      PADDLE_THROW(phi::errors::Unavailable("wanghuan4"));
-      std::rethrow_exception(std::current_exception());
-    }
+    fwd_pd_ = std::make_shared<typename TForward::primitive_desc>(
+        engine_, std::forward<First>(first), std::forward<Args>(args)...);
   }
 
   template <typename... Args>
@@ -676,14 +640,8 @@ class OneDNNHandlerNoCachingT {
     PADDLE_ENFORCE_NOT_NULL(
         fwd_pd_,
         errors::Unavailable("Get oneDNN Forward primitive %s failed."));
-    try {
-      bwd_pd_ = std::make_shared<typename TBackward::primitive_desc>(
-          engine_, std::forward<Args>(args)..., *fwd_pd_);
-    } catch (std::exception& ex) {
-      LOG(WARNING) << ex.what();
-      PADDLE_THROW(phi::errors::Unavailable("wanghuan5"));
-      std::rethrow_exception(std::current_exception());
-    }
+    bwd_pd_ = std::make_shared<typename TBackward::primitive_desc>(
+        engine_, std::forward<Args>(args)..., *fwd_pd_);
   }
 
   template <typename... Args>
@@ -695,14 +653,8 @@ class OneDNNHandlerNoCachingT {
         errors::Unavailable("Get oneDNN Forward primitive %s failed."));
     auto bwd_desc =
         typename TBackward_params::desc(std::forward<Args>(args)...);
-    try {
-      bwd_w_pd_ = std::make_shared<typename TBackward_params::primitive_desc>(
-          bwd_desc, engine_, *fwd_pd_);
-    } catch (std::exception& ex) {
-      LOG(WARNING) << ex.what();
-      PADDLE_THROW(phi::errors::Unavailable("wanghuan6"));
-      std::rethrow_exception(std::current_exception());
-    }
+    bwd_w_pd_ = std::make_shared<typename TBackward_params::primitive_desc>(
+        bwd_desc, engine_, *fwd_pd_);
   }
 
   std::shared_ptr<dnnl::memory> AcquireMemoryFromPrimitive(
diff --git a/paddle/phi/core/dense_tensor_impl.cc b/paddle/phi/core/dense_tensor_impl.cc
index 770443acf1838..0f6d059bfa012 100644
--- a/paddle/phi/core/dense_tensor_impl.cc
+++ b/paddle/phi/core/dense_tensor_impl.cc
@@ -382,6 +382,8 @@ const dnnl::memory::desc& DenseTensor::mem_desc() const {
     std::unique_ptr<StorageProperties>* storage_properties_ptr =
         const_cast<std::unique_ptr<StorageProperties>*>(&storage_properties_);
     *storage_properties_ptr = std::make_unique<OneDNNStorageProperties>();
+    static_cast<OneDNNStorageProperties*>(storage_properties_ptr->get())
+        ->mem_desc = dnnl::memory::desc();
   }
   return this->storage_properties<OneDNNStorageProperties>().mem_desc;
 }
diff --git a/paddle/phi/kernels/onednn/conv_function.h b/paddle/phi/kernels/onednn/conv_function.h
index e9d66786c6225..7d7e74f691a02 100644
--- a/paddle/phi/kernels/onednn/conv_function.h
+++ b/paddle/phi/kernels/onednn/conv_function.h
@@ -29,10 +29,8 @@ static dnnl::memory::data_type GetDstType(
     std::string fuse_activation,
     bool fuse_residual_conn,
     const phi::DenseTensor* residual_param) {
-  std::cout << "GetDstType" << std::endl;
   auto dst_dt = dnnl::memory::data_type::f32;
   if (is_int8) {
-    std::cout << "GetDstType1" << std::endl;
     dst_dt = (fuse_activation == "relu" || fuse_activation == "relu6")
                  ? dnnl::memory::data_type::u8
                  : dnnl::memory::data_type::s8;
@@ -40,21 +38,17 @@ static dnnl::memory::data_type GetDstType(
       dst_dt = dnnl::memory::data_type::f32;
     }
     if (fuse_residual_conn && residual_param) {
-      std::cout << "GetDstType2" << std::endl;
       auto residual_dt = funcs::ToOneDNNDataType(residual_param->dtype());
       if (dst_dt != residual_dt) dst_dt = residual_dt;
     }
   } else {
-    std::cout << "GetDstType3" << std::endl;
     if (!force_fp32_output && is_bfloat16) {
       dst_dt = dnnl::memory::data_type::bf16;
       if (fuse_residual_conn && residual_param) {
-        std::cout << "GetDstType4" << std::endl;
         dst_dt = funcs::ToOneDNNDataType(residual_param->dtype());
       }
     }
   }
-  std::cout << "GetDstType5" << std::endl;
   return dst_dt;
 }
 
@@ -91,12 +85,10 @@ void ComputeFP32(const OneDNNContext& dev_ctx,
                  bool fuse_residual_conn,
                  bool force_fp32_output,
                  DenseTensor* output) {
-  std::cout << "ComputeFP32 " << std::endl;
   const auto& onednn_engine = dev_ctx.GetEngine();
   const bool is_conv3d = strides.size() == 3U;
   const std::string& unique_name =
       dev_ctx.GetInputsName("Input")[0] + dev_ctx.GetInputsName("Filter")[0];
-  std::cout << "ComputeFP32 2" << std::endl;
   PD_VISIT_FLOAT_AND_INT8_TYPES(
       filter->dtype(), "ConvOneDNNHandlerT", ([&] {
         onednn::ConvOneDNNHandlerT<T, data_t, T_out> handler(dev_ctx,
@@ -118,47 +110,33 @@ void ComputeFP32(const OneDNNContext& dev_ctx,
                                                              force_fp32_output,
                                                              output,
                                                              unique_name);
-        std::cout << "ComputeFP32 3" << std::endl;
         auto src_memory_p = handler.AcquireSrcMemoryWithReorder(input);
-        std::cout << "ComputeFP32 4" << std::endl;
         auto weights_memory_p = handler.AcquireWeightsMemoryWithReorder(
             filter, groups, is_conv3d, is_test);
-        std::cout << "ComputeFP32 5" << std::endl;
         std::shared_ptr<dnnl::memory> dst_memory_p;
         if (fuse_residual_conn) {
-          std::cout << "ComputeFP32 6" << std::endl;
           dst_memory_p =
               handler.AcquireDstMemoryWithResidual(output, residual_param);
-          std::cout << "ComputeFP32 7" << std::endl;
         } else {
-          std::cout << "ComputeFP32 8" << std::endl;
           dst_memory_p = handler.template AcquireDstMemory<T_out>(output);
-          std::cout << "ComputeFP32 9" << std::endl;
         }
-        std::cout << "ComputeFP32 10" << std::endl;
+
         auto conv_p = handler.AcquireForwardPrimitive();
-        std::cout << "ComputeFP32 11" << std::endl;
         std::unordered_map<int, dnnl::memory> args = {
             {DNNL_ARG_SRC, *src_memory_p},
             {DNNL_ARG_WEIGHTS, *weights_memory_p},
             {DNNL_ARG_DST, *dst_memory_p}};
 
         if (bias) {
-          std::cout << "ComputeFP32 12" << std::endl;
           auto bias_memory_p =
               handler.AcquireBiasMemoryWithReorder(bias, is_test);
-          std::cout << "ComputeFP32 13" << std::endl;
           args.insert({DNNL_ARG_BIAS, *bias_memory_p});
         }
 
         auto& astream = OneDNNContext::tls().get_stream();
-        std::cout << "ComputeFP32 14" << std::endl;
         conv_p->execute(astream, args);
-        std::cout << "ComputeFP32 15" << std::endl;
         astream.wait();
-        std::cout << "ComputeFP32 16" << std::endl;
         output->set_mem_desc(dst_memory_p->get_desc());
-        std::cout << "ComputeFP32 17" << std::endl;
       }));
 }
 
@@ -180,9 +158,7 @@ void ComputeINT8(const OneDNNContext& dev_ctx,
                  bool fuse_residual_conn,
                  bool force_fp32_output,
                  DenseTensor* output) {
-  std::cout << "ComputeINT8 " << std::endl;
   const auto& onednn_engine = dev_ctx.GetEngine();
-  std::cout << "ComputeINT8 2" << std::endl;
   const bool is_conv3d = strides.size() == 3U;
 
   bool unsigned_output =
@@ -201,7 +177,6 @@ void ComputeINT8(const OneDNNContext& dev_ctx,
           "residual fusion does not support force output with fp32"));
   const std::string& unique_name =
       dev_ctx.GetInputsName("Input")[0] + dev_ctx.GetInputsName("Filter")[0];
-  std::cout << "ComputeINT8 3" << std::endl;
   PD_VISIT_FLOAT_AND_INT8_TYPES(
       filter->dtype(), "ConvOneDNNHandlerT", ([&] {
         onednn::ConvOneDNNHandlerT<T, data_t, T_out> handler(dev_ctx,
@@ -223,9 +198,9 @@ void ComputeINT8(const OneDNNContext& dev_ctx,
                                                              force_fp32_output,
                                                              output,
                                                              unique_name);
-        std::cout << "ComputeINT8 4" << std::endl;
+
         auto src_memory_p = handler.AcquireSrcMemoryWithReorder(input);
-        std::cout << "ComputeINT8 5" << std::endl;
+
         const auto& scale_weights_data =
             dev_ctx.HasDnnAttr("Scale_weights")
                 ? PADDLE_GET_CONST(std::vector<float>,
@@ -235,10 +210,9 @@ void ComputeINT8(const OneDNNContext& dev_ctx,
         int mask_reorder = is_multi_channel
                                ? ((groups != 1) ? (1 << 1) + (1 << 0) : 1 << 0)
                                : 0;
-        std::cout << "ComputeINT8 6" << std::endl;
         auto weights_memory_p = handler.AcquireWeightsMemoryWithReorder(
             filter, groups, false, true, scale_weights_data, mask_reorder);
-        std::cout << "ComputeINT8 7" << std::endl;
+
         std::shared_ptr<dnnl::memory> dst_memory_p;
         if (fuse_residual_conn) {
           PADDLE_ENFORCE_EQ(
@@ -250,65 +224,49 @@ void ComputeINT8(const OneDNNContext& dev_ctx,
                   " and residual param's dimension =%d .",
                   output->dims().size(),
                   residual_param->dims().size()));
-          std::cout << "ComputeINT8 8" << std::endl;
           dst_memory_p =
               handler.AcquireDstMemoryWithResidual(output, residual_param);
-          std::cout << "ComputeINT8 9" << std::endl;
           need_s8_to_u8 = (funcs::OneDNNGetDataType<T_out>() ==
                            dnnl::memory::data_type::s8) &&
                           unsigned_output;
-          std::cout << "ComputeINT8 10" << std::endl;
         } else {
-          std::cout << "ComputeINT8 11" << std::endl;
           dst_memory_p = handler.template AcquireDstMemory<T_out>(output);
-          std::cout << "ComputeINT8 12" << std::endl;
         }
 
-        std::cout << "ComputeINT8 13" << std::endl;
         auto conv_p = handler.AcquireForwardPrimitive();
-        std::cout << "ComputeINT8 14" << std::endl;
+
         std::unordered_map<int, dnnl::memory> args = {
             {DNNL_ARG_SRC, *src_memory_p},
             {DNNL_ARG_WEIGHTS, *weights_memory_p},
             {DNNL_ARG_DST, *dst_memory_p}};
 
         if (bias) {
-          std::cout << "ComputeINT8 15" << std::endl;
           auto bias_memory_p = handler.AcquireBiasMemoryWithReorder(bias, true);
-          std::cout << "ComputeINT8 16" << std::endl;
           args.insert({DNNL_ARG_BIAS, *bias_memory_p});
         }
-        std::cout << "ComputeINT8 17" << std::endl;
+
         auto src_scales_memory = handler.AcquireScalesMemory(DNNL_ARG_SRC);
-        std::cout << "ComputeINT8 18" << std::endl;
         args.insert({DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC, *src_scales_memory});
-        std::cout << "ComputeINT8 19" << std::endl;
+
         auto wei_scales_memory = handler.AcquireScalesMemory(DNNL_ARG_WEIGHTS);
-        std::cout << "ComputeINT8 20" << std::endl;
         args.insert(
             {DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS, *wei_scales_memory});
 
         if (!force_fp32_output) {
-          std::cout << "ComputeINT8 21" << std::endl;
           auto dst_scales_memory = handler.AcquireScalesMemory(DNNL_ARG_DST);
-          std::cout << "ComputeINT8 22" << std::endl;
           args.insert(
               {DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST, *dst_scales_memory});
         }
 
         auto& astream = OneDNNContext::tls().get_stream();
-        std::cout << "ComputeINT8 23" << std::endl;
         conv_p->execute(astream, args);
-        std::cout << "ComputeINT8 24" << std::endl;
         astream.wait();
-        std::cout << "ComputeINT8 25" << std::endl;
 
         if (need_s8_to_u8) {
           dev_ctx.Alloc<uint8_t>(output);
         }
-        std::cout << "ComputeINT8 26" << std::endl;
+
         output->set_mem_desc(dst_memory_p->get_desc());
-        std::cout << "ComputeINT8 27" << std::endl;
       }));
 }
 
@@ -330,7 +288,6 @@ void ConvOnednn(const Context& dev_ctx,
                 bool fuse_residual_connection,
                 bool force_fp32_output,
                 DenseTensor* out) {
-  std::cout << "ConvOnednn" << std::endl;
   PADDLE_ENFORCE_EQ(
       dev_ctx.GetPlace().GetType(),
       AllocationType::CPU,
@@ -344,10 +301,8 @@ void ConvOnednn(const Context& dev_ctx,
                            fuse_activation,
                            fuse_residual_connection,
                            residual_param);
-  std::cout << "ConvOnednn2" << std::endl;
   if (!is_INT8) {
     if (dst_dt == dnnl::memory::data_type::f32) {
-      std::cout << "ConvOnednn3" << std::endl;
       ComputeFP32<T, float>(dev_ctx,
                             input,
                             filter,
@@ -366,7 +321,6 @@ void ConvOnednn(const Context& dev_ctx,
                             force_fp32_output,
                             out);
     } else if (dst_dt == dnnl::memory::data_type::bf16) {
-      std::cout << "ConvOnednn4" << std::endl;
       ComputeFP32<T, dtype::bfloat16>(dev_ctx,
                                       input,
                                       filter,
@@ -387,7 +341,6 @@ void ConvOnednn(const Context& dev_ctx,
     }
   } else {
     if (dst_dt == dnnl::memory::data_type::f32) {
-      std::cout << "ConvOnednn5" << std::endl;
       ComputeINT8<T, float>(dev_ctx,
                             input,
                             filter,
@@ -406,7 +359,6 @@ void ConvOnednn(const Context& dev_ctx,
                             force_fp32_output,
                             out);
     } else if (dst_dt == dnnl::memory::data_type::u8) {
-      std::cout << "ConvOnednn6" << std::endl;
       ComputeINT8<T, uint8_t>(dev_ctx,
                               input,
                               filter,
@@ -425,7 +377,6 @@ void ConvOnednn(const Context& dev_ctx,
                               force_fp32_output,
                               out);
     } else if (dst_dt == dnnl::memory::data_type::s8) {
-      std::cout << "ConvOnednn7" << std::endl;
       ComputeINT8<T, int8_t>(dev_ctx,
                              input,
                              filter,
@@ -445,7 +396,6 @@ void ConvOnednn(const Context& dev_ctx,
                              out);
     }
   }
-  std::cout << "ConvOnednn8" << std::endl;
 }
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/onednn/conv_handler.h b/paddle/phi/kernels/onednn/conv_handler.h
index 0d570e9e84fe1..3d41c274de24e 100644
--- a/paddle/phi/kernels/onednn/conv_handler.h
+++ b/paddle/phi/kernels/onednn/conv_handler.h
@@ -69,7 +69,6 @@ class ConvOneDNNHandlerT
             cpu_place,
             funcs::CreateKey(
                 dev_ctx, common::vectorize(input->dims()), unique_name)) {
-    std::cout << "ConvOneDNNHandlerT" << std::endl;
     if (unlikely(!this->isCached())) {
       PADDLE_ENFORCE_EQ(
           input->layout(),
@@ -143,10 +142,8 @@ class ConvOneDNNHandlerT
       std::vector<int64_t> strides(begin(strides_in), end(strides_in));
       std::vector<int64_t> paddings(begin(paddings_in), end(paddings_in));
       std::vector<int64_t> dilations(begin(dilations_in), end(dilations_in));
-      std::cout << "ConvOneDNNHandlerT2" << std::endl;
       UpdatePaddingAndDilation(
           &paddings, &dilations, padding_algorithm, data_dims, strides, ksize);
-      std::cout << "ConvOneDNNHandlerT3" << std::endl;
       std::transform(
           dilations.begin(), dilations.end(), dilations.begin(), [](int64_t i) {
             return i - 1;
@@ -158,11 +155,9 @@ class ConvOneDNNHandlerT
       funcs::GetGroupConvWeightsTz(weights_tz, groups);
 
       const auto dst_tz = common::vectorize(output->dims());
-      std::cout << "ConvOneDNNHandlerT4" << std::endl;
+
       const dnnl::memory::dims stride_dims = strides;
-      std::cout << "ConvOneDNNHandlerT5" << std::endl;
       const auto onednn_paddings = funcs::ToOneDNNPadding(paddings);
-      std::cout << "ConvOneDNNHandlerT6" << std::endl;
       const dnnl::memory::dims dilations_dims = dilations;
       /* create memory descriptor for convolution without specified format
        * ('any') which lets a primitive (convolution in this case) choose
@@ -176,46 +171,34 @@ class ConvOneDNNHandlerT
 
       dnnl::memory::desc src_md, weights_md;
       if (funcs::is_int8<T>()) {
-        std::cout << "ConvOneDNNHandlerT7" << std::endl;
         src_md = funcs::OneDNNMemDesc(src_tz,
                                       funcs::ToOneDNNDataType(input->dtype()),
                                       chosen_memory_format);
-        std::cout << "ConvOneDNNHandlerT8" << std::endl;
         weights_md = funcs::OneDNNMemDesc(
             weights_tz, dnnl::memory::data_type::s8, chosen_memory_format);
-        std::cout << "ConvOneDNNHandlerT9" << std::endl;
       } else {
-        std::cout << "ConvOneDNNHandlerT10" << std::endl;
         src_md = funcs::OneDNNMemDesc(src_tz, data_type, chosen_memory_format);
-        std::cout << "ConvOneDNNHandlerT11" << std::endl;
         weights_md = funcs::OneDNNMemDesc(
             weights_tz, data_type, funcs::OneDNNMemoryFormat::any);
-        std::cout << "ConvOneDNNHandlerT12" << std::endl;
       }
       if (input->dims().size() == 4 && input->dims()[1] <= 4) {
         chosen_memory_format = funcs::OneDNNMemoryFormat::nhwc;
       }
-      std::cout << "ConvOneDNNHandlerT13" << std::endl;
       const auto dst_md = funcs::OneDNNMemDesc(
           dst_tz, funcs::OneDNNGetDataType<T_out>(), chosen_memory_format);
-      std::cout << "ConvOneDNNHandlerT14" << std::endl;
       const auto fwd_prop_kind = dnnl::prop_kind::forward_inference;
-      std::cout << "ConvOneDNNHandlerT15" << std::endl;
       const dnnl::primitive_attr conv_attr = CreateConvAttrs(filter,
                                                              groups,
                                                              force_fp32_output,
                                                              fuse_residual_conn,
                                                              fuse_activation);
 
-      std::cout << "ConvOneDNNHandlerT16" << std::endl;
       if (bias) {
         auto bias_tz = common::vectorize(bias->dims());
-        std::cout << "ConvOneDNNHandlerT17" << std::endl;
         dnnl::memory::desc bias_md =
             funcs::OneDNNMemDesc(bias_tz,
                                  dnnl::memory::data_type::f32,
                                  funcs::OneDNNMemoryFormat::x);
-        std::cout << "ConvOneDNNHandlerT18" << std::endl;
 
         this->AcquireForwardPrimitiveDescriptor(
             conv_attr,
@@ -229,9 +212,7 @@ class ConvOneDNNHandlerT
             dilations_dims,
             onednn_paddings[0],
             onednn_paddings[1]);
-        std::cout << "ConvOneDNNHandlerT19" << std::endl;
       } else {
-        std::cout << "ConvOneDNNHandlerT20" << std::endl;
         this->AcquireForwardPrimitiveDescriptor(
             conv_attr,
             fwd_prop_kind,
@@ -243,7 +224,6 @@ class ConvOneDNNHandlerT
             dilations_dims,
             onednn_paddings[0],
             onednn_paddings[1]);
-        std::cout << "ConvOneDNNHandlerT21" << std::endl;
       }
     }
   }
@@ -273,7 +253,6 @@ class ConvOneDNNHandlerT
             cpu_place,
             funcs::CreateKey(
                 dev_ctx, common::vectorize(in->dims()), unique_name)) {
-    std::cout << "ConvOneDNNHandlerT22" << std::endl;
     if (unlikely(!this->isBwdCached())) {
       PADDLE_ENFORCE_EQ(
           in->layout(),
@@ -315,11 +294,9 @@ class ConvOneDNNHandlerT
       auto filter_data_dims =
           common::slice_ddim(filter_dims, 2, filter_dims.size());
       auto ksize = common::vectorize(filter_data_dims);
-      std::cout << "ConvOneDNNHandlerT23" << std::endl;
 
       UpdatePaddingAndDilation(
           &paddings, &dilations, padding_algorithm, data_dims, strides, ksize);
-      std::cout << "ConvOneDNNHandlerT24" << std::endl;
 
       auto src_tz = common::vectorize(in->dims());
       auto weights_tz = common::vectorize(filter->dims());
@@ -334,7 +311,6 @@ class ConvOneDNNHandlerT
        */
       const auto chosen_memory_format = funcs::OneDNNMemoryFormat::any;
       const auto weights_format = funcs::OneDNNMemoryFormat::any;
-      std::cout << "ConvOneDNNHandlerT25" << std::endl;
 
       auto src_md = funcs::OneDNNMemDesc(
           src_tz, funcs::OneDNNGetDataType<T>(), chosen_memory_format);
@@ -348,7 +324,6 @@ class ConvOneDNNHandlerT
           weights_tz, funcs::OneDNNGetDataType<T>(), weights_format);
       auto diff_dst_md = funcs::OneDNNMemDesc(
           dst_tz, funcs::OneDNNGetDataType<T>(), chosen_memory_format);
-      std::cout << "ConvOneDNNHandlerT26" << std::endl;
 
       auto onednn_paddings = funcs::ToOneDNNPadding(paddings);
       std::transform(
@@ -362,12 +337,10 @@ class ConvOneDNNHandlerT
       dnnl::primitive_attr conv_attr;
       if (bias) {
         auto bias_tz = common::vectorize(bias->dims());
-        std::cout << "ConvOneDNNHandlerT27" << std::endl;
         dnnl::memory::desc bias_md =
             funcs::OneDNNMemDesc(bias_tz,
                                  dnnl::memory::data_type::f32,
                                  funcs::OneDNNMemoryFormat::x);
-        std::cout << "ConvOneDNNHandlerT28" << std::endl;
 
         this->AcquireForwardPrimitiveDescriptor(
             conv_attr,
@@ -381,9 +354,7 @@ class ConvOneDNNHandlerT
             dilations_dims,
             onednn_paddings[0],
             onednn_paddings[1]);
-        std::cout << "ConvOneDNNHandlerT29" << std::endl;
       } else {
-        std::cout << "ConvOneDNNHandlerT30" << std::endl;
         this->AcquireForwardPrimitiveDescriptor(
             conv_attr,
             dnnl::prop_kind::forward_inference,
@@ -395,9 +366,7 @@ class ConvOneDNNHandlerT
             dilations_dims,
             onednn_paddings[0],
             onednn_paddings[1]);
-        std::cout << "ConvOneDNNHandlerT31" << std::endl;
       }
-      std::cout << "ConvOneDNNHandlerT32" << std::endl;
 
       this->AcquireBackwardPrimitiveDescriptor(
           dnnl::algorithm::convolution_direct,
@@ -408,7 +377,6 @@ class ConvOneDNNHandlerT
           dilations_dims,
           onednn_paddings[0],
           onednn_paddings[1]);
-      std::cout << "ConvOneDNNHandlerT33" << std::endl;
 
       this->AcquireBackwardWeightsPrimitiveDescriptor(
           dnnl::algorithm::convolution_direct,
@@ -420,7 +388,6 @@ class ConvOneDNNHandlerT
           onednn_paddings[0],
           onednn_paddings[1]);
     }
-    std::cout << "ConvOneDNNHandlerT34" << std::endl;
   }
 
   dnnl::primitive_attr CreateConvAttrs(const DenseTensor* filter,
diff --git a/test/cpp/inference/infer_ut/run.sh b/test/cpp/inference/infer_ut/run.sh
index 93ad591df3a74..6f967eb0aa6c7 100755
--- a/test/cpp/inference/infer_ut/run.sh
+++ b/test/cpp/inference/infer_ut/run.sh
@@ -31,7 +31,6 @@ test_suite_list="cpu_tester*" # init test suite list, pass to --gtest_filter
 export RED='\033[0;31m' # red color
 export NC='\033[0m' # no color
 export YELLOW='\033[33m' # yellow color
-export DNNL_VERBOSE=1
 
 cd `dirname $0`
 current_dir=`pwd`

From 4e254082ff1c129071379133f458a38a64ec5d8f Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Mon, 18 Dec 2023 10:59:43 +0000
Subject: [PATCH 07/12] refine

---
 paddle/phi/core/dense_tensor_impl.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/paddle/phi/core/dense_tensor_impl.cc b/paddle/phi/core/dense_tensor_impl.cc
index 0f6d059bfa012..8a3d7db993ad7 100644
--- a/paddle/phi/core/dense_tensor_impl.cc
+++ b/paddle/phi/core/dense_tensor_impl.cc
@@ -384,6 +384,8 @@ const dnnl::memory::desc& DenseTensor::mem_desc() const {
     *storage_properties_ptr = std::make_unique<OneDNNStorageProperties>();
     static_cast<OneDNNStorageProperties*>(storage_properties_ptr->get())
         ->mem_desc = dnnl::memory::desc();
+    static_cast<OneDNNStorageProperties*>(storage_properties_ptr->get())
+        ->format = dnnl::memory::format_tag::undef;
   }
   return this->storage_properties<OneDNNStorageProperties>().mem_desc;
 }

From b126c280310cba638c63b2b0db85deb49f91a27c Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 29 Dec 2023 02:35:25 +0000
Subject: [PATCH 08/12] refine

---
 paddle/fluid/inference/api/analysis_predictor.cc | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index c2806ebbbfcc9..5c5c0d01dec74 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -387,6 +387,7 @@ AnalysisPredictor::AnalysisPredictor(const AnalysisConfig &config)
   } else {
     predictor_id_ = inference::GetUniqueId();
   }
+  root_predictor_id_ = predictor_id_;
 }
 
 bool AnalysisPredictor::Init(
@@ -401,10 +402,6 @@ bool AnalysisPredictor::Init(
   }
 #endif
 
-  if (!status_is_cloned_) {
-    root_predictor_id_ = predictor_id_;
-  }
-
   // no matter with or without MKLDNN
   paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
 
@@ -417,6 +414,7 @@ bool AnalysisPredictor::Init(
   if (!CreateExecutor()) {
     return false;
   }
+
   if (!PrepareProgram(program)) {
     return false;
   }
@@ -467,6 +465,7 @@ bool AnalysisPredictor::Init(
 #endif
 
   inference::DisplayMemoryInfo(place_, "Init predictor");
+
   return true;
 }
 
@@ -1832,7 +1831,6 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
       });
   // The config and argument take a lot of storage,
   // when the predictor settings are complete, we release these stores.
-  config_.PartiallyRelease();
 #if defined(PADDLE_WITH_TESTING)
   fusion_statis_ = *argument_->fusion_statis_ptr();
 #endif
@@ -2800,7 +2798,7 @@ std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) {
   VLOG(3) << "AnalysisPredictor::Clone";
   std::lock_guard<std::mutex> lk(clone_mutex_);
   auto *x = new AnalysisPredictor(config_);
-  x->status_is_cloned_ = true;
+
   x->root_predictor_id_ = this->root_predictor_id_;
   x->config_.apply_optim_ = false;
   if (config_.use_external_stream_ && stream == nullptr) {
@@ -2813,7 +2811,9 @@ std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) {
         "function has received a stream parameter."));
   }
   x->predictor_stream_ = stream;
-  x->Init(scope_, inference_program_);
+  x->Init(nullptr);
+  x->status_is_cloned_ = true;
+
 #ifdef PADDLE_WITH_TENSORRT
   x->executor_->ResetTrtOps(++AnalysisPredictor::clone_num_);
 #endif

From 12caf1c29c0cdc4a935ed6e43cce7fdacb448277 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 29 Dec 2023 08:57:56 +0000
Subject: [PATCH 09/12] refine

---
 paddle/fluid/framework/naive_executor.cc      | 24 ++++++++++-
 paddle/fluid/framework/naive_executor.h       |  3 +-
 .../fluid/inference/api/analysis_predictor.cc | 41 ++++++++++++-------
 3 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc
index 3bfacc950325c..afd61f8e21586 100644
--- a/paddle/fluid/framework/naive_executor.cc
+++ b/paddle/fluid/framework/naive_executor.cc
@@ -152,7 +152,8 @@ void NaiveExecutor::Run() {
 void NaiveExecutor::CreateVariables(const ProgramDesc &desc,
                                     int block_id,
                                     bool persistable,
-                                    Scope *scope) {
+                                    Scope *scope,
+                                    bool init_mkldnn_memdesc) {
   PADDLE_ENFORCE_NOT_NULL(scope,
                           platform::errors::InvalidArgument(
                               "The Scope to hold variables is nullptr."));
@@ -174,7 +175,16 @@ void NaiveExecutor::CreateVariables(const ProgramDesc &desc,
       continue;
     }
     num_vars++;
-
+#ifdef PADDLE_WITH_DNNL
+    auto init_mkldnn_memdesc_func = [&](Variable *var,
+                                        proto::VarType::Type var_type) {
+      if (var_type == proto::VarType::LOD_TENSOR) {
+        var->GetMutable<phi::DenseTensor>()->mem_desc();
+      } else if (var_type == proto::VarType::SELECTED_ROWS) {
+        var->GetMutable<phi::SelectedRows>()->mutable_value()->mem_desc();
+      }
+    };
+#endif
     if (persistable == var->Persistable()) {
       if (persistable) {
         if (!anc->FindVar(var->Name())) {
@@ -182,12 +192,22 @@ void NaiveExecutor::CreateVariables(const ProgramDesc &desc,
           VLOG(3) << scope << " Create persistable variable " << var->Name()
                   << ", which pointer is " << ptr;
           InitializeVariable(ptr, var->GetType());
+#ifdef PADDLE_WITH_DNNL
+          if (init_mkldnn_memdesc) {
+            init_mkldnn_memdesc_func(ptr, var->GetType());
+          }
+#endif
         }
       } else {
         auto *ptr = const_cast<Scope *>(scope)->Var(var->Name());
         VLOG(3) << scope << " Create variable " << var->Name()
                 << ", which pointer is " << ptr;
         InitializeVariable(ptr, var->GetType());
+#ifdef PADDLE_WITH_DNNL
+        if (init_mkldnn_memdesc) {
+          init_mkldnn_memdesc_func(ptr, var->GetType());
+        }
+#endif
       }
     }
   }
diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h
index 5a558f3bd6921..d88c5d7f02827 100644
--- a/paddle/fluid/framework/naive_executor.h
+++ b/paddle/fluid/framework/naive_executor.h
@@ -71,7 +71,8 @@ class NaiveExecutor {
   void CreateVariables(const ProgramDesc& desc,
                        int block_id,
                        bool persistable,
-                       Scope* scope);
+                       Scope* scope,
+                       bool init_mkldnn_memdesc = false);
 
   // Run all the operators.
   void Run();
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index a022884c13cb4..893ead4d29514 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -389,7 +389,6 @@ AnalysisPredictor::AnalysisPredictor(const AnalysisConfig &config)
   } else {
     predictor_id_ = inference::GetUniqueId();
   }
-  root_predictor_id_ = predictor_id_;
 }
 
 bool AnalysisPredictor::Init(
@@ -404,6 +403,10 @@ bool AnalysisPredictor::Init(
   }
 #endif
 
+  if (!status_is_cloned_) {
+    root_predictor_id_ = predictor_id_;
+  }
+
   // no matter with or without MKLDNN
   paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
 
@@ -416,7 +419,6 @@ bool AnalysisPredictor::Init(
   if (!CreateExecutor()) {
     return false;
   }
-
   if (!PrepareProgram(program)) {
     return false;
   }
@@ -467,7 +469,6 @@ bool AnalysisPredictor::Init(
 #endif
 
   inference::DisplayMemoryInfo(place_, "Init predictor");
-
   return true;
 }
 
@@ -681,14 +682,21 @@ bool AnalysisPredictor::PrepareProgram(
     const std::shared_ptr<framework::ProgramDesc> &program) {
   if (!program) {
     if (!LoadProgramDesc()) return false;
-    // If not cloned, the parameters should be loaded.
-    // If config_.ir_optim() is True, parameters is loaded in
-    // OptimizeInferenceProgram(), but other persistable variables
-    // (like RAW type var) are not created in scope.
-    // If config_.ir_optim() is False, parameters is loaded in LoadParameters(),
-    // still need to create other persistable variables.
-    // So in both case, create persistable variables at first.
+      // If not cloned, the parameters should be loaded.
+      // If config_.ir_optim() is True, parameters is loaded in
+      // OptimizeInferenceProgram(), but other persistable variables
+      // (like RAW type var) are not created in scope.
+      // If config_.ir_optim() is False, parameters is loaded in
+      // LoadParameters(), still need to create other persistable variables. So
+      // in both case, create persistable variables at first.
+#ifdef PADDLE_WITH_DNNL
+    if (config_.use_mkldnn_) {
+      executor_->CreateVariables(
+          *inference_program_, 0, true, sub_scope_, true);
+    }
+#else
     executor_->CreateVariables(*inference_program_, 0, true, sub_scope_);
+#endif
 
     // if enable_ir_optim_ is false,
     // the analysis pass(op fuse, graph analysis, trt subgraph, mkldnn etc) will
@@ -951,7 +959,13 @@ bool AnalysisPredictor::CommInit() {
     order += 1;
   }
   framework::NaiveExecutor e(place_);
+#ifdef PADDLE_WITH_DNNL
+  if (config_.use_mkldnn_) {
+    e.CreateVariables(*comm_init_program, 0, true, scope_.get(), true);
+  }
+#else
   e.CreateVariables(*comm_init_program, 0, true, scope_.get());
+#endif
   e.Prepare(scope_.get(), *comm_init_program, 0);
   e.Run();
   VLOG(3) << "Comm init successful.";
@@ -1848,6 +1862,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
       });
   // The config and argument take a lot of storage,
   // when the predictor settings are complete, we release these stores.
+  config_.PartiallyRelease();
 #if defined(PADDLE_WITH_TESTING)
   fusion_statis_ = *argument_->fusion_statis_ptr();
 #endif
@@ -2818,7 +2833,7 @@ std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) {
   VLOG(3) << "AnalysisPredictor::Clone";
   std::lock_guard<std::mutex> lk(clone_mutex_);
   auto *x = new AnalysisPredictor(config_);
-
+  x->status_is_cloned_ = true;
   x->root_predictor_id_ = this->root_predictor_id_;
   x->config_.apply_optim_ = false;
   if (config_.use_external_stream_ && stream == nullptr) {
@@ -2831,9 +2846,7 @@ std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) {
         "function has received a stream parameter."));
   }
   x->predictor_stream_ = stream;
-  x->Init(nullptr);
-  x->status_is_cloned_ = true;
-
+  x->Init(scope_, inference_program_);
 #ifdef PADDLE_WITH_TENSORRT
   x->executor_->ResetTrtOps(++AnalysisPredictor::clone_num_);
 #endif

From 37c698a509237f9ab2dc7f7229b155dd326c7ab1 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Fri, 29 Dec 2023 09:22:48 +0000
Subject: [PATCH 10/12] refine

---
 paddle/fluid/inference/api/analysis_predictor.cc | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 893ead4d29514..d87902d30116f 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -690,10 +690,7 @@ bool AnalysisPredictor::PrepareProgram(
       // LoadParameters(), still need to create other persistable variables. So
       // in both case, create persistable variables at first.
 #ifdef PADDLE_WITH_DNNL
-    if (config_.use_mkldnn_) {
-      executor_->CreateVariables(
-          *inference_program_, 0, true, sub_scope_, true);
-    }
+    executor_->CreateVariables(*inference_program_, 0, true, sub_scope_, true);
 #else
     executor_->CreateVariables(*inference_program_, 0, true, sub_scope_);
 #endif
@@ -960,9 +957,7 @@ bool AnalysisPredictor::CommInit() {
   }
   framework::NaiveExecutor e(place_);
 #ifdef PADDLE_WITH_DNNL
-  if (config_.use_mkldnn_) {
-    e.CreateVariables(*comm_init_program, 0, true, scope_.get(), true);
-  }
+  e.CreateVariables(*comm_init_program, 0, true, scope_.get(), true);
 #else
   e.CreateVariables(*comm_init_program, 0, true, scope_.get());
 #endif

From 2f846fec33c407a152ef0f26a32a659b829248b0 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 2 Jan 2024 01:25:50 +0000
Subject: [PATCH 11/12] refine

---
 .../fluid/inference/api/analysis_predictor.cc | 22 ++++++-------------
 paddle/phi/core/dense_tensor_impl.cc          | 15 +++++--------
 2 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index d87902d30116f..4af55a7c6c933 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -682,18 +682,14 @@ bool AnalysisPredictor::PrepareProgram(
     const std::shared_ptr<framework::ProgramDesc> &program) {
   if (!program) {
     if (!LoadProgramDesc()) return false;
-      // If not cloned, the parameters should be loaded.
-      // If config_.ir_optim() is True, parameters is loaded in
-      // OptimizeInferenceProgram(), but other persistable variables
-      // (like RAW type var) are not created in scope.
-      // If config_.ir_optim() is False, parameters is loaded in
-      // LoadParameters(), still need to create other persistable variables. So
-      // in both case, create persistable variables at first.
-#ifdef PADDLE_WITH_DNNL
-    executor_->CreateVariables(*inference_program_, 0, true, sub_scope_, true);
-#else
+    // If not cloned, the parameters should be loaded.
+    // If config_.ir_optim() is True, parameters is loaded in
+    // OptimizeInferenceProgram(), but other persistable variables
+    // (like RAW type var) are not created in scope.
+    // If config_.ir_optim() is False, parameters is loaded in LoadParameters(),
+    // still need to create other persistable variables.
+    // So in both case, create persistable variables at first.
     executor_->CreateVariables(*inference_program_, 0, true, sub_scope_);
-#endif
 
     // if enable_ir_optim_ is false,
     // the analysis pass(op fuse, graph analysis, trt subgraph, mkldnn etc) will
@@ -956,11 +952,7 @@ bool AnalysisPredictor::CommInit() {
     order += 1;
   }
   framework::NaiveExecutor e(place_);
-#ifdef PADDLE_WITH_DNNL
-  e.CreateVariables(*comm_init_program, 0, true, scope_.get(), true);
-#else
   e.CreateVariables(*comm_init_program, 0, true, scope_.get());
-#endif
   e.Prepare(scope_.get(), *comm_init_program, 0);
   e.Run();
   VLOG(3) << "Comm init successful.";
diff --git a/paddle/phi/core/dense_tensor_impl.cc b/paddle/phi/core/dense_tensor_impl.cc
index 8a3d7db993ad7..39efb048e7432 100644
--- a/paddle/phi/core/dense_tensor_impl.cc
+++ b/paddle/phi/core/dense_tensor_impl.cc
@@ -379,13 +379,8 @@ std::vector<DenseTensor> DenseTensor::Chunk(int64_t chunks,
 #ifdef PADDLE_WITH_DNNL
 const dnnl::memory::desc& DenseTensor::mem_desc() const {
   if (storage_properties_ == nullptr) {
-    std::unique_ptr<StorageProperties>* storage_properties_ptr =
-        const_cast<std::unique_ptr<StorageProperties>*>(&storage_properties_);
-    *storage_properties_ptr = std::make_unique<OneDNNStorageProperties>();
-    static_cast<OneDNNStorageProperties*>(storage_properties_ptr->get())
-        ->mem_desc = dnnl::memory::desc();
-    static_cast<OneDNNStorageProperties*>(storage_properties_ptr->get())
-        ->format = dnnl::memory::format_tag::undef;
+    static dnnl::memory::desc undef_desc = dnnl::memory::desc();
+    return undef_desc;
   }
   return this->storage_properties<OneDNNStorageProperties>().mem_desc;
 }
@@ -393,8 +388,10 @@ const dnnl::memory::desc& DenseTensor::mem_desc() const {
 void DenseTensor::set_mem_desc(const dnnl::memory::desc& mem_desc) {
   if (storage_properties_ == nullptr) {
     storage_properties_ = std::make_unique<OneDNNStorageProperties>();
-  }
-  if (OneDNNStorageProperties::classof(storage_properties_.get())) {
+    static_cast<OneDNNStorageProperties*>(storage_properties_.get())->mem_desc =
+        mem_desc;
+    meta_.layout = DataLayout::ONEDNN;
+  } else if (OneDNNStorageProperties::classof(storage_properties_.get())) {
     static_cast<OneDNNStorageProperties*>(storage_properties_.get())->mem_desc =
         mem_desc;
     meta_.layout = DataLayout::ONEDNN;

From 8c422076640dca861ff46e19ad5e4325f741f6e9 Mon Sep 17 00:00:00 2001
From: Wang Huan <wanghuan29@baidu.com>
Date: Tue, 2 Jan 2024 01:28:41 +0000
Subject: [PATCH 12/12] refine

---
 paddle/fluid/framework/naive_executor.cc | 24 ++----------------------
 paddle/fluid/framework/naive_executor.h  |  3 +--
 2 files changed, 3 insertions(+), 24 deletions(-)

diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc
index afd61f8e21586..3bfacc950325c 100644
--- a/paddle/fluid/framework/naive_executor.cc
+++ b/paddle/fluid/framework/naive_executor.cc
@@ -152,8 +152,7 @@ void NaiveExecutor::Run() {
 void NaiveExecutor::CreateVariables(const ProgramDesc &desc,
                                     int block_id,
                                     bool persistable,
-                                    Scope *scope,
-                                    bool init_mkldnn_memdesc) {
+                                    Scope *scope) {
   PADDLE_ENFORCE_NOT_NULL(scope,
                           platform::errors::InvalidArgument(
                               "The Scope to hold variables is nullptr."));
@@ -175,16 +174,7 @@ void NaiveExecutor::CreateVariables(const ProgramDesc &desc,
       continue;
     }
     num_vars++;
-#ifdef PADDLE_WITH_DNNL
-    auto init_mkldnn_memdesc_func = [&](Variable *var,
-                                        proto::VarType::Type var_type) {
-      if (var_type == proto::VarType::LOD_TENSOR) {
-        var->GetMutable<phi::DenseTensor>()->mem_desc();
-      } else if (var_type == proto::VarType::SELECTED_ROWS) {
-        var->GetMutable<phi::SelectedRows>()->mutable_value()->mem_desc();
-      }
-    };
-#endif
+
     if (persistable == var->Persistable()) {
       if (persistable) {
         if (!anc->FindVar(var->Name())) {
@@ -192,22 +182,12 @@ void NaiveExecutor::CreateVariables(const ProgramDesc &desc,
           VLOG(3) << scope << " Create persistable variable " << var->Name()
                   << ", which pointer is " << ptr;
           InitializeVariable(ptr, var->GetType());
-#ifdef PADDLE_WITH_DNNL
-          if (init_mkldnn_memdesc) {
-            init_mkldnn_memdesc_func(ptr, var->GetType());
-          }
-#endif
         }
       } else {
         auto *ptr = const_cast<Scope *>(scope)->Var(var->Name());
         VLOG(3) << scope << " Create variable " << var->Name()
                 << ", which pointer is " << ptr;
         InitializeVariable(ptr, var->GetType());
-#ifdef PADDLE_WITH_DNNL
-        if (init_mkldnn_memdesc) {
-          init_mkldnn_memdesc_func(ptr, var->GetType());
-        }
-#endif
       }
     }
   }
diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h
index d88c5d7f02827..5a558f3bd6921 100644
--- a/paddle/fluid/framework/naive_executor.h
+++ b/paddle/fluid/framework/naive_executor.h
@@ -71,8 +71,7 @@ class NaiveExecutor {
   void CreateVariables(const ProgramDesc& desc,
                        int block_id,
                        bool persistable,
-                       Scope* scope,
-                       bool init_mkldnn_memdesc = false);
+                       Scope* scope);
 
   // Run all the operators.
   void Run();