From 6c3e3a2a40bb5dd5c92ac6e1a53a95404b51a16a Mon Sep 17 00:00:00 2001 From: Wang Huan Date: Thu, 14 Dec 2023 11:31:50 +0000 Subject: [PATCH 01/12] delete dense_tensor mem_desc_ --- paddle/phi/core/dense_tensor.cc | 10 ---------- paddle/phi/core/dense_tensor.h | 18 ------------------ paddle/phi/core/dense_tensor.inl | 12 +----------- paddle/phi/core/dense_tensor_impl.cc | 27 +++++++++++++++++++++++---- 4 files changed, 24 insertions(+), 43 deletions(-) diff --git a/paddle/phi/core/dense_tensor.cc b/paddle/phi/core/dense_tensor.cc index c86a06bedef8d..1181a81266976 100644 --- a/paddle/phi/core/dense_tensor.cc +++ b/paddle/phi/core/dense_tensor.cc @@ -59,10 +59,6 @@ DenseTensor::DenseTensor(const DenseTensor& other) { storage_properties_ = std::move(CopyStorageProperties(other.storage_properties_)); inplace_version_counter_ = other.inplace_version_counter_; - -#ifdef PADDLE_WITH_DNNL - mem_desc_ = other.mem_desc_; -#endif } DenseTensor& DenseTensor::operator=(const DenseTensor& other) { @@ -74,9 +70,6 @@ DenseTensor& DenseTensor::operator=(const DenseTensor& other) { storage_properties_ = std::move(CopyStorageProperties(other.storage_properties_)); inplace_version_counter_ = other.inplace_version_counter_; -#ifdef PADDLE_WITH_DNNL - mem_desc_ = other.mem_desc_; -#endif return *this; } @@ -85,9 +78,6 @@ DenseTensor& DenseTensor::operator=(DenseTensor&& other) noexcept { std::swap(holder_, other.holder_); storage_properties_ = std::move(other.storage_properties_); std::swap(inplace_version_counter_, other.inplace_version_counter_); -#ifdef PADDLE_WITH_DNNL - mem_desc_ = other.mem_desc_; -#endif return *this; } diff --git a/paddle/phi/core/dense_tensor.h b/paddle/phi/core/dense_tensor.h index bcc2b07a89e3a..b78cec1483272 100644 --- a/paddle/phi/core/dense_tensor.h +++ b/paddle/phi/core/dense_tensor.h @@ -22,12 +22,6 @@ limitations under the License. */ #include "paddle/phi/core/tensor_meta.h" #include "paddle/utils/test_macros.h" -/* @jim19930609: Move to MKLDNN_Tensor in the future - */ -#ifdef PADDLE_WITH_DNNL -#include "dnnl.hpp" // NOLINT -#endif - namespace phi { class DenseTensorUtils; @@ -290,18 +284,6 @@ class TEST_API DenseTensor : public TensorBase, std::shared_ptr inplace_version_counter_ = std::make_shared(); -/* @jim19930609: This is a hack -In general, it is badly designed to fuse MKLDNN-specific objects into a -generic Tensor. -We temporarily leave them here to unblock Tensor Unification progress. -In the final state, we should come up with a MKLDNN_Tensor and move the -following codes there. -*/ -#ifdef PADDLE_WITH_DNNL - /// \brief memory descriptor of tensor which have layout set as kMKLDNN - dnnl::memory::desc mem_desc_; -#endif - #ifndef PADDLE_WITH_CUSTOM_KERNEL #include "paddle/phi/core/dense_tensor.inl" #endif diff --git a/paddle/phi/core/dense_tensor.inl b/paddle/phi/core/dense_tensor.inl index 19101e7093f74..a8672b2171143 100644 --- a/paddle/phi/core/dense_tensor.inl +++ b/paddle/phi/core/dense_tensor.inl @@ -97,22 +97,12 @@ std::vector Split(int64_t split_size, int64_t axis) const; std::vector Chunk(int64_t chunks, int64_t axis) const; -/* @jim19930609: This is a hack -In general, it is badly designed to fuse MKLDNN-specific objects into a -generic Tensor. -We temporarily leave them here to unblock Tensor Unification progress. -In the final state, we should come up with a MKLDNN_Tensor and move the -following codes there. -*/ #ifdef PADDLE_WITH_DNNL public: const dnnl::memory::desc& mem_desc() const; -inline void set_mem_desc(const dnnl::memory::desc& mem_desc) { - mem_desc_ = mem_desc; - meta_.layout = DataLayout::ONEDNN; -} +void set_mem_desc(const dnnl::memory::desc& mem_desc); #endif diff --git a/paddle/phi/core/dense_tensor_impl.cc b/paddle/phi/core/dense_tensor_impl.cc index 5fa43647da19c..770443acf1838 100644 --- a/paddle/phi/core/dense_tensor_impl.cc +++ b/paddle/phi/core/dense_tensor_impl.cc @@ -377,7 +377,29 @@ std::vector DenseTensor::Chunk(int64_t chunks, } #ifdef PADDLE_WITH_DNNL -const dnnl::memory::desc& DenseTensor::mem_desc() const { return mem_desc_; } +const dnnl::memory::desc& DenseTensor::mem_desc() const { + if (storage_properties_ == nullptr) { + std::unique_ptr* storage_properties_ptr = + const_cast*>(&storage_properties_); + *storage_properties_ptr = std::make_unique(); + } + return this->storage_properties().mem_desc; +} + +void DenseTensor::set_mem_desc(const dnnl::memory::desc& mem_desc) { + if (storage_properties_ == nullptr) { + storage_properties_ = std::make_unique(); + } + if (OneDNNStorageProperties::classof(storage_properties_.get())) { + static_cast(storage_properties_.get())->mem_desc = + mem_desc; + meta_.layout = DataLayout::ONEDNN; + } else { + PADDLE_THROW(phi::errors::InvalidArgument( + "The actual type of storage_properties is inconsistent with the type " + "of the template parameter passed in.")); + } +} #endif // NOTE: For historical reasons, this interface has a special behavior, @@ -394,9 +416,6 @@ DenseTensor& DenseTensor::ShareDataWith(const DenseTensor& src) { meta_.strides = src.meta_.strides; storage_properties_ = std::move(CopyStorageProperties(src.storage_properties_)); -#ifdef PADDLE_WITH_DNNL - mem_desc_ = src.mem_desc_; -#endif return *this; } From 239a6a260c905c41ffd798716ccdf9bab3bdc025 Mon Sep 17 00:00:00 2001 From: Wang Huan Date: Fri, 15 Dec 2023 02:37:35 +0000 Subject: [PATCH 02/12] refine --- paddle/phi/backends/onednn/onednn_reuse.h | 96 +++++++++++++++++++---- paddle/phi/kernels/onednn/conv_function.h | 66 ++++++++++++++-- paddle/phi/kernels/onednn/conv_handler.h | 35 ++++++++- 3 files changed, 172 insertions(+), 25 deletions(-) diff --git a/paddle/phi/backends/onednn/onednn_reuse.h b/paddle/phi/backends/onednn/onednn_reuse.h index d9719c6f3e5b2..db6ceef72b329 100644 --- a/paddle/phi/backends/onednn/onednn_reuse.h +++ b/paddle/phi/backends/onednn/onednn_reuse.h @@ -318,16 +318,32 @@ class OneDNNHandlerT { typename std::enable_if::type, dnnl::primitive_attr>::value>::type CreateForwardPrimitiveDescriptor(First&& first, Args&&... args) { - fwd_pd_ = std::make_shared( - engine_, std::forward(args)..., first); + try { + fwd_pd_ = std::make_shared( + engine_, std::forward(args)..., first); + } catch (std::exception& ex) { + LOG(WARNING) << Type() << " raises an exception " + << platform::demangle(typeid(ex).name()) << ", " + << ex.what(); + PADDLE_THROW(platform::errors::Unavailable("wanghuan7")); + std::rethrow_exception(std::current_exception()); + } } template typename std::enable_if::type, dnnl::primitive_attr>::value>::type CreateForwardPrimitiveDescriptor(First&& first, Args&&... args) { - fwd_pd_ = std::make_shared( - engine_, std::forward(first), std::forward(args)...); + try { + fwd_pd_ = std::make_shared( + engine_, std::forward(first), std::forward(args)...); + } catch (std::exception& ex) { + LOG(WARNING) << Type() << " raises an exception " + << platform::demangle(typeid(ex).name()) << ", " + << ex.what(); + PADDLE_THROW(platform::errors::Unavailable("wanghuan8")); + std::rethrow_exception(std::current_exception()); + } } template @@ -342,8 +358,16 @@ class OneDNNHandlerT { bwd_pd_ = std::static_pointer_cast( dev_ctx_.GetBlob(key_pd)); if (bwd_pd_ == nullptr) { - bwd_pd_ = std::make_shared( - engine_, std::forward(args)..., *fwd_pd_); + try { + bwd_pd_ = std::make_shared( + engine_, std::forward(args)..., *fwd_pd_); + } catch (std::exception& ex) { + LOG(WARNING) << Type() << " raises an exception " + << platform::demangle(typeid(ex).name()) << ", " + << ex.what(); + PADDLE_THROW(platform::errors::Unavailable("wanghuan1")); + std::rethrow_exception(std::current_exception()); + } dev_ctx_.SetBlob(key_pd, bwd_pd_); } } @@ -361,8 +385,16 @@ class OneDNNHandlerT { std::static_pointer_cast( dev_ctx_.GetBlob(key_pd)); if (bwd_w_pd_ == nullptr) { - bwd_w_pd_ = std::make_shared( - engine_, std::forward(args)..., *fwd_pd_); + try { + bwd_w_pd_ = std::make_shared( + engine_, std::forward(args)..., *fwd_pd_); + } catch (std::exception& ex) { + LOG(WARNING) << Type() << " raises an exception " + << platform::demangle(typeid(ex).name()) << ", " + << ex.what(); + PADDLE_THROW(platform::errors::Unavailable("wanghuan2")); + std::rethrow_exception(std::current_exception()); + } dev_ctx_.SetBlob(key_pd, bwd_w_pd_); } } @@ -621,16 +653,32 @@ class OneDNNHandlerNoCachingT { typename std::enable_if::type, dnnl::primitive_attr>::value>::type CreateForwardPrimitiveDescriptor(First&& first, Args&&... args) { - fwd_pd_ = std::make_shared( - engine_, std::forward(args)..., first); + try { + fwd_pd_ = std::make_shared( + engine_, std::forward(args)..., first); + } catch (std::exception& ex) { + LOG(WARNING) << Type() << " raises an exception " + << platform::demangle(typeid(ex).name()) << ", " + << ex.what(); + PADDLE_THROW(platform::errors::Unavailable("wanghuan3")); + std::rethrow_exception(std::current_exception()); + } } template typename std::enable_if::type, dnnl::primitive_attr>::value>::type CreateForwardPrimitiveDescriptor(First&& first, Args&&... args) { - fwd_pd_ = std::make_shared( - engine_, std::forward(first), std::forward(args)...); + try { + fwd_pd_ = std::make_shared( + engine_, std::forward(first), std::forward(args)...); + } catch (std::exception& ex) { + LOG(WARNING) << Type() << " raises an exception " + << platform::demangle(typeid(ex).name()) << ", " + << ex.what(); + PADDLE_THROW(platform::errors::Unavailable("wanghuan4")); + std::rethrow_exception(std::current_exception()); + } } template @@ -640,8 +688,16 @@ class OneDNNHandlerNoCachingT { PADDLE_ENFORCE_NOT_NULL( fwd_pd_, errors::Unavailable("Get oneDNN Forward primitive %s failed.")); - bwd_pd_ = std::make_shared( - engine_, std::forward(args)..., *fwd_pd_); + try { + bwd_pd_ = std::make_shared( + engine_, std::forward(args)..., *fwd_pd_); + } catch (std::exception& ex) { + LOG(WARNING) << Type() << " raises an exception " + << platform::demangle(typeid(ex).name()) << ", " + << ex.what(); + PADDLE_THROW(platform::errors::Unavailable("wanghuan5")); + std::rethrow_exception(std::current_exception()); + } } template @@ -653,8 +709,16 @@ class OneDNNHandlerNoCachingT { errors::Unavailable("Get oneDNN Forward primitive %s failed.")); auto bwd_desc = typename TBackward_params::desc(std::forward(args)...); - bwd_w_pd_ = std::make_shared( - bwd_desc, engine_, *fwd_pd_); + try { + bwd_w_pd_ = std::make_shared( + bwd_desc, engine_, *fwd_pd_); + } catch (std::exception& ex) { + LOG(WARNING) << Type() << " raises an exception " + << platform::demangle(typeid(ex).name()) << ", " + << ex.what(); + PADDLE_THROW(platform::errors::Unavailable("wanghuan6")); + std::rethrow_exception(std::current_exception()); + } } std::shared_ptr AcquireMemoryFromPrimitive( diff --git a/paddle/phi/kernels/onednn/conv_function.h b/paddle/phi/kernels/onednn/conv_function.h index 7d7e74f691a02..e9d66786c6225 100644 --- a/paddle/phi/kernels/onednn/conv_function.h +++ b/paddle/phi/kernels/onednn/conv_function.h @@ -29,8 +29,10 @@ static dnnl::memory::data_type GetDstType( std::string fuse_activation, bool fuse_residual_conn, const phi::DenseTensor* residual_param) { + std::cout << "GetDstType" << std::endl; auto dst_dt = dnnl::memory::data_type::f32; if (is_int8) { + std::cout << "GetDstType1" << std::endl; dst_dt = (fuse_activation == "relu" || fuse_activation == "relu6") ? dnnl::memory::data_type::u8 : dnnl::memory::data_type::s8; @@ -38,17 +40,21 @@ static dnnl::memory::data_type GetDstType( dst_dt = dnnl::memory::data_type::f32; } if (fuse_residual_conn && residual_param) { + std::cout << "GetDstType2" << std::endl; auto residual_dt = funcs::ToOneDNNDataType(residual_param->dtype()); if (dst_dt != residual_dt) dst_dt = residual_dt; } } else { + std::cout << "GetDstType3" << std::endl; if (!force_fp32_output && is_bfloat16) { dst_dt = dnnl::memory::data_type::bf16; if (fuse_residual_conn && residual_param) { + std::cout << "GetDstType4" << std::endl; dst_dt = funcs::ToOneDNNDataType(residual_param->dtype()); } } } + std::cout << "GetDstType5" << std::endl; return dst_dt; } @@ -85,10 +91,12 @@ void ComputeFP32(const OneDNNContext& dev_ctx, bool fuse_residual_conn, bool force_fp32_output, DenseTensor* output) { + std::cout << "ComputeFP32 " << std::endl; const auto& onednn_engine = dev_ctx.GetEngine(); const bool is_conv3d = strides.size() == 3U; const std::string& unique_name = dev_ctx.GetInputsName("Input")[0] + dev_ctx.GetInputsName("Filter")[0]; + std::cout << "ComputeFP32 2" << std::endl; PD_VISIT_FLOAT_AND_INT8_TYPES( filter->dtype(), "ConvOneDNNHandlerT", ([&] { onednn::ConvOneDNNHandlerT handler(dev_ctx, @@ -110,33 +118,47 @@ void ComputeFP32(const OneDNNContext& dev_ctx, force_fp32_output, output, unique_name); + std::cout << "ComputeFP32 3" << std::endl; auto src_memory_p = handler.AcquireSrcMemoryWithReorder(input); + std::cout << "ComputeFP32 4" << std::endl; auto weights_memory_p = handler.AcquireWeightsMemoryWithReorder( filter, groups, is_conv3d, is_test); + std::cout << "ComputeFP32 5" << std::endl; std::shared_ptr dst_memory_p; if (fuse_residual_conn) { + std::cout << "ComputeFP32 6" << std::endl; dst_memory_p = handler.AcquireDstMemoryWithResidual(output, residual_param); + std::cout << "ComputeFP32 7" << std::endl; } else { + std::cout << "ComputeFP32 8" << std::endl; dst_memory_p = handler.template AcquireDstMemory(output); + std::cout << "ComputeFP32 9" << std::endl; } - + std::cout << "ComputeFP32 10" << std::endl; auto conv_p = handler.AcquireForwardPrimitive(); + std::cout << "ComputeFP32 11" << std::endl; std::unordered_map args = { {DNNL_ARG_SRC, *src_memory_p}, {DNNL_ARG_WEIGHTS, *weights_memory_p}, {DNNL_ARG_DST, *dst_memory_p}}; if (bias) { + std::cout << "ComputeFP32 12" << std::endl; auto bias_memory_p = handler.AcquireBiasMemoryWithReorder(bias, is_test); + std::cout << "ComputeFP32 13" << std::endl; args.insert({DNNL_ARG_BIAS, *bias_memory_p}); } auto& astream = OneDNNContext::tls().get_stream(); + std::cout << "ComputeFP32 14" << std::endl; conv_p->execute(astream, args); + std::cout << "ComputeFP32 15" << std::endl; astream.wait(); + std::cout << "ComputeFP32 16" << std::endl; output->set_mem_desc(dst_memory_p->get_desc()); + std::cout << "ComputeFP32 17" << std::endl; })); } @@ -158,7 +180,9 @@ void ComputeINT8(const OneDNNContext& dev_ctx, bool fuse_residual_conn, bool force_fp32_output, DenseTensor* output) { + std::cout << "ComputeINT8 " << std::endl; const auto& onednn_engine = dev_ctx.GetEngine(); + std::cout << "ComputeINT8 2" << std::endl; const bool is_conv3d = strides.size() == 3U; bool unsigned_output = @@ -177,6 +201,7 @@ void ComputeINT8(const OneDNNContext& dev_ctx, "residual fusion does not support force output with fp32")); const std::string& unique_name = dev_ctx.GetInputsName("Input")[0] + dev_ctx.GetInputsName("Filter")[0]; + std::cout << "ComputeINT8 3" << std::endl; PD_VISIT_FLOAT_AND_INT8_TYPES( filter->dtype(), "ConvOneDNNHandlerT", ([&] { onednn::ConvOneDNNHandlerT handler(dev_ctx, @@ -198,9 +223,9 @@ void ComputeINT8(const OneDNNContext& dev_ctx, force_fp32_output, output, unique_name); - + std::cout << "ComputeINT8 4" << std::endl; auto src_memory_p = handler.AcquireSrcMemoryWithReorder(input); - + std::cout << "ComputeINT8 5" << std::endl; const auto& scale_weights_data = dev_ctx.HasDnnAttr("Scale_weights") ? PADDLE_GET_CONST(std::vector, @@ -210,9 +235,10 @@ void ComputeINT8(const OneDNNContext& dev_ctx, int mask_reorder = is_multi_channel ? ((groups != 1) ? (1 << 1) + (1 << 0) : 1 << 0) : 0; + std::cout << "ComputeINT8 6" << std::endl; auto weights_memory_p = handler.AcquireWeightsMemoryWithReorder( filter, groups, false, true, scale_weights_data, mask_reorder); - + std::cout << "ComputeINT8 7" << std::endl; std::shared_ptr dst_memory_p; if (fuse_residual_conn) { PADDLE_ENFORCE_EQ( @@ -224,49 +250,65 @@ void ComputeINT8(const OneDNNContext& dev_ctx, " and residual param's dimension =%d .", output->dims().size(), residual_param->dims().size())); + std::cout << "ComputeINT8 8" << std::endl; dst_memory_p = handler.AcquireDstMemoryWithResidual(output, residual_param); + std::cout << "ComputeINT8 9" << std::endl; need_s8_to_u8 = (funcs::OneDNNGetDataType() == dnnl::memory::data_type::s8) && unsigned_output; + std::cout << "ComputeINT8 10" << std::endl; } else { + std::cout << "ComputeINT8 11" << std::endl; dst_memory_p = handler.template AcquireDstMemory(output); + std::cout << "ComputeINT8 12" << std::endl; } + std::cout << "ComputeINT8 13" << std::endl; auto conv_p = handler.AcquireForwardPrimitive(); - + std::cout << "ComputeINT8 14" << std::endl; std::unordered_map args = { {DNNL_ARG_SRC, *src_memory_p}, {DNNL_ARG_WEIGHTS, *weights_memory_p}, {DNNL_ARG_DST, *dst_memory_p}}; if (bias) { + std::cout << "ComputeINT8 15" << std::endl; auto bias_memory_p = handler.AcquireBiasMemoryWithReorder(bias, true); + std::cout << "ComputeINT8 16" << std::endl; args.insert({DNNL_ARG_BIAS, *bias_memory_p}); } - + std::cout << "ComputeINT8 17" << std::endl; auto src_scales_memory = handler.AcquireScalesMemory(DNNL_ARG_SRC); + std::cout << "ComputeINT8 18" << std::endl; args.insert({DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC, *src_scales_memory}); - + std::cout << "ComputeINT8 19" << std::endl; auto wei_scales_memory = handler.AcquireScalesMemory(DNNL_ARG_WEIGHTS); + std::cout << "ComputeINT8 20" << std::endl; args.insert( {DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS, *wei_scales_memory}); if (!force_fp32_output) { + std::cout << "ComputeINT8 21" << std::endl; auto dst_scales_memory = handler.AcquireScalesMemory(DNNL_ARG_DST); + std::cout << "ComputeINT8 22" << std::endl; args.insert( {DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST, *dst_scales_memory}); } auto& astream = OneDNNContext::tls().get_stream(); + std::cout << "ComputeINT8 23" << std::endl; conv_p->execute(astream, args); + std::cout << "ComputeINT8 24" << std::endl; astream.wait(); + std::cout << "ComputeINT8 25" << std::endl; if (need_s8_to_u8) { dev_ctx.Alloc(output); } - + std::cout << "ComputeINT8 26" << std::endl; output->set_mem_desc(dst_memory_p->get_desc()); + std::cout << "ComputeINT8 27" << std::endl; })); } @@ -288,6 +330,7 @@ void ConvOnednn(const Context& dev_ctx, bool fuse_residual_connection, bool force_fp32_output, DenseTensor* out) { + std::cout << "ConvOnednn" << std::endl; PADDLE_ENFORCE_EQ( dev_ctx.GetPlace().GetType(), AllocationType::CPU, @@ -301,8 +344,10 @@ void ConvOnednn(const Context& dev_ctx, fuse_activation, fuse_residual_connection, residual_param); + std::cout << "ConvOnednn2" << std::endl; if (!is_INT8) { if (dst_dt == dnnl::memory::data_type::f32) { + std::cout << "ConvOnednn3" << std::endl; ComputeFP32(dev_ctx, input, filter, @@ -321,6 +366,7 @@ void ConvOnednn(const Context& dev_ctx, force_fp32_output, out); } else if (dst_dt == dnnl::memory::data_type::bf16) { + std::cout << "ConvOnednn4" << std::endl; ComputeFP32(dev_ctx, input, filter, @@ -341,6 +387,7 @@ void ConvOnednn(const Context& dev_ctx, } } else { if (dst_dt == dnnl::memory::data_type::f32) { + std::cout << "ConvOnednn5" << std::endl; ComputeINT8(dev_ctx, input, filter, @@ -359,6 +406,7 @@ void ConvOnednn(const Context& dev_ctx, force_fp32_output, out); } else if (dst_dt == dnnl::memory::data_type::u8) { + std::cout << "ConvOnednn6" << std::endl; ComputeINT8(dev_ctx, input, filter, @@ -377,6 +425,7 @@ void ConvOnednn(const Context& dev_ctx, force_fp32_output, out); } else if (dst_dt == dnnl::memory::data_type::s8) { + std::cout << "ConvOnednn7" << std::endl; ComputeINT8(dev_ctx, input, filter, @@ -396,6 +445,7 @@ void ConvOnednn(const Context& dev_ctx, out); } } + std::cout << "ConvOnednn8" << std::endl; } } // namespace phi diff --git a/paddle/phi/kernels/onednn/conv_handler.h b/paddle/phi/kernels/onednn/conv_handler.h index 3d41c274de24e..0d570e9e84fe1 100644 --- a/paddle/phi/kernels/onednn/conv_handler.h +++ b/paddle/phi/kernels/onednn/conv_handler.h @@ -69,6 +69,7 @@ class ConvOneDNNHandlerT cpu_place, funcs::CreateKey( dev_ctx, common::vectorize(input->dims()), unique_name)) { + std::cout << "ConvOneDNNHandlerT" << std::endl; if (unlikely(!this->isCached())) { PADDLE_ENFORCE_EQ( input->layout(), @@ -142,8 +143,10 @@ class ConvOneDNNHandlerT std::vector strides(begin(strides_in), end(strides_in)); std::vector paddings(begin(paddings_in), end(paddings_in)); std::vector dilations(begin(dilations_in), end(dilations_in)); + std::cout << "ConvOneDNNHandlerT2" << std::endl; UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, data_dims, strides, ksize); + std::cout << "ConvOneDNNHandlerT3" << std::endl; std::transform( dilations.begin(), dilations.end(), dilations.begin(), [](int64_t i) { return i - 1; @@ -155,9 +158,11 @@ class ConvOneDNNHandlerT funcs::GetGroupConvWeightsTz(weights_tz, groups); const auto dst_tz = common::vectorize(output->dims()); - + std::cout << "ConvOneDNNHandlerT4" << std::endl; const dnnl::memory::dims stride_dims = strides; + std::cout << "ConvOneDNNHandlerT5" << std::endl; const auto onednn_paddings = funcs::ToOneDNNPadding(paddings); + std::cout << "ConvOneDNNHandlerT6" << std::endl; const dnnl::memory::dims dilations_dims = dilations; /* create memory descriptor for convolution without specified format * ('any') which lets a primitive (convolution in this case) choose @@ -171,34 +176,46 @@ class ConvOneDNNHandlerT dnnl::memory::desc src_md, weights_md; if (funcs::is_int8()) { + std::cout << "ConvOneDNNHandlerT7" << std::endl; src_md = funcs::OneDNNMemDesc(src_tz, funcs::ToOneDNNDataType(input->dtype()), chosen_memory_format); + std::cout << "ConvOneDNNHandlerT8" << std::endl; weights_md = funcs::OneDNNMemDesc( weights_tz, dnnl::memory::data_type::s8, chosen_memory_format); + std::cout << "ConvOneDNNHandlerT9" << std::endl; } else { + std::cout << "ConvOneDNNHandlerT10" << std::endl; src_md = funcs::OneDNNMemDesc(src_tz, data_type, chosen_memory_format); + std::cout << "ConvOneDNNHandlerT11" << std::endl; weights_md = funcs::OneDNNMemDesc( weights_tz, data_type, funcs::OneDNNMemoryFormat::any); + std::cout << "ConvOneDNNHandlerT12" << std::endl; } if (input->dims().size() == 4 && input->dims()[1] <= 4) { chosen_memory_format = funcs::OneDNNMemoryFormat::nhwc; } + std::cout << "ConvOneDNNHandlerT13" << std::endl; const auto dst_md = funcs::OneDNNMemDesc( dst_tz, funcs::OneDNNGetDataType(), chosen_memory_format); + std::cout << "ConvOneDNNHandlerT14" << std::endl; const auto fwd_prop_kind = dnnl::prop_kind::forward_inference; + std::cout << "ConvOneDNNHandlerT15" << std::endl; const dnnl::primitive_attr conv_attr = CreateConvAttrs(filter, groups, force_fp32_output, fuse_residual_conn, fuse_activation); + std::cout << "ConvOneDNNHandlerT16" << std::endl; if (bias) { auto bias_tz = common::vectorize(bias->dims()); + std::cout << "ConvOneDNNHandlerT17" << std::endl; dnnl::memory::desc bias_md = funcs::OneDNNMemDesc(bias_tz, dnnl::memory::data_type::f32, funcs::OneDNNMemoryFormat::x); + std::cout << "ConvOneDNNHandlerT18" << std::endl; this->AcquireForwardPrimitiveDescriptor( conv_attr, @@ -212,7 +229,9 @@ class ConvOneDNNHandlerT dilations_dims, onednn_paddings[0], onednn_paddings[1]); + std::cout << "ConvOneDNNHandlerT19" << std::endl; } else { + std::cout << "ConvOneDNNHandlerT20" << std::endl; this->AcquireForwardPrimitiveDescriptor( conv_attr, fwd_prop_kind, @@ -224,6 +243,7 @@ class ConvOneDNNHandlerT dilations_dims, onednn_paddings[0], onednn_paddings[1]); + std::cout << "ConvOneDNNHandlerT21" << std::endl; } } } @@ -253,6 +273,7 @@ class ConvOneDNNHandlerT cpu_place, funcs::CreateKey( dev_ctx, common::vectorize(in->dims()), unique_name)) { + std::cout << "ConvOneDNNHandlerT22" << std::endl; if (unlikely(!this->isBwdCached())) { PADDLE_ENFORCE_EQ( in->layout(), @@ -294,9 +315,11 @@ class ConvOneDNNHandlerT auto filter_data_dims = common::slice_ddim(filter_dims, 2, filter_dims.size()); auto ksize = common::vectorize(filter_data_dims); + std::cout << "ConvOneDNNHandlerT23" << std::endl; UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, data_dims, strides, ksize); + std::cout << "ConvOneDNNHandlerT24" << std::endl; auto src_tz = common::vectorize(in->dims()); auto weights_tz = common::vectorize(filter->dims()); @@ -311,6 +334,7 @@ class ConvOneDNNHandlerT */ const auto chosen_memory_format = funcs::OneDNNMemoryFormat::any; const auto weights_format = funcs::OneDNNMemoryFormat::any; + std::cout << "ConvOneDNNHandlerT25" << std::endl; auto src_md = funcs::OneDNNMemDesc( src_tz, funcs::OneDNNGetDataType(), chosen_memory_format); @@ -324,6 +348,7 @@ class ConvOneDNNHandlerT weights_tz, funcs::OneDNNGetDataType(), weights_format); auto diff_dst_md = funcs::OneDNNMemDesc( dst_tz, funcs::OneDNNGetDataType(), chosen_memory_format); + std::cout << "ConvOneDNNHandlerT26" << std::endl; auto onednn_paddings = funcs::ToOneDNNPadding(paddings); std::transform( @@ -337,10 +362,12 @@ class ConvOneDNNHandlerT dnnl::primitive_attr conv_attr; if (bias) { auto bias_tz = common::vectorize(bias->dims()); + std::cout << "ConvOneDNNHandlerT27" << std::endl; dnnl::memory::desc bias_md = funcs::OneDNNMemDesc(bias_tz, dnnl::memory::data_type::f32, funcs::OneDNNMemoryFormat::x); + std::cout << "ConvOneDNNHandlerT28" << std::endl; this->AcquireForwardPrimitiveDescriptor( conv_attr, @@ -354,7 +381,9 @@ class ConvOneDNNHandlerT dilations_dims, onednn_paddings[0], onednn_paddings[1]); + std::cout << "ConvOneDNNHandlerT29" << std::endl; } else { + std::cout << "ConvOneDNNHandlerT30" << std::endl; this->AcquireForwardPrimitiveDescriptor( conv_attr, dnnl::prop_kind::forward_inference, @@ -366,7 +395,9 @@ class ConvOneDNNHandlerT dilations_dims, onednn_paddings[0], onednn_paddings[1]); + std::cout << "ConvOneDNNHandlerT31" << std::endl; } + std::cout << "ConvOneDNNHandlerT32" << std::endl; this->AcquireBackwardPrimitiveDescriptor( dnnl::algorithm::convolution_direct, @@ -377,6 +408,7 @@ class ConvOneDNNHandlerT dilations_dims, onednn_paddings[0], onednn_paddings[1]); + std::cout << "ConvOneDNNHandlerT33" << std::endl; this->AcquireBackwardWeightsPrimitiveDescriptor( dnnl::algorithm::convolution_direct, @@ -388,6 +420,7 @@ class ConvOneDNNHandlerT onednn_paddings[0], onednn_paddings[1]); } + std::cout << "ConvOneDNNHandlerT34" << std::endl; } dnnl::primitive_attr CreateConvAttrs(const DenseTensor* filter, From 1d9cbf4c98d630a313b7cd923ee2bff212e646c0 Mon Sep 17 00:00:00 2001 From: Wang Huan Date: Fri, 15 Dec 2023 02:42:54 +0000 Subject: [PATCH 03/12] refine --- paddle/phi/backends/onednn/onednn_reuse.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/paddle/phi/backends/onednn/onednn_reuse.h b/paddle/phi/backends/onednn/onednn_reuse.h index db6ceef72b329..8835d5d3bf14a 100644 --- a/paddle/phi/backends/onednn/onednn_reuse.h +++ b/paddle/phi/backends/onednn/onednn_reuse.h @@ -325,7 +325,7 @@ class OneDNNHandlerT { LOG(WARNING) << Type() << " raises an exception " << platform::demangle(typeid(ex).name()) << ", " << ex.what(); - PADDLE_THROW(platform::errors::Unavailable("wanghuan7")); + PADDLE_THROW(phi::errors::Unavailable("wanghuan7")); std::rethrow_exception(std::current_exception()); } } @@ -341,7 +341,7 @@ class OneDNNHandlerT { LOG(WARNING) << Type() << " raises an exception " << platform::demangle(typeid(ex).name()) << ", " << ex.what(); - PADDLE_THROW(platform::errors::Unavailable("wanghuan8")); + PADDLE_THROW(phi::errors::Unavailable("wanghuan8")); std::rethrow_exception(std::current_exception()); } } @@ -365,7 +365,7 @@ class OneDNNHandlerT { LOG(WARNING) << Type() << " raises an exception " << platform::demangle(typeid(ex).name()) << ", " << ex.what(); - PADDLE_THROW(platform::errors::Unavailable("wanghuan1")); + PADDLE_THROW(phi::errors::Unavailable("wanghuan1")); std::rethrow_exception(std::current_exception()); } dev_ctx_.SetBlob(key_pd, bwd_pd_); @@ -392,7 +392,7 @@ class OneDNNHandlerT { LOG(WARNING) << Type() << " raises an exception " << platform::demangle(typeid(ex).name()) << ", " << ex.what(); - PADDLE_THROW(platform::errors::Unavailable("wanghuan2")); + PADDLE_THROW(phi::errors::Unavailable("wanghuan2")); std::rethrow_exception(std::current_exception()); } dev_ctx_.SetBlob(key_pd, bwd_w_pd_); @@ -660,7 +660,7 @@ class OneDNNHandlerNoCachingT { LOG(WARNING) << Type() << " raises an exception " << platform::demangle(typeid(ex).name()) << ", " << ex.what(); - PADDLE_THROW(platform::errors::Unavailable("wanghuan3")); + PADDLE_THROW(phi::errors::Unavailable("wanghuan3")); std::rethrow_exception(std::current_exception()); } } @@ -676,7 +676,7 @@ class OneDNNHandlerNoCachingT { LOG(WARNING) << Type() << " raises an exception " << platform::demangle(typeid(ex).name()) << ", " << ex.what(); - PADDLE_THROW(platform::errors::Unavailable("wanghuan4")); + PADDLE_THROW(phi::errors::Unavailable("wanghuan4")); std::rethrow_exception(std::current_exception()); } } @@ -695,7 +695,7 @@ class OneDNNHandlerNoCachingT { LOG(WARNING) << Type() << " raises an exception " << platform::demangle(typeid(ex).name()) << ", " << ex.what(); - PADDLE_THROW(platform::errors::Unavailable("wanghuan5")); + PADDLE_THROW(phi::errors::Unavailable("wanghuan5")); std::rethrow_exception(std::current_exception()); } } @@ -716,7 +716,7 @@ class OneDNNHandlerNoCachingT { LOG(WARNING) << Type() << " raises an exception " << platform::demangle(typeid(ex).name()) << ", " << ex.what(); - PADDLE_THROW(platform::errors::Unavailable("wanghuan6")); + PADDLE_THROW(phi::errors::Unavailable("wanghuan6")); std::rethrow_exception(std::current_exception()); } } From 5751e54eaecce1f8633c41f3c71c1ee36eacb967 Mon Sep 17 00:00:00 2001 From: Wang Huan Date: Fri, 15 Dec 2023 02:45:19 +0000 Subject: [PATCH 04/12] refine --- paddle/phi/backends/onednn/onednn_reuse.h | 32 ++++++----------------- 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/paddle/phi/backends/onednn/onednn_reuse.h b/paddle/phi/backends/onednn/onednn_reuse.h index 8835d5d3bf14a..a4aa1730503fb 100644 --- a/paddle/phi/backends/onednn/onednn_reuse.h +++ b/paddle/phi/backends/onednn/onednn_reuse.h @@ -322,9 +322,7 @@ class OneDNNHandlerT { fwd_pd_ = std::make_shared( engine_, std::forward(args)..., first); } catch (std::exception& ex) { - LOG(WARNING) << Type() << " raises an exception " - << platform::demangle(typeid(ex).name()) << ", " - << ex.what(); + LOG(WARNING) << ex.what(); PADDLE_THROW(phi::errors::Unavailable("wanghuan7")); std::rethrow_exception(std::current_exception()); } @@ -338,9 +336,7 @@ class OneDNNHandlerT { fwd_pd_ = std::make_shared( engine_, std::forward(first), std::forward(args)...); } catch (std::exception& ex) { - LOG(WARNING) << Type() << " raises an exception " - << platform::demangle(typeid(ex).name()) << ", " - << ex.what(); + LOG(WARNING) << ex.what(); PADDLE_THROW(phi::errors::Unavailable("wanghuan8")); std::rethrow_exception(std::current_exception()); } @@ -362,9 +358,7 @@ class OneDNNHandlerT { bwd_pd_ = std::make_shared( engine_, std::forward(args)..., *fwd_pd_); } catch (std::exception& ex) { - LOG(WARNING) << Type() << " raises an exception " - << platform::demangle(typeid(ex).name()) << ", " - << ex.what(); + LOG(WARNING) << ex.what(); PADDLE_THROW(phi::errors::Unavailable("wanghuan1")); std::rethrow_exception(std::current_exception()); } @@ -389,9 +383,7 @@ class OneDNNHandlerT { bwd_w_pd_ = std::make_shared( engine_, std::forward(args)..., *fwd_pd_); } catch (std::exception& ex) { - LOG(WARNING) << Type() << " raises an exception " - << platform::demangle(typeid(ex).name()) << ", " - << ex.what(); + LOG(WARNING) << ex.what(); PADDLE_THROW(phi::errors::Unavailable("wanghuan2")); std::rethrow_exception(std::current_exception()); } @@ -657,9 +649,7 @@ class OneDNNHandlerNoCachingT { fwd_pd_ = std::make_shared( engine_, std::forward(args)..., first); } catch (std::exception& ex) { - LOG(WARNING) << Type() << " raises an exception " - << platform::demangle(typeid(ex).name()) << ", " - << ex.what(); + LOG(WARNING) << ex.what(); PADDLE_THROW(phi::errors::Unavailable("wanghuan3")); std::rethrow_exception(std::current_exception()); } @@ -673,9 +663,7 @@ class OneDNNHandlerNoCachingT { fwd_pd_ = std::make_shared( engine_, std::forward(first), std::forward(args)...); } catch (std::exception& ex) { - LOG(WARNING) << Type() << " raises an exception " - << platform::demangle(typeid(ex).name()) << ", " - << ex.what(); + LOG(WARNING) << ex.what(); PADDLE_THROW(phi::errors::Unavailable("wanghuan4")); std::rethrow_exception(std::current_exception()); } @@ -692,9 +680,7 @@ class OneDNNHandlerNoCachingT { bwd_pd_ = std::make_shared( engine_, std::forward(args)..., *fwd_pd_); } catch (std::exception& ex) { - LOG(WARNING) << Type() << " raises an exception " - << platform::demangle(typeid(ex).name()) << ", " - << ex.what(); + LOG(WARNING) << ex.what(); PADDLE_THROW(phi::errors::Unavailable("wanghuan5")); std::rethrow_exception(std::current_exception()); } @@ -713,9 +699,7 @@ class OneDNNHandlerNoCachingT { bwd_w_pd_ = std::make_shared( bwd_desc, engine_, *fwd_pd_); } catch (std::exception& ex) { - LOG(WARNING) << Type() << " raises an exception " - << platform::demangle(typeid(ex).name()) << ", " - << ex.what(); + LOG(WARNING) << ex.what(); PADDLE_THROW(phi::errors::Unavailable("wanghuan6")); std::rethrow_exception(std::current_exception()); } From 84095d911cd80be27ff87e7e9a064ca579ba70db Mon Sep 17 00:00:00 2001 From: Wang Huan Date: Fri, 15 Dec 2023 06:11:58 +0000 Subject: [PATCH 05/12] refine --- test/cpp/inference/infer_ut/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/test/cpp/inference/infer_ut/run.sh b/test/cpp/inference/infer_ut/run.sh index 6f967eb0aa6c7..93ad591df3a74 100755 --- a/test/cpp/inference/infer_ut/run.sh +++ b/test/cpp/inference/infer_ut/run.sh @@ -31,6 +31,7 @@ test_suite_list="cpu_tester*" # init test suite list, pass to --gtest_filter export RED='\033[0;31m' # red color export NC='\033[0m' # no color export YELLOW='\033[33m' # yellow color +export DNNL_VERBOSE=1 cd `dirname $0` current_dir=`pwd` From 319d536d82b001c6db7cf95f0c518eb95abe8d7a Mon Sep 17 00:00:00 2001 From: Wang Huan Date: Mon, 18 Dec 2023 08:06:34 +0000 Subject: [PATCH 06/12] refine --- paddle/phi/backends/onednn/onednn_reuse.h | 80 +++++------------------ paddle/phi/core/dense_tensor_impl.cc | 2 + paddle/phi/kernels/onednn/conv_function.h | 66 +++---------------- paddle/phi/kernels/onednn/conv_handler.h | 35 +--------- test/cpp/inference/infer_ut/run.sh | 1 - 5 files changed, 27 insertions(+), 157 deletions(-) diff --git a/paddle/phi/backends/onednn/onednn_reuse.h b/paddle/phi/backends/onednn/onednn_reuse.h index a4aa1730503fb..d9719c6f3e5b2 100644 --- a/paddle/phi/backends/onednn/onednn_reuse.h +++ b/paddle/phi/backends/onednn/onednn_reuse.h @@ -318,28 +318,16 @@ class OneDNNHandlerT { typename std::enable_if::type, dnnl::primitive_attr>::value>::type CreateForwardPrimitiveDescriptor(First&& first, Args&&... args) { - try { - fwd_pd_ = std::make_shared( - engine_, std::forward(args)..., first); - } catch (std::exception& ex) { - LOG(WARNING) << ex.what(); - PADDLE_THROW(phi::errors::Unavailable("wanghuan7")); - std::rethrow_exception(std::current_exception()); - } + fwd_pd_ = std::make_shared( + engine_, std::forward(args)..., first); } template typename std::enable_if::type, dnnl::primitive_attr>::value>::type CreateForwardPrimitiveDescriptor(First&& first, Args&&... args) { - try { - fwd_pd_ = std::make_shared( - engine_, std::forward(first), std::forward(args)...); - } catch (std::exception& ex) { - LOG(WARNING) << ex.what(); - PADDLE_THROW(phi::errors::Unavailable("wanghuan8")); - std::rethrow_exception(std::current_exception()); - } + fwd_pd_ = std::make_shared( + engine_, std::forward(first), std::forward(args)...); } template @@ -354,14 +342,8 @@ class OneDNNHandlerT { bwd_pd_ = std::static_pointer_cast( dev_ctx_.GetBlob(key_pd)); if (bwd_pd_ == nullptr) { - try { - bwd_pd_ = std::make_shared( - engine_, std::forward(args)..., *fwd_pd_); - } catch (std::exception& ex) { - LOG(WARNING) << ex.what(); - PADDLE_THROW(phi::errors::Unavailable("wanghuan1")); - std::rethrow_exception(std::current_exception()); - } + bwd_pd_ = std::make_shared( + engine_, std::forward(args)..., *fwd_pd_); dev_ctx_.SetBlob(key_pd, bwd_pd_); } } @@ -379,14 +361,8 @@ class OneDNNHandlerT { std::static_pointer_cast( dev_ctx_.GetBlob(key_pd)); if (bwd_w_pd_ == nullptr) { - try { - bwd_w_pd_ = std::make_shared( - engine_, std::forward(args)..., *fwd_pd_); - } catch (std::exception& ex) { - LOG(WARNING) << ex.what(); - PADDLE_THROW(phi::errors::Unavailable("wanghuan2")); - std::rethrow_exception(std::current_exception()); - } + bwd_w_pd_ = std::make_shared( + engine_, std::forward(args)..., *fwd_pd_); dev_ctx_.SetBlob(key_pd, bwd_w_pd_); } } @@ -645,28 +621,16 @@ class OneDNNHandlerNoCachingT { typename std::enable_if::type, dnnl::primitive_attr>::value>::type CreateForwardPrimitiveDescriptor(First&& first, Args&&... args) { - try { - fwd_pd_ = std::make_shared( - engine_, std::forward(args)..., first); - } catch (std::exception& ex) { - LOG(WARNING) << ex.what(); - PADDLE_THROW(phi::errors::Unavailable("wanghuan3")); - std::rethrow_exception(std::current_exception()); - } + fwd_pd_ = std::make_shared( + engine_, std::forward(args)..., first); } template typename std::enable_if::type, dnnl::primitive_attr>::value>::type CreateForwardPrimitiveDescriptor(First&& first, Args&&... args) { - try { - fwd_pd_ = std::make_shared( - engine_, std::forward(first), std::forward(args)...); - } catch (std::exception& ex) { - LOG(WARNING) << ex.what(); - PADDLE_THROW(phi::errors::Unavailable("wanghuan4")); - std::rethrow_exception(std::current_exception()); - } + fwd_pd_ = std::make_shared( + engine_, std::forward(first), std::forward(args)...); } template @@ -676,14 +640,8 @@ class OneDNNHandlerNoCachingT { PADDLE_ENFORCE_NOT_NULL( fwd_pd_, errors::Unavailable("Get oneDNN Forward primitive %s failed.")); - try { - bwd_pd_ = std::make_shared( - engine_, std::forward(args)..., *fwd_pd_); - } catch (std::exception& ex) { - LOG(WARNING) << ex.what(); - PADDLE_THROW(phi::errors::Unavailable("wanghuan5")); - std::rethrow_exception(std::current_exception()); - } + bwd_pd_ = std::make_shared( + engine_, std::forward(args)..., *fwd_pd_); } template @@ -695,14 +653,8 @@ class OneDNNHandlerNoCachingT { errors::Unavailable("Get oneDNN Forward primitive %s failed.")); auto bwd_desc = typename TBackward_params::desc(std::forward(args)...); - try { - bwd_w_pd_ = std::make_shared( - bwd_desc, engine_, *fwd_pd_); - } catch (std::exception& ex) { - LOG(WARNING) << ex.what(); - PADDLE_THROW(phi::errors::Unavailable("wanghuan6")); - std::rethrow_exception(std::current_exception()); - } + bwd_w_pd_ = std::make_shared( + bwd_desc, engine_, *fwd_pd_); } std::shared_ptr AcquireMemoryFromPrimitive( diff --git a/paddle/phi/core/dense_tensor_impl.cc b/paddle/phi/core/dense_tensor_impl.cc index 770443acf1838..0f6d059bfa012 100644 --- a/paddle/phi/core/dense_tensor_impl.cc +++ b/paddle/phi/core/dense_tensor_impl.cc @@ -382,6 +382,8 @@ const dnnl::memory::desc& DenseTensor::mem_desc() const { std::unique_ptr* storage_properties_ptr = const_cast*>(&storage_properties_); *storage_properties_ptr = std::make_unique(); + static_cast(storage_properties_ptr->get()) + ->mem_desc = dnnl::memory::desc(); } return this->storage_properties().mem_desc; } diff --git a/paddle/phi/kernels/onednn/conv_function.h b/paddle/phi/kernels/onednn/conv_function.h index e9d66786c6225..7d7e74f691a02 100644 --- a/paddle/phi/kernels/onednn/conv_function.h +++ b/paddle/phi/kernels/onednn/conv_function.h @@ -29,10 +29,8 @@ static dnnl::memory::data_type GetDstType( std::string fuse_activation, bool fuse_residual_conn, const phi::DenseTensor* residual_param) { - std::cout << "GetDstType" << std::endl; auto dst_dt = dnnl::memory::data_type::f32; if (is_int8) { - std::cout << "GetDstType1" << std::endl; dst_dt = (fuse_activation == "relu" || fuse_activation == "relu6") ? dnnl::memory::data_type::u8 : dnnl::memory::data_type::s8; @@ -40,21 +38,17 @@ static dnnl::memory::data_type GetDstType( dst_dt = dnnl::memory::data_type::f32; } if (fuse_residual_conn && residual_param) { - std::cout << "GetDstType2" << std::endl; auto residual_dt = funcs::ToOneDNNDataType(residual_param->dtype()); if (dst_dt != residual_dt) dst_dt = residual_dt; } } else { - std::cout << "GetDstType3" << std::endl; if (!force_fp32_output && is_bfloat16) { dst_dt = dnnl::memory::data_type::bf16; if (fuse_residual_conn && residual_param) { - std::cout << "GetDstType4" << std::endl; dst_dt = funcs::ToOneDNNDataType(residual_param->dtype()); } } } - std::cout << "GetDstType5" << std::endl; return dst_dt; } @@ -91,12 +85,10 @@ void ComputeFP32(const OneDNNContext& dev_ctx, bool fuse_residual_conn, bool force_fp32_output, DenseTensor* output) { - std::cout << "ComputeFP32 " << std::endl; const auto& onednn_engine = dev_ctx.GetEngine(); const bool is_conv3d = strides.size() == 3U; const std::string& unique_name = dev_ctx.GetInputsName("Input")[0] + dev_ctx.GetInputsName("Filter")[0]; - std::cout << "ComputeFP32 2" << std::endl; PD_VISIT_FLOAT_AND_INT8_TYPES( filter->dtype(), "ConvOneDNNHandlerT", ([&] { onednn::ConvOneDNNHandlerT handler(dev_ctx, @@ -118,47 +110,33 @@ void ComputeFP32(const OneDNNContext& dev_ctx, force_fp32_output, output, unique_name); - std::cout << "ComputeFP32 3" << std::endl; auto src_memory_p = handler.AcquireSrcMemoryWithReorder(input); - std::cout << "ComputeFP32 4" << std::endl; auto weights_memory_p = handler.AcquireWeightsMemoryWithReorder( filter, groups, is_conv3d, is_test); - std::cout << "ComputeFP32 5" << std::endl; std::shared_ptr dst_memory_p; if (fuse_residual_conn) { - std::cout << "ComputeFP32 6" << std::endl; dst_memory_p = handler.AcquireDstMemoryWithResidual(output, residual_param); - std::cout << "ComputeFP32 7" << std::endl; } else { - std::cout << "ComputeFP32 8" << std::endl; dst_memory_p = handler.template AcquireDstMemory(output); - std::cout << "ComputeFP32 9" << std::endl; } - std::cout << "ComputeFP32 10" << std::endl; + auto conv_p = handler.AcquireForwardPrimitive(); - std::cout << "ComputeFP32 11" << std::endl; std::unordered_map args = { {DNNL_ARG_SRC, *src_memory_p}, {DNNL_ARG_WEIGHTS, *weights_memory_p}, {DNNL_ARG_DST, *dst_memory_p}}; if (bias) { - std::cout << "ComputeFP32 12" << std::endl; auto bias_memory_p = handler.AcquireBiasMemoryWithReorder(bias, is_test); - std::cout << "ComputeFP32 13" << std::endl; args.insert({DNNL_ARG_BIAS, *bias_memory_p}); } auto& astream = OneDNNContext::tls().get_stream(); - std::cout << "ComputeFP32 14" << std::endl; conv_p->execute(astream, args); - std::cout << "ComputeFP32 15" << std::endl; astream.wait(); - std::cout << "ComputeFP32 16" << std::endl; output->set_mem_desc(dst_memory_p->get_desc()); - std::cout << "ComputeFP32 17" << std::endl; })); } @@ -180,9 +158,7 @@ void ComputeINT8(const OneDNNContext& dev_ctx, bool fuse_residual_conn, bool force_fp32_output, DenseTensor* output) { - std::cout << "ComputeINT8 " << std::endl; const auto& onednn_engine = dev_ctx.GetEngine(); - std::cout << "ComputeINT8 2" << std::endl; const bool is_conv3d = strides.size() == 3U; bool unsigned_output = @@ -201,7 +177,6 @@ void ComputeINT8(const OneDNNContext& dev_ctx, "residual fusion does not support force output with fp32")); const std::string& unique_name = dev_ctx.GetInputsName("Input")[0] + dev_ctx.GetInputsName("Filter")[0]; - std::cout << "ComputeINT8 3" << std::endl; PD_VISIT_FLOAT_AND_INT8_TYPES( filter->dtype(), "ConvOneDNNHandlerT", ([&] { onednn::ConvOneDNNHandlerT handler(dev_ctx, @@ -223,9 +198,9 @@ void ComputeINT8(const OneDNNContext& dev_ctx, force_fp32_output, output, unique_name); - std::cout << "ComputeINT8 4" << std::endl; + auto src_memory_p = handler.AcquireSrcMemoryWithReorder(input); - std::cout << "ComputeINT8 5" << std::endl; + const auto& scale_weights_data = dev_ctx.HasDnnAttr("Scale_weights") ? PADDLE_GET_CONST(std::vector, @@ -235,10 +210,9 @@ void ComputeINT8(const OneDNNContext& dev_ctx, int mask_reorder = is_multi_channel ? ((groups != 1) ? (1 << 1) + (1 << 0) : 1 << 0) : 0; - std::cout << "ComputeINT8 6" << std::endl; auto weights_memory_p = handler.AcquireWeightsMemoryWithReorder( filter, groups, false, true, scale_weights_data, mask_reorder); - std::cout << "ComputeINT8 7" << std::endl; + std::shared_ptr dst_memory_p; if (fuse_residual_conn) { PADDLE_ENFORCE_EQ( @@ -250,65 +224,49 @@ void ComputeINT8(const OneDNNContext& dev_ctx, " and residual param's dimension =%d .", output->dims().size(), residual_param->dims().size())); - std::cout << "ComputeINT8 8" << std::endl; dst_memory_p = handler.AcquireDstMemoryWithResidual(output, residual_param); - std::cout << "ComputeINT8 9" << std::endl; need_s8_to_u8 = (funcs::OneDNNGetDataType() == dnnl::memory::data_type::s8) && unsigned_output; - std::cout << "ComputeINT8 10" << std::endl; } else { - std::cout << "ComputeINT8 11" << std::endl; dst_memory_p = handler.template AcquireDstMemory(output); - std::cout << "ComputeINT8 12" << std::endl; } - std::cout << "ComputeINT8 13" << std::endl; auto conv_p = handler.AcquireForwardPrimitive(); - std::cout << "ComputeINT8 14" << std::endl; + std::unordered_map args = { {DNNL_ARG_SRC, *src_memory_p}, {DNNL_ARG_WEIGHTS, *weights_memory_p}, {DNNL_ARG_DST, *dst_memory_p}}; if (bias) { - std::cout << "ComputeINT8 15" << std::endl; auto bias_memory_p = handler.AcquireBiasMemoryWithReorder(bias, true); - std::cout << "ComputeINT8 16" << std::endl; args.insert({DNNL_ARG_BIAS, *bias_memory_p}); } - std::cout << "ComputeINT8 17" << std::endl; + auto src_scales_memory = handler.AcquireScalesMemory(DNNL_ARG_SRC); - std::cout << "ComputeINT8 18" << std::endl; args.insert({DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC, *src_scales_memory}); - std::cout << "ComputeINT8 19" << std::endl; + auto wei_scales_memory = handler.AcquireScalesMemory(DNNL_ARG_WEIGHTS); - std::cout << "ComputeINT8 20" << std::endl; args.insert( {DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS, *wei_scales_memory}); if (!force_fp32_output) { - std::cout << "ComputeINT8 21" << std::endl; auto dst_scales_memory = handler.AcquireScalesMemory(DNNL_ARG_DST); - std::cout << "ComputeINT8 22" << std::endl; args.insert( {DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST, *dst_scales_memory}); } auto& astream = OneDNNContext::tls().get_stream(); - std::cout << "ComputeINT8 23" << std::endl; conv_p->execute(astream, args); - std::cout << "ComputeINT8 24" << std::endl; astream.wait(); - std::cout << "ComputeINT8 25" << std::endl; if (need_s8_to_u8) { dev_ctx.Alloc(output); } - std::cout << "ComputeINT8 26" << std::endl; + output->set_mem_desc(dst_memory_p->get_desc()); - std::cout << "ComputeINT8 27" << std::endl; })); } @@ -330,7 +288,6 @@ void ConvOnednn(const Context& dev_ctx, bool fuse_residual_connection, bool force_fp32_output, DenseTensor* out) { - std::cout << "ConvOnednn" << std::endl; PADDLE_ENFORCE_EQ( dev_ctx.GetPlace().GetType(), AllocationType::CPU, @@ -344,10 +301,8 @@ void ConvOnednn(const Context& dev_ctx, fuse_activation, fuse_residual_connection, residual_param); - std::cout << "ConvOnednn2" << std::endl; if (!is_INT8) { if (dst_dt == dnnl::memory::data_type::f32) { - std::cout << "ConvOnednn3" << std::endl; ComputeFP32(dev_ctx, input, filter, @@ -366,7 +321,6 @@ void ConvOnednn(const Context& dev_ctx, force_fp32_output, out); } else if (dst_dt == dnnl::memory::data_type::bf16) { - std::cout << "ConvOnednn4" << std::endl; ComputeFP32(dev_ctx, input, filter, @@ -387,7 +341,6 @@ void ConvOnednn(const Context& dev_ctx, } } else { if (dst_dt == dnnl::memory::data_type::f32) { - std::cout << "ConvOnednn5" << std::endl; ComputeINT8(dev_ctx, input, filter, @@ -406,7 +359,6 @@ void ConvOnednn(const Context& dev_ctx, force_fp32_output, out); } else if (dst_dt == dnnl::memory::data_type::u8) { - std::cout << "ConvOnednn6" << std::endl; ComputeINT8(dev_ctx, input, filter, @@ -425,7 +377,6 @@ void ConvOnednn(const Context& dev_ctx, force_fp32_output, out); } else if (dst_dt == dnnl::memory::data_type::s8) { - std::cout << "ConvOnednn7" << std::endl; ComputeINT8(dev_ctx, input, filter, @@ -445,7 +396,6 @@ void ConvOnednn(const Context& dev_ctx, out); } } - std::cout << "ConvOnednn8" << std::endl; } } // namespace phi diff --git a/paddle/phi/kernels/onednn/conv_handler.h b/paddle/phi/kernels/onednn/conv_handler.h index 0d570e9e84fe1..3d41c274de24e 100644 --- a/paddle/phi/kernels/onednn/conv_handler.h +++ b/paddle/phi/kernels/onednn/conv_handler.h @@ -69,7 +69,6 @@ class ConvOneDNNHandlerT cpu_place, funcs::CreateKey( dev_ctx, common::vectorize(input->dims()), unique_name)) { - std::cout << "ConvOneDNNHandlerT" << std::endl; if (unlikely(!this->isCached())) { PADDLE_ENFORCE_EQ( input->layout(), @@ -143,10 +142,8 @@ class ConvOneDNNHandlerT std::vector strides(begin(strides_in), end(strides_in)); std::vector paddings(begin(paddings_in), end(paddings_in)); std::vector dilations(begin(dilations_in), end(dilations_in)); - std::cout << "ConvOneDNNHandlerT2" << std::endl; UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, data_dims, strides, ksize); - std::cout << "ConvOneDNNHandlerT3" << std::endl; std::transform( dilations.begin(), dilations.end(), dilations.begin(), [](int64_t i) { return i - 1; @@ -158,11 +155,9 @@ class ConvOneDNNHandlerT funcs::GetGroupConvWeightsTz(weights_tz, groups); const auto dst_tz = common::vectorize(output->dims()); - std::cout << "ConvOneDNNHandlerT4" << std::endl; + const dnnl::memory::dims stride_dims = strides; - std::cout << "ConvOneDNNHandlerT5" << std::endl; const auto onednn_paddings = funcs::ToOneDNNPadding(paddings); - std::cout << "ConvOneDNNHandlerT6" << std::endl; const dnnl::memory::dims dilations_dims = dilations; /* create memory descriptor for convolution without specified format * ('any') which lets a primitive (convolution in this case) choose @@ -176,46 +171,34 @@ class ConvOneDNNHandlerT dnnl::memory::desc src_md, weights_md; if (funcs::is_int8()) { - std::cout << "ConvOneDNNHandlerT7" << std::endl; src_md = funcs::OneDNNMemDesc(src_tz, funcs::ToOneDNNDataType(input->dtype()), chosen_memory_format); - std::cout << "ConvOneDNNHandlerT8" << std::endl; weights_md = funcs::OneDNNMemDesc( weights_tz, dnnl::memory::data_type::s8, chosen_memory_format); - std::cout << "ConvOneDNNHandlerT9" << std::endl; } else { - std::cout << "ConvOneDNNHandlerT10" << std::endl; src_md = funcs::OneDNNMemDesc(src_tz, data_type, chosen_memory_format); - std::cout << "ConvOneDNNHandlerT11" << std::endl; weights_md = funcs::OneDNNMemDesc( weights_tz, data_type, funcs::OneDNNMemoryFormat::any); - std::cout << "ConvOneDNNHandlerT12" << std::endl; } if (input->dims().size() == 4 && input->dims()[1] <= 4) { chosen_memory_format = funcs::OneDNNMemoryFormat::nhwc; } - std::cout << "ConvOneDNNHandlerT13" << std::endl; const auto dst_md = funcs::OneDNNMemDesc( dst_tz, funcs::OneDNNGetDataType(), chosen_memory_format); - std::cout << "ConvOneDNNHandlerT14" << std::endl; const auto fwd_prop_kind = dnnl::prop_kind::forward_inference; - std::cout << "ConvOneDNNHandlerT15" << std::endl; const dnnl::primitive_attr conv_attr = CreateConvAttrs(filter, groups, force_fp32_output, fuse_residual_conn, fuse_activation); - std::cout << "ConvOneDNNHandlerT16" << std::endl; if (bias) { auto bias_tz = common::vectorize(bias->dims()); - std::cout << "ConvOneDNNHandlerT17" << std::endl; dnnl::memory::desc bias_md = funcs::OneDNNMemDesc(bias_tz, dnnl::memory::data_type::f32, funcs::OneDNNMemoryFormat::x); - std::cout << "ConvOneDNNHandlerT18" << std::endl; this->AcquireForwardPrimitiveDescriptor( conv_attr, @@ -229,9 +212,7 @@ class ConvOneDNNHandlerT dilations_dims, onednn_paddings[0], onednn_paddings[1]); - std::cout << "ConvOneDNNHandlerT19" << std::endl; } else { - std::cout << "ConvOneDNNHandlerT20" << std::endl; this->AcquireForwardPrimitiveDescriptor( conv_attr, fwd_prop_kind, @@ -243,7 +224,6 @@ class ConvOneDNNHandlerT dilations_dims, onednn_paddings[0], onednn_paddings[1]); - std::cout << "ConvOneDNNHandlerT21" << std::endl; } } } @@ -273,7 +253,6 @@ class ConvOneDNNHandlerT cpu_place, funcs::CreateKey( dev_ctx, common::vectorize(in->dims()), unique_name)) { - std::cout << "ConvOneDNNHandlerT22" << std::endl; if (unlikely(!this->isBwdCached())) { PADDLE_ENFORCE_EQ( in->layout(), @@ -315,11 +294,9 @@ class ConvOneDNNHandlerT auto filter_data_dims = common::slice_ddim(filter_dims, 2, filter_dims.size()); auto ksize = common::vectorize(filter_data_dims); - std::cout << "ConvOneDNNHandlerT23" << std::endl; UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, data_dims, strides, ksize); - std::cout << "ConvOneDNNHandlerT24" << std::endl; auto src_tz = common::vectorize(in->dims()); auto weights_tz = common::vectorize(filter->dims()); @@ -334,7 +311,6 @@ class ConvOneDNNHandlerT */ const auto chosen_memory_format = funcs::OneDNNMemoryFormat::any; const auto weights_format = funcs::OneDNNMemoryFormat::any; - std::cout << "ConvOneDNNHandlerT25" << std::endl; auto src_md = funcs::OneDNNMemDesc( src_tz, funcs::OneDNNGetDataType(), chosen_memory_format); @@ -348,7 +324,6 @@ class ConvOneDNNHandlerT weights_tz, funcs::OneDNNGetDataType(), weights_format); auto diff_dst_md = funcs::OneDNNMemDesc( dst_tz, funcs::OneDNNGetDataType(), chosen_memory_format); - std::cout << "ConvOneDNNHandlerT26" << std::endl; auto onednn_paddings = funcs::ToOneDNNPadding(paddings); std::transform( @@ -362,12 +337,10 @@ class ConvOneDNNHandlerT dnnl::primitive_attr conv_attr; if (bias) { auto bias_tz = common::vectorize(bias->dims()); - std::cout << "ConvOneDNNHandlerT27" << std::endl; dnnl::memory::desc bias_md = funcs::OneDNNMemDesc(bias_tz, dnnl::memory::data_type::f32, funcs::OneDNNMemoryFormat::x); - std::cout << "ConvOneDNNHandlerT28" << std::endl; this->AcquireForwardPrimitiveDescriptor( conv_attr, @@ -381,9 +354,7 @@ class ConvOneDNNHandlerT dilations_dims, onednn_paddings[0], onednn_paddings[1]); - std::cout << "ConvOneDNNHandlerT29" << std::endl; } else { - std::cout << "ConvOneDNNHandlerT30" << std::endl; this->AcquireForwardPrimitiveDescriptor( conv_attr, dnnl::prop_kind::forward_inference, @@ -395,9 +366,7 @@ class ConvOneDNNHandlerT dilations_dims, onednn_paddings[0], onednn_paddings[1]); - std::cout << "ConvOneDNNHandlerT31" << std::endl; } - std::cout << "ConvOneDNNHandlerT32" << std::endl; this->AcquireBackwardPrimitiveDescriptor( dnnl::algorithm::convolution_direct, @@ -408,7 +377,6 @@ class ConvOneDNNHandlerT dilations_dims, onednn_paddings[0], onednn_paddings[1]); - std::cout << "ConvOneDNNHandlerT33" << std::endl; this->AcquireBackwardWeightsPrimitiveDescriptor( dnnl::algorithm::convolution_direct, @@ -420,7 +388,6 @@ class ConvOneDNNHandlerT onednn_paddings[0], onednn_paddings[1]); } - std::cout << "ConvOneDNNHandlerT34" << std::endl; } dnnl::primitive_attr CreateConvAttrs(const DenseTensor* filter, diff --git a/test/cpp/inference/infer_ut/run.sh b/test/cpp/inference/infer_ut/run.sh index 93ad591df3a74..6f967eb0aa6c7 100755 --- a/test/cpp/inference/infer_ut/run.sh +++ b/test/cpp/inference/infer_ut/run.sh @@ -31,7 +31,6 @@ test_suite_list="cpu_tester*" # init test suite list, pass to --gtest_filter export RED='\033[0;31m' # red color export NC='\033[0m' # no color export YELLOW='\033[33m' # yellow color -export DNNL_VERBOSE=1 cd `dirname $0` current_dir=`pwd` From 4e254082ff1c129071379133f458a38a64ec5d8f Mon Sep 17 00:00:00 2001 From: Wang Huan Date: Mon, 18 Dec 2023 10:59:43 +0000 Subject: [PATCH 07/12] refine --- paddle/phi/core/dense_tensor_impl.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/paddle/phi/core/dense_tensor_impl.cc b/paddle/phi/core/dense_tensor_impl.cc index 0f6d059bfa012..8a3d7db993ad7 100644 --- a/paddle/phi/core/dense_tensor_impl.cc +++ b/paddle/phi/core/dense_tensor_impl.cc @@ -384,6 +384,8 @@ const dnnl::memory::desc& DenseTensor::mem_desc() const { *storage_properties_ptr = std::make_unique(); static_cast(storage_properties_ptr->get()) ->mem_desc = dnnl::memory::desc(); + static_cast(storage_properties_ptr->get()) + ->format = dnnl::memory::format_tag::undef; } return this->storage_properties().mem_desc; } From b126c280310cba638c63b2b0db85deb49f91a27c Mon Sep 17 00:00:00 2001 From: Wang Huan Date: Fri, 29 Dec 2023 02:35:25 +0000 Subject: [PATCH 08/12] refine --- paddle/fluid/inference/api/analysis_predictor.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index c2806ebbbfcc9..5c5c0d01dec74 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -387,6 +387,7 @@ AnalysisPredictor::AnalysisPredictor(const AnalysisConfig &config) } else { predictor_id_ = inference::GetUniqueId(); } + root_predictor_id_ = predictor_id_; } bool AnalysisPredictor::Init( @@ -401,10 +402,6 @@ bool AnalysisPredictor::Init( } #endif - if (!status_is_cloned_) { - root_predictor_id_ = predictor_id_; - } - // no matter with or without MKLDNN paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); @@ -417,6 +414,7 @@ bool AnalysisPredictor::Init( if (!CreateExecutor()) { return false; } + if (!PrepareProgram(program)) { return false; } @@ -467,6 +465,7 @@ bool AnalysisPredictor::Init( #endif inference::DisplayMemoryInfo(place_, "Init predictor"); + return true; } @@ -1832,7 +1831,6 @@ void AnalysisPredictor::OptimizeInferenceProgram() { }); // The config and argument take a lot of storage, // when the predictor settings are complete, we release these stores. - config_.PartiallyRelease(); #if defined(PADDLE_WITH_TESTING) fusion_statis_ = *argument_->fusion_statis_ptr(); #endif @@ -2800,7 +2798,7 @@ std::unique_ptr AnalysisPredictor::Clone(void *stream) { VLOG(3) << "AnalysisPredictor::Clone"; std::lock_guard lk(clone_mutex_); auto *x = new AnalysisPredictor(config_); - x->status_is_cloned_ = true; + x->root_predictor_id_ = this->root_predictor_id_; x->config_.apply_optim_ = false; if (config_.use_external_stream_ && stream == nullptr) { @@ -2813,7 +2811,9 @@ std::unique_ptr AnalysisPredictor::Clone(void *stream) { "function has received a stream parameter.")); } x->predictor_stream_ = stream; - x->Init(scope_, inference_program_); + x->Init(nullptr); + x->status_is_cloned_ = true; + #ifdef PADDLE_WITH_TENSORRT x->executor_->ResetTrtOps(++AnalysisPredictor::clone_num_); #endif From 12caf1c29c0cdc4a935ed6e43cce7fdacb448277 Mon Sep 17 00:00:00 2001 From: Wang Huan Date: Fri, 29 Dec 2023 08:57:56 +0000 Subject: [PATCH 09/12] refine --- paddle/fluid/framework/naive_executor.cc | 24 ++++++++++- paddle/fluid/framework/naive_executor.h | 3 +- .../fluid/inference/api/analysis_predictor.cc | 41 ++++++++++++------- 3 files changed, 51 insertions(+), 17 deletions(-) diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index 3bfacc950325c..afd61f8e21586 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -152,7 +152,8 @@ void NaiveExecutor::Run() { void NaiveExecutor::CreateVariables(const ProgramDesc &desc, int block_id, bool persistable, - Scope *scope) { + Scope *scope, + bool init_mkldnn_memdesc) { PADDLE_ENFORCE_NOT_NULL(scope, platform::errors::InvalidArgument( "The Scope to hold variables is nullptr.")); @@ -174,7 +175,16 @@ void NaiveExecutor::CreateVariables(const ProgramDesc &desc, continue; } num_vars++; - +#ifdef PADDLE_WITH_DNNL + auto init_mkldnn_memdesc_func = [&](Variable *var, + proto::VarType::Type var_type) { + if (var_type == proto::VarType::LOD_TENSOR) { + var->GetMutable()->mem_desc(); + } else if (var_type == proto::VarType::SELECTED_ROWS) { + var->GetMutable()->mutable_value()->mem_desc(); + } + }; +#endif if (persistable == var->Persistable()) { if (persistable) { if (!anc->FindVar(var->Name())) { @@ -182,12 +192,22 @@ void NaiveExecutor::CreateVariables(const ProgramDesc &desc, VLOG(3) << scope << " Create persistable variable " << var->Name() << ", which pointer is " << ptr; InitializeVariable(ptr, var->GetType()); +#ifdef PADDLE_WITH_DNNL + if (init_mkldnn_memdesc) { + init_mkldnn_memdesc_func(ptr, var->GetType()); + } +#endif } } else { auto *ptr = const_cast(scope)->Var(var->Name()); VLOG(3) << scope << " Create variable " << var->Name() << ", which pointer is " << ptr; InitializeVariable(ptr, var->GetType()); +#ifdef PADDLE_WITH_DNNL + if (init_mkldnn_memdesc) { + init_mkldnn_memdesc_func(ptr, var->GetType()); + } +#endif } } } diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h index 5a558f3bd6921..d88c5d7f02827 100644 --- a/paddle/fluid/framework/naive_executor.h +++ b/paddle/fluid/framework/naive_executor.h @@ -71,7 +71,8 @@ class NaiveExecutor { void CreateVariables(const ProgramDesc& desc, int block_id, bool persistable, - Scope* scope); + Scope* scope, + bool init_mkldnn_memdesc = false); // Run all the operators. void Run(); diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index a022884c13cb4..893ead4d29514 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -389,7 +389,6 @@ AnalysisPredictor::AnalysisPredictor(const AnalysisConfig &config) } else { predictor_id_ = inference::GetUniqueId(); } - root_predictor_id_ = predictor_id_; } bool AnalysisPredictor::Init( @@ -404,6 +403,10 @@ bool AnalysisPredictor::Init( } #endif + if (!status_is_cloned_) { + root_predictor_id_ = predictor_id_; + } + // no matter with or without MKLDNN paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); @@ -416,7 +419,6 @@ bool AnalysisPredictor::Init( if (!CreateExecutor()) { return false; } - if (!PrepareProgram(program)) { return false; } @@ -467,7 +469,6 @@ bool AnalysisPredictor::Init( #endif inference::DisplayMemoryInfo(place_, "Init predictor"); - return true; } @@ -681,14 +682,21 @@ bool AnalysisPredictor::PrepareProgram( const std::shared_ptr &program) { if (!program) { if (!LoadProgramDesc()) return false; - // If not cloned, the parameters should be loaded. - // If config_.ir_optim() is True, parameters is loaded in - // OptimizeInferenceProgram(), but other persistable variables - // (like RAW type var) are not created in scope. - // If config_.ir_optim() is False, parameters is loaded in LoadParameters(), - // still need to create other persistable variables. - // So in both case, create persistable variables at first. + // If not cloned, the parameters should be loaded. + // If config_.ir_optim() is True, parameters is loaded in + // OptimizeInferenceProgram(), but other persistable variables + // (like RAW type var) are not created in scope. + // If config_.ir_optim() is False, parameters is loaded in + // LoadParameters(), still need to create other persistable variables. So + // in both case, create persistable variables at first. +#ifdef PADDLE_WITH_DNNL + if (config_.use_mkldnn_) { + executor_->CreateVariables( + *inference_program_, 0, true, sub_scope_, true); + } +#else executor_->CreateVariables(*inference_program_, 0, true, sub_scope_); +#endif // if enable_ir_optim_ is false, // the analysis pass(op fuse, graph analysis, trt subgraph, mkldnn etc) will @@ -951,7 +959,13 @@ bool AnalysisPredictor::CommInit() { order += 1; } framework::NaiveExecutor e(place_); +#ifdef PADDLE_WITH_DNNL + if (config_.use_mkldnn_) { + e.CreateVariables(*comm_init_program, 0, true, scope_.get(), true); + } +#else e.CreateVariables(*comm_init_program, 0, true, scope_.get()); +#endif e.Prepare(scope_.get(), *comm_init_program, 0); e.Run(); VLOG(3) << "Comm init successful."; @@ -1848,6 +1862,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() { }); // The config and argument take a lot of storage, // when the predictor settings are complete, we release these stores. + config_.PartiallyRelease(); #if defined(PADDLE_WITH_TESTING) fusion_statis_ = *argument_->fusion_statis_ptr(); #endif @@ -2818,7 +2833,7 @@ std::unique_ptr AnalysisPredictor::Clone(void *stream) { VLOG(3) << "AnalysisPredictor::Clone"; std::lock_guard lk(clone_mutex_); auto *x = new AnalysisPredictor(config_); - + x->status_is_cloned_ = true; x->root_predictor_id_ = this->root_predictor_id_; x->config_.apply_optim_ = false; if (config_.use_external_stream_ && stream == nullptr) { @@ -2831,9 +2846,7 @@ std::unique_ptr AnalysisPredictor::Clone(void *stream) { "function has received a stream parameter.")); } x->predictor_stream_ = stream; - x->Init(nullptr); - x->status_is_cloned_ = true; - + x->Init(scope_, inference_program_); #ifdef PADDLE_WITH_TENSORRT x->executor_->ResetTrtOps(++AnalysisPredictor::clone_num_); #endif From 37c698a509237f9ab2dc7f7229b155dd326c7ab1 Mon Sep 17 00:00:00 2001 From: Wang Huan Date: Fri, 29 Dec 2023 09:22:48 +0000 Subject: [PATCH 10/12] refine --- paddle/fluid/inference/api/analysis_predictor.cc | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 893ead4d29514..d87902d30116f 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -690,10 +690,7 @@ bool AnalysisPredictor::PrepareProgram( // LoadParameters(), still need to create other persistable variables. So // in both case, create persistable variables at first. #ifdef PADDLE_WITH_DNNL - if (config_.use_mkldnn_) { - executor_->CreateVariables( - *inference_program_, 0, true, sub_scope_, true); - } + executor_->CreateVariables(*inference_program_, 0, true, sub_scope_, true); #else executor_->CreateVariables(*inference_program_, 0, true, sub_scope_); #endif @@ -960,9 +957,7 @@ bool AnalysisPredictor::CommInit() { } framework::NaiveExecutor e(place_); #ifdef PADDLE_WITH_DNNL - if (config_.use_mkldnn_) { - e.CreateVariables(*comm_init_program, 0, true, scope_.get(), true); - } + e.CreateVariables(*comm_init_program, 0, true, scope_.get(), true); #else e.CreateVariables(*comm_init_program, 0, true, scope_.get()); #endif From 2f846fec33c407a152ef0f26a32a659b829248b0 Mon Sep 17 00:00:00 2001 From: Wang Huan Date: Tue, 2 Jan 2024 01:25:50 +0000 Subject: [PATCH 11/12] refine --- .../fluid/inference/api/analysis_predictor.cc | 22 ++++++------------- paddle/phi/core/dense_tensor_impl.cc | 15 +++++-------- 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index d87902d30116f..4af55a7c6c933 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -682,18 +682,14 @@ bool AnalysisPredictor::PrepareProgram( const std::shared_ptr &program) { if (!program) { if (!LoadProgramDesc()) return false; - // If not cloned, the parameters should be loaded. - // If config_.ir_optim() is True, parameters is loaded in - // OptimizeInferenceProgram(), but other persistable variables - // (like RAW type var) are not created in scope. - // If config_.ir_optim() is False, parameters is loaded in - // LoadParameters(), still need to create other persistable variables. So - // in both case, create persistable variables at first. -#ifdef PADDLE_WITH_DNNL - executor_->CreateVariables(*inference_program_, 0, true, sub_scope_, true); -#else + // If not cloned, the parameters should be loaded. + // If config_.ir_optim() is True, parameters is loaded in + // OptimizeInferenceProgram(), but other persistable variables + // (like RAW type var) are not created in scope. + // If config_.ir_optim() is False, parameters is loaded in LoadParameters(), + // still need to create other persistable variables. + // So in both case, create persistable variables at first. executor_->CreateVariables(*inference_program_, 0, true, sub_scope_); -#endif // if enable_ir_optim_ is false, // the analysis pass(op fuse, graph analysis, trt subgraph, mkldnn etc) will @@ -956,11 +952,7 @@ bool AnalysisPredictor::CommInit() { order += 1; } framework::NaiveExecutor e(place_); -#ifdef PADDLE_WITH_DNNL - e.CreateVariables(*comm_init_program, 0, true, scope_.get(), true); -#else e.CreateVariables(*comm_init_program, 0, true, scope_.get()); -#endif e.Prepare(scope_.get(), *comm_init_program, 0); e.Run(); VLOG(3) << "Comm init successful."; diff --git a/paddle/phi/core/dense_tensor_impl.cc b/paddle/phi/core/dense_tensor_impl.cc index 8a3d7db993ad7..39efb048e7432 100644 --- a/paddle/phi/core/dense_tensor_impl.cc +++ b/paddle/phi/core/dense_tensor_impl.cc @@ -379,13 +379,8 @@ std::vector DenseTensor::Chunk(int64_t chunks, #ifdef PADDLE_WITH_DNNL const dnnl::memory::desc& DenseTensor::mem_desc() const { if (storage_properties_ == nullptr) { - std::unique_ptr* storage_properties_ptr = - const_cast*>(&storage_properties_); - *storage_properties_ptr = std::make_unique(); - static_cast(storage_properties_ptr->get()) - ->mem_desc = dnnl::memory::desc(); - static_cast(storage_properties_ptr->get()) - ->format = dnnl::memory::format_tag::undef; + static dnnl::memory::desc undef_desc = dnnl::memory::desc(); + return undef_desc; } return this->storage_properties().mem_desc; } @@ -393,8 +388,10 @@ const dnnl::memory::desc& DenseTensor::mem_desc() const { void DenseTensor::set_mem_desc(const dnnl::memory::desc& mem_desc) { if (storage_properties_ == nullptr) { storage_properties_ = std::make_unique(); - } - if (OneDNNStorageProperties::classof(storage_properties_.get())) { + static_cast(storage_properties_.get())->mem_desc = + mem_desc; + meta_.layout = DataLayout::ONEDNN; + } else if (OneDNNStorageProperties::classof(storage_properties_.get())) { static_cast(storage_properties_.get())->mem_desc = mem_desc; meta_.layout = DataLayout::ONEDNN; From 8c422076640dca861ff46e19ad5e4325f741f6e9 Mon Sep 17 00:00:00 2001 From: Wang Huan Date: Tue, 2 Jan 2024 01:28:41 +0000 Subject: [PATCH 12/12] refine --- paddle/fluid/framework/naive_executor.cc | 24 ++---------------------- paddle/fluid/framework/naive_executor.h | 3 +-- 2 files changed, 3 insertions(+), 24 deletions(-) diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index afd61f8e21586..3bfacc950325c 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -152,8 +152,7 @@ void NaiveExecutor::Run() { void NaiveExecutor::CreateVariables(const ProgramDesc &desc, int block_id, bool persistable, - Scope *scope, - bool init_mkldnn_memdesc) { + Scope *scope) { PADDLE_ENFORCE_NOT_NULL(scope, platform::errors::InvalidArgument( "The Scope to hold variables is nullptr.")); @@ -175,16 +174,7 @@ void NaiveExecutor::CreateVariables(const ProgramDesc &desc, continue; } num_vars++; -#ifdef PADDLE_WITH_DNNL - auto init_mkldnn_memdesc_func = [&](Variable *var, - proto::VarType::Type var_type) { - if (var_type == proto::VarType::LOD_TENSOR) { - var->GetMutable()->mem_desc(); - } else if (var_type == proto::VarType::SELECTED_ROWS) { - var->GetMutable()->mutable_value()->mem_desc(); - } - }; -#endif + if (persistable == var->Persistable()) { if (persistable) { if (!anc->FindVar(var->Name())) { @@ -192,22 +182,12 @@ void NaiveExecutor::CreateVariables(const ProgramDesc &desc, VLOG(3) << scope << " Create persistable variable " << var->Name() << ", which pointer is " << ptr; InitializeVariable(ptr, var->GetType()); -#ifdef PADDLE_WITH_DNNL - if (init_mkldnn_memdesc) { - init_mkldnn_memdesc_func(ptr, var->GetType()); - } -#endif } } else { auto *ptr = const_cast(scope)->Var(var->Name()); VLOG(3) << scope << " Create variable " << var->Name() << ", which pointer is " << ptr; InitializeVariable(ptr, var->GetType()); -#ifdef PADDLE_WITH_DNNL - if (init_mkldnn_memdesc) { - init_mkldnn_memdesc_func(ptr, var->GetType()); - } -#endif } } } diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h index d88c5d7f02827..5a558f3bd6921 100644 --- a/paddle/fluid/framework/naive_executor.h +++ b/paddle/fluid/framework/naive_executor.h @@ -71,8 +71,7 @@ class NaiveExecutor { void CreateVariables(const ProgramDesc& desc, int block_id, bool persistable, - Scope* scope, - bool init_mkldnn_memdesc = false); + Scope* scope); // Run all the operators. void Run();