From 72553a2c89c2be1276be248a92712b10227466a5 Mon Sep 17 00:00:00 2001 From: DannyIsFunny <912790387@qq.com> Date: Mon, 6 Jun 2022 07:25:20 +0000 Subject: [PATCH 1/8] code --- paddle/fluid/framework/operator.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 7dc885f54ab6c..ec0acf2a63340 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -1535,7 +1535,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope, new CacheImpl(new phi::KernelContext(), new RuntimeInferShapeContext(*this, *runtime_ctx)); BuildPhiKernelContext(*runtime_ctx, dev_ctx, impl_->getKernelContext()); - (*pt_kernel_)(impl_->getKernelContext()); } else { phi::KernelContext pt_kernel_context; @@ -2398,7 +2397,6 @@ void OperatorWithKernel::BuildPhiKernelContext( // calcute the start and end index of the input tensors size_t start_idx = (i == 0 ? 0 : pt_kernel_context->InputRangeAt(i - 1).second); - // deal with optional here if ((it == ctx.inputs.end() || it->second.size() == 0) && (input_defs[i].type_index == @@ -2412,6 +2410,7 @@ void OperatorWithKernel::BuildPhiKernelContext( auto end_idx = start_idx + 1; pt_kernel_context->AssignInputRange(std::make_pair(start_idx, end_idx), i); + continue; } auto ins_vector = it->second; @@ -2426,6 +2425,7 @@ void OperatorWithKernel::BuildPhiKernelContext( tensor_in = &(var->Get()); pt_kernel_context->EmplaceBackInputWithoutSetRange(tensor_in); } else if (var->IsType()) { + need_prepare_phi_data_ = true; paddle::small_vector tensor_vector; auto& tensor_array = var->Get(); for (auto& t : tensor_array) { @@ -2526,6 +2526,7 @@ void OperatorWithKernel::BuildPhiKernelContext( attr_names[i])); } } else { // scalar is in the input + need_prepare_phi_data_ = true; auto& ins_vector = ctx.inputs.at(attr_names[i]); pt_kernel_context->EmplaceBackAttr(std::move( experimental::MakePhiScalarFromVar(*ins_vector.front()))); @@ -2557,6 +2558,7 @@ void OperatorWithKernel::BuildPhiKernelContext( attr_names[i])); } } else { // shape is in the input + need_prepare_phi_data_ = true; auto& ins_vector = ctx.inputs.at(attr_names[i]); if (ins_vector.size() == 1) { // ShapeTensor pt_kernel_context->EmplaceBackAttr(std::move( From c49e2672f4954382128cf99448b4aed71e87ec24 Mon Sep 17 00:00:00 2001 From: DannyIsFunny <912790387@qq.com> Date: Wed, 22 Jun 2022 09:22:44 +0000 Subject: [PATCH 2/8] cuda graph --- paddle/fluid/framework/naive_executor.cc | 30 ++++++++++++++++++++---- paddle/fluid/framework/naive_executor.h | 2 ++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index 1c2740c2b2ee7..ef50a078825a0 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -46,11 +46,33 @@ void NaiveExecutor::Run() { platform::RegisterModelLayout(ops_, place_); #endif platform::ScopedFlushDenormal flush; + platform::CUDADeviceContext *ctx = static_cast( + platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0))); + auto stream = ctx->stream(); + for (auto &op : ops_) { - VLOG(4) << std::this_thread::get_id() << " run " - << op->DebugStringEx(scope_) << " on scope " << scope_; - op->SetIsCalledByExecutor(false); - op->Run(*scope_, place_); + if(std::count(graphed_ops.begin(), graphed_ops.end(), op->Type())) { + if(graph_instances_.count(op.get())){ + cudaGraphLaunch(graph_instances_[op.get()], stream); + cudaStreamSynchronize(stream); + } else { + cudaStreamBeginCapture(stream, cudaStreamCaptureModeGlobal); + VLOG(4) << std::this_thread::get_id() << " run " + << op->DebugStringEx(scope_) << " on scope " << scope_; + op->SetIsCalledByExecutor(false); + op->Run(*scope_, place_); + cudaGraph_t graph_; + cudaGraphExec_t instance_; + cudaStreamEndCapture(stream, &graph_); + cudaGraphInstantiate(&instance_, graph_, NULL, NULL, 0); + graph_instances_[op.get()] = instance_; + } + } else { + VLOG(4) << std::this_thread::get_id() << " run " + << op->DebugStringEx(scope_) << " on scope " << scope_; + op->SetIsCalledByExecutor(false); + op->Run(*scope_, place_); + } } } diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h index 498ad1d058827..2756208c530f0 100644 --- a/paddle/fluid/framework/naive_executor.h +++ b/paddle/fluid/framework/naive_executor.h @@ -76,6 +76,8 @@ class NaiveExecutor { const platform::Place place_; // Catch the required resource to avoid recreate. std::vector> ops_; + std::map graph_instances_; + std::vector graphed_ops {"conv2d"}; Scope* scope_; }; From 14a73886b1d9cca4e63289807743b15858bc4eb0 Mon Sep 17 00:00:00 2001 From: DannyIsFunny <912790387@qq.com> Date: Thu, 23 Jun 2022 03:16:19 +0000 Subject: [PATCH 3/8] code --- paddle/fluid/framework/operator.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index dbf6bec676c90..8a73557d78483 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -2398,6 +2398,7 @@ void OperatorWithKernel::BuildPhiKernelContext( // calcute the start and end index of the input tensors size_t start_idx = (i == 0 ? 0 : pt_kernel_context->InputRangeAt(i - 1).second); + // deal with optional here if ((it == ctx.inputs.end() || it->second.size() == 0) && (input_defs[i].type_index == @@ -2411,7 +2412,6 @@ void OperatorWithKernel::BuildPhiKernelContext( auto end_idx = start_idx + 1; pt_kernel_context->AssignInputRange(std::make_pair(start_idx, end_idx), i); - continue; } auto ins_vector = it->second; @@ -2426,7 +2426,6 @@ void OperatorWithKernel::BuildPhiKernelContext( tensor_in = &(var->Get()); pt_kernel_context->EmplaceBackInputWithoutSetRange(tensor_in); } else if (var->IsType()) { - need_prepare_phi_data_ = true; paddle::small_vector tensor_vector; auto& tensor_array = var->Get(); for (auto& t : tensor_array) { @@ -2527,7 +2526,6 @@ void OperatorWithKernel::BuildPhiKernelContext( attr_names[i])); } } else { // scalar is in the input - need_prepare_phi_data_ = true; auto& ins_vector = ctx.inputs.at(attr_names[i]); pt_kernel_context->EmplaceBackAttr(std::move( experimental::MakePhiScalarFromVar(*ins_vector.front()))); @@ -2559,7 +2557,6 @@ void OperatorWithKernel::BuildPhiKernelContext( attr_names[i])); } } else { // shape is in the input - need_prepare_phi_data_ = true; auto& ins_vector = ctx.inputs.at(attr_names[i]); if (ins_vector.size() == 1) { // ShapeTensor pt_kernel_context->EmplaceBackAttr(std::move( From 53cf34c89aa31d8027a16c334dff2afa07404053 Mon Sep 17 00:00:00 2001 From: DannyIsFunny <912790387@qq.com> Date: Thu, 23 Jun 2022 03:38:34 +0000 Subject: [PATCH 4/8] code --- paddle/fluid/framework/operator.cc | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 8a73557d78483..7395a8e0da8e8 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -1529,20 +1529,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope, // Do data transform before building KernelContext // TODO(zhiqiu): support TransferInplaceVarsBack PreparePhiData(exec_scope, *pt_kernel_, *kernel_signature_, runtime_ctx); - if (enable_cache_runtime_context_ && !need_prepare_phi_data_ && - !need_prepare_data_) { - impl_ = - new CacheImpl(new phi::KernelContext(), - new RuntimeInferShapeContext(*this, *runtime_ctx)); - BuildPhiKernelContext(*runtime_ctx, dev_ctx, impl_->getKernelContext()); - (*pt_kernel_)(impl_->getKernelContext()); - } else { - phi::KernelContext pt_kernel_context; - // Do data transform before building KernelContext - // TODO(zhiqiu): support TransferInplaceVarsBack - BuildPhiKernelContext(*runtime_ctx, dev_ctx, &pt_kernel_context); - (*pt_kernel_)(&pt_kernel_context); - } + BuildPhiKernelContext(*runtime_ctx, dev_ctx, &pt_kernel_context); + (*pt_kernel_)(&pt_kernel_context); } else { (*kernel_func_)( ExecutionContext(*this, exec_scope, *dev_ctx, *runtime_ctx)); From 59a4de80eae7b95b8ab3d97f3c0bd947636effe4 Mon Sep 17 00:00:00 2001 From: DannyIsFunny <912790387@qq.com> Date: Thu, 23 Jun 2022 03:45:52 +0000 Subject: [PATCH 5/8] code --- paddle/fluid/framework/naive_executor.cc | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index ef50a078825a0..e451cc172e84c 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -45,17 +45,19 @@ void NaiveExecutor::Run() { platform::AttachPointerHashToMKLDNNKey(this, place_); platform::RegisterModelLayout(ops_, place_); #endif + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) platform::ScopedFlushDenormal flush; platform::CUDADeviceContext *ctx = static_cast( platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0))); auto stream = ctx->stream(); for (auto &op : ops_) { - if(std::count(graphed_ops.begin(), graphed_ops.end(), op->Type())) { - if(graph_instances_.count(op.get())){ - cudaGraphLaunch(graph_instances_[op.get()], stream); - cudaStreamSynchronize(stream); - } else { + if (std::count(graphed_ops.begin(), graphed_ops.end(), op->Type())) { + if (graph_instances_.count(op.get())) { + cudaGraphLaunch(graph_instances_[op.get()], stream); + cudaStreamSynchronize(stream); + } else { cudaStreamBeginCapture(stream, cudaStreamCaptureModeGlobal); VLOG(4) << std::this_thread::get_id() << " run " << op->DebugStringEx(scope_) << " on scope " << scope_; @@ -67,13 +69,21 @@ void NaiveExecutor::Run() { cudaGraphInstantiate(&instance_, graph_, NULL, NULL, 0); graph_instances_[op.get()] = instance_; } - } else { + } else { VLOG(4) << std::this_thread::get_id() << " run " << op->DebugStringEx(scope_) << " on scope " << scope_; op->SetIsCalledByExecutor(false); op->Run(*scope_, place_); } } +#else + for (auto &op : ops_) { + VLOG(4) << std::this_thread::get_id() << " run " + << op->DebugStringEx(scope_) << " on scope " << scope_; + op->SetIsCalledByExecutor(false); + op->Run(*scope_, place_); + } +#endif } void NaiveExecutor::CreateVariables(const ProgramDesc &desc, int block_id, From 4a07287cbe412f73d7be4b9efe507f2856b7f32b Mon Sep 17 00:00:00 2001 From: DannyIsFunny <912790387@qq.com> Date: Thu, 23 Jun 2022 03:48:35 +0000 Subject: [PATCH 6/8] code --- paddle/fluid/framework/naive_executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index e451cc172e84c..b4e64a0935f97 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -46,8 +46,8 @@ void NaiveExecutor::Run() { platform::RegisterModelLayout(ops_, place_); #endif -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) platform::ScopedFlushDenormal flush; +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) platform::CUDADeviceContext *ctx = static_cast( platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0))); auto stream = ctx->stream(); From 3599cf573b6734ecc486feb475dc89975cfd9caf Mon Sep 17 00:00:00 2001 From: DannyIsFunny <912790387@qq.com> Date: Mon, 27 Jun 2022 06:41:50 +0000 Subject: [PATCH 7/8] code --- paddle/fluid/framework/operator.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 7395a8e0da8e8..002adb83796e8 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -1261,6 +1261,11 @@ void OperatorWithKernel::RunImpl(const Scope& scope, RuntimeContext ctx(Inputs(), Outputs(), scope); RunImpl(scope, place, &ctx); pre_scope_ = cur_scope; + } else if (run_phi_kernel_ && impl_ != nullptr && !need_prepare_data_ && + !need_prepare_phi_data_) { + if (!all_kernels_must_compute_runtime_shape_) + this->Info().infer_shape_(impl_->getRuntimeInferShapeContext()); + (*pt_kernel_)(impl_->getKernelContext()); } else { if (runtime_ctx_.get() == nullptr || pre_scope_ != cur_scope) { std::lock_guard lock(cache_update_mutex_); From 11cce8ce57fbab536a5dab2e320a8f5ee1e4d16d Mon Sep 17 00:00:00 2001 From: DannyIsFunny <912790387@qq.com> Date: Mon, 27 Jun 2022 06:46:19 +0000 Subject: [PATCH 8/8] code --- paddle/fluid/framework/naive_executor.cc | 32 ------------------------ paddle/fluid/framework/naive_executor.h | 2 -- 2 files changed, 34 deletions(-) diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index b4e64a0935f97..1c2740c2b2ee7 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -45,45 +45,13 @@ void NaiveExecutor::Run() { platform::AttachPointerHashToMKLDNNKey(this, place_); platform::RegisterModelLayout(ops_, place_); #endif - platform::ScopedFlushDenormal flush; -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - platform::CUDADeviceContext *ctx = static_cast( - platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0))); - auto stream = ctx->stream(); - - for (auto &op : ops_) { - if (std::count(graphed_ops.begin(), graphed_ops.end(), op->Type())) { - if (graph_instances_.count(op.get())) { - cudaGraphLaunch(graph_instances_[op.get()], stream); - cudaStreamSynchronize(stream); - } else { - cudaStreamBeginCapture(stream, cudaStreamCaptureModeGlobal); - VLOG(4) << std::this_thread::get_id() << " run " - << op->DebugStringEx(scope_) << " on scope " << scope_; - op->SetIsCalledByExecutor(false); - op->Run(*scope_, place_); - cudaGraph_t graph_; - cudaGraphExec_t instance_; - cudaStreamEndCapture(stream, &graph_); - cudaGraphInstantiate(&instance_, graph_, NULL, NULL, 0); - graph_instances_[op.get()] = instance_; - } - } else { - VLOG(4) << std::this_thread::get_id() << " run " - << op->DebugStringEx(scope_) << " on scope " << scope_; - op->SetIsCalledByExecutor(false); - op->Run(*scope_, place_); - } - } -#else for (auto &op : ops_) { VLOG(4) << std::this_thread::get_id() << " run " << op->DebugStringEx(scope_) << " on scope " << scope_; op->SetIsCalledByExecutor(false); op->Run(*scope_, place_); } -#endif } void NaiveExecutor::CreateVariables(const ProgramDesc &desc, int block_id, diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h index 2756208c530f0..498ad1d058827 100644 --- a/paddle/fluid/framework/naive_executor.h +++ b/paddle/fluid/framework/naive_executor.h @@ -76,8 +76,6 @@ class NaiveExecutor { const platform::Place place_; // Catch the required resource to avoid recreate. std::vector> ops_; - std::map graph_instances_; - std::vector graphed_ops {"conv2d"}; Scope* scope_; };