From 9961d370615cdcc755d449782b96bf2c17c0a14c Mon Sep 17 00:00:00 2001 From: cyber-pioneer Date: Fri, 22 Dec 2023 06:40:34 +0000 Subject: [PATCH 01/13] inference support decomp --- .../fluid/inference/api/analysis_predictor.cc | 10 ++ .../test_decomp_inference_predictor_run.py | 147 ++++++++++++++++++ 2 files changed, 157 insertions(+) create mode 100644 test/ir/inference/test_decomp_inference_predictor_run.py diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 78a38ef175ef1..3be03df0f9fef 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -56,6 +56,7 @@ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/primitive/base/decomp_trans.h" #include "paddle/phi/api/include/context_pool.h" #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/common/backend.h" @@ -64,6 +65,7 @@ #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/generator.h" #include "paddle/phi/kernels/funcs/data_type_transform.h" +#include "paddle/pir/core/op_result.h" #include "paddle/utils/string/split.h" #if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) @@ -786,6 +788,14 @@ bool AnalysisPredictor::PrepareExecutor() { pir_program_ = std::move( paddle::TranslateLegacyProgramToProgram(*inference_program_)); + std::vector src_vars; + std::set blacklist; + std::set whitelist; + VLOG(4) << "[Prim] Bind Decomp sinking_decomp begin."; + DecompProgram decomp_object( + pir_program_.get(), src_vars, blacklist, whitelist); + decomp_object.decomp_program(); + if (config_.use_gpu()) { ::pir::PassManager gpu_pm(::pir::IrContext::Instance(), 2); //----------------------------------------------------------------------------------------------// diff --git a/test/ir/inference/test_decomp_inference_predictor_run.py b/test/ir/inference/test_decomp_inference_predictor_run.py new file mode 100644 index 0000000000000..9f93abdf65e75 --- /dev/null +++ b/test/ir/inference/test_decomp_inference_predictor_run.py @@ -0,0 +1,147 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile +import unittest + +import numpy as np + +import paddle +from paddle.inference import Config, create_predictor + + +class TestNet(paddle.nn.Layer): + def __init__(self): + super().__init__() + self.fc1 = paddle.nn.Linear(4, 4) + self.fc2 = paddle.nn.Linear(4, 4) + + def forward(self, x1, x2): + y1 = self.fc1(x1) + y2 = self.fc2(x2) + y = paddle.nn.functional.relu(y1 + y2) + return y + + +@unittest.skipIf( + not paddle.is_compiled_with_cuda(), 'should compile with cuda.' +) +class TestPredictorRunWithTensor(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + net = TestNet() + model = paddle.jit.to_static( + net, + input_spec=[ + paddle.static.InputSpec( + shape=[2, 4], dtype='float32', name='input0' + ), + paddle.static.InputSpec( + shape=[2, 4], dtype='float32', name='input1' + ), + ], + ) + paddle.jit.save( + model, + os.path.join( + self.temp_dir.name, 'test_predictor_run_model/inference' + ), + ) + + def tearDown(self): + self.temp_dir.cleanup() + + def enable_pir(self, flag: bool): + paddle.set_flags({'FLAGS_enable_pir_in_executor': flag}) + + def init_predictor(self): + config = Config( + os.path.join( + self.temp_dir.name, + 'test_predictor_run_model/inference.pdmodel', + ), + os.path.join( + self.temp_dir.name, + 'test_predictor_run_model/inference.pdiparams', + ), + ) + config.enable_use_gpu(256, 0) + config.switch_ir_optim(False) + # config.enable_memory_optim() + config.enable_new_executor() + predictor = create_predictor(config) + return predictor + + def get_inputs(self): + input0 = np.array([[1, 2, 3, 4], [2, 3, 4, 5]]).astype(np.float32) + input1 = np.array([[0.1, 0.2, 0.3, 0.4], [1.2, 1.3, 1.4, 1.5]]).astype( + np.float32 + ) + + input0_tensor = paddle.to_tensor(input0) + input1_tensor = paddle.to_tensor(input1) + + return [input0_tensor, input1_tensor] + + def get_disorder_output(self, predictor): + [input0_tensor, input1_tensor] = self.get_inputs() + + input_names = predictor.get_input_names() + input0_tensor.name = input_names[0] + input1_tensor.name = input_names[1] + + # disorder + inputs = [input1_tensor, input0_tensor] + outputs = predictor.run(inputs) + + return outputs[0] + + def get_inorder_output(self, predictor): + [input0_tensor, input1_tensor] = self.get_inputs() + + # inorder + inputs = [input0_tensor, input1_tensor] + outputs = predictor.run(inputs) + + return outputs[0] + + def test_output(self): + self.enable_pir(False) + predictor = self.init_predictor() + output = self.get_inorder_output(predictor) + self.enable_pir(True) + pir_predictor = self.init_predictor() + pir_output = self.get_disorder_output(pir_predictor) + + np.testing.assert_allclose( + output.numpy().flatten(), pir_output.numpy().flatten() + ) + + def test_output_prim(self): + paddle.core._set_prim_all_enabled(True) + self.enable_pir(False) + predictor = self.init_predictor() + output = self.get_inorder_output(predictor) + self.enable_pir(True) + pir_predictor = self.init_predictor() + pir_output = self.get_disorder_output(pir_predictor) + + np.testing.assert_allclose( + output.numpy().flatten(), pir_output.numpy().flatten() + ) + + +if __name__ == '__main__': + unittest.main() From b0dcabae07fefa0765815b5955a144587e6952e9 Mon Sep 17 00:00:00 2001 From: cyber-pioneer Date: Mon, 25 Dec 2023 03:19:14 +0000 Subject: [PATCH 02/13] polish code --- .../fluid/inference/api/analysis_predictor.cc | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 3be03df0f9fef..6bb0af75c9097 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -56,6 +56,7 @@ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/prim/utils/utils.h" #include "paddle/fluid/primitive/base/decomp_trans.h" #include "paddle/phi/api/include/context_pool.h" #include "paddle/phi/api/include/tensor.h" @@ -788,13 +789,15 @@ bool AnalysisPredictor::PrepareExecutor() { pir_program_ = std::move( paddle::TranslateLegacyProgramToProgram(*inference_program_)); - std::vector src_vars; - std::set blacklist; - std::set whitelist; - VLOG(4) << "[Prim] Bind Decomp sinking_decomp begin."; - DecompProgram decomp_object( - pir_program_.get(), src_vars, blacklist, whitelist); - decomp_object.decomp_program(); + if (paddle::prim::PrimCommonUtils::IsFwdPrimEnabled()) { + VLOG(4) << "[Prim] Decomp program in predictor begin."; + std::vector src_vars; + std::set blacklist; + std::set whitelist; + DecompProgram decomp_object( + pir_program_.get(), src_vars, blacklist, whitelist); + decomp_object.decomp_program(); + } if (config_.use_gpu()) { ::pir::PassManager gpu_pm(::pir::IrContext::Instance(), 2); From fce1a00ee62b19703381fe695ed45ac04c65e1d3 Mon Sep 17 00:00:00 2001 From: cyber-pioneer Date: Tue, 26 Dec 2023 03:03:40 +0000 Subject: [PATCH 03/13] add decomp base define --- paddle/fluid/primitive/base/decomp_trans.cc | 13 ++----------- paddle/fluid/primitive/base/decomp_trans.h | 17 ++++++++++++++++- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/primitive/base/decomp_trans.cc b/paddle/fluid/primitive/base/decomp_trans.cc index 6dde6c8b94002..23feb33a09f62 100644 --- a/paddle/fluid/primitive/base/decomp_trans.cc +++ b/paddle/fluid/primitive/base/decomp_trans.cc @@ -124,8 +124,8 @@ void DecompProgram::check_decomp_outputs( for (size_t i = 0; i < orig_outs.size(); i++) { if (skip_invalid_op_check && paddle::dialect::IsEmptyValue(decomp_outs[i])) { - VLOG(0) << "[Prim] Decomp op skip check of output index " << i - << " of op " << op_name; + VLOG(4) << "[Prim] Decomp op skip check of " << i + << "-index output of op " << op_name; } else { PADDLE_ENFORCE( !paddle::dialect::IsEmptyValue(orig_outs[i]), @@ -266,15 +266,6 @@ std::vector> call_decomp_rule(pir::Operation* op) { return decomp_res; } -DecompProgram::DecompProgram(pir::Program* program, - const std::vector& src_vars, - const std::set& blacklist, - const std::set& whitelist) - : program_(program), - src_vars_(src_vars), - blacklist_(blacklist), - whitelist_(whitelist) {} - std::vector DecompProgram::decomp_program() { std::ostringstream orig_prog_stream; std::unordered_map orig_vars_dict; diff --git a/paddle/fluid/primitive/base/decomp_trans.h b/paddle/fluid/primitive/base/decomp_trans.h index 550d8beab8031..ca54b26fd95f8 100644 --- a/paddle/fluid/primitive/base/decomp_trans.h +++ b/paddle/fluid/primitive/base/decomp_trans.h @@ -29,7 +29,13 @@ class DecompProgram { DecompProgram(pir::Program* program, const std::vector& src_vars, const std::set& blacklist, - const std::set& whitelist); + const std::set& whitelist) + : program_(program), + src_vars_(src_vars), + blacklist_(blacklist), + whitelist_(whitelist) {} + + explict DecompProgram(pir::Program* program) : program_(program) {} std::vector decomp_program(); bool check_decomp_dynamic_shape(pir::Operation* op); @@ -46,6 +52,15 @@ class DecompProgram { const std::vector& decomp_outs, std::unordered_map orig_vars_dict); bool enable_decomp_by_filter(const std::string& op_name); + void set_src_vars(const std::vector& src_vars) { + src_vars_ = src_vars; + } + void set_blacklist(const std::set& blacklist) { + blacklist_ = blacklist; + } + void set_whitelist(const std::set& whitelist) { + whitelist_ = whitelist; + } private: pir::Program* program_; From 9a4f1195673ff39661347aaa530c7235905cfca4 Mon Sep 17 00:00:00 2001 From: cyber-pioneer Date: Tue, 26 Dec 2023 03:12:28 +0000 Subject: [PATCH 04/13] add decomp base define2 --- paddle/fluid/primitive/base/decomp_trans.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/primitive/base/decomp_trans.h b/paddle/fluid/primitive/base/decomp_trans.h index ca54b26fd95f8..f0ff56e81bbed 100644 --- a/paddle/fluid/primitive/base/decomp_trans.h +++ b/paddle/fluid/primitive/base/decomp_trans.h @@ -26,6 +26,8 @@ namespace paddle { class DecompProgram { public: + explicit DecompProgram(pir::Program* program) : program_(program) {} + DecompProgram(pir::Program* program, const std::vector& src_vars, const std::set& blacklist, @@ -35,8 +37,6 @@ class DecompProgram { blacklist_(blacklist), whitelist_(whitelist) {} - explict DecompProgram(pir::Program* program) : program_(program) {} - std::vector decomp_program(); bool check_decomp_dynamic_shape(pir::Operation* op); void check_decomp_outputs(const std::string& op_name, From 28648e1ba10be0c4f2108b807e78fe8ed3e2d7aa Mon Sep 17 00:00:00 2001 From: cyber-pioneer Date: Tue, 26 Dec 2023 05:20:48 +0000 Subject: [PATCH 05/13] change decomp infer --- paddle/fluid/inference/api/analysis_predictor.cc | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 6bb0af75c9097..6ccbc978ad382 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -791,11 +791,7 @@ bool AnalysisPredictor::PrepareExecutor() { if (paddle::prim::PrimCommonUtils::IsFwdPrimEnabled()) { VLOG(4) << "[Prim] Decomp program in predictor begin."; - std::vector src_vars; - std::set blacklist; - std::set whitelist; - DecompProgram decomp_object( - pir_program_.get(), src_vars, blacklist, whitelist); + DecompProgram decomp_object(pir_program_.get()); decomp_object.decomp_program(); } From df1fc179cacf7f4550215b2bdf545b228157e0af Mon Sep 17 00:00:00 2001 From: cyber-pioneer Date: Tue, 26 Dec 2023 08:18:03 +0000 Subject: [PATCH 06/13] fix symbol overload --- .../fluid/inference/api/analysis_predictor.cc | 1 - .../tensor_operants_gen.py | 23 +++++++-------- .../test_decomp_inference_predictor_run.py | 28 +++++++++---------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 6ccbc978ad382..fb50b4a715be7 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -66,7 +66,6 @@ #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/generator.h" #include "paddle/phi/kernels/funcs/data_type_transform.h" -#include "paddle/pir/core/op_result.h" #include "paddle/utils/string/split.h" #if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) diff --git a/paddle/fluid/prim/api/auto_code_generated/tensor_operants_gen.py b/paddle/fluid/prim/api/auto_code_generated/tensor_operants_gen.py index 378f57a468cd4..6cf6615075282 100644 --- a/paddle/fluid/prim/api/auto_code_generated/tensor_operants_gen.py +++ b/paddle/fluid/prim/api/auto_code_generated/tensor_operants_gen.py @@ -216,6 +216,7 @@ class TEST_API StaticTensorOperants : public TensorOperantsBase { #include "paddle/fluid/primitive/type/lazy_tensor.h" PHI_DECLARE_bool(enable_pir_api); +PHI_DECLARE_bool(enable_pir_in_executor); """ @@ -228,7 +229,7 @@ class TEST_API StaticTensorOperants : public TensorOperantsBase { using LazyTensor = paddle::primitive::LazyTensor; Tensor StaticTensorOperants::add(const Tensor& x, const Scalar& y) { - if (FLAGS_enable_pir_api) { + if (FLAGS_enable_pir_api || FLAGS_enable_pir_in_executor) { return paddle::primitive::backend::add(x, paddle::primitive::backend::full(x.shape(), y, x.dtype(), x.place())); } else { return paddle::prim::add(x, paddle::prim::full(x.shape(), y, x.dtype(), x.place())); @@ -236,7 +237,7 @@ class TEST_API StaticTensorOperants : public TensorOperantsBase { } Tensor StaticTensorOperants::subtract(const Tensor& x, const Scalar& y) { - if (FLAGS_enable_pir_api) { + if (FLAGS_enable_pir_api || FLAGS_enable_pir_in_executor) { return paddle::primitive::backend::subtract(x, paddle::primitive::backend::full(x.shape(), y, x.dtype(), x.place())); } else { return paddle::prim::subtract(x, paddle::prim::full(x.shape(), y, x.dtype(), x.place())); @@ -244,7 +245,7 @@ class TEST_API StaticTensorOperants : public TensorOperantsBase { } Tensor StaticTensorOperants::multiply(const Tensor& x, const Scalar& y) { - if (FLAGS_enable_pir_api) { + if (FLAGS_enable_pir_api || FLAGS_enable_pir_in_executor) { return paddle::primitive::backend::scale(x, y, 0.0f, true); } else { return paddle::prim::scale(x, y, 0.0f, true); @@ -252,7 +253,7 @@ class TEST_API StaticTensorOperants : public TensorOperantsBase { } Tensor StaticTensorOperants::divide(const Tensor& x, const Scalar& y) { - if (FLAGS_enable_pir_api) { + if (FLAGS_enable_pir_api || FLAGS_enable_pir_in_executor) { return paddle::primitive::backend::divide(x, paddle::primitive::backend::full(x.shape(), y, x.dtype(), x.place())); } else { return paddle::prim::divide(x, paddle::prim::full(x.shape(), y, x.dtype(), x.place())); @@ -260,7 +261,7 @@ class TEST_API StaticTensorOperants : public TensorOperantsBase { } Tensor StaticTensorOperants::add(const Scalar& x, const Tensor& y) { - if (FLAGS_enable_pir_api) { + if (FLAGS_enable_pir_api || FLAGS_enable_pir_in_executor) { return paddle::primitive::backend::add(paddle::primitive::backend::full(y.shape(), x, y.dtype(), y.place()), y); } else { return paddle::prim::add(paddle::prim::full(y.shape(), x, y.dtype(), y.place()), y); @@ -269,7 +270,7 @@ class TEST_API StaticTensorOperants : public TensorOperantsBase { Tensor StaticTensorOperants::subtract(const Scalar& x, const Tensor& y) { - if (FLAGS_enable_pir_api) { + if (FLAGS_enable_pir_api || FLAGS_enable_pir_in_executor) { return paddle::primitive::backend::subtract(paddle::primitive::backend::full(y.shape(), x, y.dtype(), y.place()), y); } else { return paddle::prim::subtract(paddle::prim::full(y.shape(), x, y.dtype(), y.place()), y); @@ -277,7 +278,7 @@ class TEST_API StaticTensorOperants : public TensorOperantsBase { } Tensor StaticTensorOperants::multiply(const Scalar& x, const Tensor& y) { - if (FLAGS_enable_pir_api) { + if (FLAGS_enable_pir_api || FLAGS_enable_pir_in_executor) { return paddle::primitive::backend::scale(y, x, 0.0f, true); } else { return paddle::prim::scale(y, x, 0.0f, true); @@ -285,7 +286,7 @@ class TEST_API StaticTensorOperants : public TensorOperantsBase { } Tensor StaticTensorOperants::divide(const Scalar& x, const Tensor& y) { - if (FLAGS_enable_pir_api) { + if (FLAGS_enable_pir_api || FLAGS_enable_pir_in_executor) { return paddle::primitive::backend::divide(paddle::primitive::backend::full(y.shape(), x, y.dtype(), y.place()), y); } else { return paddle::prim::divide(paddle::prim::full(y.shape(), x, y.dtype(), y.place()), y); @@ -293,7 +294,7 @@ class TEST_API StaticTensorOperants : public TensorOperantsBase { } Tensor StaticTensorOperants::pow(const Tensor& x, const Tensor& y) { - if (FLAGS_enable_pir_api) { + if (FLAGS_enable_pir_api || FLAGS_enable_pir_in_executor) { return paddle::primitive::backend::elementwise_pow(x, y); } else { return paddle::prim::elementwise_pow(x, y); @@ -301,7 +302,7 @@ class TEST_API StaticTensorOperants : public TensorOperantsBase { } Tensor StaticTensorOperants::pow(const Tensor& x, const Scalar& y) { - if (FLAGS_enable_pir_api) { + if (FLAGS_enable_pir_api || FLAGS_enable_pir_in_executor) { return paddle::primitive::backend::elementwise_pow(x, paddle::primitive::backend::full(x.shape(), y, x.dtype(), x.place())); } else { return paddle::prim::elementwise_pow(x, paddle::prim::full(x.shape(), y, x.dtype(), x.place())); @@ -394,7 +395,7 @@ def gene_static_tensor_func_call(self): ) static_func_parameters = self.get_func_args() - static_tensor_func_call = f"""if (FLAGS_enable_pir_api) {{ + static_tensor_func_call = f"""if (FLAGS_enable_pir_api || FLAGS_enable_pir_in_executor) {{ return {backend_static_func_name}({static_func_parameters}); }} else {{ return {prim_static_func_name}({static_func_parameters}); diff --git a/test/ir/inference/test_decomp_inference_predictor_run.py b/test/ir/inference/test_decomp_inference_predictor_run.py index 9f93abdf65e75..a0e38977d0d76 100644 --- a/test/ir/inference/test_decomp_inference_predictor_run.py +++ b/test/ir/inference/test_decomp_inference_predictor_run.py @@ -21,17 +21,19 @@ import paddle from paddle.inference import Config, create_predictor +np.random.seed(2023) + class TestNet(paddle.nn.Layer): def __init__(self): super().__init__() - self.fc1 = paddle.nn.Linear(4, 4) - self.fc2 = paddle.nn.Linear(4, 4) + self.fc1 = paddle.nn.Linear(64, 32) + self.fc2 = paddle.nn.Linear(64, 32) def forward(self, x1, x2): y1 = self.fc1(x1) y2 = self.fc2(x2) - y = paddle.nn.functional.relu(y1 + y2) + y = paddle.nn.functional.softmax(y1 + y2) return y @@ -40,16 +42,19 @@ def forward(self, x1, x2): ) class TestPredictorRunWithTensor(unittest.TestCase): def setUp(self): + self.shape = [4, 8, 16, 64] + self.x = np.random.random(self.shape).astype(np.float32) + self.y = np.random.random(self.shape).astype(np.float32) self.temp_dir = tempfile.TemporaryDirectory() net = TestNet() model = paddle.jit.to_static( net, input_spec=[ paddle.static.InputSpec( - shape=[2, 4], dtype='float32', name='input0' + shape=self.shape, dtype='float32', name='input0' ), paddle.static.InputSpec( - shape=[2, 4], dtype='float32', name='input1' + shape=self.shape, dtype='float32', name='input1' ), ], ) @@ -85,13 +90,8 @@ def init_predictor(self): return predictor def get_inputs(self): - input0 = np.array([[1, 2, 3, 4], [2, 3, 4, 5]]).astype(np.float32) - input1 = np.array([[0.1, 0.2, 0.3, 0.4], [1.2, 1.3, 1.4, 1.5]]).astype( - np.float32 - ) - - input0_tensor = paddle.to_tensor(input0) - input1_tensor = paddle.to_tensor(input1) + input0_tensor = paddle.to_tensor(self.x) + input1_tensor = paddle.to_tensor(self.y) return [input0_tensor, input1_tensor] @@ -126,7 +126,7 @@ def test_output(self): pir_output = self.get_disorder_output(pir_predictor) np.testing.assert_allclose( - output.numpy().flatten(), pir_output.numpy().flatten() + output.numpy().flatten(), pir_output.numpy().flatten(), rtol=1e-6 ) def test_output_prim(self): @@ -139,7 +139,7 @@ def test_output_prim(self): pir_output = self.get_disorder_output(pir_predictor) np.testing.assert_allclose( - output.numpy().flatten(), pir_output.numpy().flatten() + output.numpy().flatten(), pir_output.numpy().flatten(), rtol=1e-6 ) From 1c94e0133e41740ed0a9409451db4156fb0321bc Mon Sep 17 00:00:00 2001 From: cyber-pioneer Date: Tue, 26 Dec 2023 08:51:40 +0000 Subject: [PATCH 07/13] fix test case --- .../test_decomp_inference_predictor_run.py | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/test/ir/inference/test_decomp_inference_predictor_run.py b/test/ir/inference/test_decomp_inference_predictor_run.py index a0e38977d0d76..687f28c1bcf15 100644 --- a/test/ir/inference/test_decomp_inference_predictor_run.py +++ b/test/ir/inference/test_decomp_inference_predictor_run.py @@ -21,8 +21,6 @@ import paddle from paddle.inference import Config, create_predictor -np.random.seed(2023) - class TestNet(paddle.nn.Layer): def __init__(self): @@ -33,15 +31,16 @@ def __init__(self): def forward(self, x1, x2): y1 = self.fc1(x1) y2 = self.fc2(x2) - y = paddle.nn.functional.softmax(y1 + y2) - return y + y3 = y1 + y2 + y4 = paddle.nn.functional.layer_norm(y3, y3.shape[1:]) + z = paddle.nn.functional.softmax(y4) + return z -@unittest.skipIf( - not paddle.is_compiled_with_cuda(), 'should compile with cuda.' -) class TestPredictorRunWithTensor(unittest.TestCase): def setUp(self): + self.use_gpu = paddle.is_compiled_with_cuda() + np.random.seed(2023) self.shape = [4, 8, 16, 64] self.x = np.random.random(self.shape).astype(np.float32) self.y = np.random.random(self.shape).astype(np.float32) @@ -82,9 +81,9 @@ def init_predictor(self): 'test_predictor_run_model/inference.pdiparams', ), ) - config.enable_use_gpu(256, 0) + if self.use_gpu: + config.enable_use_gpu(256, 0) config.switch_ir_optim(False) - # config.enable_memory_optim() config.enable_new_executor() predictor = create_predictor(config) return predictor @@ -117,29 +116,38 @@ def get_inorder_output(self, predictor): return outputs[0] - def test_output(self): + def test_output_prim_inorder(self): self.enable_pir(False) predictor = self.init_predictor() output = self.get_inorder_output(predictor) self.enable_pir(True) + paddle.core._set_prim_all_enabled(True) pir_predictor = self.init_predictor() - pir_output = self.get_disorder_output(pir_predictor) + pir_output = self.get_inorder_output(pir_predictor) + paddle.core._set_prim_all_enabled(False) np.testing.assert_allclose( - output.numpy().flatten(), pir_output.numpy().flatten(), rtol=1e-6 + output.numpy().flatten(), + pir_output.numpy().flatten(), + rtol=1e-6, + atol=1e-6, ) - def test_output_prim(self): - paddle.core._set_prim_all_enabled(True) + def test_output_prim_disorder(self): self.enable_pir(False) predictor = self.init_predictor() - output = self.get_inorder_output(predictor) + output = self.get_disorder_output(predictor) self.enable_pir(True) + paddle.core._set_prim_all_enabled(True) pir_predictor = self.init_predictor() pir_output = self.get_disorder_output(pir_predictor) + paddle.core._set_prim_all_enabled(False) np.testing.assert_allclose( - output.numpy().flatten(), pir_output.numpy().flatten(), rtol=1e-6 + output.numpy().flatten(), + pir_output.numpy().flatten(), + rtol=1e-6, + atol=1e-6, ) From ed442b5c05c3db4963ac075a60d2e301aa336754 Mon Sep 17 00:00:00 2001 From: cyber-pioneer Date: Wed, 27 Dec 2023 02:49:22 +0000 Subject: [PATCH 08/13] debug --- paddle/fluid/primitive/base/decomp_trans.cc | 15 ++++++++++++--- paddle/fluid/primitive/base/decomp_trans.h | 4 +++- paddle/fluid/pybind/pybind.cc | 3 ++- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/primitive/base/decomp_trans.cc b/paddle/fluid/primitive/base/decomp_trans.cc index 23feb33a09f62..df0111d56f8af 100644 --- a/paddle/fluid/primitive/base/decomp_trans.cc +++ b/paddle/fluid/primitive/base/decomp_trans.cc @@ -238,6 +238,14 @@ std::vector DecompProgram::construct_dst_vars( return tar_vars; } +std::vector DecompProgram::get_dst_vars() { + if (!paddle::prim::PrimCommonUtils::IsFwdPrimEnabled()) { + return src_vars_; + } else { + return dst_vars_; + } +} + bool DecompProgram::enable_decomp_by_filter(const std::string& op_name) { bool flag = true; @@ -266,7 +274,7 @@ std::vector> call_decomp_rule(pir::Operation* op) { return decomp_res; } -std::vector DecompProgram::decomp_program() { +void DecompProgram::decomp_program() { std::ostringstream orig_prog_stream; std::unordered_map orig_vars_dict; for (size_t i = 0; i < src_vars_.size(); i++) { @@ -276,7 +284,7 @@ std::vector DecompProgram::decomp_program() { VLOG(4) << "[Prim] Origin program bofore decomp :\n" << orig_prog_stream.str(); if (!paddle::prim::PrimCommonUtils::IsFwdPrimEnabled()) { - return src_vars_; + return; } std::vector tar_vars(src_vars_.size()); pir::Block* block = program_->block(); @@ -329,7 +337,8 @@ std::vector DecompProgram::decomp_program() { std::ostringstream decomp_prog_stream; program_->Print(decomp_prog_stream); VLOG(4) << "[Prim] New program after decomp :\n" << decomp_prog_stream.str(); - return tar_vars; + dst_vars_ = tar_vars; + return; } } // namespace paddle diff --git a/paddle/fluid/primitive/base/decomp_trans.h b/paddle/fluid/primitive/base/decomp_trans.h index f0ff56e81bbed..4f3a83d326b33 100644 --- a/paddle/fluid/primitive/base/decomp_trans.h +++ b/paddle/fluid/primitive/base/decomp_trans.h @@ -37,7 +37,7 @@ class DecompProgram { blacklist_(blacklist), whitelist_(whitelist) {} - std::vector decomp_program(); + void decomp_program(); bool check_decomp_dynamic_shape(pir::Operation* op); void check_decomp_outputs(const std::string& op_name, const std::vector& orig_outs, @@ -61,10 +61,12 @@ class DecompProgram { void set_whitelist(const std::set& whitelist) { whitelist_ = whitelist; } + std::vector get_dst_vars(); private: pir::Program* program_; std::vector src_vars_; + std::vector dst_vars_; std::set blacklist_; std::set whitelist_; }; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index efeeb4855205e..53df4c25034ab 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -785,7 +785,8 @@ void BindDecomp(pybind11::module *m) { VLOG(4) << "[Prim] Bind Decomp sinking_decomp begin."; py::list res; DecompProgram decomp_object(program, src_vars, blacklist, whitelist); - auto tar_vars = decomp_object.decomp_program(); + decomp_object.decomp_program(); + std::vector tar_vars = decomp_object.get_dst_vars(); for (size_t i = 0; i < tar_vars.size(); ++i) { if (!tar_vars[i]) { res.append(nullptr); From f14766ff6d0d17027e4be42e73432251bccfcdc3 Mon Sep 17 00:00:00 2001 From: cyber-pioneer Date: Wed, 27 Dec 2023 08:09:06 +0000 Subject: [PATCH 09/13] debug --- paddle/fluid/primitive/base/decomp_trans.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/paddle/fluid/primitive/base/decomp_trans.cc b/paddle/fluid/primitive/base/decomp_trans.cc index df0111d56f8af..412e6e5535248 100644 --- a/paddle/fluid/primitive/base/decomp_trans.cc +++ b/paddle/fluid/primitive/base/decomp_trans.cc @@ -313,6 +313,22 @@ void DecompProgram::decomp_program() { tar_vars = construct_dst_vars( op->name(), orig_outs, standard_decomp_res, orig_vars_dict); + std::vector tmp_ops_list; + for (auto& tmp_op : *block) { + tmp_ops_list.push_back(&tmp_op); + } + for (size_t j = 0; j < tmp_ops_list.size(); j++) { + auto sub_op = tmp_ops_list[i]; + VLOG(0) << "[prim] sub op " << sub_op->name() << " index " << j; + if (sub_op == op) { + VLOG(0) << "[prim] sub op start ================ "; + } + + if (sub_op == ops_list[i + 1] || sub_op == ops_list[-1]) { + VLOG(0) << "[prim] sub op end ================ "; + } + } + op->ReplaceAllUsesWith(standard_decomp_res); bool remove_op = true; for (auto& item : op->results()) { From b5766400d2280cbe12072112168acc48955c4775 Mon Sep 17 00:00:00 2001 From: cyber-pioneer Date: Tue, 2 Jan 2024 02:59:50 +0000 Subject: [PATCH 10/13] decomp add debug info --- paddle/fluid/primitive/base/decomp_trans.cc | 31 +++++++++++++++------ 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/primitive/base/decomp_trans.cc b/paddle/fluid/primitive/base/decomp_trans.cc index 412e6e5535248..0dbdbc5f16bcf 100644 --- a/paddle/fluid/primitive/base/decomp_trans.cc +++ b/paddle/fluid/primitive/base/decomp_trans.cc @@ -26,6 +26,8 @@ PHI_DECLARE_bool(prim_skip_dynamic); using paddle::dialect::DenseTensorType; using paddle::dialect::SelectedRowsType; +constexpr char kPrimBelongTo[] = "prim_belong_to"; + namespace paddle { using Program = pir::Program; @@ -292,6 +294,8 @@ void DecompProgram::decomp_program() { for (auto& op : *block) { ops_list.push_back(&op); } + size_t decomp_end_index = 0; + size_t sub_op_start_index = 0; for (size_t i = 0; i < ops_list.size(); i++) { auto op = ops_list[i]; bool enable_prim = @@ -313,23 +317,34 @@ void DecompProgram::decomp_program() { tar_vars = construct_dst_vars( op->name(), orig_outs, standard_decomp_res, orig_vars_dict); + op->ReplaceAllUsesWith(standard_decomp_res); + std::vector tmp_ops_list; for (auto& tmp_op : *block) { tmp_ops_list.push_back(&tmp_op); } - for (size_t j = 0; j < tmp_ops_list.size(); j++) { - auto sub_op = tmp_ops_list[i]; - VLOG(0) << "[prim] sub op " << sub_op->name() << " index " << j; - if (sub_op == op) { - VLOG(0) << "[prim] sub op start ================ "; + bool attach_belong_op_info = false; + + for (size_t j = sub_op_start_index; j < tmp_ops_list.size(); j++) { + auto sub_op = tmp_ops_list[j]; + auto it = std::find(ops_list.begin(), ops_list.end(), sub_op); + if (((it != ops_list.end()) && sub_op == ops_list[i - 1]) || + ((it == ops_list.end()) && j == decomp_end_index)) { + attach_belong_op_info = true; + continue; } - if (sub_op == ops_list[i + 1] || sub_op == ops_list[-1]) { - VLOG(0) << "[prim] sub op end ================ "; + if (sub_op == ops_list[i]) { + decomp_end_index = j - 1; + attach_belong_op_info = false; + sub_op_start_index = decomp_end_index; + } + if (attach_belong_op_info) { + pir::Attribute attribute(builder.str_attr(op->name())); + sub_op->set_attribute(kPrimBelongTo, attribute); } } - op->ReplaceAllUsesWith(standard_decomp_res); bool remove_op = true; for (auto& item : op->results()) { if (item.HasOneUse()) { From 570110c0c0e8ef519a5b01f1ea5e848b2963bcdf Mon Sep 17 00:00:00 2001 From: cyber-pioneer Date: Tue, 2 Jan 2024 11:41:32 +0000 Subject: [PATCH 11/13] add cpp flag --- paddle/fluid/prim/utils/utils.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/paddle/fluid/prim/utils/utils.cc b/paddle/fluid/prim/utils/utils.cc index 80721ccf1049d..87a2fef943f41 100644 --- a/paddle/fluid/prim/utils/utils.cc +++ b/paddle/fluid/prim/utils/utils.cc @@ -17,8 +17,11 @@ #include "paddle/fluid/prim/utils/static/static_global_utils.h" PADDLE_DEFINE_EXPORTED_bool(prim_enabled, false, "enable_prim or not"); +PADDLE_DEFINE_EXPORTED_string(prim_blacklist, "", "prim ops blacklist"); + namespace paddle { namespace prim { + bool PrimCommonUtils::IsBwdPrimEnabled() { return StaticCompositeContext::Instance().IsBwdPrimEnabled(); } @@ -40,10 +43,12 @@ bool PrimCommonUtils::IsFwdPrimEnabled() { } void PrimCommonUtils::SetFwdPrimEnabled(bool enable_prim) { + VLOG(0) << "FLAGS_prim_enabled ====================== " << FLAGS_prim_enabled; StaticCompositeContext::Instance().SetFwdPrimEnabled(enable_prim); } void PrimCommonUtils::SetAllPrimEnabled(bool enable_prim) { + VLOG(0) << "FLAGS_prim_enabled ====================== " << FLAGS_prim_enabled; StaticCompositeContext::Instance().SetAllPrimEnabled(enable_prim); } From ff4ec98d360a563cee11ba75ae3f7009619111d8 Mon Sep 17 00:00:00 2001 From: cyber-pioneer Date: Tue, 2 Jan 2024 11:44:47 +0000 Subject: [PATCH 12/13] revert --- paddle/fluid/primitive/base/decomp_trans.cc | 31 --------------------- 1 file changed, 31 deletions(-) diff --git a/paddle/fluid/primitive/base/decomp_trans.cc b/paddle/fluid/primitive/base/decomp_trans.cc index 0dbdbc5f16bcf..df0111d56f8af 100644 --- a/paddle/fluid/primitive/base/decomp_trans.cc +++ b/paddle/fluid/primitive/base/decomp_trans.cc @@ -26,8 +26,6 @@ PHI_DECLARE_bool(prim_skip_dynamic); using paddle::dialect::DenseTensorType; using paddle::dialect::SelectedRowsType; -constexpr char kPrimBelongTo[] = "prim_belong_to"; - namespace paddle { using Program = pir::Program; @@ -294,8 +292,6 @@ void DecompProgram::decomp_program() { for (auto& op : *block) { ops_list.push_back(&op); } - size_t decomp_end_index = 0; - size_t sub_op_start_index = 0; for (size_t i = 0; i < ops_list.size(); i++) { auto op = ops_list[i]; bool enable_prim = @@ -318,33 +314,6 @@ void DecompProgram::decomp_program() { op->name(), orig_outs, standard_decomp_res, orig_vars_dict); op->ReplaceAllUsesWith(standard_decomp_res); - - std::vector tmp_ops_list; - for (auto& tmp_op : *block) { - tmp_ops_list.push_back(&tmp_op); - } - bool attach_belong_op_info = false; - - for (size_t j = sub_op_start_index; j < tmp_ops_list.size(); j++) { - auto sub_op = tmp_ops_list[j]; - auto it = std::find(ops_list.begin(), ops_list.end(), sub_op); - if (((it != ops_list.end()) && sub_op == ops_list[i - 1]) || - ((it == ops_list.end()) && j == decomp_end_index)) { - attach_belong_op_info = true; - continue; - } - - if (sub_op == ops_list[i]) { - decomp_end_index = j - 1; - attach_belong_op_info = false; - sub_op_start_index = decomp_end_index; - } - if (attach_belong_op_info) { - pir::Attribute attribute(builder.str_attr(op->name())); - sub_op->set_attribute(kPrimBelongTo, attribute); - } - } - bool remove_op = true; for (auto& item : op->results()) { if (item.HasOneUse()) { From 93c02ba6c0cca005c2ea6b6d2c0ac89278736ae1 Mon Sep 17 00:00:00 2001 From: cyber-pioneer Date: Tue, 2 Jan 2024 13:27:53 +0000 Subject: [PATCH 13/13] remove unused flag --- paddle/fluid/prim/utils/utils.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/prim/utils/utils.cc b/paddle/fluid/prim/utils/utils.cc index 87a2fef943f41..fcee9301a9aa7 100644 --- a/paddle/fluid/prim/utils/utils.cc +++ b/paddle/fluid/prim/utils/utils.cc @@ -17,13 +17,15 @@ #include "paddle/fluid/prim/utils/static/static_global_utils.h" PADDLE_DEFINE_EXPORTED_bool(prim_enabled, false, "enable_prim or not"); -PADDLE_DEFINE_EXPORTED_string(prim_blacklist, "", "prim ops blacklist"); +PADDLE_DEFINE_EXPORTED_bool(prim_all, false, "enable prim_all or not"); +PADDLE_DEFINE_EXPORTED_bool(prim_forward, false, "enable prim_forward or not"); +PADDLE_DEFINE_EXPORTED_bool(prim_backward, false, "enable prim_backward not"); namespace paddle { namespace prim { - bool PrimCommonUtils::IsBwdPrimEnabled() { - return StaticCompositeContext::Instance().IsBwdPrimEnabled(); + bool res = StaticCompositeContext::Instance().IsBwdPrimEnabled(); + return res || FLAGS_prim_all || FLAGS_prim_backward; } void PrimCommonUtils::SetBwdPrimEnabled(bool enable_prim) { @@ -39,16 +41,15 @@ void PrimCommonUtils::SetEagerPrimEnabled(bool enable_prim) { } bool PrimCommonUtils::IsFwdPrimEnabled() { - return StaticCompositeContext::Instance().IsFwdPrimEnabled(); + bool res = StaticCompositeContext::Instance().IsFwdPrimEnabled(); + return res || FLAGS_prim_all || FLAGS_prim_forward; } void PrimCommonUtils::SetFwdPrimEnabled(bool enable_prim) { - VLOG(0) << "FLAGS_prim_enabled ====================== " << FLAGS_prim_enabled; StaticCompositeContext::Instance().SetFwdPrimEnabled(enable_prim); } void PrimCommonUtils::SetAllPrimEnabled(bool enable_prim) { - VLOG(0) << "FLAGS_prim_enabled ====================== " << FLAGS_prim_enabled; StaticCompositeContext::Instance().SetAllPrimEnabled(enable_prim); }