From 8f5d8796563ec7d5c9280389ee54b11bf024b568 Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Sun, 2 Jun 2024 14:36:08 +0000 Subject: [PATCH 01/19] support model convert from fp32 to fp16 --- paddle2onnx/parser/parser.cc | 3 +- tests/test_resnet_fp16.py | 75 ++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 tests/test_resnet_fp16.py diff --git a/paddle2onnx/parser/parser.cc b/paddle2onnx/parser/parser.cc index d2c743a2d..c2c03019c 100755 --- a/paddle2onnx/parser/parser.cc +++ b/paddle2onnx/parser/parser.cc @@ -815,7 +815,6 @@ void PaddleParser::GetGlobalBlockInputOutputInfo() { } int32_t PaddleDataTypeSize(int32_t paddle_dtype) { - Assert(paddle_dtype != FP16, "Float16 is not supported."); if (paddle_dtype == P2ODataType::BOOL) { return sizeof(bool); } else if (paddle_dtype == P2ODataType::INT8) { @@ -828,6 +827,8 @@ int32_t PaddleDataTypeSize(int32_t paddle_dtype) { return sizeof(int64_t); } else if (paddle_dtype == P2ODataType::FP32) { return sizeof(float); + } else if (paddle_dtype == P2ODataType::FP16) { + return sizeof(int16_t); } else if (paddle_dtype == P2ODataType::FP64) { return sizeof(double); } else if (paddle_dtype == P2ODataType::UINT8) { diff --git a/tests/test_resnet_fp16.py b/tests/test_resnet_fp16.py new file mode 100644 index 000000000..a8b095a0d --- /dev/null +++ b/tests/test_resnet_fp16.py @@ -0,0 +1,75 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import onnxruntime + +import paddle +from onnxbase import APIOnnx, randtool +import paddle2onnx + +import unittest + +class TestFP32ToFP16(unittest.TestCase): + def test(): + pass + +if __name__ == "__main__": + # download resnet model + if not os.path.exists("ResNet50_infer"): + os.system("wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet50_infer.tar && tar -xf ResNet50_infer.tar && rm -rf ResNet50_infer.tar") + + # generate fp16 model + path = "ResNet50_infer/inference" + # paddle.set_device("gpu") + model = paddle.jit.load(path) + model.float16() + input_spec = [paddle.static.InputSpec(shape=[-1, 3, 224, 224], dtype='float16', name='inputs')] + paddle.jit.save(model, 'ResNet50_infer/inference_fp16', input_spec=input_spec) + + # model.eval() + # x = paddle.randn([1, 3, 224, 224], "float16") + # output = model(x) + # print(f"output: {output}") + + # convert to onnx + os.system("paddle2onnx --model_dir ResNet50_infer --model_filename inference_fp16.pdmodel --params_filename inference_fp16.pdiparams --export_fp16_model True --save_file ResNet50_infer/resnet_fp16.onnx") + # os.system("paddle2onnx --model_dir ResNet50_infer --model_filename inference.pdmodel --params_filename inference.pdiparams --export_fp16_model True --save_file ResNet50_infer/resnet_fp16.onnx") + + # valid precision + # np.random.seed(10) + # input_img = np.random.rand(1, 3, 224, 224).astype("float32") + + # onnx_file_name = "/wuzp/Paddle2ONNX/model/ResNet50_infer/resnet_fp32.onnx" + # # providers = [("CUDAExecutionProvider")] + # ort_session = onnxruntime.InferenceSession(onnx_file_name) + + # ort_inputs = {ort_session.get_inputs()[0].name: input_img} + # ort_outputs = ort_session.run(None, ort_inputs) + # print(f"ort_output: {ort_outputs}") + # # print(onnxruntime.get_device()) + + # onnx_file_name_fp16 = "/wuzp/Paddle2ONNX/model/resnet_fp16.onnx" + # # providers = [("CUDAExecutionProvider")] + # ort_session_fp16 = onnxruntime.InferenceSession(onnx_file_name_fp16) + # input_img_fp16 = input_img.astype("float16") + # ort_inputs_fp16 = {ort_session_fp16.get_inputs()[0].name: input_img_fp16} + # ort_outputs_fp16 = ort_session_fp16.run(None, ort_inputs_fp16) + # print(f"ort_outputs_fp16: {ort_outputs_fp16}") + + # # assert + # np.testing.assert_allclose( + # ort_outputs_fp16[0], ort_outputs[0], rtol=1e-03, atol=1e-05 + # ) From 53f9286f58691c0df7fdcc684afd040a66121e05 Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Tue, 4 Jun 2024 15:43:21 +0000 Subject: [PATCH 02/19] support model convert from fp32 to fp16 --- paddle2onnx/converter.cc | 10 +++++++++ tests/test_resnet_fp16.py | 47 ++++++++++++++------------------------- 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/paddle2onnx/converter.cc b/paddle2onnx/converter.cc index 40cc63a58..957ad0f89 100644 --- a/paddle2onnx/converter.cc +++ b/paddle2onnx/converter.cc @@ -168,6 +168,16 @@ PADDLE2ONNX_DECL bool Export( disable_op_types.push_back(disable_op_type); } } + + // convert output to fp16 + if (export_fp16_model || (parser.inputs[0].dtype != parser.outputs[0].dtype)) + { + for (auto &output : parser.outputs) + { + output.dtype = P2ODataType::FP16; + } + } + std::string calibration_str; std::string result = me.Run( parser, opset_version, auto_upgrade_opset, verbose, enable_onnx_checker, diff --git a/tests/test_resnet_fp16.py b/tests/test_resnet_fp16.py index a8b095a0d..6a2a6729e 100644 --- a/tests/test_resnet_fp16.py +++ b/tests/test_resnet_fp16.py @@ -17,8 +17,6 @@ import onnxruntime import paddle -from onnxbase import APIOnnx, randtool -import paddle2onnx import unittest @@ -36,40 +34,29 @@ def test(): # paddle.set_device("gpu") model = paddle.jit.load(path) model.float16() + model.eval() input_spec = [paddle.static.InputSpec(shape=[-1, 3, 224, 224], dtype='float16', name='inputs')] - paddle.jit.save(model, 'ResNet50_infer/inference_fp16', input_spec=input_spec) - - # model.eval() - # x = paddle.randn([1, 3, 224, 224], "float16") - # output = model(x) - # print(f"output: {output}") + # paddle.jit.save(model, 'ResNet50_infer/inference_fp16', input_spec=input_spec) # convert to onnx - os.system("paddle2onnx --model_dir ResNet50_infer --model_filename inference_fp16.pdmodel --params_filename inference_fp16.pdiparams --export_fp16_model True --save_file ResNet50_infer/resnet_fp16.onnx") - # os.system("paddle2onnx --model_dir ResNet50_infer --model_filename inference.pdmodel --params_filename inference.pdiparams --export_fp16_model True --save_file ResNet50_infer/resnet_fp16.onnx") + paddle.onnx.export(model, "./resnet_fp16", input_spec=input_spec, export_fp16_model=True) # ONNX模型导出 # valid precision - # np.random.seed(10) - # input_img = np.random.rand(1, 3, 224, 224).astype("float32") + np.random.seed(10) + input_img = np.random.rand(1, 3, 224, 224).astype("float16") - # onnx_file_name = "/wuzp/Paddle2ONNX/model/ResNet50_infer/resnet_fp32.onnx" - # # providers = [("CUDAExecutionProvider")] - # ort_session = onnxruntime.InferenceSession(onnx_file_name) + onnx_file_name = "./resnet_fp16.onnx" + ort_session = onnxruntime.InferenceSession(onnx_file_name) - # ort_inputs = {ort_session.get_inputs()[0].name: input_img} - # ort_outputs = ort_session.run(None, ort_inputs) - # print(f"ort_output: {ort_outputs}") - # # print(onnxruntime.get_device()) + ort_inputs = {ort_session.get_inputs()[0].name: input_img} + ort_outputs = ort_session.run(None, ort_inputs) - # onnx_file_name_fp16 = "/wuzp/Paddle2ONNX/model/resnet_fp16.onnx" - # # providers = [("CUDAExecutionProvider")] - # ort_session_fp16 = onnxruntime.InferenceSession(onnx_file_name_fp16) - # input_img_fp16 = input_img.astype("float16") - # ort_inputs_fp16 = {ort_session_fp16.get_inputs()[0].name: input_img_fp16} - # ort_outputs_fp16 = ort_session_fp16.run(None, ort_inputs_fp16) - # print(f"ort_outputs_fp16: {ort_outputs_fp16}") + # resnet50 cannot be inferenced by half? + model.float() + paddle_input = paddle.to_tensor(input_img, dtype="float32") + paddle_output = model(paddle_input) - # # assert - # np.testing.assert_allclose( - # ort_outputs_fp16[0], ort_outputs[0], rtol=1e-03, atol=1e-05 - # ) + # assert + np.testing.assert_allclose( + paddle_output.numpy(), ort_outputs[0], rtol=1e-03, atol=1e-05 + ) From ed4f7c24177eddf6d53b09668094b6d37209d7a4 Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Thu, 6 Jun 2024 14:32:48 +0000 Subject: [PATCH 03/19] support model convert from fp32 to fp16 --- paddle2onnx/mapper/tensor/matmul.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/paddle2onnx/mapper/tensor/matmul.cc b/paddle2onnx/mapper/tensor/matmul.cc index fd8bb1264..034aa04c6 100644 --- a/paddle2onnx/mapper/tensor/matmul.cc +++ b/paddle2onnx/mapper/tensor/matmul.cc @@ -43,7 +43,11 @@ void MatmulMapper::Opset7() { if (transpose_Y_) { input_y = GetTrans(input_y_info); } - if (fabs(alpha_ - 1.0) < 1e-6) { + if(P2ODataType::FP16 == input_x_info[0].dtype) + { + auto node = helper_->MakeNode("MatMul", {input_x, input_y}, {output_info[0].name}); + } + else if (fabs(alpha_ - 1.0) < 1e-6) { auto node = helper_->MakeNode("MatMul", {input_x, input_y}); helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, input_y_info[0].dtype); From 17e6980a01c26dc846ac383c3e43f8a1275dffde Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Wed, 12 Jun 2024 15:41:54 +0000 Subject: [PATCH 04/19] support model convert from fp32 to fp16 --- paddle2onnx/mapper/nn/pool2d.cc | 22 ++++++---------------- paddle2onnx/mapper/nn/pool3d.cc | 19 ++++--------------- paddle2onnx/mapper/tensor/fill_constant.cc | 10 +++------- paddle2onnx/mapper/tensor/matmul.cc | 14 +++++--------- paddle2onnx/mapper/tensor/matmul_v2.cc | 4 +--- tests/test_resnet_fp16.py | 2 +- 6 files changed, 20 insertions(+), 51 deletions(-) diff --git a/paddle2onnx/mapper/nn/pool2d.cc b/paddle2onnx/mapper/nn/pool2d.cc index 0996bca8a..6b52785c9 100755 --- a/paddle2onnx/mapper/nn/pool2d.cc +++ b/paddle2onnx/mapper/nn/pool2d.cc @@ -117,11 +117,7 @@ void Pool2dMapper::AdaptivePool(const std::vector& input_info, } std::shared_ptr* node_ptr; - auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype, - P2ODataType::FP32); - auto node = helper_->MakeNode(onnx_pool_type, {input}); - helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, - output_info[0].dtype); + auto node = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); std::vector kernel_size = {kernel_h, kernel_w}; AddAttribute(node, "kernel_shape", kernel_size); std::vector strides = {stride_h, stride_w}; @@ -165,12 +161,11 @@ void Pool2dMapper::NoAdaptivePool(const std::vector& input_info, int64_t max_ksize = *std::max_element(std::begin(k_size_), std::end(k_size_)); int64_t max_pads = *std::max_element(std::begin(pads_), std::end(pads_)); - auto input_x = helper_->AutoCast(input_info[0].name, input_info[0].dtype, - P2ODataType::FP32); + std::string input_x = input_info[0].name; if (max_ksize <= max_pads) { std::vector onnx_paddings = {0, 0, pads_[0], pads_[1], 0, 0, pads_[2], pads_[3]}; - std::vector inputs_names = {input_x}; + std::vector inputs_names = {input_info[0].name}; if (helper_->GetOpsetVersion() >= 11) { std::string paddings_node = helper_->Constant(GetOnnxDtype(P2ODataType::INT64), onnx_paddings); @@ -199,9 +194,7 @@ void Pool2dMapper::NoAdaptivePool(const std::vector& input_info, auto iter = op_mapper_.find(pooling_type_); onnx_pool_type = iter->second[0]; } - auto node = helper_->MakeNode(onnx_pool_type, {input_x}); - helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, - output_info[0].dtype); + auto node = helper_->MakeNode(onnx_pool_type, {input_x}, {output_info[0].name}); AddAttribute(node, "kernel_shape", k_size_); AddAttribute(node, "strides", strides_); @@ -317,11 +310,8 @@ void Pool2dMapper::Opset7() { auto iter = op_mapper_.find(pooling_type_); onnx_pool_type = iter->second[1]; } - auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype, - P2ODataType::FP32); - auto output = helper_->MakeNode(onnx_pool_type, {input})->output(0); - helper_->AutoCast(output, output_info[0].name, P2ODataType::FP32, - output_info[0].dtype); + + auto output = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); } else if (adaptive_) { AdaptivePool(input_info, output_info); } else { diff --git a/paddle2onnx/mapper/nn/pool3d.cc b/paddle2onnx/mapper/nn/pool3d.cc index fb6916fa1..66ee53a54 100644 --- a/paddle2onnx/mapper/nn/pool3d.cc +++ b/paddle2onnx/mapper/nn/pool3d.cc @@ -57,11 +57,7 @@ void Pool3dMapper::AdaptivePool(const std::vector& input_info, onnx_pool_type = iter->second[0]; } std::shared_ptr* node_ptr; - auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype, - P2ODataType::FP32); - auto node = helper_->MakeNode(onnx_pool_type, {input}); - helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, - output_info[0].dtype); + auto node = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); std::vector kernel_size = {kernel_d, kernel_h, kernel_w}; AddAttribute(node, "kernel_shape", kernel_size); std::vector strides = {stride_d, stride_h, stride_w}; @@ -109,8 +105,7 @@ void Pool3dMapper::NoAdaptivePool(const std::vector& input_info, int64_t max_ksize = *std::max_element(std::begin(k_size_), std::end(k_size_)); int64_t max_pads = *std::max_element(std::begin(pads_), std::end(pads_)); - auto input_x = helper_->AutoCast(input_info[0].name, input_info[0].dtype, - P2ODataType::FP32); + auto input_x = input_info[0].name; if (max_ksize <= max_pads) { std::vector onnx_paddings = {0, 0, pads_[0], pads_[1], pads_[2], 0, 0, pads_[3], pads_[4], pads_[5]}; @@ -143,9 +138,7 @@ void Pool3dMapper::NoAdaptivePool(const std::vector& input_info, auto iter = op_mapper_.find(pooling_type_); onnx_pool_type = iter->second[0]; } - auto node = helper_->MakeNode(onnx_pool_type, {input_x}); - helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, - output_info[0].dtype); + auto node = helper_->MakeNode(onnx_pool_type, {input_x}, {output_info[0].name}); AddAttribute(node, "kernel_shape", k_size_); AddAttribute(node, "strides", strides_); @@ -247,11 +240,7 @@ void Pool3dMapper::Opset7() { auto iter = op_mapper_.find(pooling_type_); onnx_pool_type = iter->second[1]; } - auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype, - P2ODataType::FP32); - auto output = helper_->MakeNode(onnx_pool_type, {input})->output(0); - helper_->AutoCast(output, output_info[0].name, P2ODataType::FP32, - output_info[0].dtype); + auto output = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); } else if (adaptive_) { AdaptivePool(input_info, output_info); } else { diff --git a/paddle2onnx/mapper/tensor/fill_constant.cc b/paddle2onnx/mapper/tensor/fill_constant.cc index 4a79019a3..06fbf21e6 100644 --- a/paddle2onnx/mapper/tensor/fill_constant.cc +++ b/paddle2onnx/mapper/tensor/fill_constant.cc @@ -79,9 +79,8 @@ void FillConstantMapper::Opset7() { float value = GetFillValue(); if (HasInput("ValueTensor")) { auto value_info = GetInput("ValueTensor"); - auto value_tensor = helper_->AutoCast(value_info[0].name, value_info[0].dtype, out_info[0].dtype); auto out = helper_->Constant(shape, GetOnnxDtype(out_info[0].dtype), float(0.0)); - helper_->MakeNode("Add", {out, value_tensor}, {out_info[0].name}); + helper_->MakeNode("Add", {out, value_info[0].name}, {out_info[0].name}); } else { helper_->Constant(out_info[0].name, shape, GetOnnxDtype(out_info[0].dtype), value); } @@ -101,8 +100,7 @@ void FillConstantMapper::Opset9() { std::string shape_name; if (HasInput("ShapeTensor")) { auto shape_info = GetInput("ShapeTensor"); - shape_name = helper_->AutoCast(shape_info[0].name, shape_info[0].dtype, - P2ODataType::INT64); + shape_name = shape_info[0].name; } else { auto shape_info = GetInput("ShapeTensorList"); shape_name = helper_->ConcatIndices(shape_info); @@ -149,9 +147,7 @@ void FillConstantMapper::Opset9() { } if (value_is_tensor) { auto value_info = GetInput("ValueTensor"); - std::string cast_value = helper_->AutoCast( - value_info[0].name, value_info[0].dtype, out_info[0].dtype); - helper_->MakeNode("Add", {out, cast_value}, {out_info[0].name}); + helper_->MakeNode("Add", {out, value_info[0].name}, {out_info[0].name}); } else { helper_->MakeNode("Identity", {out}, {out_info[0].name}); } diff --git a/paddle2onnx/mapper/tensor/matmul.cc b/paddle2onnx/mapper/tensor/matmul.cc index 034aa04c6..e8c41f620 100644 --- a/paddle2onnx/mapper/tensor/matmul.cc +++ b/paddle2onnx/mapper/tensor/matmul.cc @@ -43,22 +43,18 @@ void MatmulMapper::Opset7() { if (transpose_Y_) { input_y = GetTrans(input_y_info); } - if(P2ODataType::FP16 == input_x_info[0].dtype) + + if (fabs(alpha_ - 1.0) < 1e-6) { auto node = helper_->MakeNode("MatMul", {input_x, input_y}, {output_info[0].name}); } - else if (fabs(alpha_ - 1.0) < 1e-6) { - auto node = helper_->MakeNode("MatMul", {input_x, input_y}); - helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, - input_y_info[0].dtype); - } else { + else + { auto mutmul_node = helper_->MakeNode("MatMul", {input_x, input_y}); std::string scale_node = helper_->Constant({1}, GetOnnxDtype(input_x_info[0].dtype), alpha_); auto mul_node = - helper_->MakeNode("Mul", {mutmul_node->output(0), scale_node}); - helper_->AutoCast(mul_node->output(0), output_info[0].name, - P2ODataType::FP32, input_y_info[0].dtype); + helper_->MakeNode("Mul", {mutmul_node->output(0), scale_node}, {output_info[0].name}); } } diff --git a/paddle2onnx/mapper/tensor/matmul_v2.cc b/paddle2onnx/mapper/tensor/matmul_v2.cc index f78a26f42..7df31cd48 100644 --- a/paddle2onnx/mapper/tensor/matmul_v2.cc +++ b/paddle2onnx/mapper/tensor/matmul_v2.cc @@ -43,9 +43,7 @@ void MatmulV2Mapper::Opset7() { if (trans_y_) { input_y = GetTrans(input_y_info); } - auto node = helper_->MakeNode("MatMul", {input_x, input_y}); - helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, - input_y_info[0].dtype); + auto node = helper_->MakeNode("MatMul", {input_x, input_y}, {output_info[0].name}); } } // namespace paddle2onnx diff --git a/tests/test_resnet_fp16.py b/tests/test_resnet_fp16.py index 6a2a6729e..fe2ee112b 100644 --- a/tests/test_resnet_fp16.py +++ b/tests/test_resnet_fp16.py @@ -58,5 +58,5 @@ def test(): # assert np.testing.assert_allclose( - paddle_output.numpy(), ort_outputs[0], rtol=1e-03, atol=1e-05 + paddle_output.numpy(), ort_outputs[0], rtol=2e-02, atol=1e-04 ) From 0b89e74580ac53cdc878d0fd84982c69d17dfa1b Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Wed, 12 Jun 2024 15:44:28 +0000 Subject: [PATCH 05/19] support model convert from fp32 to fp16 --- tests/test_resnet_fp16.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_resnet_fp16.py b/tests/test_resnet_fp16.py index fe2ee112b..1675b5c56 100644 --- a/tests/test_resnet_fp16.py +++ b/tests/test_resnet_fp16.py @@ -58,5 +58,5 @@ def test(): # assert np.testing.assert_allclose( - paddle_output.numpy(), ort_outputs[0], rtol=2e-02, atol=1e-04 + paddle_output.numpy(), ort_outputs[0], rtol=1e-02, atol=1e-05 ) From 96f6c02e98a525e0601eff7f7a3ec565e771c9a7 Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Mon, 17 Jun 2024 09:36:50 +0000 Subject: [PATCH 06/19] support model convert from fp32 to fp16 --- paddle2onnx/__init__.py | 2 ++ paddle2onnx/converter.cc | 19 +++++++++++++++++-- pyproject.toml | 1 + tests/test_resnet_fp16.py | 8 +------- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/paddle2onnx/__init__.py b/paddle2onnx/__init__.py index e09fd8865..d35c1510c 100755 --- a/paddle2onnx/__init__.py +++ b/paddle2onnx/__init__.py @@ -11,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from .convert import dygraph2onnx from .version import version __version__ = version diff --git a/paddle2onnx/converter.cc b/paddle2onnx/converter.cc index 957ad0f89..0c1432bfa 100644 --- a/paddle2onnx/converter.cc +++ b/paddle2onnx/converter.cc @@ -169,12 +169,27 @@ PADDLE2ONNX_DECL bool Export( } } + // lamda func to judge whether to exist fp16 in inputs + auto judge_input_fp16 = [](const std::vector &inputs_info) + { + for (const auto &info : inputs_info) + { + if (P2ODataType::FP16 == info.dtype) + { + return true; + } + } + return false; + }; + // convert output to fp16 - if (export_fp16_model || (parser.inputs[0].dtype != parser.outputs[0].dtype)) + if (export_fp16_model || judge_input_fp16(parser.inputs)) { for (auto &output : parser.outputs) { - output.dtype = P2ODataType::FP16; + // if output dtype is fp32 or fp64, convering to fp16 + output.dtype = + (output.dtype == P2ODataType::FP32 || output.dtype == P2ODataType::FP64) ? P2ODataType::FP16 : output.dtype; } } diff --git a/pyproject.toml b/pyproject.toml index 68876acb3..3d3b158d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ license = {text = "Apache License v2.0"} requires-python = ">=3.8" dependencies = [ "onnxruntime>=1.10.0", + "numpy<2.0.0", # numpy 2.0.0 cannot support p2o at now ] [project.scripts] diff --git a/tests/test_resnet_fp16.py b/tests/test_resnet_fp16.py index 1675b5c56..e1a9f94b8 100644 --- a/tests/test_resnet_fp16.py +++ b/tests/test_resnet_fp16.py @@ -18,13 +18,7 @@ import paddle -import unittest - -class TestFP32ToFP16(unittest.TestCase): - def test(): - pass - -if __name__ == "__main__": +def test_resnet_fp16_convert(): # download resnet model if not os.path.exists("ResNet50_infer"): os.system("wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet50_infer.tar && tar -xf ResNet50_infer.tar && rm -rf ResNet50_infer.tar") From 04e830e5690a5ee3b504e2c054432d43f70fa998 Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Mon, 17 Jun 2024 10:40:44 +0000 Subject: [PATCH 07/19] support model convert from fp32 to fp16 --- paddle2onnx/mapper/nn/pool2d.cc | 9 ++++++--- paddle2onnx/mapper/nn/pool3d.cc | 7 +++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/paddle2onnx/mapper/nn/pool2d.cc b/paddle2onnx/mapper/nn/pool2d.cc index 6b52785c9..6edf6147c 100755 --- a/paddle2onnx/mapper/nn/pool2d.cc +++ b/paddle2onnx/mapper/nn/pool2d.cc @@ -161,11 +161,12 @@ void Pool2dMapper::NoAdaptivePool(const std::vector& input_info, int64_t max_ksize = *std::max_element(std::begin(k_size_), std::end(k_size_)); int64_t max_pads = *std::max_element(std::begin(pads_), std::end(pads_)); - std::string input_x = input_info[0].name; + auto input_x = helper_->AutoCast(input_info[0].name, input_info[0].dtype, + P2ODataType::FP32); if (max_ksize <= max_pads) { std::vector onnx_paddings = {0, 0, pads_[0], pads_[1], 0, 0, pads_[2], pads_[3]}; - std::vector inputs_names = {input_info[0].name}; + std::vector inputs_names = {input_x}; if (helper_->GetOpsetVersion() >= 11) { std::string paddings_node = helper_->Constant(GetOnnxDtype(P2ODataType::INT64), onnx_paddings); @@ -194,7 +195,9 @@ void Pool2dMapper::NoAdaptivePool(const std::vector& input_info, auto iter = op_mapper_.find(pooling_type_); onnx_pool_type = iter->second[0]; } - auto node = helper_->MakeNode(onnx_pool_type, {input_x}, {output_info[0].name}); + auto node = helper_->MakeNode(onnx_pool_type, {input_x}); + helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, + output_info[0].dtype); AddAttribute(node, "kernel_shape", k_size_); AddAttribute(node, "strides", strides_); diff --git a/paddle2onnx/mapper/nn/pool3d.cc b/paddle2onnx/mapper/nn/pool3d.cc index 66ee53a54..b13f5d9f6 100644 --- a/paddle2onnx/mapper/nn/pool3d.cc +++ b/paddle2onnx/mapper/nn/pool3d.cc @@ -105,7 +105,8 @@ void Pool3dMapper::NoAdaptivePool(const std::vector& input_info, int64_t max_ksize = *std::max_element(std::begin(k_size_), std::end(k_size_)); int64_t max_pads = *std::max_element(std::begin(pads_), std::end(pads_)); - auto input_x = input_info[0].name; + auto input_x = helper_->AutoCast(input_info[0].name, input_info[0].dtype, + P2ODataType::FP32); if (max_ksize <= max_pads) { std::vector onnx_paddings = {0, 0, pads_[0], pads_[1], pads_[2], 0, 0, pads_[3], pads_[4], pads_[5]}; @@ -138,7 +139,9 @@ void Pool3dMapper::NoAdaptivePool(const std::vector& input_info, auto iter = op_mapper_.find(pooling_type_); onnx_pool_type = iter->second[0]; } - auto node = helper_->MakeNode(onnx_pool_type, {input_x}, {output_info[0].name}); + auto node = helper_->MakeNode(onnx_pool_type, {input_x}); + helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, + output_info[0].dtype); AddAttribute(node, "kernel_shape", k_size_); AddAttribute(node, "strides", strides_); From 92e9a58abaa1df163656150cf6df7fe57b923376 Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Mon, 17 Jun 2024 14:04:18 +0000 Subject: [PATCH 08/19] support model convert from fp32 to fp16 --- paddle2onnx/mapper/nn/pool2d.cc | 13 ++++++++++--- paddle2onnx/mapper/nn/pool3d.cc | 12 ++++++++++-- paddle2onnx/mapper/tensor/fill_constant.cc | 10 +++++++--- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/paddle2onnx/mapper/nn/pool2d.cc b/paddle2onnx/mapper/nn/pool2d.cc index 6edf6147c..0996bca8a 100755 --- a/paddle2onnx/mapper/nn/pool2d.cc +++ b/paddle2onnx/mapper/nn/pool2d.cc @@ -117,7 +117,11 @@ void Pool2dMapper::AdaptivePool(const std::vector& input_info, } std::shared_ptr* node_ptr; - auto node = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); + auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype, + P2ODataType::FP32); + auto node = helper_->MakeNode(onnx_pool_type, {input}); + helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, + output_info[0].dtype); std::vector kernel_size = {kernel_h, kernel_w}; AddAttribute(node, "kernel_shape", kernel_size); std::vector strides = {stride_h, stride_w}; @@ -313,8 +317,11 @@ void Pool2dMapper::Opset7() { auto iter = op_mapper_.find(pooling_type_); onnx_pool_type = iter->second[1]; } - - auto output = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); + auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype, + P2ODataType::FP32); + auto output = helper_->MakeNode(onnx_pool_type, {input})->output(0); + helper_->AutoCast(output, output_info[0].name, P2ODataType::FP32, + output_info[0].dtype); } else if (adaptive_) { AdaptivePool(input_info, output_info); } else { diff --git a/paddle2onnx/mapper/nn/pool3d.cc b/paddle2onnx/mapper/nn/pool3d.cc index b13f5d9f6..fb6916fa1 100644 --- a/paddle2onnx/mapper/nn/pool3d.cc +++ b/paddle2onnx/mapper/nn/pool3d.cc @@ -57,7 +57,11 @@ void Pool3dMapper::AdaptivePool(const std::vector& input_info, onnx_pool_type = iter->second[0]; } std::shared_ptr* node_ptr; - auto node = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); + auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype, + P2ODataType::FP32); + auto node = helper_->MakeNode(onnx_pool_type, {input}); + helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, + output_info[0].dtype); std::vector kernel_size = {kernel_d, kernel_h, kernel_w}; AddAttribute(node, "kernel_shape", kernel_size); std::vector strides = {stride_d, stride_h, stride_w}; @@ -243,7 +247,11 @@ void Pool3dMapper::Opset7() { auto iter = op_mapper_.find(pooling_type_); onnx_pool_type = iter->second[1]; } - auto output = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); + auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype, + P2ODataType::FP32); + auto output = helper_->MakeNode(onnx_pool_type, {input})->output(0); + helper_->AutoCast(output, output_info[0].name, P2ODataType::FP32, + output_info[0].dtype); } else if (adaptive_) { AdaptivePool(input_info, output_info); } else { diff --git a/paddle2onnx/mapper/tensor/fill_constant.cc b/paddle2onnx/mapper/tensor/fill_constant.cc index 06fbf21e6..4a79019a3 100644 --- a/paddle2onnx/mapper/tensor/fill_constant.cc +++ b/paddle2onnx/mapper/tensor/fill_constant.cc @@ -79,8 +79,9 @@ void FillConstantMapper::Opset7() { float value = GetFillValue(); if (HasInput("ValueTensor")) { auto value_info = GetInput("ValueTensor"); + auto value_tensor = helper_->AutoCast(value_info[0].name, value_info[0].dtype, out_info[0].dtype); auto out = helper_->Constant(shape, GetOnnxDtype(out_info[0].dtype), float(0.0)); - helper_->MakeNode("Add", {out, value_info[0].name}, {out_info[0].name}); + helper_->MakeNode("Add", {out, value_tensor}, {out_info[0].name}); } else { helper_->Constant(out_info[0].name, shape, GetOnnxDtype(out_info[0].dtype), value); } @@ -100,7 +101,8 @@ void FillConstantMapper::Opset9() { std::string shape_name; if (HasInput("ShapeTensor")) { auto shape_info = GetInput("ShapeTensor"); - shape_name = shape_info[0].name; + shape_name = helper_->AutoCast(shape_info[0].name, shape_info[0].dtype, + P2ODataType::INT64); } else { auto shape_info = GetInput("ShapeTensorList"); shape_name = helper_->ConcatIndices(shape_info); @@ -147,7 +149,9 @@ void FillConstantMapper::Opset9() { } if (value_is_tensor) { auto value_info = GetInput("ValueTensor"); - helper_->MakeNode("Add", {out, value_info[0].name}, {out_info[0].name}); + std::string cast_value = helper_->AutoCast( + value_info[0].name, value_info[0].dtype, out_info[0].dtype); + helper_->MakeNode("Add", {out, cast_value}, {out_info[0].name}); } else { helper_->MakeNode("Identity", {out}, {out_info[0].name}); } From 9af77786537951b7be5036ba44ebed3fe083ab2e Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Wed, 19 Jun 2024 08:13:25 +0000 Subject: [PATCH 09/19] support model convert from fp32 to fp16 --- paddle2onnx/mapper/mapper.h | 7 ++- paddle2onnx/mapper/nn/pool2d.cc | 59 ++++++++++++++------ paddle2onnx/mapper/nn/pool3d.cc | 62 ++++++++++++++++------ paddle2onnx/mapper/tensor/assign.cc | 5 ++ paddle2onnx/mapper/tensor/fill_constant.cc | 24 +++++++-- paddle2onnx/mapper/tensor/matmul.cc | 27 ++++++++-- paddle2onnx/mapper/tensor/matmul_v2.cc | 20 +++++-- tests/run.sh | 3 +- tests/test_auto_scan_assign.py | 2 +- tests/test_resnet_fp16.py | 1 - 10 files changed, 162 insertions(+), 48 deletions(-) diff --git a/paddle2onnx/mapper/mapper.h b/paddle2onnx/mapper/mapper.h index 4c910f517..56311f359 100755 --- a/paddle2onnx/mapper/mapper.h +++ b/paddle2onnx/mapper/mapper.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once #include +#include #include "paddle2onnx/mapper/data_helper.h" #include "paddle2onnx/mapper/onnx_helper.h" @@ -20,7 +21,11 @@ #include "paddle2onnx/parser/parser.h" namespace paddle2onnx { -class Mapper { + + static const std::unordered_set kNoNeedCastTypes{P2ODataType::INT8, P2ODataType::FP16, P2ODataType::FP32}; // 0: float32, 3: int8, 6: float16 + + class Mapper + { public: Mapper() { } diff --git a/paddle2onnx/mapper/nn/pool2d.cc b/paddle2onnx/mapper/nn/pool2d.cc index 0996bca8a..8ad8f30b7 100755 --- a/paddle2onnx/mapper/nn/pool2d.cc +++ b/paddle2onnx/mapper/nn/pool2d.cc @@ -116,12 +116,20 @@ void Pool2dMapper::AdaptivePool(const std::vector& input_info, onnx_pool_type = iter->second[0]; } - std::shared_ptr* node_ptr; - auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype, - P2ODataType::FP32); - auto node = helper_->MakeNode(onnx_pool_type, {input}); - helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, - output_info[0].dtype); + std::shared_ptr node(nullptr); + if (kNoNeedCastTypes.find(input_info[0].dtype) != kNoNeedCastTypes.end()) + { + node = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); + } + else + { + auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype, + P2ODataType::FP32); + node = helper_->MakeNode(onnx_pool_type, {input}); + helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, + output_info[0].dtype); + } + std::vector kernel_size = {kernel_h, kernel_w}; AddAttribute(node, "kernel_shape", kernel_size); std::vector strides = {stride_h, stride_w}; @@ -165,8 +173,12 @@ void Pool2dMapper::NoAdaptivePool(const std::vector& input_info, int64_t max_ksize = *std::max_element(std::begin(k_size_), std::end(k_size_)); int64_t max_pads = *std::max_element(std::begin(pads_), std::end(pads_)); - auto input_x = helper_->AutoCast(input_info[0].name, input_info[0].dtype, - P2ODataType::FP32); + std::string input_x = input_info[0].name; + if (kNoNeedCastTypes.find(input_info[0].dtype) == kNoNeedCastTypes.end()) + { + input_x = helper_->AutoCast(input_info[0].name, input_info[0].dtype, + P2ODataType::FP32); + } if (max_ksize <= max_pads) { std::vector onnx_paddings = {0, 0, pads_[0], pads_[1], 0, 0, pads_[2], pads_[3]}; @@ -199,9 +211,17 @@ void Pool2dMapper::NoAdaptivePool(const std::vector& input_info, auto iter = op_mapper_.find(pooling_type_); onnx_pool_type = iter->second[0]; } - auto node = helper_->MakeNode(onnx_pool_type, {input_x}); - helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, - output_info[0].dtype); + std::shared_ptr node(nullptr); + if (kNoNeedCastTypes.find(input_info[0].dtype) != kNoNeedCastTypes.end()) + { + node = helper_->MakeNode(onnx_pool_type, {input_x}, {output_info[0].name}); + } + else + { + node = helper_->MakeNode(onnx_pool_type, {input_x}); + helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, + output_info[0].dtype); + } AddAttribute(node, "kernel_shape", k_size_); AddAttribute(node, "strides", strides_); @@ -317,11 +337,18 @@ void Pool2dMapper::Opset7() { auto iter = op_mapper_.find(pooling_type_); onnx_pool_type = iter->second[1]; } - auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype, - P2ODataType::FP32); - auto output = helper_->MakeNode(onnx_pool_type, {input})->output(0); - helper_->AutoCast(output, output_info[0].name, P2ODataType::FP32, - output_info[0].dtype); + if (kNoNeedCastTypes.find(input_info[0].dtype) != kNoNeedCastTypes.end()) + { + auto output = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); + } + else + { + auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype, + P2ODataType::FP32); + auto output = helper_->MakeNode(onnx_pool_type, {input})->output(0); + helper_->AutoCast(output, output_info[0].name, P2ODataType::FP32, + output_info[0].dtype); + } } else if (adaptive_) { AdaptivePool(input_info, output_info); } else { diff --git a/paddle2onnx/mapper/nn/pool3d.cc b/paddle2onnx/mapper/nn/pool3d.cc index fb6916fa1..201245bf4 100644 --- a/paddle2onnx/mapper/nn/pool3d.cc +++ b/paddle2onnx/mapper/nn/pool3d.cc @@ -56,12 +56,21 @@ void Pool3dMapper::AdaptivePool(const std::vector& input_info, auto iter = op_mapper_.find(pooling_type_); onnx_pool_type = iter->second[0]; } - std::shared_ptr* node_ptr; - auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype, - P2ODataType::FP32); - auto node = helper_->MakeNode(onnx_pool_type, {input}); - helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, - output_info[0].dtype); + + std::shared_ptr node; + if (kNoNeedCastTypes.find(input_info[0].dtype) != kNoNeedCastTypes.end()) + { + node = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); + } + else + { + auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype, + P2ODataType::FP32); + node = helper_->MakeNode(onnx_pool_type, {input}); + helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, + output_info[0].dtype); + } + std::vector kernel_size = {kernel_d, kernel_h, kernel_w}; AddAttribute(node, "kernel_shape", kernel_size); std::vector strides = {stride_d, stride_h, stride_w}; @@ -109,8 +118,13 @@ void Pool3dMapper::NoAdaptivePool(const std::vector& input_info, int64_t max_ksize = *std::max_element(std::begin(k_size_), std::end(k_size_)); int64_t max_pads = *std::max_element(std::begin(pads_), std::end(pads_)); - auto input_x = helper_->AutoCast(input_info[0].name, input_info[0].dtype, - P2ODataType::FP32); + auto input_x = input_info[0].name; + if (kNoNeedCastTypes.find(input_info[0].dtype) == kNoNeedCastTypes.end()) + { + input_x = helper_->AutoCast(input_info[0].name, input_info[0].dtype, + P2ODataType::FP32); + } + if (max_ksize <= max_pads) { std::vector onnx_paddings = {0, 0, pads_[0], pads_[1], pads_[2], 0, 0, pads_[3], pads_[4], pads_[5]}; @@ -143,9 +157,17 @@ void Pool3dMapper::NoAdaptivePool(const std::vector& input_info, auto iter = op_mapper_.find(pooling_type_); onnx_pool_type = iter->second[0]; } - auto node = helper_->MakeNode(onnx_pool_type, {input_x}); - helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, - output_info[0].dtype); + std::shared_ptr node(nullptr); + if (kNoNeedCastTypes.find(input_info[0].dtype) != kNoNeedCastTypes.end()) + { + node = helper_->MakeNode(onnx_pool_type, {input_x}, {output_info[0].name}); + } + else + { + node = helper_->MakeNode(onnx_pool_type, {input_x}); + helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, + output_info[0].dtype); + } AddAttribute(node, "kernel_shape", k_size_); AddAttribute(node, "strides", strides_); @@ -247,11 +269,19 @@ void Pool3dMapper::Opset7() { auto iter = op_mapper_.find(pooling_type_); onnx_pool_type = iter->second[1]; } - auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype, - P2ODataType::FP32); - auto output = helper_->MakeNode(onnx_pool_type, {input})->output(0); - helper_->AutoCast(output, output_info[0].name, P2ODataType::FP32, - output_info[0].dtype); + + if (kNoNeedCastTypes.find(input_info[0].dtype) != kNoNeedCastTypes.end()) + { + auto output = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); + } + else + { + auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype, + P2ODataType::FP32); + auto output = helper_->MakeNode(onnx_pool_type, {input})->output(0); + helper_->AutoCast(output, output_info[0].name, P2ODataType::FP32, + output_info[0].dtype); + } } else if (adaptive_) { AdaptivePool(input_info, output_info); } else { diff --git a/paddle2onnx/mapper/tensor/assign.cc b/paddle2onnx/mapper/tensor/assign.cc index a4d0d3553..4507a7b04 100644 --- a/paddle2onnx/mapper/tensor/assign.cc +++ b/paddle2onnx/mapper/tensor/assign.cc @@ -21,6 +21,8 @@ REGISTER_MAPPER(share_data, AssignMapper) void AssignMapper::Opset7() { auto input_info = GetInput("X"); auto output_info = GetOutput("Out"); + + if (block_idx_ != 0 && OpType() != "share_data") { // Here's a trick for tensorrt // Consider remove this trick @@ -43,6 +45,9 @@ void AssignMapper::Opset7() { } else { helper_->MakeNode("Identity", {input_info[0].name}, {output_info[0].name}); } + std::cout << "use assign...\n"; + std::cout << "use input_info dtype: " << input_info[0].dtype << std::endl;; + std::cout << "use output_info dtype: " << output_info[0].dtype << std::endl; } } // namespace paddle2onnx diff --git a/paddle2onnx/mapper/tensor/fill_constant.cc b/paddle2onnx/mapper/tensor/fill_constant.cc index 4a79019a3..ced00db1d 100644 --- a/paddle2onnx/mapper/tensor/fill_constant.cc +++ b/paddle2onnx/mapper/tensor/fill_constant.cc @@ -79,9 +79,16 @@ void FillConstantMapper::Opset7() { float value = GetFillValue(); if (HasInput("ValueTensor")) { auto value_info = GetInput("ValueTensor"); - auto value_tensor = helper_->AutoCast(value_info[0].name, value_info[0].dtype, out_info[0].dtype); auto out = helper_->Constant(shape, GetOnnxDtype(out_info[0].dtype), float(0.0)); - helper_->MakeNode("Add", {out, value_tensor}, {out_info[0].name}); + if (kNoNeedCastTypes.find(value_info[0].dtype) != kNoNeedCastTypes.end()) + { + helper_->MakeNode("Add", {out, value_info[0].name}, {out_info[0].name}); + } + else + { + auto value_tensor = helper_->AutoCast(value_info[0].name, value_info[0].dtype, out_info[0].dtype); + helper_->MakeNode("Add", {out, value_tensor}, {out_info[0].name}); + } } else { helper_->Constant(out_info[0].name, shape, GetOnnxDtype(out_info[0].dtype), value); } @@ -149,9 +156,16 @@ void FillConstantMapper::Opset9() { } if (value_is_tensor) { auto value_info = GetInput("ValueTensor"); - std::string cast_value = helper_->AutoCast( - value_info[0].name, value_info[0].dtype, out_info[0].dtype); - helper_->MakeNode("Add", {out, cast_value}, {out_info[0].name}); + if (kNoNeedCastTypes.find(value_info[0].dtype) != kNoNeedCastTypes.end()) + { + helper_->MakeNode("Add", {out, value_info[0].name}, {out_info[0].name}); + } + else + { + std::string cast_value = helper_->AutoCast( + value_info[0].name, value_info[0].dtype, out_info[0].dtype); + helper_->MakeNode("Add", {out, cast_value}, {out_info[0].name}); + } } else { helper_->MakeNode("Identity", {out}, {out_info[0].name}); } diff --git a/paddle2onnx/mapper/tensor/matmul.cc b/paddle2onnx/mapper/tensor/matmul.cc index e8c41f620..26a238b63 100644 --- a/paddle2onnx/mapper/tensor/matmul.cc +++ b/paddle2onnx/mapper/tensor/matmul.cc @@ -20,7 +20,7 @@ REGISTER_MAPPER(matmul, MatmulMapper) std::string MatmulMapper::GetTrans(std::vector& input_info) { std::string castd_name = input_info[0].name; - if (input_info[0].dtype == P2ODataType::FP64) { + if (kNoNeedCastTypes.find(input_info[0].dtype) != kNoNeedCastTypes.end()) { castd_name = helper_->AutoCast(input_info[0].name, input_info[0].dtype, P2ODataType::FP32); } @@ -44,9 +44,26 @@ void MatmulMapper::Opset7() { input_y = GetTrans(input_y_info); } - if (fabs(alpha_ - 1.0) < 1e-6) + if (kNoNeedCastTypes.find(input_x_info[0].dtype) != kNoNeedCastTypes.end()) { - auto node = helper_->MakeNode("MatMul", {input_x, input_y}, {output_info[0].name}); + if (fabs(alpha_ - 1.0) < 1e-6) + { + auto node = helper_->MakeNode("MatMul", {input_x, input_y}, {output_info[0].name}); + } + else + { + auto mutmul_node = helper_->MakeNode("MatMul", {input_x, input_y}); + std::string scale_node = + helper_->Constant({1}, GetOnnxDtype(input_x_info[0].dtype), alpha_); + auto mul_node = + helper_->MakeNode("Mul", {mutmul_node->output(0), scale_node}, {output_info[0].name}); + } + } + else if (fabs(alpha_ - 1.0) < 1e-6) + { + auto node = helper_->MakeNode("MatMul", {input_x, input_y}); + helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, + input_y_info[0].dtype); } else { @@ -54,7 +71,9 @@ void MatmulMapper::Opset7() { std::string scale_node = helper_->Constant({1}, GetOnnxDtype(input_x_info[0].dtype), alpha_); auto mul_node = - helper_->MakeNode("Mul", {mutmul_node->output(0), scale_node}, {output_info[0].name}); + helper_->MakeNode("Mul", {mutmul_node->output(0), scale_node}); + helper_->AutoCast(mul_node->output(0), output_info[0].name, + P2ODataType::FP32, input_y_info[0].dtype); } } diff --git a/paddle2onnx/mapper/tensor/matmul_v2.cc b/paddle2onnx/mapper/tensor/matmul_v2.cc index 7df31cd48..2fa91d733 100644 --- a/paddle2onnx/mapper/tensor/matmul_v2.cc +++ b/paddle2onnx/mapper/tensor/matmul_v2.cc @@ -22,8 +22,13 @@ namespace paddle2onnx { REGISTER_MAPPER(matmul_v2, MatmulV2Mapper) std::string MatmulV2Mapper::GetTrans(std::vector& input_info) { - std::string castd_name = helper_->AutoCast( - input_info[0].name, input_info[0].dtype, P2ODataType::FP32); + std::string castd_name = input_info[0].name; + if (kNoNeedCastTypes.find(input_info[0].dtype) == kNoNeedCastTypes.end()) + { + castd_name = helper_->AutoCast( + input_info[0].name, input_info[0].dtype, P2ODataType::FP32); + } + std::vector perm = Arange(0, input_info[0].Rank()); std::swap(perm[perm.size() - 1], perm[perm.size() - 2]); auto transpose_node = helper_->MakeNode("Transpose", {castd_name}); @@ -43,7 +48,16 @@ void MatmulV2Mapper::Opset7() { if (trans_y_) { input_y = GetTrans(input_y_info); } - auto node = helper_->MakeNode("MatMul", {input_x, input_y}, {output_info[0].name}); + if (kNoNeedCastTypes.find(input_y_info[0].dtype) != kNoNeedCastTypes.end()) + { + auto node = helper_->MakeNode("MatMul", {input_x, input_y}, {output_info[0].name}); + } + else + { + auto node = helper_->MakeNode("MatMul", {input_x, input_y}); + helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32, + input_y_info[0].dtype); + } } } // namespace paddle2onnx diff --git a/tests/run.sh b/tests/run.sh index fe36c05fc..0b2c7f57c 100755 --- a/tests/run.sh +++ b/tests/run.sh @@ -60,7 +60,8 @@ ignore="test_auto_scan_multiclass_nms.py test_unsqueeze.py \ test_quantize_model.py \ test_quantize_model_minist.py \ - test_quantize_model_speedup.py" + test_quantize_model_speedup.py \ + test_resnet_fp16.py" bug=0 # Install Python Packet diff --git a/tests/test_auto_scan_assign.py b/tests/test_auto_scan_assign.py index cfce31dde..f28aca499 100644 --- a/tests/test_auto_scan_assign.py +++ b/tests/test_auto_scan_assign.py @@ -56,7 +56,7 @@ def sample_convert_config(self, draw): dtype = draw( st.sampled_from( - ["float16", "float32", "float64", "int32", "int64"])) + ["float32", "float64", "int32", "int64"])) # "list" has a bug input_dtype = draw(st.sampled_from(["tensor", "ndarray"])) diff --git a/tests/test_resnet_fp16.py b/tests/test_resnet_fp16.py index e1a9f94b8..034f7841f 100644 --- a/tests/test_resnet_fp16.py +++ b/tests/test_resnet_fp16.py @@ -45,7 +45,6 @@ def test_resnet_fp16_convert(): ort_inputs = {ort_session.get_inputs()[0].name: input_img} ort_outputs = ort_session.run(None, ort_inputs) - # resnet50 cannot be inferenced by half? model.float() paddle_input = paddle.to_tensor(input_img, dtype="float32") paddle_output = model(paddle_input) From cfe4a638b255b406d18dfb367e656bf322d642f6 Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Wed, 19 Jun 2024 08:14:24 +0000 Subject: [PATCH 10/19] support model convert from fp32 to fp16 --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 19e604524..da6e9253b 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ Paddle2ONNX 本身不依赖其他组件,但是我们建议您在以下环境 - PaddlePaddle == 2.6.0 - onnxruntime >= 1.10.0 +- numpy < 2.0.0 # 3 安装 Paddle2ONNX From 1120032ba7a3db84e1239a81e2827a10dbc0e8f4 Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Thu, 20 Jun 2024 06:50:06 +0000 Subject: [PATCH 11/19] support model convert from fp32 to fp16 --- paddle2onnx/__init__.py | 1 - tests/test_auto_scan_assign.py | 2 +- tests/test_resnet_fp16.py | 54 ++++++++++++++++++++++++---------- 3 files changed, 39 insertions(+), 18 deletions(-) diff --git a/paddle2onnx/__init__.py b/paddle2onnx/__init__.py index d35c1510c..13bba31a4 100755 --- a/paddle2onnx/__init__.py +++ b/paddle2onnx/__init__.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .convert import dygraph2onnx from .version import version __version__ = version diff --git a/tests/test_auto_scan_assign.py b/tests/test_auto_scan_assign.py index f28aca499..cfce31dde 100644 --- a/tests/test_auto_scan_assign.py +++ b/tests/test_auto_scan_assign.py @@ -56,7 +56,7 @@ def sample_convert_config(self, draw): dtype = draw( st.sampled_from( - ["float32", "float64", "int32", "int64"])) + ["float16", "float32", "float64", "int32", "int64"])) # "list" has a bug input_dtype = draw(st.sampled_from(["tensor", "ndarray"])) diff --git a/tests/test_resnet_fp16.py b/tests/test_resnet_fp16.py index 034f7841f..82988b957 100644 --- a/tests/test_resnet_fp16.py +++ b/tests/test_resnet_fp16.py @@ -17,6 +17,8 @@ import onnxruntime import paddle +import paddle2onnx +from paddle.inference import PrecisionType, PlaceType, convert_to_mixed_precision def test_resnet_fp16_convert(): # download resnet model @@ -24,32 +26,52 @@ def test_resnet_fp16_convert(): os.system("wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet50_infer.tar && tar -xf ResNet50_infer.tar && rm -rf ResNet50_infer.tar") # generate fp16 model - path = "ResNet50_infer/inference" + path = "ResNet50_infer" + src_model = os.path.join(path,"inference.pdmodel") + src_params = os.path.join(path,"inference.pdiparams") + dst_model = os.path.join(path,"inference_fp16.pdmodel") + dst_params = os.path.join(path,"inference_fp16.pdiparams") + + convert_to_mixed_precision( + src_model, # fp32 model path + src_params, # fp32 params path + dst_model, # mix precious model path + dst_params, # mix precious params path + PrecisionType.Half, + PlaceType.GPU, + False + ) + # paddle.set_device("gpu") - model = paddle.jit.load(path) - model.float16() - model.eval() - input_spec = [paddle.static.InputSpec(shape=[-1, 3, 224, 224], dtype='float16', name='inputs')] - # paddle.jit.save(model, 'ResNet50_infer/inference_fp16', input_spec=input_spec) + paddle.enable_static() + path_fp16 = os.path.join(path, "inference_fp16") + exe = paddle.static.Executor(paddle.CUDAPlace(0)) + [inference_program, feed_target_names, fetch_targets] = paddle.static.load_inference_model(path_fp16, exe) + + # infer paddle fp16 + np.random.seed(10) + tensor_img = np.array(np.random.random((1, 3, 224, 224)), dtype=np.float16) + results = exe.run(inference_program, + feed={feed_target_names[0]: tensor_img}, + fetch_list=fetch_targets) # convert to onnx - paddle.onnx.export(model, "./resnet_fp16", input_spec=input_spec, export_fp16_model=True) # ONNX模型导出 + input_spec = [paddle.static.InputSpec(shape=[-1, 3, 224, 224], dtype='float16', name='inputs')] + model_file = path_fp16 + ".pdmodel" + params_file = path_fp16 + ".pdiparams" + paddle2onnx.export(model_file, params_file, "./resnet_fp16.onnx", export_fp16_model=True) # ONNX模型导出 # valid precision - np.random.seed(10) - input_img = np.random.rand(1, 3, 224, 224).astype("float16") - onnx_file_name = "./resnet_fp16.onnx" ort_session = onnxruntime.InferenceSession(onnx_file_name) - ort_inputs = {ort_session.get_inputs()[0].name: input_img} + ort_inputs = {ort_session.get_inputs()[0].name: tensor_img} ort_outputs = ort_session.run(None, ort_inputs) - model.float() - paddle_input = paddle.to_tensor(input_img, dtype="float32") - paddle_output = model(paddle_input) - # assert np.testing.assert_allclose( - paddle_output.numpy(), ort_outputs[0], rtol=1e-02, atol=1e-05 + results[0], ort_outputs[0], rtol=2e-02, atol=2e-05 ) + +if __name__ == "__main__": + test_resnet_fp16_convert() From 1b2e6f03da093ede4ca50727143d98114963eed3 Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Thu, 20 Jun 2024 07:08:38 +0000 Subject: [PATCH 12/19] support model convert from fp32 to fp16 --- paddle2onnx/converter.cc | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/paddle2onnx/converter.cc b/paddle2onnx/converter.cc index 0c1432bfa..40cc63a58 100644 --- a/paddle2onnx/converter.cc +++ b/paddle2onnx/converter.cc @@ -168,31 +168,6 @@ PADDLE2ONNX_DECL bool Export( disable_op_types.push_back(disable_op_type); } } - - // lamda func to judge whether to exist fp16 in inputs - auto judge_input_fp16 = [](const std::vector &inputs_info) - { - for (const auto &info : inputs_info) - { - if (P2ODataType::FP16 == info.dtype) - { - return true; - } - } - return false; - }; - - // convert output to fp16 - if (export_fp16_model || judge_input_fp16(parser.inputs)) - { - for (auto &output : parser.outputs) - { - // if output dtype is fp32 or fp64, convering to fp16 - output.dtype = - (output.dtype == P2ODataType::FP32 || output.dtype == P2ODataType::FP64) ? P2ODataType::FP16 : output.dtype; - } - } - std::string calibration_str; std::string result = me.Run( parser, opset_version, auto_upgrade_opset, verbose, enable_onnx_checker, From 710c56a7a3858c92a770387e12a3e6b4e5522827 Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Thu, 20 Jun 2024 07:13:09 +0000 Subject: [PATCH 13/19] support model convert from fp32 to fp16 --- paddle2onnx/__init__.py | 1 - paddle2onnx/mapper/mapper.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/paddle2onnx/__init__.py b/paddle2onnx/__init__.py index 13bba31a4..e09fd8865 100755 --- a/paddle2onnx/__init__.py +++ b/paddle2onnx/__init__.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from .version import version __version__ = version diff --git a/paddle2onnx/mapper/mapper.h b/paddle2onnx/mapper/mapper.h index 56311f359..b65082f29 100755 --- a/paddle2onnx/mapper/mapper.h +++ b/paddle2onnx/mapper/mapper.h @@ -22,7 +22,7 @@ namespace paddle2onnx { - static const std::unordered_set kNoNeedCastTypes{P2ODataType::INT8, P2ODataType::FP16, P2ODataType::FP32}; // 0: float32, 3: int8, 6: float16 + static const std::unordered_set kNoNeedCastTypes{P2ODataType::INT8, P2ODataType::FP16, P2ODataType::FP32}; class Mapper { From ad627e1aad7ee46dc15acea243dcbd7b1083a59c Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Thu, 20 Jun 2024 08:58:55 +0000 Subject: [PATCH 14/19] support model convert from fp32 to fp16 --- paddle2onnx/mapper/mapper.h | 6 +----- paddle2onnx/mapper/nn/pool2d.h | 1 + paddle2onnx/mapper/nn/pool3d.h | 1 + paddle2onnx/mapper/tensor/assign.h | 3 +++ paddle2onnx/mapper/tensor/fill_constant.h | 1 + paddle2onnx/mapper/tensor/matmul.h | 1 + paddle2onnx/mapper/tensor/matmul_v2.h | 1 + 7 files changed, 9 insertions(+), 5 deletions(-) diff --git a/paddle2onnx/mapper/mapper.h b/paddle2onnx/mapper/mapper.h index b65082f29..de459a370 100755 --- a/paddle2onnx/mapper/mapper.h +++ b/paddle2onnx/mapper/mapper.h @@ -21,11 +21,7 @@ #include "paddle2onnx/parser/parser.h" namespace paddle2onnx { - - static const std::unordered_set kNoNeedCastTypes{P2ODataType::INT8, P2ODataType::FP16, P2ODataType::FP32}; - - class Mapper - { +class Mapper { public: Mapper() { } diff --git a/paddle2onnx/mapper/nn/pool2d.h b/paddle2onnx/mapper/nn/pool2d.h index 02f92cc98..a64fdbaa5 100644 --- a/paddle2onnx/mapper/nn/pool2d.h +++ b/paddle2onnx/mapper/nn/pool2d.h @@ -63,6 +63,7 @@ class Pool2dMapper : public Mapper { const std::vector& output_info); void NoAdaptivePool(const std::vector& input_info, const std::vector& output_info); + const std::unordered_set kNoNeedCastTypes{P2ODataType::INT8, P2ODataType::FP16, P2ODataType::FP32}; bool ceil_mode_; bool global_pooling_; bool adaptive_; diff --git a/paddle2onnx/mapper/nn/pool3d.h b/paddle2onnx/mapper/nn/pool3d.h index 9be0c901d..a4206a264 100644 --- a/paddle2onnx/mapper/nn/pool3d.h +++ b/paddle2onnx/mapper/nn/pool3d.h @@ -50,6 +50,7 @@ class Pool3dMapper : public Mapper { const std::vector& output_info); void NoAdaptivePool(const std::vector& input_info, const std::vector& output_info); + const std::unordered_set kNoNeedCastTypes{P2ODataType::INT8, P2ODataType::FP16, P2ODataType::FP32}; bool ceil_mode_; bool global_pooling_; bool adaptive_; diff --git a/paddle2onnx/mapper/tensor/assign.h b/paddle2onnx/mapper/tensor/assign.h index ad9585978..484c16cda 100644 --- a/paddle2onnx/mapper/tensor/assign.h +++ b/paddle2onnx/mapper/tensor/assign.h @@ -26,6 +26,9 @@ class AssignMapper : public Mapper { int64_t op_id) : Mapper(p, helper, block_id, op_id) {} void Opset7(); + +private: + const std::unordered_set kNoNeedCastTypes{P2ODataType::INT8, P2ODataType::FP16, P2ODataType::FP32}; }; } // namespace paddle2onnx diff --git a/paddle2onnx/mapper/tensor/fill_constant.h b/paddle2onnx/mapper/tensor/fill_constant.h index 231cb8402..c96d4ab84 100644 --- a/paddle2onnx/mapper/tensor/fill_constant.h +++ b/paddle2onnx/mapper/tensor/fill_constant.h @@ -31,6 +31,7 @@ class FillConstantMapper : public Mapper { private: float GetFillValue(); + const std::unordered_set kNoNeedCastTypes{P2ODataType::INT8, P2ODataType::FP16, P2ODataType::FP32}; std::string str_value_; float value_; }; diff --git a/paddle2onnx/mapper/tensor/matmul.h b/paddle2onnx/mapper/tensor/matmul.h index 16957701f..881d13bf7 100644 --- a/paddle2onnx/mapper/tensor/matmul.h +++ b/paddle2onnx/mapper/tensor/matmul.h @@ -34,6 +34,7 @@ class MatmulMapper : public Mapper { private: std::string GetTrans(std::vector& input_info); + const std::unordered_set kNoNeedCastTypes{P2ODataType::INT8, P2ODataType::FP16, P2ODataType::FP32}; bool transpose_X_ = false; bool transpose_Y_ = false; float alpha_ = 1.0; diff --git a/paddle2onnx/mapper/tensor/matmul_v2.h b/paddle2onnx/mapper/tensor/matmul_v2.h index bb3762a34..04699a90b 100644 --- a/paddle2onnx/mapper/tensor/matmul_v2.h +++ b/paddle2onnx/mapper/tensor/matmul_v2.h @@ -33,6 +33,7 @@ class MatmulV2Mapper : public Mapper { private: std::string GetTrans(std::vector& input_info); + const std::unordered_set kNoNeedCastTypes{P2ODataType::INT8, P2ODataType::FP16, P2ODataType::FP32}; bool trans_x_ = false; bool trans_y_ = false; }; From 2cfe42f6fd5f0bacb8f24e8821df224fff12ace4 Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Thu, 20 Jun 2024 09:07:10 +0000 Subject: [PATCH 15/19] support model convert from fp32 to fp16 --- paddle2onnx/mapper/mapper.h | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle2onnx/mapper/mapper.h b/paddle2onnx/mapper/mapper.h index de459a370..4c910f517 100755 --- a/paddle2onnx/mapper/mapper.h +++ b/paddle2onnx/mapper/mapper.h @@ -13,7 +13,6 @@ // limitations under the License. #pragma once #include -#include #include "paddle2onnx/mapper/data_helper.h" #include "paddle2onnx/mapper/onnx_helper.h" From 024143fc23960ff8f068ae7a64e72efebea171a5 Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Thu, 20 Jun 2024 09:21:42 +0000 Subject: [PATCH 16/19] support model convert from fp32 to fp16 --- paddle2onnx/mapper/tensor/assign.cc | 5 ----- paddle2onnx/mapper/tensor/assign.h | 3 --- 2 files changed, 8 deletions(-) diff --git a/paddle2onnx/mapper/tensor/assign.cc b/paddle2onnx/mapper/tensor/assign.cc index 4507a7b04..a4d0d3553 100644 --- a/paddle2onnx/mapper/tensor/assign.cc +++ b/paddle2onnx/mapper/tensor/assign.cc @@ -21,8 +21,6 @@ REGISTER_MAPPER(share_data, AssignMapper) void AssignMapper::Opset7() { auto input_info = GetInput("X"); auto output_info = GetOutput("Out"); - - if (block_idx_ != 0 && OpType() != "share_data") { // Here's a trick for tensorrt // Consider remove this trick @@ -45,9 +43,6 @@ void AssignMapper::Opset7() { } else { helper_->MakeNode("Identity", {input_info[0].name}, {output_info[0].name}); } - std::cout << "use assign...\n"; - std::cout << "use input_info dtype: " << input_info[0].dtype << std::endl;; - std::cout << "use output_info dtype: " << output_info[0].dtype << std::endl; } } // namespace paddle2onnx diff --git a/paddle2onnx/mapper/tensor/assign.h b/paddle2onnx/mapper/tensor/assign.h index 484c16cda..ad9585978 100644 --- a/paddle2onnx/mapper/tensor/assign.h +++ b/paddle2onnx/mapper/tensor/assign.h @@ -26,9 +26,6 @@ class AssignMapper : public Mapper { int64_t op_id) : Mapper(p, helper, block_id, op_id) {} void Opset7(); - -private: - const std::unordered_set kNoNeedCastTypes{P2ODataType::INT8, P2ODataType::FP16, P2ODataType::FP32}; }; } // namespace paddle2onnx From 3f14872458c0d1c01eef89b0910dace0008392e5 Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Thu, 20 Jun 2024 15:12:02 +0000 Subject: [PATCH 17/19] support model convert from fp32 to fp16 --- paddle2onnx/mapper/nn/pool2d.cc | 8 ++++---- paddle2onnx/mapper/nn/pool2d.h | 2 +- paddle2onnx/mapper/nn/pool3d.cc | 8 ++++---- paddle2onnx/mapper/nn/pool3d.h | 2 +- paddle2onnx/mapper/tensor/fill_constant.cc | 24 ++++------------------ paddle2onnx/mapper/tensor/fill_constant.h | 1 - paddle2onnx/mapper/tensor/matmul.cc | 4 ++-- paddle2onnx/mapper/tensor/matmul.h | 2 +- paddle2onnx/mapper/tensor/matmul_v2.cc | 4 ++-- paddle2onnx/mapper/tensor/matmul_v2.h | 2 +- 10 files changed, 20 insertions(+), 37 deletions(-) diff --git a/paddle2onnx/mapper/nn/pool2d.cc b/paddle2onnx/mapper/nn/pool2d.cc index 8ad8f30b7..6412f9f26 100755 --- a/paddle2onnx/mapper/nn/pool2d.cc +++ b/paddle2onnx/mapper/nn/pool2d.cc @@ -117,7 +117,7 @@ void Pool2dMapper::AdaptivePool(const std::vector& input_info, } std::shared_ptr node(nullptr); - if (kNoNeedCastTypes.find(input_info[0].dtype) != kNoNeedCastTypes.end()) + if (kNeedCastTypes.find(input_info[0].dtype) == kNeedCastTypes.end()) { node = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); } @@ -174,7 +174,7 @@ void Pool2dMapper::NoAdaptivePool(const std::vector& input_info, int64_t max_ksize = *std::max_element(std::begin(k_size_), std::end(k_size_)); int64_t max_pads = *std::max_element(std::begin(pads_), std::end(pads_)); std::string input_x = input_info[0].name; - if (kNoNeedCastTypes.find(input_info[0].dtype) == kNoNeedCastTypes.end()) + if (kNeedCastTypes.find(input_info[0].dtype) != kNeedCastTypes.end()) { input_x = helper_->AutoCast(input_info[0].name, input_info[0].dtype, P2ODataType::FP32); @@ -212,7 +212,7 @@ void Pool2dMapper::NoAdaptivePool(const std::vector& input_info, onnx_pool_type = iter->second[0]; } std::shared_ptr node(nullptr); - if (kNoNeedCastTypes.find(input_info[0].dtype) != kNoNeedCastTypes.end()) + if (kNeedCastTypes.find(input_info[0].dtype) == kNeedCastTypes.end()) { node = helper_->MakeNode(onnx_pool_type, {input_x}, {output_info[0].name}); } @@ -337,7 +337,7 @@ void Pool2dMapper::Opset7() { auto iter = op_mapper_.find(pooling_type_); onnx_pool_type = iter->second[1]; } - if (kNoNeedCastTypes.find(input_info[0].dtype) != kNoNeedCastTypes.end()) + if (kNeedCastTypes.find(input_info[0].dtype) == kNeedCastTypes.end()) { auto output = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); } diff --git a/paddle2onnx/mapper/nn/pool2d.h b/paddle2onnx/mapper/nn/pool2d.h index a64fdbaa5..fb292787b 100644 --- a/paddle2onnx/mapper/nn/pool2d.h +++ b/paddle2onnx/mapper/nn/pool2d.h @@ -63,7 +63,7 @@ class Pool2dMapper : public Mapper { const std::vector& output_info); void NoAdaptivePool(const std::vector& input_info, const std::vector& output_info); - const std::unordered_set kNoNeedCastTypes{P2ODataType::INT8, P2ODataType::FP16, P2ODataType::FP32}; + const std::unordered_set kNeedCastTypes{P2ODataType::FP64}; bool ceil_mode_; bool global_pooling_; bool adaptive_; diff --git a/paddle2onnx/mapper/nn/pool3d.cc b/paddle2onnx/mapper/nn/pool3d.cc index 201245bf4..5559dbd06 100644 --- a/paddle2onnx/mapper/nn/pool3d.cc +++ b/paddle2onnx/mapper/nn/pool3d.cc @@ -58,7 +58,7 @@ void Pool3dMapper::AdaptivePool(const std::vector& input_info, } std::shared_ptr node; - if (kNoNeedCastTypes.find(input_info[0].dtype) != kNoNeedCastTypes.end()) + if (kNeedCastTypes.find(input_info[0].dtype) == kNeedCastTypes.end()) { node = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); } @@ -119,7 +119,7 @@ void Pool3dMapper::NoAdaptivePool(const std::vector& input_info, int64_t max_ksize = *std::max_element(std::begin(k_size_), std::end(k_size_)); int64_t max_pads = *std::max_element(std::begin(pads_), std::end(pads_)); auto input_x = input_info[0].name; - if (kNoNeedCastTypes.find(input_info[0].dtype) == kNoNeedCastTypes.end()) + if (kNeedCastTypes.find(input_info[0].dtype) != kNeedCastTypes.end()) { input_x = helper_->AutoCast(input_info[0].name, input_info[0].dtype, P2ODataType::FP32); @@ -158,7 +158,7 @@ void Pool3dMapper::NoAdaptivePool(const std::vector& input_info, onnx_pool_type = iter->second[0]; } std::shared_ptr node(nullptr); - if (kNoNeedCastTypes.find(input_info[0].dtype) != kNoNeedCastTypes.end()) + if (kNeedCastTypes.find(input_info[0].dtype) == kNeedCastTypes.end()) { node = helper_->MakeNode(onnx_pool_type, {input_x}, {output_info[0].name}); } @@ -270,7 +270,7 @@ void Pool3dMapper::Opset7() { onnx_pool_type = iter->second[1]; } - if (kNoNeedCastTypes.find(input_info[0].dtype) != kNoNeedCastTypes.end()) + if (kNeedCastTypes.find(input_info[0].dtype) == kNeedCastTypes.end()) { auto output = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); } diff --git a/paddle2onnx/mapper/nn/pool3d.h b/paddle2onnx/mapper/nn/pool3d.h index a4206a264..250779c7c 100644 --- a/paddle2onnx/mapper/nn/pool3d.h +++ b/paddle2onnx/mapper/nn/pool3d.h @@ -50,7 +50,7 @@ class Pool3dMapper : public Mapper { const std::vector& output_info); void NoAdaptivePool(const std::vector& input_info, const std::vector& output_info); - const std::unordered_set kNoNeedCastTypes{P2ODataType::INT8, P2ODataType::FP16, P2ODataType::FP32}; + const std::unordered_set kNeedCastTypes{P2ODataType::FP64}; bool ceil_mode_; bool global_pooling_; bool adaptive_; diff --git a/paddle2onnx/mapper/tensor/fill_constant.cc b/paddle2onnx/mapper/tensor/fill_constant.cc index ced00db1d..d359c5072 100644 --- a/paddle2onnx/mapper/tensor/fill_constant.cc +++ b/paddle2onnx/mapper/tensor/fill_constant.cc @@ -25,11 +25,12 @@ int32_t FillConstantMapper::GetMinOpset(bool verbose) { auto onnx_dtype = GetOnnxDtype(out_info[0].dtype); if (onnx_dtype != ONNX_NAMESPACE::TensorProto::INT32 && onnx_dtype != ONNX_NAMESPACE::TensorProto::INT64 && + onnx_dtype != ONNX_NAMESPACE::TensorProto::FLOAT16 && onnx_dtype != ONNX_NAMESPACE::TensorProto::FLOAT && onnx_dtype != ONNX_NAMESPACE::TensorProto::DOUBLE && onnx_dtype != ONNX_NAMESPACE::TensorProto::BOOL ) { - Error() << "Only support int32/int64/float32/float64/bool data type in " + Error() << "Only support int32/int64/float16/float32/float64/bool data type in " "fill_constant operator." << std::endl; return -1; @@ -80,15 +81,7 @@ void FillConstantMapper::Opset7() { if (HasInput("ValueTensor")) { auto value_info = GetInput("ValueTensor"); auto out = helper_->Constant(shape, GetOnnxDtype(out_info[0].dtype), float(0.0)); - if (kNoNeedCastTypes.find(value_info[0].dtype) != kNoNeedCastTypes.end()) - { - helper_->MakeNode("Add", {out, value_info[0].name}, {out_info[0].name}); - } - else - { - auto value_tensor = helper_->AutoCast(value_info[0].name, value_info[0].dtype, out_info[0].dtype); - helper_->MakeNode("Add", {out, value_tensor}, {out_info[0].name}); - } + helper_->MakeNode("Add", {out, value_info[0].name}, {out_info[0].name}); } else { helper_->Constant(out_info[0].name, shape, GetOnnxDtype(out_info[0].dtype), value); } @@ -156,16 +149,7 @@ void FillConstantMapper::Opset9() { } if (value_is_tensor) { auto value_info = GetInput("ValueTensor"); - if (kNoNeedCastTypes.find(value_info[0].dtype) != kNoNeedCastTypes.end()) - { - helper_->MakeNode("Add", {out, value_info[0].name}, {out_info[0].name}); - } - else - { - std::string cast_value = helper_->AutoCast( - value_info[0].name, value_info[0].dtype, out_info[0].dtype); - helper_->MakeNode("Add", {out, cast_value}, {out_info[0].name}); - } + helper_->MakeNode("Add", {out, value_info[0].name}, {out_info[0].name}); } else { helper_->MakeNode("Identity", {out}, {out_info[0].name}); } diff --git a/paddle2onnx/mapper/tensor/fill_constant.h b/paddle2onnx/mapper/tensor/fill_constant.h index c96d4ab84..231cb8402 100644 --- a/paddle2onnx/mapper/tensor/fill_constant.h +++ b/paddle2onnx/mapper/tensor/fill_constant.h @@ -31,7 +31,6 @@ class FillConstantMapper : public Mapper { private: float GetFillValue(); - const std::unordered_set kNoNeedCastTypes{P2ODataType::INT8, P2ODataType::FP16, P2ODataType::FP32}; std::string str_value_; float value_; }; diff --git a/paddle2onnx/mapper/tensor/matmul.cc b/paddle2onnx/mapper/tensor/matmul.cc index 26a238b63..bc6ea86de 100644 --- a/paddle2onnx/mapper/tensor/matmul.cc +++ b/paddle2onnx/mapper/tensor/matmul.cc @@ -20,7 +20,7 @@ REGISTER_MAPPER(matmul, MatmulMapper) std::string MatmulMapper::GetTrans(std::vector& input_info) { std::string castd_name = input_info[0].name; - if (kNoNeedCastTypes.find(input_info[0].dtype) != kNoNeedCastTypes.end()) { + if (kNeedCastTypes.find(input_info[0].dtype) != kNeedCastTypes.end()) { castd_name = helper_->AutoCast(input_info[0].name, input_info[0].dtype, P2ODataType::FP32); } @@ -44,7 +44,7 @@ void MatmulMapper::Opset7() { input_y = GetTrans(input_y_info); } - if (kNoNeedCastTypes.find(input_x_info[0].dtype) != kNoNeedCastTypes.end()) + if (kNeedCastTypes.find(input_x_info[0].dtype) == kNeedCastTypes.end()) { if (fabs(alpha_ - 1.0) < 1e-6) { diff --git a/paddle2onnx/mapper/tensor/matmul.h b/paddle2onnx/mapper/tensor/matmul.h index 881d13bf7..863237711 100644 --- a/paddle2onnx/mapper/tensor/matmul.h +++ b/paddle2onnx/mapper/tensor/matmul.h @@ -34,7 +34,7 @@ class MatmulMapper : public Mapper { private: std::string GetTrans(std::vector& input_info); - const std::unordered_set kNoNeedCastTypes{P2ODataType::INT8, P2ODataType::FP16, P2ODataType::FP32}; + const std::unordered_set kNeedCastTypes{P2ODataType::INT8, P2ODataType::FP64}; bool transpose_X_ = false; bool transpose_Y_ = false; float alpha_ = 1.0; diff --git a/paddle2onnx/mapper/tensor/matmul_v2.cc b/paddle2onnx/mapper/tensor/matmul_v2.cc index 2fa91d733..15922878d 100644 --- a/paddle2onnx/mapper/tensor/matmul_v2.cc +++ b/paddle2onnx/mapper/tensor/matmul_v2.cc @@ -23,7 +23,7 @@ REGISTER_MAPPER(matmul_v2, MatmulV2Mapper) std::string MatmulV2Mapper::GetTrans(std::vector& input_info) { std::string castd_name = input_info[0].name; - if (kNoNeedCastTypes.find(input_info[0].dtype) == kNoNeedCastTypes.end()) + if (kNeedCastTypes.find(input_info[0].dtype) != kNeedCastTypes.end()) { castd_name = helper_->AutoCast( input_info[0].name, input_info[0].dtype, P2ODataType::FP32); @@ -48,7 +48,7 @@ void MatmulV2Mapper::Opset7() { if (trans_y_) { input_y = GetTrans(input_y_info); } - if (kNoNeedCastTypes.find(input_y_info[0].dtype) != kNoNeedCastTypes.end()) + if (kNeedCastTypes.find(input_y_info[0].dtype) == kNeedCastTypes.end()) { auto node = helper_->MakeNode("MatMul", {input_x, input_y}, {output_info[0].name}); } diff --git a/paddle2onnx/mapper/tensor/matmul_v2.h b/paddle2onnx/mapper/tensor/matmul_v2.h index 04699a90b..cf1856dc9 100644 --- a/paddle2onnx/mapper/tensor/matmul_v2.h +++ b/paddle2onnx/mapper/tensor/matmul_v2.h @@ -33,7 +33,7 @@ class MatmulV2Mapper : public Mapper { private: std::string GetTrans(std::vector& input_info); - const std::unordered_set kNoNeedCastTypes{P2ODataType::INT8, P2ODataType::FP16, P2ODataType::FP32}; + const std::unordered_set kNeedCastTypes{P2ODataType::INT8, P2ODataType::FP64}; bool trans_x_ = false; bool trans_y_ = false; }; From 9cacb7b842a475ae7db5e56fbc1b3f2c53bcedab Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Mon, 24 Jun 2024 14:10:17 +0000 Subject: [PATCH 18/19] support model convert from fp32 to fp16 --- paddle2onnx/mapper/tensor/matmul.h | 2 +- paddle2onnx/mapper/tensor/matmul_v2.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle2onnx/mapper/tensor/matmul.h b/paddle2onnx/mapper/tensor/matmul.h index 863237711..2f1d1983c 100644 --- a/paddle2onnx/mapper/tensor/matmul.h +++ b/paddle2onnx/mapper/tensor/matmul.h @@ -34,7 +34,7 @@ class MatmulMapper : public Mapper { private: std::string GetTrans(std::vector& input_info); - const std::unordered_set kNeedCastTypes{P2ODataType::INT8, P2ODataType::FP64}; + const std::unordered_set kNeedCastTypes{P2ODataType::INT8, P2ODataType::INT16, P2ODataType::FP64}; bool transpose_X_ = false; bool transpose_Y_ = false; float alpha_ = 1.0; diff --git a/paddle2onnx/mapper/tensor/matmul_v2.h b/paddle2onnx/mapper/tensor/matmul_v2.h index cf1856dc9..3e76fd3e5 100644 --- a/paddle2onnx/mapper/tensor/matmul_v2.h +++ b/paddle2onnx/mapper/tensor/matmul_v2.h @@ -33,7 +33,7 @@ class MatmulV2Mapper : public Mapper { private: std::string GetTrans(std::vector& input_info); - const std::unordered_set kNeedCastTypes{P2ODataType::INT8, P2ODataType::FP64}; + const std::unordered_set kNeedCastTypes{P2ODataType::INT8, P2ODataType::INT16, P2ODataType::FP64}; bool trans_x_ = false; bool trans_y_ = false; }; From f239519e92707b44d9435e019a8c2537758a1179 Mon Sep 17 00:00:00 2001 From: xiaoyewww <641311428@qq.com> Date: Wed, 26 Jun 2024 05:50:32 +0000 Subject: [PATCH 19/19] support model convert from fp32 to fp16 --- README.md | 1 - paddle2onnx/mapper/nn/pool2d.cc | 8 ++++---- paddle2onnx/mapper/nn/pool2d.h | 2 +- paddle2onnx/mapper/nn/pool3d.cc | 8 ++++---- paddle2onnx/mapper/nn/pool3d.h | 2 +- paddle2onnx/mapper/tensor/matmul.cc | 4 ++-- paddle2onnx/mapper/tensor/matmul.h | 2 +- paddle2onnx/mapper/tensor/matmul_v2.cc | 4 ++-- paddle2onnx/mapper/tensor/matmul_v2.h | 2 +- pyproject.toml | 1 - tests/run.sh | 1 + 11 files changed, 17 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index da6e9253b..19e604524 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,6 @@ Paddle2ONNX 本身不依赖其他组件,但是我们建议您在以下环境 - PaddlePaddle == 2.6.0 - onnxruntime >= 1.10.0 -- numpy < 2.0.0 # 3 安装 Paddle2ONNX diff --git a/paddle2onnx/mapper/nn/pool2d.cc b/paddle2onnx/mapper/nn/pool2d.cc index 6412f9f26..0672490b7 100755 --- a/paddle2onnx/mapper/nn/pool2d.cc +++ b/paddle2onnx/mapper/nn/pool2d.cc @@ -117,7 +117,7 @@ void Pool2dMapper::AdaptivePool(const std::vector& input_info, } std::shared_ptr node(nullptr); - if (kNeedCastTypes.find(input_info[0].dtype) == kNeedCastTypes.end()) + if (kNoNeedCastTypesOpSet7.find(input_info[0].dtype) != kNoNeedCastTypesOpSet7.end()) { node = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); } @@ -174,7 +174,7 @@ void Pool2dMapper::NoAdaptivePool(const std::vector& input_info, int64_t max_ksize = *std::max_element(std::begin(k_size_), std::end(k_size_)); int64_t max_pads = *std::max_element(std::begin(pads_), std::end(pads_)); std::string input_x = input_info[0].name; - if (kNeedCastTypes.find(input_info[0].dtype) != kNeedCastTypes.end()) + if (kNoNeedCastTypesOpSet7.find(input_info[0].dtype) == kNoNeedCastTypesOpSet7.end()) { input_x = helper_->AutoCast(input_info[0].name, input_info[0].dtype, P2ODataType::FP32); @@ -212,7 +212,7 @@ void Pool2dMapper::NoAdaptivePool(const std::vector& input_info, onnx_pool_type = iter->second[0]; } std::shared_ptr node(nullptr); - if (kNeedCastTypes.find(input_info[0].dtype) == kNeedCastTypes.end()) + if (kNoNeedCastTypesOpSet7.find(input_info[0].dtype) != kNoNeedCastTypesOpSet7.end()) { node = helper_->MakeNode(onnx_pool_type, {input_x}, {output_info[0].name}); } @@ -337,7 +337,7 @@ void Pool2dMapper::Opset7() { auto iter = op_mapper_.find(pooling_type_); onnx_pool_type = iter->second[1]; } - if (kNeedCastTypes.find(input_info[0].dtype) == kNeedCastTypes.end()) + if (kNoNeedCastTypesOpSet7.find(input_info[0].dtype) != kNoNeedCastTypesOpSet7.end()) { auto output = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); } diff --git a/paddle2onnx/mapper/nn/pool2d.h b/paddle2onnx/mapper/nn/pool2d.h index fb292787b..9fd9df489 100644 --- a/paddle2onnx/mapper/nn/pool2d.h +++ b/paddle2onnx/mapper/nn/pool2d.h @@ -63,7 +63,7 @@ class Pool2dMapper : public Mapper { const std::vector& output_info); void NoAdaptivePool(const std::vector& input_info, const std::vector& output_info); - const std::unordered_set kNeedCastTypes{P2ODataType::FP64}; + const std::unordered_set kNoNeedCastTypesOpSet7{P2ODataType::FP16, P2ODataType::FP32}; bool ceil_mode_; bool global_pooling_; bool adaptive_; diff --git a/paddle2onnx/mapper/nn/pool3d.cc b/paddle2onnx/mapper/nn/pool3d.cc index 5559dbd06..2da09abd3 100644 --- a/paddle2onnx/mapper/nn/pool3d.cc +++ b/paddle2onnx/mapper/nn/pool3d.cc @@ -58,7 +58,7 @@ void Pool3dMapper::AdaptivePool(const std::vector& input_info, } std::shared_ptr node; - if (kNeedCastTypes.find(input_info[0].dtype) == kNeedCastTypes.end()) + if (kNoNeedCastTypesOpSet7.find(input_info[0].dtype) != kNoNeedCastTypesOpSet7.end()) { node = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); } @@ -119,7 +119,7 @@ void Pool3dMapper::NoAdaptivePool(const std::vector& input_info, int64_t max_ksize = *std::max_element(std::begin(k_size_), std::end(k_size_)); int64_t max_pads = *std::max_element(std::begin(pads_), std::end(pads_)); auto input_x = input_info[0].name; - if (kNeedCastTypes.find(input_info[0].dtype) != kNeedCastTypes.end()) + if (kNoNeedCastTypesOpSet7.find(input_info[0].dtype) == kNoNeedCastTypesOpSet7.end()) { input_x = helper_->AutoCast(input_info[0].name, input_info[0].dtype, P2ODataType::FP32); @@ -158,7 +158,7 @@ void Pool3dMapper::NoAdaptivePool(const std::vector& input_info, onnx_pool_type = iter->second[0]; } std::shared_ptr node(nullptr); - if (kNeedCastTypes.find(input_info[0].dtype) == kNeedCastTypes.end()) + if (kNoNeedCastTypesOpSet7.find(input_info[0].dtype) != kNoNeedCastTypesOpSet7.end()) { node = helper_->MakeNode(onnx_pool_type, {input_x}, {output_info[0].name}); } @@ -270,7 +270,7 @@ void Pool3dMapper::Opset7() { onnx_pool_type = iter->second[1]; } - if (kNeedCastTypes.find(input_info[0].dtype) == kNeedCastTypes.end()) + if (kNoNeedCastTypesOpSet7.find(input_info[0].dtype) != kNoNeedCastTypesOpSet7.end()) { auto output = helper_->MakeNode(onnx_pool_type, {input_info[0].name}, {output_info[0].name}); } diff --git a/paddle2onnx/mapper/nn/pool3d.h b/paddle2onnx/mapper/nn/pool3d.h index 250779c7c..5aeb3adf0 100644 --- a/paddle2onnx/mapper/nn/pool3d.h +++ b/paddle2onnx/mapper/nn/pool3d.h @@ -50,7 +50,7 @@ class Pool3dMapper : public Mapper { const std::vector& output_info); void NoAdaptivePool(const std::vector& input_info, const std::vector& output_info); - const std::unordered_set kNeedCastTypes{P2ODataType::FP64}; + const std::unordered_set kNoNeedCastTypesOpSet7{P2ODataType::FP16, P2ODataType::FP32}; bool ceil_mode_; bool global_pooling_; bool adaptive_; diff --git a/paddle2onnx/mapper/tensor/matmul.cc b/paddle2onnx/mapper/tensor/matmul.cc index bc6ea86de..48b66f62b 100644 --- a/paddle2onnx/mapper/tensor/matmul.cc +++ b/paddle2onnx/mapper/tensor/matmul.cc @@ -20,7 +20,7 @@ REGISTER_MAPPER(matmul, MatmulMapper) std::string MatmulMapper::GetTrans(std::vector& input_info) { std::string castd_name = input_info[0].name; - if (kNeedCastTypes.find(input_info[0].dtype) != kNeedCastTypes.end()) { + if (kNoNeedCastTypesOpSet7.find(input_info[0].dtype) == kNoNeedCastTypesOpSet7.end()) { castd_name = helper_->AutoCast(input_info[0].name, input_info[0].dtype, P2ODataType::FP32); } @@ -44,7 +44,7 @@ void MatmulMapper::Opset7() { input_y = GetTrans(input_y_info); } - if (kNeedCastTypes.find(input_x_info[0].dtype) == kNeedCastTypes.end()) + if (kNoNeedCastTypesOpSet7.find(input_x_info[0].dtype) != kNoNeedCastTypesOpSet7.end()) { if (fabs(alpha_ - 1.0) < 1e-6) { diff --git a/paddle2onnx/mapper/tensor/matmul.h b/paddle2onnx/mapper/tensor/matmul.h index 2f1d1983c..b29226e72 100644 --- a/paddle2onnx/mapper/tensor/matmul.h +++ b/paddle2onnx/mapper/tensor/matmul.h @@ -34,7 +34,7 @@ class MatmulMapper : public Mapper { private: std::string GetTrans(std::vector& input_info); - const std::unordered_set kNeedCastTypes{P2ODataType::INT8, P2ODataType::INT16, P2ODataType::FP64}; + const std::unordered_set kNoNeedCastTypesOpSet7{P2ODataType::FP16, P2ODataType::FP32, P2ODataType::INT32, P2ODataType::INT64}; bool transpose_X_ = false; bool transpose_Y_ = false; float alpha_ = 1.0; diff --git a/paddle2onnx/mapper/tensor/matmul_v2.cc b/paddle2onnx/mapper/tensor/matmul_v2.cc index 15922878d..db3af7f58 100644 --- a/paddle2onnx/mapper/tensor/matmul_v2.cc +++ b/paddle2onnx/mapper/tensor/matmul_v2.cc @@ -23,7 +23,7 @@ REGISTER_MAPPER(matmul_v2, MatmulV2Mapper) std::string MatmulV2Mapper::GetTrans(std::vector& input_info) { std::string castd_name = input_info[0].name; - if (kNeedCastTypes.find(input_info[0].dtype) != kNeedCastTypes.end()) + if (kNoNeedCastTypesOpSet7.find(input_info[0].dtype) == kNoNeedCastTypesOpSet7.end()) { castd_name = helper_->AutoCast( input_info[0].name, input_info[0].dtype, P2ODataType::FP32); @@ -48,7 +48,7 @@ void MatmulV2Mapper::Opset7() { if (trans_y_) { input_y = GetTrans(input_y_info); } - if (kNeedCastTypes.find(input_y_info[0].dtype) == kNeedCastTypes.end()) + if (kNoNeedCastTypesOpSet7.find(input_y_info[0].dtype) != kNoNeedCastTypesOpSet7.end()) { auto node = helper_->MakeNode("MatMul", {input_x, input_y}, {output_info[0].name}); } diff --git a/paddle2onnx/mapper/tensor/matmul_v2.h b/paddle2onnx/mapper/tensor/matmul_v2.h index 3e76fd3e5..bc342e6d1 100644 --- a/paddle2onnx/mapper/tensor/matmul_v2.h +++ b/paddle2onnx/mapper/tensor/matmul_v2.h @@ -33,7 +33,7 @@ class MatmulV2Mapper : public Mapper { private: std::string GetTrans(std::vector& input_info); - const std::unordered_set kNeedCastTypes{P2ODataType::INT8, P2ODataType::INT16, P2ODataType::FP64}; + const std::unordered_set kNoNeedCastTypesOpSet7{P2ODataType::FP16, P2ODataType::FP32, P2ODataType::INT32, P2ODataType::INT64}; bool trans_x_ = false; bool trans_y_ = false; }; diff --git a/pyproject.toml b/pyproject.toml index 3d3b158d8..68876acb3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,6 @@ license = {text = "Apache License v2.0"} requires-python = ">=3.8" dependencies = [ "onnxruntime>=1.10.0", - "numpy<2.0.0", # numpy 2.0.0 cannot support p2o at now ] [project.scripts] diff --git a/tests/run.sh b/tests/run.sh index 0b2c7f57c..8adec38a9 100755 --- a/tests/run.sh +++ b/tests/run.sh @@ -70,6 +70,7 @@ $PY_CMD -m pip install pytest $PY_CMD -m pip install onnx onnxruntime tqdm filelock $PY_CMD -m pip install paddlepaddle==2.6.0 $PY_CMD -m pip install six hypothesis +$PY_CMD -m pip install numpy==1.26.4 export ENABLE_DEV=ON