From fe00002df83848acaf611d8f24070afdb26d92b1 Mon Sep 17 00:00:00 2001 From: Feiyue Chen Date: Fri, 23 Feb 2024 07:54:47 +0000 Subject: [PATCH 1/2] Added Transpose Op --- .../vsinpu/builders/impl/tensor_op_builder.h | 39 +++++++++++++++++++ .../vsinpu/builders/op_builder_factory.h | 3 +- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/tensor_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/tensor_op_builder.h index d4cd8dc3a050e..00fb1c794c915 100644 --- a/onnxruntime/core/providers/vsinpu/builders/impl/tensor_op_builder.h +++ b/onnxruntime/core/providers/vsinpu/builders/impl/tensor_op_builder.h @@ -84,6 +84,45 @@ class ReshapeOpBuilder : public BaseOpBuilder { return true; } }; + +class TransposeOpBuilder : public BaseOpBuilder { + bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer, + const Node* node) const override { + auto input_defs = node->InputDefs(); + auto shape_dim = vsi::npu::util::GetTensorShape(*input_defs[0]).NumDimensions(); + NodeAttrHelper helper(*node); + auto perm = helper.Get("perm", std::vector(shape_dim, 1)); + if (perm.size() != shape_dim) { + LOGS_DEFAULT(VERBOSE) << "Size mismatch between perm vector and input shape."; + return false; + } + if (*input_defs[0]->Type() == "tensor(int64)") { + LOGS_DEFAULT(VERBOSE) << "Int64 input cannot support except as parameter."; + return false; + } + return true; + } + bool HandleBuildOp(vsi::npu::GraphEP* graph_ep, + std::vector>& inputs, + std::vector>& outputs, + const Node* node) override { + LOGS_DEFAULT(VERBOSE) << "Creating Transpose Op."; + std::vector def_val(inputs[0]->GetShape().size()); + for (int64_t i = 0; i < def_val.size(); i++) def_val[i] = def_val.size() - i - 1; + + NodeAttrHelper helper(*node); + def_val = helper.Get("perm", def_val); + std::vector timvx_perm; + for (uint32_t i = 0; i < def_val.size(); i++) { + timvx_perm.push_back(def_val.size() - 1 - def_val[def_val.size() - i - 1]); + } + auto op = graph_ep->GetGraph()->CreateOperation(timvx_perm); + (*op).BindInputs(inputs).BindOutputs(outputs); + graph_ep->GetOps().push_back(std::move(op)); + return true; + } +}; + } // namespace npu } // namespace vsi diff --git a/onnxruntime/core/providers/vsinpu/builders/op_builder_factory.h b/onnxruntime/core/providers/vsinpu/builders/op_builder_factory.h index 09b3977c4786f..6cfd9703f8a06 100644 --- a/onnxruntime/core/providers/vsinpu/builders/op_builder_factory.h +++ b/onnxruntime/core/providers/vsinpu/builders/op_builder_factory.h @@ -74,7 +74,8 @@ static const std::map reg = { REGISTER_OP_BUILDER("MaxPool", MaxPoolOpBuilder), REGISTER_OP_BUILDER("Reshape", ReshapeOpBuilder), REGISTER_OP_BUILDER("Concat", ConcatOpBuilder), - REGISTER_OP_BUILDER("Softmax", SoftmaxOpBuilder) + REGISTER_OP_BUILDER("Softmax", SoftmaxOpBuilder), + REGISTER_OP_BUILDER("Transpose", TransposeOpBuilder) #undef REGISTER_OP_BUILDER }; From f7e37553674b7291f55151c2ce2a81fcd23f94ac Mon Sep 17 00:00:00 2001 From: Feiyue Chen Date: Tue, 27 Feb 2024 06:59:18 +0000 Subject: [PATCH 2/2] Added tensorop/ quantizeop support Caution:Clip op will go to sw if input is i8/u8 Quantize/Dequantize have error of +/-1 of some element Introduce ReverseAxis api --- .../vsinpu/builders/impl/base_op_builder.cc | 6 +- .../vsinpu/builders/impl/clip_op_builder.cc | 112 +++++++++++ .../vsinpu/builders/impl/clip_op_builder.h | 59 ++++++ .../vsinpu/builders/impl/concat_op_builder.h | 3 +- .../vsinpu/builders/impl/flatten_op_builder.h | 6 +- .../vsinpu/builders/impl/norm_op_builder.h | 75 ++++++++ .../builders/impl/qlinearmatmul_op_builder.cc | 174 ++++++++++++++++++ .../builders/impl/qlinearmatmul_op_builder.h | 76 ++++++++ .../vsinpu/builders/impl/reduce_op_builder.h | 80 ++++++++ .../vsinpu/builders/impl/softmax_op_builder.h | 3 +- .../vsinpu/builders/op_builder_factory.h | 14 +- .../core/providers/vsinpu/vsinpu_ep_graph.cc | 2 +- .../core/providers/vsinpu/vsinpu_util.cc | 26 ++- .../core/providers/vsinpu/vsinpu_util.h | 4 + 14 files changed, 624 insertions(+), 16 deletions(-) create mode 100644 onnxruntime/core/providers/vsinpu/builders/impl/clip_op_builder.cc create mode 100644 onnxruntime/core/providers/vsinpu/builders/impl/clip_op_builder.h create mode 100644 onnxruntime/core/providers/vsinpu/builders/impl/norm_op_builder.h create mode 100644 onnxruntime/core/providers/vsinpu/builders/impl/qlinearmatmul_op_builder.cc create mode 100644 onnxruntime/core/providers/vsinpu/builders/impl/qlinearmatmul_op_builder.h create mode 100644 onnxruntime/core/providers/vsinpu/builders/impl/reduce_op_builder.h diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/vsinpu/builders/impl/base_op_builder.cc index 4abf838a9fbb6..833fda47e28ea 100644 --- a/onnxruntime/core/providers/vsinpu/builders/impl/base_op_builder.cc +++ b/onnxruntime/core/providers/vsinpu/builders/impl/base_op_builder.cc @@ -59,12 +59,8 @@ bool BaseOpBuilder::IsSupported(const onnxruntime::GraphViewer& graph_viewer, return false; } - if (node->Domain() != "") { - LOGS_DEFAULT(VERBOSE) << "Only support node with default domain!"; - return false; - } - if (!util::CheckNoZeroDim(node)) { + LOGS_DEFAULT(VERBOSE) << "Dynamic shape(shape has zero dim) is not supported!"; return false; } diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/clip_op_builder.cc b/onnxruntime/core/providers/vsinpu/builders/impl/clip_op_builder.cc new file mode 100644 index 0000000000000..61bba943b9ddf --- /dev/null +++ b/onnxruntime/core/providers/vsinpu/builders/impl/clip_op_builder.cc @@ -0,0 +1,112 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ +#include "core/providers/vsinpu/builders/impl/clip_op_builder.h" + +namespace onnxruntime { +namespace vsi { +namespace npu { + +namespace clip_internal { +template +struct LowMax { + constexpr static T low() { + return std::numeric_limits::lowest(); + } + constexpr static T max() { + return std::numeric_limits::max(); + } +}; +} // namespace clip_internal + +template +struct ClipOpBuilder::ClipImpl { + ClipImpl(vsi::npu::GraphEP* graph_ep, std::vector>& inputs, + std::vector>& outputs) { + T min_default = clip_internal::LowMax::low(); + T max_default = clip_internal::LowMax::max(); + + T* min_data = &min_default; + T* max_data = &max_default; + std::shared_ptr min_tensor = nullptr; + std::shared_ptr max_tensor = nullptr; + if (inputs.size() > 1) { + min_tensor = inputs[1]; + if (inputs.size() > 2) { + max_tensor = inputs[2]; + } + } + if (min_tensor) { + min_tensor->CopyDataFromTensor(min_data); + } + if (max_tensor) { + max_tensor->CopyDataFromTensor(max_data); + } + auto op = graph_ep->GetGraph()->CreateOperation(static_cast(*min_data), static_cast(*max_data)); + (*op).BindInputs(inputs).BindOutputs(outputs); + graph_ep->GetOps().push_back(std::move(op)); + } +}; + +bool ClipOpBuilder::HandleBuildOp(vsi::npu::GraphEP* graph_ep, + std::vector>& inputs, + std::vector>& outputs, + const Node* node) { + LOGS_DEFAULT(INFO) << "Creating Clip Op."; + if (node->SinceVersion() <= 6) { + NodeAttrHelper helper(*node); + auto min = helper.Get("min", -3.402e+38f); + auto max = helper.Get("max", 3.402e+38f); + auto op = graph_ep->GetGraph()->CreateOperation(min, max); + (*op).BindInputs(inputs).BindOutputs(outputs); + graph_ep->GetOps().push_back(std::move(op)); + } else { + switch (inputs[0]->GetDataType()) { + case tim::vx::DataType::INT8: + ClipImpl(graph_ep, inputs, outputs); + break; + case tim::vx::DataType::UINT8: + ClipImpl(graph_ep, inputs, outputs); + break; + case tim::vx::DataType::INT16: + ClipImpl(graph_ep, inputs, outputs); + break; + case tim::vx::DataType::INT32: + ClipImpl(graph_ep, inputs, outputs); + break; + case tim::vx::DataType::FLOAT16: + ClipImpl(graph_ep, inputs, outputs); + break; + case tim::vx::DataType::FLOAT32: + default: + ClipImpl(graph_ep, inputs, outputs); + break; + } + } + return true; +} + +} // namespace npu + +} // namespace vsi +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/clip_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/clip_op_builder.h new file mode 100644 index 0000000000000..220b3dffb5ae9 --- /dev/null +++ b/onnxruntime/core/providers/vsinpu/builders/impl/clip_op_builder.h @@ -0,0 +1,59 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ +#include "core/providers/vsinpu/builders/impl/base_op_builder.h" +#include "core/providers/shared/utils/utils.h" + +namespace onnxruntime { +namespace vsi { +namespace npu { +// template +class ClipOpBuilder final : public BaseOpBuilder { + bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer, + const Node* node) const override { + if (*node->InputDefs()[0]->Type() == "tensor(int64)") { + LOGS_DEFAULT(ERROR) << "Int64 datatype is only used to describe a param in TIM-VX."; + return false; + } + if (node->SinceVersion() > 6) { + if (node->InputDefs().size() > 1 && !graph_viewer.IsInitializedTensor(node->InputDefs()[1]->Name())) { + LOGS_DEFAULT(ERROR) << "Min/Max value must be const input or attribute."; + return false; + } + } + return true; + } + + bool HandleBuildOp(vsi::npu::GraphEP* graph_ep, + std::vector>& inputs, + std::vector>& outputs, + const Node* node) override; + + private: + template + struct ClipImpl; +}; +} // namespace npu + +} // namespace vsi +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/concat_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/concat_op_builder.h index 2a5297ed1d092..b0d722f12e194 100644 --- a/onnxruntime/core/providers/vsinpu/builders/impl/concat_op_builder.h +++ b/onnxruntime/core/providers/vsinpu/builders/impl/concat_op_builder.h @@ -49,8 +49,7 @@ class ConcatOpBuilder : public BaseOpBuilder { LOGS_DEFAULT(VERBOSE) << "Creating Concat Op."; NodeAttrHelper helper(*node); auto axis = helper.Get("axis", 0); - axis = HandleNegativeAxis(axis, inputs[0]->GetShape().size()); - axis = inputs[0]->GetShape().size() - axis - 1; + axis = util::ReverseAxis(axis, inputs[0]->GetShape().size()); auto op = graph_ep->GetGraph()->CreateOperation(static_cast(axis), inputs.size()); (*op).BindInputs(inputs).BindOutputs(outputs); graph_ep->GetOps().push_back(std::move(op)); diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/flatten_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/flatten_op_builder.h index a6b879fc91cb6..ab11880cb42b6 100644 --- a/onnxruntime/core/providers/vsinpu/builders/impl/flatten_op_builder.h +++ b/onnxruntime/core/providers/vsinpu/builders/impl/flatten_op_builder.h @@ -40,10 +40,8 @@ class FlattenOpBuilder : public BaseOpBuilder { else { auto input_shape = inputs[0]->GetShape(); NodeAttrHelper helper(*node); - int64_t axis = helper.Get("axis", 0); - if (axis < 0) { - axis = HandleNegativeAxis(axis, inputs[0]->GetShape().size()); // handle negative and enforce axis is valid - } + int64_t axis = helper.Get("axis", 1); + axis = util::ReverseAxis(static_cast(axis), input_shape.size()); uint32_t first_dim = 1; for (int64_t i = 0; i < axis; i++) { first_dim *= inputs[0]->GetShape()[i]; diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/norm_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/norm_op_builder.h new file mode 100644 index 0000000000000..06c20c2146f9f --- /dev/null +++ b/onnxruntime/core/providers/vsinpu/builders/impl/norm_op_builder.h @@ -0,0 +1,75 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ +#include "core/providers/vsinpu/builders/impl/base_op_builder.h" +#include "core/providers/shared/utils/utils.h" + +namespace onnxruntime { +namespace vsi { +namespace npu { +enum { + input_tensor = 0, + scale_tensor = 1, + Bias_tensor = 2, + mean_tensor = 3, + var_tensor = 4 +}; +class BatchNormOpBuilder : public BaseOpBuilder { + bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer, + const Node* node) const override { + auto input_defs = node->InputDefs(); + NodeAttrHelper helper(*node); + auto training_mode = helper.Get("training_mode", 0); + if (training_mode) { + LOGS_DEFAULT(WARNING) << "Training is not supported in batch_norm op."; + return false; + } + if (helper.HasAttr("spatial") || node->SinceVersion() < 9) { + LOGS_DEFAULT(ERROR) << "VSINPU does not support 'spatial' parameter."; + return false; + } + if (!graph_viewer.IsInitializedTensor(input_defs[scale_tensor]->Name())) { + LOGS_DEFAULT(ERROR) << "Not support mean/var/gamma/beta set as dynamic input yet."; + return false; + } + + return true; + } + bool HandleBuildOp(vsi::npu::GraphEP* graph_ep, + std::vector>& inputs, + std::vector>& outputs, + const Node* node) override { + LOGS_DEFAULT(INFO) << "Creating BatchNorm Op."; + NodeAttrHelper helper(*node); + auto epsilon = helper.Get("epsilon", 1e-5f); + auto op = graph_ep->GetGraph()->CreateOperation(epsilon); + (*op).BindInput(inputs[input_tensor]).BindInput(inputs[mean_tensor]).BindInput(inputs[var_tensor]).BindInput(inputs[scale_tensor]).BindInput(inputs[Bias_tensor]); + (*op).BindOutputs(outputs); + graph_ep->GetOps().push_back(std::move(op)); + return true; + } +}; +} // namespace npu + +} // namespace vsi +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/qlinearmatmul_op_builder.cc b/onnxruntime/core/providers/vsinpu/builders/impl/qlinearmatmul_op_builder.cc new file mode 100644 index 0000000000000..f50b7b990dc67 --- /dev/null +++ b/onnxruntime/core/providers/vsinpu/builders/impl/qlinearmatmul_op_builder.cc @@ -0,0 +1,174 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ +#include "core/providers/vsinpu/builders/impl/qlinearmatmul_op_builder.h" + +namespace onnxruntime { +namespace vsi { +namespace npu { + +template +struct QLinearMatMulOpBuilder::QMatMulImpl { + QMatMulImpl(vsi::npu::GraphEP* graph_ep, std::vector>& inputs, + std::vector>& outputs) { + T1 A_zp; + inputs[A_zero_point]->CopyDataFromTensor(&A_zp); + T2 B_zp; + inputs[B_zero_point]->CopyDataFromTensor(&B_zp); + T3 out_zp; + inputs[out_zero_point]->CopyDataFromTensor(&out_zp); + tim::vx::Quantization AQuant(tim::vx::QuantType::ASYMMETRIC, static_cast(1.0f), static_cast(A_zp)); + tim::vx::Quantization BQuant(tim::vx::QuantType::ASYMMETRIC, static_cast(1.0f), static_cast(B_zp)); + tim::vx::Quantization OutQuant(tim::vx::QuantType::ASYMMETRIC, static_cast(1.0f), static_cast(out_zp)); + + switch (inputs[A_scale]->GetDataType()) { + case tim::vx::DataType::FLOAT32: { + float a_scale, b_scale, o_scale; + inputs[A_scale]->CopyDataFromTensor(&a_scale); + inputs[B_scale]->CopyDataFromTensor(&b_scale); + inputs[out_scale]->CopyDataFromTensor(&o_scale); + AQuant.SetScales({a_scale}); + BQuant.SetScales({b_scale}); + OutQuant.SetScales({o_scale}); + } break; + case tim::vx::DataType::FLOAT16: { + Ort::Float16_t a_scale, b_scale, o_scale; + inputs[A_scale]->CopyDataFromTensor(&a_scale); + inputs[B_scale]->CopyDataFromTensor(&b_scale); + inputs[out_scale]->CopyDataFromTensor(&o_scale); + AQuant.SetScales({static_cast(a_scale)}); + BQuant.SetScales({static_cast(b_scale)}); + OutQuant.SetScales({static_cast(o_scale)}); + } break; + } + + tim::vx::TensorSpec ASpec(inputs[matrixA]->GetSpec()); + tim::vx::TensorSpec BSpec(inputs[matrixB]->GetSpec()); + tim::vx::TensorSpec OutSpec(outputs[0]->GetSpec()); + ASpec.SetQuantization(AQuant); + BSpec.SetQuantization(BQuant); + OutSpec.SetQuantization(OutQuant); + auto real_A = graph_ep->GetGraph()->CreateTensor(ASpec); + auto real_B = graph_ep->GetGraph()->CreateTensor(BSpec); + auto real_out = graph_ep->GetGraph()->CreateTensor(OutSpec); + if (inputs[matrixB]->GetSpec().GetTensorAttribute() == tim::vx::TensorAttribute::CONSTANT) { + std::vector B_data(inputs[matrixB]->GetSpec().GetElementNum()); + inputs[matrixB]->CopyDataFromTensor(B_data.data()); + real_B->CopyDataToTensor(B_data.data()); + } + for (auto& IO : graph_ep->GetGraphInputs()) { + if (IO->tensor.get() == inputs[matrixA].get()) { + IO->tensor = real_A; + } else if (IO->tensor.get() == inputs[matrixB].get()) { + IO->tensor = real_B; + } + } + + for (auto& IO : graph_ep->GetGraphOutputs()) { + if (IO->tensor.get() == outputs[0].get()) { + IO->tensor = real_out; + break; + } + } + + inputs[matrixA] = real_A; + inputs[matrixB] = real_B; + outputs[0] = real_out; + + auto op = graph_ep->GetGraph()->CreateOperation(); + + (*op).BindInput(inputs[matrixA]).BindInput(inputs[matrixB]); + (*op).BindOutput(real_out); + graph_ep->GetOps().push_back(std::move(op)); + } +}; + +bool QLinearMatMulOpBuilder::HandleBuildOp(vsi::npu::GraphEP* graph_ep, + std::vector>& inputs, + std::vector>& outputs, + const Node* node) { + LOGS_DEFAULT(INFO) << "Creating QLinearMatmul Op."; + switch (inputs[A_zero_point]->GetDataType()) { + case tim::vx::DataType::INT8: { + switch (inputs[B_zero_point]->GetDataType()) { + case tim::vx::DataType::INT8: { + switch (inputs[out_zero_point]->GetDataType()) { + case tim::vx::DataType::INT8: + QMatMulImpl(graph_ep, inputs, outputs); + break; + case tim::vx::DataType::UINT8: + QMatMulImpl(graph_ep, inputs, outputs); + break; + } + break; + } + case tim::vx::DataType::UINT8: { + switch (inputs[out_zero_point]->GetDataType()) { + case tim::vx::DataType::INT8: + QMatMulImpl(graph_ep, inputs, outputs); + break; + case tim::vx::DataType::UINT8: + QMatMulImpl(graph_ep, inputs, outputs); + break; + } + break; + } + } + break; + } + case tim::vx::DataType::UINT8:{ + switch (inputs[B_zero_point]->GetDataType()) { + case tim::vx::DataType::INT8: { + switch (inputs[out_zero_point]->GetDataType()) { + case tim::vx::DataType::INT8: + QMatMulImpl(graph_ep, inputs, outputs); + break; + case tim::vx::DataType::UINT8: + QMatMulImpl(graph_ep, inputs, outputs); + break; + } + break; + } + case tim::vx::DataType::UINT8: { + switch (inputs[out_zero_point]->GetDataType()) { + case tim::vx::DataType::INT8: + QMatMulImpl(graph_ep, inputs, outputs); + break; + case tim::vx::DataType::UINT8: + QMatMulImpl(graph_ep, inputs, outputs); + break; + } + break; + } + } + break; + } + } + + return true; +} + +} // namespace npu + +} // namespace vsi +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/qlinearmatmul_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/qlinearmatmul_op_builder.h new file mode 100644 index 0000000000000..0eb935bf600ed --- /dev/null +++ b/onnxruntime/core/providers/vsinpu/builders/impl/qlinearmatmul_op_builder.h @@ -0,0 +1,76 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ +#include "core/providers/vsinpu/builders/impl/base_op_builder.h" + +namespace onnxruntime { +namespace vsi { +namespace npu { +enum { + matrixA = 0, + A_scale = 1, + A_zero_point = 2, + matrixB = 3, + B_scale = 4, + B_zero_point = 5, + out_scale = 6, + out_zero_point = 7 +}; + +class QLinearMatMulOpBuilder : public BaseOpBuilder { + bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer, + const Node* node) const override { + auto input_defs = node->InputDefs(); + auto A_def = input_defs[matrixA]; + auto B_def = input_defs[matrixB]; + for (auto def : input_defs) { + if (def->Name() == A_def->Name() || def->Name() == B_def->Name()) + continue; + else { + if (!graph_viewer.IsInitializedTensor(def->Name())) { + LOGS_DEFAULT(WARNING) << "Scale and zero point must be known before setting graph."; + return false; + } + } + } + + if (input_defs[A_scale]->Shape()->dim_size() != 1 || input_defs[B_scale]->Shape()->dim_size() != 1 || input_defs[out_scale]->Shape()->dim_size() != 1) { + LOGS_DEFAULT(ERROR) << "Per channel quantized output is not supported in QuantizeLinearOp."; + return false; + } + + return true; + } + bool HandleBuildOp(vsi::npu::GraphEP* graph_ep, + std::vector>& inputs, + std::vector>& outputs, + const Node* node) override; + + private: + template + struct QMatMulImpl; +}; +} // namespace npu + +} // namespace vsi +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/reduce_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/reduce_op_builder.h new file mode 100644 index 0000000000000..86c1204f0286e --- /dev/null +++ b/onnxruntime/core/providers/vsinpu/builders/impl/reduce_op_builder.h @@ -0,0 +1,80 @@ +/**************************************************************************** + * + * Copyright (c) 2024 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ +#include "core/providers/vsinpu/builders/impl/base_op_builder.h" +#include "core/providers/shared/utils/utils.h" + +namespace onnxruntime { +namespace vsi { +namespace npu { +class ReduceMeanOpBuilder : public BaseOpBuilder { + bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer, + const Node* node) const override { + auto input_defs = node->InputDefs(); + if (*input_defs[0]->Type() == "tensor(int32)") { + LOGS_DEFAULT(WARNING) << "Not support int32 reduce mean yet."; + return false; + } + return true; + } + bool HandleBuildOp(vsi::npu::GraphEP* graph_ep, + std::vector>& inputs, + std::vector>& outputs, + const Node* node) override { + LOGS_DEFAULT(INFO) << "Creating ReduceMean Op."; + + NodeAttrHelper helper(*node); + std::vector def_axes; + auto input_shape_size = inputs[0]->GetShape().size(); + + if (node->SinceVersion() < 18 && helper.HasAttr("axes")) { + def_axes = helper.Get("axes", def_axes); + } else if (inputs.size() > 1) { + def_axes.resize(inputs[1]->GetSpec().GetElementNum()); + inputs[1]->CopyDataFromTensor(def_axes.data()); + } else { + for (int64_t i = 0; i < input_shape_size; ++i) { + def_axes.push_back(i); + } + } + + std::vector axes(def_axes.begin(), def_axes.end()); + axes = util::ReverseAxis(axes, input_shape_size); + + if (helper.HasAttr("noop_with_empty_axes") && inputs.size() == 1 && helper.Get("noop_with_empty_axes", 0) == 1) { + outputs[0] = inputs[0]; + return true; + } + + bool keepdims = helper.Get("keepdims", 1) == 1; + auto op = graph_ep->GetGraph()->CreateOperation(axes, keepdims); + op->BindInput(inputs[0]).BindOutputs(outputs); + graph_ep->GetOps().push_back(std::move(op)); + + return true; +} +}; +} // namespace npu + +} // namespace vsi +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/vsinpu/builders/impl/softmax_op_builder.h b/onnxruntime/core/providers/vsinpu/builders/impl/softmax_op_builder.h index cbe49dbe547af..2c3817e840f12 100644 --- a/onnxruntime/core/providers/vsinpu/builders/impl/softmax_op_builder.h +++ b/onnxruntime/core/providers/vsinpu/builders/impl/softmax_op_builder.h @@ -74,8 +74,7 @@ class SoftmaxOpBuilder : public BaseOpBuilder { graph_ep->GetOps().push_back(std::move(softmax_op)); graph_ep->GetOps().push_back(std::move(reshaped_output_op)); } else { - axis = HandleNegativeAxis(axis, inputs[0]->GetShape().size()); - axis = inputs[0]->GetShape().size() - axis - 1; + axis = util::ReverseAxis(axis, inputs[0]->GetShape().size()); auto op = graph_ep->GetGraph()->CreateOperation(1, static_cast(axis)); (*op).BindInputs(inputs).BindOutputs(outputs); graph_ep->GetOps().push_back(std::move(op)); diff --git a/onnxruntime/core/providers/vsinpu/builders/op_builder_factory.h b/onnxruntime/core/providers/vsinpu/builders/op_builder_factory.h index 6cfd9703f8a06..cafce323e7f34 100644 --- a/onnxruntime/core/providers/vsinpu/builders/op_builder_factory.h +++ b/onnxruntime/core/providers/vsinpu/builders/op_builder_factory.h @@ -34,6 +34,12 @@ #include "impl/tensor_op_builder.h" #include "impl/concat_op_builder.h" #include "impl/softmax_op_builder.h" +#include "impl/norm_op_builder.h" +#include "impl/clip_op_builder.h" +#include "impl/reduce_op_builder.h" +#include "impl/quantize_op_builder.h" +#include "impl/dequantize_op_builder.h" +#include "impl/qlinearmatmul_op_builder.h" namespace onnxruntime { namespace vsi { namespace npu { @@ -75,7 +81,13 @@ static const std::map reg = { REGISTER_OP_BUILDER("Reshape", ReshapeOpBuilder), REGISTER_OP_BUILDER("Concat", ConcatOpBuilder), REGISTER_OP_BUILDER("Softmax", SoftmaxOpBuilder), - REGISTER_OP_BUILDER("Transpose", TransposeOpBuilder) + REGISTER_OP_BUILDER("Transpose", TransposeOpBuilder), + REGISTER_OP_BUILDER("BatchNormalization", BatchNormOpBuilder), + REGISTER_OP_BUILDER("Clip", ClipOpBuilder), + REGISTER_OP_BUILDER("ReduceMean", ReduceMeanOpBuilder), + REGISTER_OP_BUILDER("QuantizeLinear", QuantizeLinearOpBuilder), + REGISTER_OP_BUILDER("DequantizeLinear", DequantizeLinearOpBuilder), + REGISTER_OP_BUILDER("QLinearMatMul", QLinearMatMulOpBuilder), #undef REGISTER_OP_BUILDER }; diff --git a/onnxruntime/core/providers/vsinpu/vsinpu_ep_graph.cc b/onnxruntime/core/providers/vsinpu/vsinpu_ep_graph.cc index 88ae316acf4b4..b0ab5f3678602 100644 --- a/onnxruntime/core/providers/vsinpu/vsinpu_ep_graph.cc +++ b/onnxruntime/core/providers/vsinpu/vsinpu_ep_graph.cc @@ -86,7 +86,7 @@ std::shared_ptr GraphEP::MapTIMVXTensor( for (auto output : graph_outputs_) { if (output->name == arg->Name()) { output->tensor = tensor; - output->shape = vsi::npu::util::GetTensorShape(*arg); + output->shape = utils::GetTensorShapeFromTensorShapeProto(*arg->Shape()); break; } } diff --git a/onnxruntime/core/providers/vsinpu/vsinpu_util.cc b/onnxruntime/core/providers/vsinpu/vsinpu_util.cc index 82ec624ac185e..1d53fc326448b 100644 --- a/onnxruntime/core/providers/vsinpu/vsinpu_util.cc +++ b/onnxruntime/core/providers/vsinpu/vsinpu_util.cc @@ -47,6 +47,8 @@ tim::vx::DataType OnnxDtypeToTIMVXDtype(const int32_t dtype) { return tim::vx::DataType::INT32; case onnx::TensorProto_DataType_INT16: return tim::vx::DataType::INT16; + case onnx::TensorProto_DataType_UINT16: + return tim::vx::DataType::UINT16; case onnx::TensorProto_DataType_BOOL: return tim::vx::DataType::INT8; default: @@ -64,6 +66,7 @@ tim::vx::DataType OnnxDtypeToTIMVXDtype(const ONNX_NAMESPACE::DataType type) { {"tensor(uint8)", tim::vx::DataType::UINT8}, {"tensor(int32)", tim::vx::DataType::INT32}, {"tensor(int16)", tim::vx::DataType::INT16}, + {"tensor(uint16)", tim::vx::DataType::UINT16}, {"tensor(int64)", tim::vx::DataType::INT64}, {"tensor(bool)", tim::vx::DataType::INT8}, }; @@ -277,12 +280,33 @@ bool CheckNoZeroDim(const Node* node) { }); if (!no_zero_dim) { - LOGS_DEFAULT(ERROR) <<"Tensor with dimension 0 is not supported."; + LOGS_DEFAULT(ERROR) << "Tensor with dimension 0 is not supported."; return false; } return true; } +int32_t ReverseAxis(int32_t origin_axis, int32_t length) { + int32_t axis = 0; + if (origin_axis < 0) { + origin_axis += length; + } + axis = length - origin_axis - 1; + return axis; +} + +std::vector ReverseAxis(std::vector origin_axes, int32_t length) { + std::vector axes; + for (int32_t& axis : origin_axes) { + if (axis < 0) { + axis += length; + } + axes.push_back(length - axis - 1); + } + std::sort(axes.begin(), axes.end()); + return axes; +} + } // namespace util } // namespace npu } // namespace vsi diff --git a/onnxruntime/core/providers/vsinpu/vsinpu_util.h b/onnxruntime/core/providers/vsinpu/vsinpu_util.h index 811624c9fb2cc..cdaff95ad2fcc 100644 --- a/onnxruntime/core/providers/vsinpu/vsinpu_util.h +++ b/onnxruntime/core/providers/vsinpu/vsinpu_util.h @@ -69,6 +69,10 @@ bool CheckAllExcludeType(const Node* node, std::string& reason); bool CheckNoZeroDim(const Node* node); +int32_t ReverseAxis(int32_t origin_axis, int32_t length); + +std::vector ReverseAxis(std::vector origin_axes, int32_t length); + } // namespace util } // namespace npu } // namespace vsi