Fixed quantized add op for rknn (#1384)

PaddlePaddle · Sep 18, 2024 · 253b9e7 · 253b9e7
1 parent 0a5915e
commit 253b9e7
Show file tree

Hide file tree

Showing 15 changed files with 411 additions and 334 deletions.
diff --git a/VERSION_NUMBER b/VERSION_NUMBER
@@ -1 +1 @@
-1.2.8
+1.2.9
diff --git a/paddle2onnx/mapper/activation/hard_swish.cc b/paddle2onnx/mapper/activation/hard_swish.cc
@@ -18,7 +18,6 @@ namespace paddle2onnx {
 REGISTER_MAPPER(hard_swish, HardSwishMapper)
 
 int32_t HardSwishMapper::GetMinOpsetVersion(bool verbose) {
-    Logger(verbose, 14) << RequireOpset(14) << std::endl;
     return 14;
 }
 

diff --git a/paddle2onnx/mapper/exporter.cc b/paddle2onnx/mapper/exporter.cc
@@ -81,14 +81,19 @@ namespace paddle2onnx
       }
     }
 
+    if (unsupported_ops.size() == 0)
+    {
+      return true;
+    }
+
     auto logger = P2OLogger();
     logger << "Oops, there are some operators not supported yet, including ";
     for (auto &item : unsupported_ops)
     {
       logger << item << ",";
     }
     logger << std::endl;
-    return (unsupported_ops.size() == 0);
+    return false;
   }
 
   int32_t ModelExporter::GetMinOpsetVersion(const PaddleParser &parser)

diff --git a/paddle2onnx/mapper/nn/dropout.cc b/paddle2onnx/mapper/nn/dropout.cc
@@ -55,11 +55,8 @@ void DropoutMapper::Opset7() {
     } else {
       GetAttr("dropout_prob", &dropout_prob_);
     }
-    std::string scale_node = helper_->Constant(
-        {}, GetOnnxDtype(input_info[0].dtype), 1 - dropout_prob_);
-    helper_->MakeNode("Mul", {input_info[0].name, scale_node},
-                      {output_info[0].name});
+    std::string scale_node = helper_->Constant({1}, GetOnnxDtype(input_info[0].dtype), 1 - dropout_prob_);
+    helper_->MakeNode("Mul", {input_info[0].name, scale_node}, {output_info[0].name});
   }
 }
-
 }  // namespace paddle2onnx
diff --git a/paddle2onnx/mapper/quantize/dequantize_linear.cc b/paddle2onnx/mapper/quantize/dequantize_linear.cc
@@ -18,50 +18,7 @@ namespace paddle2onnx {
 REGISTER_MAPPER(dequantize_linear, DequantizeLinearMapper)
 
 int32_t DequantizeLinearMapper::GetMinOpsetVersion(bool verbose) {
-  if (!IsConstantInput("Scale")) {
-    Error() << "Input `Scale` requires to be a constant tensor." << std::endl;
-    return -1;
-  }
-  std::vector<float> scales;
-  if (!TryGetInputValue("Scale", &scales)) {
-    Error() << "Failed to read tensor value of `Scale`." << std::endl;
-    return -1;
-  }
-  if (bit_length_ != 8) {
-    Error() << "Only support bit_length = 8." << std::endl;
-    return -1;
-  }
-  if (scales.size() > 1) {
-    auto x_info = GetInput("X");
-    if (x_info[0].shape[quant_axis_] != scales.size()) {
-      Error() << "Scale size must equal to the size of input quantize axis."
-              << std::endl;
-      return -1;
-    }
-    Logger(verbose, 13) << "While size of scales greater than 1, "
-                        << RequireOpset(13) << std::endl;
-    return 13;
-  }
-  auto x_info = GetInput("X");
-  auto x_shape = x_info[0].shape;
-  if (x_shape.size() == 2) {
-    if (quant_axis_ != 1) {
-      Error() << "When the rank of input is 2, the attribute quant_axis "
-                 "requires to be 1."
-              << std::endl;
-      return -1;
-    }
-  } else if (x_shape.size() == 4) {
-    if (!(quant_axis_ == 1 || quant_axis_ == 0)) {
-      Error() << "When the rank of input is 4, the attribute quant_axis "
-                 "requires to be 0 or 1."
-              << std::endl;
-      return -1;
-    }
-  }
-
-  Logger(verbose, 10) << RequireOpset(10) << std::endl;
-  return 10;
+  return 13;
 }
 
 void DequantizeLinearMapper::ConvertInt8ToFp32(

diff --git a/paddle2onnx/mapper/quantize/quantize_linear.cc b/paddle2onnx/mapper/quantize/quantize_linear.cc
@@ -18,36 +18,7 @@ namespace paddle2onnx {
 REGISTER_MAPPER(quantize_linear, QuantizeLinearMapper)
 
 int32_t QuantizeLinearMapper::GetMinOpsetVersion(bool verbose) {
-  if (!IsConstantInput("Scale")) {
-    Error() << "Input `Scale` requires to be a constant tensor." << std::endl;
-    return -1;
-  }
-  std::vector<float> scales;
-  if (!TryGetInputValue("Scale", &scales)) {
-    Error() << "Failed to read tensor value of `Scale`." << std::endl;
-    return -1;
-  }
-  if (bit_length_ != 8) {
-    Error() << "Only support bit_length = 8." << std::endl;
-    return -1;
-  }
-  if (round_type_ != 0) {
-    Error() << "The round_type attr of quantize_linear must be 0." << std::endl;
-    return -1;
-  }
-  if (scales.size() > 1) {
-    auto x_info = GetInput("X");
-    if (x_info[0].shape[quant_axis_] != scales.size()) {
-      Error() << "Scale size must equal to the size of input quantize axis."
-              << std::endl;
-      return -1;
-    }
-    Logger(verbose, 13) << "While size of scales greater than 1, "
-                        << RequireOpset(13) << std::endl;
-    return 13;
-  }
-  Logger(verbose, 10) << RequireOpset(10) << std::endl;
-  return 10;
+  return 13;
 }
 
 void QuantizeLinearMapper::Opset10() {

diff --git a/paddle2onnx/mapper/quantize_helper.cc b/paddle2onnx/mapper/quantize_helper.cc
@@ -237,6 +237,7 @@ void QuantizeModelProcessor::AddQDQForRKNN() {
                               "Reshape",
                               "Resize",
                               "Round",
+                              "Shape",
                               "Sigmoid",
                               "Sin",
                               "Sinh",
@@ -270,35 +271,28 @@ void QuantizeModelProcessor::AddQDQForRKNN() {
           continue;
         }
 
-        std::vector<float> matmul_weight;
-        if (!GetTensorByName(name, &matmul_weight)) {
+        std::vector<float> weight;
+        if (!GetTensorByName(name, &weight)) {
           P2OLogger() << "Failed to GetTensorByName: " << node->op_type() << ";" << name << std::endl;
           continue;
         }
 
-        std::vector<int64_t> matmul_weight_shape;
-        if (!GetTensorShape(name, &matmul_weight_shape)) {
+        std::vector<int64_t> weight_shape;
+        if (!GetTensorShape(name, &weight_shape)) {
           P2OLogger() << "Failed to GetTensorShape: " << node->op_type() << ";" << name << std::endl;
           continue;
         }
 
         int64_t quantize_axis = 1;
         std::vector<float> scale;
         std::vector<int64_t> zeros;
-        if(matmul_weight_shape.size() == 1) {
-          quantize_axis = 0;
-        }
-        GetChannelWiseQuantizeInfo(matmul_weight, matmul_weight_shape, quantize_axis, &scale, &zeros);
-        std::string scale_node, zero_node;
-
-        if (scale.size() == 1) {
-          scale_node = helper_->Constant({}, ONNX_NAMESPACE::TensorProto::FLOAT, scale[0]);
-          zero_node = helper_->Constant({}, ONNX_NAMESPACE::TensorProto::INT8, zeros[0]);
-        } else {
-          scale_node = helper_->Constant(ONNX_NAMESPACE::TensorProto::FLOAT, scale);
-          zero_node = helper_->Constant(ONNX_NAMESPACE::TensorProto::INT8, zeros);
-        }
-        QuantizeInfo matmul_weight_quantize_info(scale, zeros, scale_node, zero_node, quantize_axis);
+        GetTensorWiseQuantizeInfo(weight, &scale, &zeros);
+
+        std::string weight_scale_node, weight_zero_node;
+        weight_scale_node = helper_->Constant({}, ONNX_NAMESPACE::TensorProto::FLOAT, scale[0]);
+        weight_zero_node = helper_->Constant({}, ONNX_NAMESPACE::TensorProto::INT8, zeros[0]);
+
+        QuantizeInfo matmul_weight_quantize_info(scale, zeros, weight_scale_node, weight_zero_node, quantize_axis);
         helper_->quantize_info[name] = matmul_weight_quantize_info;
       }
     } else if (node->op_type() == "BatchNormalization") {
@@ -1024,14 +1018,34 @@ bool QuantizeModelProcessor::GetTensorShape(const std::string& name,
     }
     for (auto i = 0; i < node.attribute_size(); i++) {
       auto attr = node.attribute(i);
-      if (attr.name() == "value") {
-        auto tensor = attr.mutable_t();
-        for (int64_t i = 0; i < tensor->dims_size(); i++) {
-          shape->push_back(tensor->dims(i));
-        }
+      if (attr.name() != "value") {
+        continue;
+      }
+      auto tensor = attr.mutable_t();
+      for (int64_t i = 0; i < tensor->dims_size(); i++) {
+        shape->push_back(tensor->dims(i));
+      }
+    }
+  }
+
+  for (auto& item : *nodes_)
+  {
+    auto node = *(item.get());
+    if (node.output(0) != name) {
+      continue;
+    }
+    for (auto i = 0; i < node.attribute_size(); i++) {
+      auto attr = node.attribute(i);
+      if (attr.name() != "value") {
+        continue;
+      }
+      auto tensor = attr.mutable_t();
+      for (int64_t i = 0; i < tensor->dims_size(); i++) {
+        shape->push_back(tensor->dims(i));
       }
     }
   }
+
   return !shape->empty();
 }
 
@@ -1051,21 +1065,18 @@ void QuantizeModelProcessor::GetTensorWiseQuantizeInfo(
   zero->push_back(0);
 }
 
-void QuantizeModelProcessor::GetChannelWiseQuantizeInfo(const std::vector<float>& tensor, 
-                                                        const std::vector<int64_t>& shapes,
-                                                        int64_t quant_axis, 
-                                                        std::vector<float>* scale,
-                                                        std::vector<int64_t>* zero) {
-  int64_t channel_count = 1;
-  if (shapes.size() != 1) {
-    quant_axis = 1;
-  }
-  if (quant_axis == 0) {
-    for (int64_t i = 0; i < channel_count; i++) {
+void QuantizeModelProcessor::GetChannelWiseQuantizeInfo(
+    const std::vector<float>& tensor, const std::vector<int64_t>& shape,
+    const int64_t& quant_axis, std::vector<float>* scale,
+    std::vector<int64_t>* zero) {
+  int64_t channel_count = shape[quant_axis];
+
+  for (int64_t i = 0; i < channel_count; i++) {
+    if (quant_axis == 0) {
       float max_val = -1;
       int64_t inner_offset = 1;
-      for (auto& shape : shapes) {
-        inner_offset *= shape;
+      for (auto& j : shape) {
+        inner_offset *= j;
       }
       inner_offset /= channel_count;
       int64_t index = i * inner_offset;
@@ -1074,19 +1085,36 @@ void QuantizeModelProcessor::GetChannelWiseQuantizeInfo(const std::vector<float>
           max_val = fabs(tensor[index + j]);
         }
       }
-      Assert(max_val >= 0, "[GetChannelWiseQuantizeInfo] Require the scale >= 0, but now it's " + std::to_string(max_val) + ".");
+      Assert(
+          max_val >= 0,
+          "[GetChannelWiseQuantizeInfo] Require the scale >= 0, but now it's " +
+              std::to_string(max_val) + ".");
       scale->push_back(max_val / 127);
       zero->push_back(0);
+    } else if (quant_axis == 1) {
+      float max_val = -1;
+      int64_t inner_offset = shape.size() == 4 ? shape[2] * shape[3] : 1;
+      for (int64_t outter = 0; outter < shape[0]; outter++) {
+        int64_t index = outter * channel_count * inner_offset;
+        for (int64_t inner = 0; inner < inner_offset; inner++) {
+          int64_t final_index = index + i * inner_offset + inner;
+          if (fabs(tensor[final_index]) > max_val) {
+            max_val = fabs(tensor[final_index]);
+          }
+        }
+      }
+      Assert(
+          max_val >= 0,
+          "[GetChannelWiseQuantizeInfo] Require the scale >= 0, but now it's " +
+              std::to_string(max_val) + ".");
+      scale->push_back(max_val / 127);
+      zero->push_back(0);
+    } else {
+      Assert(false,
+             "QuantizeModelProcessor::GetChannelWiseQuantizeInfo only supports "
+             "quant_axis equals to 0 or 1, but now it's " +
+                 std::to_string(quant_axis) + ".");
     }
-  } else if (quant_axis == 1) {
-    auto max_val = *std::max_element(tensor.begin(), tensor.end());
-    Assert(max_val >= 0, "[GetChannelWiseQuantizeInfo] Require the scale >= 0, but now it's " + std::to_string(max_val) + ".");
-    scale->push_back(max_val / 127);
-    zero->push_back(0);
-  } else {
-    Assert(false,
-            "QuantizeModelProcessor::GetChannelWiseQuantizeInfo only supports quant_axis equals to 0, 1, -1, "
-            "but now it's " + std::to_string(quant_axis) + ".");
   }
 }
 

diff --git a/paddle2onnx/mapper/quantize_helper.h b/paddle2onnx/mapper/quantize_helper.h
@@ -116,11 +116,11 @@ struct QuantizeModelProcessor {
                                  std::vector<int64_t>* zero);
 
   // Perform channel wise quantization, returning scale and zero
-  void GetChannelWiseQuantizeInfo(const std::vector<float>& tensor,
-                                  const std::vector<int64_t>& shapes,
-                                  int64_t quant_axis,
-                                  std::vector<float>* scale,
-                                  std::vector<int64_t>* zero);
+  void GetChannelWiseQuantizeInfo(const std::vector<float>& tensor, 
+                                const std::vector<int64_t>& shape,
+                                const int64_t& quant_axis, 
+                                std::vector<float>* scale,
+                                std::vector<int64_t>* zero);
 
   // Generate name2node_dict to save input name and its related nodes
   void UpdateInputNameToNodes();

diff --git a/paddle2onnx/mapper/tensor/elementwise.cc b/paddle2onnx/mapper/tensor/elementwise.cc
@@ -15,10 +15,8 @@
 
 namespace paddle2onnx {
 
-REGISTER_MAPPER(elementwise_add, ElementwiseMapper)
 REGISTER_MAPPER(elementwise_sub, ElementwiseMapper)
 REGISTER_MAPPER(elementwise_div, ElementwiseMapper)
-REGISTER_MAPPER(elementwise_mul, ElementwiseMapper)
 REGISTER_MAPPER(elementwise_min, ElementwiseMapper)
 REGISTER_MAPPER(elementwise_max, ElementwiseMapper)
 REGISTER_MAPPER(elementwise_pow, ElementwiseMapper)

diff --git a/paddle2onnx/mapper/tensor/elementwise.h b/paddle2onnx/mapper/tensor/elementwise.h
@@ -27,10 +27,8 @@ class ElementwiseMapper : public Mapper {
       : Mapper(p, helper, block_id, op_id) {
     GetAttr("axis", &axis_);
 
-    op_mapper_["elementwise_add"] = "Add";
     op_mapper_["elementwise_sub"] = "Sub";
     op_mapper_["elementwise_div"] = "Div";
-    op_mapper_["elementwise_mul"] = "Mul";
     op_mapper_["elementwise_min"] = "Min";
     op_mapper_["elementwise_max"] = "Max";
     op_mapper_["elementwise_pow"] = "Pow";