Skip to content

Commit

Permalink
Fixed quantized add op for rknn (#1384)
Browse files Browse the repository at this point in the history
  • Loading branch information
Zheng-Bicheng authored Sep 18, 2024
1 parent 0a5915e commit 253b9e7
Show file tree
Hide file tree
Showing 15 changed files with 411 additions and 334 deletions.
2 changes: 1 addition & 1 deletion VERSION_NUMBER
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.2.8
1.2.9
1 change: 0 additions & 1 deletion paddle2onnx/mapper/activation/hard_swish.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ namespace paddle2onnx {
REGISTER_MAPPER(hard_swish, HardSwishMapper)

int32_t HardSwishMapper::GetMinOpsetVersion(bool verbose) {
Logger(verbose, 14) << RequireOpset(14) << std::endl;
return 14;
}

Expand Down
7 changes: 6 additions & 1 deletion paddle2onnx/mapper/exporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,19 @@ namespace paddle2onnx
}
}

if (unsupported_ops.size() == 0)
{
return true;
}

auto logger = P2OLogger();
logger << "Oops, there are some operators not supported yet, including ";
for (auto &item : unsupported_ops)
{
logger << item << ",";
}
logger << std::endl;
return (unsupported_ops.size() == 0);
return false;
}

int32_t ModelExporter::GetMinOpsetVersion(const PaddleParser &parser)
Expand Down
7 changes: 2 additions & 5 deletions paddle2onnx/mapper/nn/dropout.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,8 @@ void DropoutMapper::Opset7() {
} else {
GetAttr("dropout_prob", &dropout_prob_);
}
std::string scale_node = helper_->Constant(
{}, GetOnnxDtype(input_info[0].dtype), 1 - dropout_prob_);
helper_->MakeNode("Mul", {input_info[0].name, scale_node},
{output_info[0].name});
std::string scale_node = helper_->Constant({1}, GetOnnxDtype(input_info[0].dtype), 1 - dropout_prob_);
helper_->MakeNode("Mul", {input_info[0].name, scale_node}, {output_info[0].name});
}
}

} // namespace paddle2onnx
45 changes: 1 addition & 44 deletions paddle2onnx/mapper/quantize/dequantize_linear.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,50 +18,7 @@ namespace paddle2onnx {
REGISTER_MAPPER(dequantize_linear, DequantizeLinearMapper)

int32_t DequantizeLinearMapper::GetMinOpsetVersion(bool verbose) {
if (!IsConstantInput("Scale")) {
Error() << "Input `Scale` requires to be a constant tensor." << std::endl;
return -1;
}
std::vector<float> scales;
if (!TryGetInputValue("Scale", &scales)) {
Error() << "Failed to read tensor value of `Scale`." << std::endl;
return -1;
}
if (bit_length_ != 8) {
Error() << "Only support bit_length = 8." << std::endl;
return -1;
}
if (scales.size() > 1) {
auto x_info = GetInput("X");
if (x_info[0].shape[quant_axis_] != scales.size()) {
Error() << "Scale size must equal to the size of input quantize axis."
<< std::endl;
return -1;
}
Logger(verbose, 13) << "While size of scales greater than 1, "
<< RequireOpset(13) << std::endl;
return 13;
}
auto x_info = GetInput("X");
auto x_shape = x_info[0].shape;
if (x_shape.size() == 2) {
if (quant_axis_ != 1) {
Error() << "When the rank of input is 2, the attribute quant_axis "
"requires to be 1."
<< std::endl;
return -1;
}
} else if (x_shape.size() == 4) {
if (!(quant_axis_ == 1 || quant_axis_ == 0)) {
Error() << "When the rank of input is 4, the attribute quant_axis "
"requires to be 0 or 1."
<< std::endl;
return -1;
}
}

Logger(verbose, 10) << RequireOpset(10) << std::endl;
return 10;
return 13;
}

void DequantizeLinearMapper::ConvertInt8ToFp32(
Expand Down
31 changes: 1 addition & 30 deletions paddle2onnx/mapper/quantize/quantize_linear.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,36 +18,7 @@ namespace paddle2onnx {
REGISTER_MAPPER(quantize_linear, QuantizeLinearMapper)

int32_t QuantizeLinearMapper::GetMinOpsetVersion(bool verbose) {
if (!IsConstantInput("Scale")) {
Error() << "Input `Scale` requires to be a constant tensor." << std::endl;
return -1;
}
std::vector<float> scales;
if (!TryGetInputValue("Scale", &scales)) {
Error() << "Failed to read tensor value of `Scale`." << std::endl;
return -1;
}
if (bit_length_ != 8) {
Error() << "Only support bit_length = 8." << std::endl;
return -1;
}
if (round_type_ != 0) {
Error() << "The round_type attr of quantize_linear must be 0." << std::endl;
return -1;
}
if (scales.size() > 1) {
auto x_info = GetInput("X");
if (x_info[0].shape[quant_axis_] != scales.size()) {
Error() << "Scale size must equal to the size of input quantize axis."
<< std::endl;
return -1;
}
Logger(verbose, 13) << "While size of scales greater than 1, "
<< RequireOpset(13) << std::endl;
return 13;
}
Logger(verbose, 10) << RequireOpset(10) << std::endl;
return 10;
return 13;
}

void QuantizeLinearMapper::Opset10() {
Expand Down
120 changes: 74 additions & 46 deletions paddle2onnx/mapper/quantize_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ void QuantizeModelProcessor::AddQDQForRKNN() {
"Reshape",
"Resize",
"Round",
"Shape",
"Sigmoid",
"Sin",
"Sinh",
Expand Down Expand Up @@ -270,35 +271,28 @@ void QuantizeModelProcessor::AddQDQForRKNN() {
continue;
}

std::vector<float> matmul_weight;
if (!GetTensorByName(name, &matmul_weight)) {
std::vector<float> weight;
if (!GetTensorByName(name, &weight)) {
P2OLogger() << "Failed to GetTensorByName: " << node->op_type() << ";" << name << std::endl;
continue;
}

std::vector<int64_t> matmul_weight_shape;
if (!GetTensorShape(name, &matmul_weight_shape)) {
std::vector<int64_t> weight_shape;
if (!GetTensorShape(name, &weight_shape)) {
P2OLogger() << "Failed to GetTensorShape: " << node->op_type() << ";" << name << std::endl;
continue;
}

int64_t quantize_axis = 1;
std::vector<float> scale;
std::vector<int64_t> zeros;
if(matmul_weight_shape.size() == 1) {
quantize_axis = 0;
}
GetChannelWiseQuantizeInfo(matmul_weight, matmul_weight_shape, quantize_axis, &scale, &zeros);
std::string scale_node, zero_node;

if (scale.size() == 1) {
scale_node = helper_->Constant({}, ONNX_NAMESPACE::TensorProto::FLOAT, scale[0]);
zero_node = helper_->Constant({}, ONNX_NAMESPACE::TensorProto::INT8, zeros[0]);
} else {
scale_node = helper_->Constant(ONNX_NAMESPACE::TensorProto::FLOAT, scale);
zero_node = helper_->Constant(ONNX_NAMESPACE::TensorProto::INT8, zeros);
}
QuantizeInfo matmul_weight_quantize_info(scale, zeros, scale_node, zero_node, quantize_axis);
GetTensorWiseQuantizeInfo(weight, &scale, &zeros);

std::string weight_scale_node, weight_zero_node;
weight_scale_node = helper_->Constant({}, ONNX_NAMESPACE::TensorProto::FLOAT, scale[0]);
weight_zero_node = helper_->Constant({}, ONNX_NAMESPACE::TensorProto::INT8, zeros[0]);

QuantizeInfo matmul_weight_quantize_info(scale, zeros, weight_scale_node, weight_zero_node, quantize_axis);
helper_->quantize_info[name] = matmul_weight_quantize_info;
}
} else if (node->op_type() == "BatchNormalization") {
Expand Down Expand Up @@ -1024,14 +1018,34 @@ bool QuantizeModelProcessor::GetTensorShape(const std::string& name,
}
for (auto i = 0; i < node.attribute_size(); i++) {
auto attr = node.attribute(i);
if (attr.name() == "value") {
auto tensor = attr.mutable_t();
for (int64_t i = 0; i < tensor->dims_size(); i++) {
shape->push_back(tensor->dims(i));
}
if (attr.name() != "value") {
continue;
}
auto tensor = attr.mutable_t();
for (int64_t i = 0; i < tensor->dims_size(); i++) {
shape->push_back(tensor->dims(i));
}
}
}

for (auto& item : *nodes_)
{
auto node = *(item.get());
if (node.output(0) != name) {
continue;
}
for (auto i = 0; i < node.attribute_size(); i++) {
auto attr = node.attribute(i);
if (attr.name() != "value") {
continue;
}
auto tensor = attr.mutable_t();
for (int64_t i = 0; i < tensor->dims_size(); i++) {
shape->push_back(tensor->dims(i));
}
}
}

return !shape->empty();
}

Expand All @@ -1051,21 +1065,18 @@ void QuantizeModelProcessor::GetTensorWiseQuantizeInfo(
zero->push_back(0);
}

void QuantizeModelProcessor::GetChannelWiseQuantizeInfo(const std::vector<float>& tensor,
const std::vector<int64_t>& shapes,
int64_t quant_axis,
std::vector<float>* scale,
std::vector<int64_t>* zero) {
int64_t channel_count = 1;
if (shapes.size() != 1) {
quant_axis = 1;
}
if (quant_axis == 0) {
for (int64_t i = 0; i < channel_count; i++) {
void QuantizeModelProcessor::GetChannelWiseQuantizeInfo(
const std::vector<float>& tensor, const std::vector<int64_t>& shape,
const int64_t& quant_axis, std::vector<float>* scale,
std::vector<int64_t>* zero) {
int64_t channel_count = shape[quant_axis];

for (int64_t i = 0; i < channel_count; i++) {
if (quant_axis == 0) {
float max_val = -1;
int64_t inner_offset = 1;
for (auto& shape : shapes) {
inner_offset *= shape;
for (auto& j : shape) {
inner_offset *= j;
}
inner_offset /= channel_count;
int64_t index = i * inner_offset;
Expand All @@ -1074,19 +1085,36 @@ void QuantizeModelProcessor::GetChannelWiseQuantizeInfo(const std::vector<float>
max_val = fabs(tensor[index + j]);
}
}
Assert(max_val >= 0, "[GetChannelWiseQuantizeInfo] Require the scale >= 0, but now it's " + std::to_string(max_val) + ".");
Assert(
max_val >= 0,
"[GetChannelWiseQuantizeInfo] Require the scale >= 0, but now it's " +
std::to_string(max_val) + ".");
scale->push_back(max_val / 127);
zero->push_back(0);
} else if (quant_axis == 1) {
float max_val = -1;
int64_t inner_offset = shape.size() == 4 ? shape[2] * shape[3] : 1;
for (int64_t outter = 0; outter < shape[0]; outter++) {
int64_t index = outter * channel_count * inner_offset;
for (int64_t inner = 0; inner < inner_offset; inner++) {
int64_t final_index = index + i * inner_offset + inner;
if (fabs(tensor[final_index]) > max_val) {
max_val = fabs(tensor[final_index]);
}
}
}
Assert(
max_val >= 0,
"[GetChannelWiseQuantizeInfo] Require the scale >= 0, but now it's " +
std::to_string(max_val) + ".");
scale->push_back(max_val / 127);
zero->push_back(0);
} else {
Assert(false,
"QuantizeModelProcessor::GetChannelWiseQuantizeInfo only supports "
"quant_axis equals to 0 or 1, but now it's " +
std::to_string(quant_axis) + ".");
}
} else if (quant_axis == 1) {
auto max_val = *std::max_element(tensor.begin(), tensor.end());
Assert(max_val >= 0, "[GetChannelWiseQuantizeInfo] Require the scale >= 0, but now it's " + std::to_string(max_val) + ".");
scale->push_back(max_val / 127);
zero->push_back(0);
} else {
Assert(false,
"QuantizeModelProcessor::GetChannelWiseQuantizeInfo only supports quant_axis equals to 0, 1, -1, "
"but now it's " + std::to_string(quant_axis) + ".");
}
}

Expand Down
10 changes: 5 additions & 5 deletions paddle2onnx/mapper/quantize_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,11 @@ struct QuantizeModelProcessor {
std::vector<int64_t>* zero);

// Perform channel wise quantization, returning scale and zero
void GetChannelWiseQuantizeInfo(const std::vector<float>& tensor,
const std::vector<int64_t>& shapes,
int64_t quant_axis,
std::vector<float>* scale,
std::vector<int64_t>* zero);
void GetChannelWiseQuantizeInfo(const std::vector<float>& tensor,
const std::vector<int64_t>& shape,
const int64_t& quant_axis,
std::vector<float>* scale,
std::vector<int64_t>* zero);

// Generate name2node_dict to save input name and its related nodes
void UpdateInputNameToNodes();
Expand Down
2 changes: 0 additions & 2 deletions paddle2onnx/mapper/tensor/elementwise.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@

namespace paddle2onnx {

REGISTER_MAPPER(elementwise_add, ElementwiseMapper)
REGISTER_MAPPER(elementwise_sub, ElementwiseMapper)
REGISTER_MAPPER(elementwise_div, ElementwiseMapper)
REGISTER_MAPPER(elementwise_mul, ElementwiseMapper)
REGISTER_MAPPER(elementwise_min, ElementwiseMapper)
REGISTER_MAPPER(elementwise_max, ElementwiseMapper)
REGISTER_MAPPER(elementwise_pow, ElementwiseMapper)
Expand Down
2 changes: 0 additions & 2 deletions paddle2onnx/mapper/tensor/elementwise.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,8 @@ class ElementwiseMapper : public Mapper {
: Mapper(p, helper, block_id, op_id) {
GetAttr("axis", &axis_);

op_mapper_["elementwise_add"] = "Add";
op_mapper_["elementwise_sub"] = "Sub";
op_mapper_["elementwise_div"] = "Div";
op_mapper_["elementwise_mul"] = "Mul";
op_mapper_["elementwise_min"] = "Min";
op_mapper_["elementwise_max"] = "Max";
op_mapper_["elementwise_pow"] = "Pow";
Expand Down
Loading

0 comments on commit 253b9e7

Please sign in to comment.