diff --git a/src/relay/qnn/op/add.cc b/src/relay/qnn/op/add.cc index 143293d74c270..5a11cd419fd26 100644 --- a/src/relay/qnn/op/add.cc +++ b/src/relay/qnn/op/add.cc @@ -89,7 +89,7 @@ Expr QnnAddCanonicalize(const Attrs& attrs, const Array& new_args, float real_lhs_scale_val = lhs_scale_val / twice_max_input_scale * (1 << 20); float real_rhs_scale_val = rhs_scale_val / twice_max_input_scale * (1 << 20); float real_out_scale_val = twice_max_input_scale / ((1 << 20) * out_scale_val); - + auto real_lhs_scale = MakeConstantScalar( DataType::Float(32), real_lhs_scale_val); auto real_rhs_scale = MakeConstantScalar( @@ -115,14 +115,14 @@ Expr QnnAddCanonicalize(const Attrs& attrs, const Array& new_args, itmd_out_scale, output_zero_point, DataType::Int(32), rounding); } else { - // FIXME (anijain2305) - The lowering can be further optimized. Instead of inserting requantize in - // the start, we can insert requantize at the end if both input tensors have same qnn params. In - // that case, we can first add the tensors, subtract the zero point, and requantize at the end. - // This can be done in future. - + // FIXME (anijain2305) - The lowering can be further optimized. Instead of inserting requantize + // in the start, we can insert requantize at the end if both input tensors have same qnn params. + // In that case, we can first add the tensors, subtract the zero point, and requantize at the + // end. This can be done in future. + // Since the input qnn params can be different than output qnn params, we first requantize the - // input tensors to the output qnn params. Then we call relay.add on the requantized inputs. This - // addition results in extra addition of the output zero point. We futher subtract the zero + // input tensors to the output qnn params. Then we call relay.add on the requantized inputs. + // This addition results in extra addition of the output zero point. We futher subtract the zero // point. The whole process can be represented using following equations // // scale_c * (Q_c - zp_c) = scale_a * (Q_a - zp_a) + scale_b * (Q_b - zp_b) @@ -134,7 +134,7 @@ Expr QnnAddCanonicalize(const Attrs& attrs, const Array& new_args, // Comparing the LHS and RHS, it results in // Q_c = Q_a' + Q_b' - zp_c // The add op is done in int32 precision. - + // Requantize LHS if necessary. auto requantized_lhs = lhs; if (!IsEqualScalar(lhs_scale, output_scale) || @@ -144,7 +144,7 @@ Expr QnnAddCanonicalize(const Attrs& attrs, const Array& new_args, } else { requantized_lhs = Cast(requantized_lhs, DataType::Int(32)); } - + // Requantize RHS if necessary. auto requantized_rhs = rhs; if (!IsEqualScalar(rhs_scale, output_scale) || @@ -156,20 +156,19 @@ Expr QnnAddCanonicalize(const Attrs& attrs, const Array& new_args, } output = Add(requantized_lhs, requantized_rhs); - + // Subtract zero point. auto zero_scalar = MakeConstantScalar(DataType::Int(32), 0); if (!IsEqualScalar(output_zero_point, zero_scalar)) { output = Subtract(output, output_zero_point); } } - + // Go back to lower precision. auto q_min = GetQmin(input_dtype); auto q_max = GetQmax(input_dtype); output = Clip(output, q_min, q_max); return Cast(output, input_dtype); - } Expr MakeQnnAdd(Expr lhs, Expr rhs, Expr lhs_scale, Expr lhs_zero_point, @@ -177,7 +176,7 @@ Expr MakeQnnAdd(Expr lhs, Expr rhs, Expr lhs_scale, Expr lhs_zero_point, Expr output_zero_point, std::string rounding) { auto attrs = make_object(); attrs->rounding = std::move(rounding); - + static const Op& op = Op::Get("qnn.add"); return CallNode::make(op, {lhs, rhs, lhs_scale, lhs_zero_point, diff --git a/topi/include/topi/nn/pooling.h b/topi/include/topi/nn/pooling.h index c47280aa68b06..3b1e6fd24e9f1 100644 --- a/topi/include/topi/nn/pooling.h +++ b/topi/include/topi/nn/pooling.h @@ -578,7 +578,7 @@ inline Tensor adaptive_pool_impl(const Tensor& x, Array indices; Array reduce_axes; std::tie(indices, reduce_axes) = get_iter_vars(output, false); - + PrimExpr divide_factor = tvm::cast(x->dtype, 1); for (size_t i = 0; i < n_dim; ++i) { divide_factor *= tvm::cast(x->dtype, reduce_axes[i]->dom->extent); @@ -592,12 +592,12 @@ inline Tensor adaptive_pool_impl(const Tensor& x, Array indices; Array reduce_axes; std::tie(indices, reduce_axes) = get_iter_vars(output, false); - + PrimExpr divide_factor = tvm::cast(x->dtype, 1); for (size_t i = 0; i < n_dim; ++i) { divide_factor *= tvm::cast(x->dtype, reduce_axes[i]->dom->extent); } - + return div(pool_sum(indices), divide_factor); }, "tensor", kElementWise); }