From 73ca28851cee4db88cbbc80220f0852b0dc5b4fd Mon Sep 17 00:00:00 2001 From: Ashwini Khade Date: Tue, 6 Aug 2019 18:08:40 -0700 Subject: [PATCH] update quatizelinear to process int8 input --- .../providers/cpu/cpu_execution_provider.cc | 6 ++-- .../providers/cpu/tensor/quantize_linear.cc | 29 ++++++++++++++----- .../cpu/tensor/quantize_linear_test.cc | 12 +++++++- 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc index 08b6a31938111..8bdff361f3deb 100644 --- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc +++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc @@ -263,7 +263,8 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, ThresholdedRelu); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t, DequantizeLinear); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, int8_t, DequantizeLinear); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, float, QuantizeLinear); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, uint8_t, QuantizeLinear); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, int8_t, QuantizeLinear); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, QLinearMatMul); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, MatMulInteger); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, ConvInteger); @@ -539,7 +540,8 @@ void RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/core/providers/cpu/tensor/quantize_linear.cc b/onnxruntime/core/providers/cpu/tensor/quantize_linear.cc index 5846bc102f565..49006cc3725cd 100644 --- a/onnxruntime/core/providers/cpu/tensor/quantize_linear.cc +++ b/onnxruntime/core/providers/cpu/tensor/quantize_linear.cc @@ -63,13 +63,22 @@ Status DequantizeLinear::Compute(OpKernelContext* ctx) const { ONNX_CPU_OPERATOR_TYPED_KERNEL( QuantizeLinear, 10, - float, + uint8_t, KernelDefBuilder() .TypeConstraint("x", DataTypeImpl::GetTensorType()) - .TypeConstraint("y_scale", DataTypeImpl::GetTensorType()) .TypeConstraint("y_zero_point", DataTypeImpl::GetTensorType()) .TypeConstraint("y", DataTypeImpl::GetTensorType()), - QuantizeLinear); + QuantizeLinear); + +ONNX_CPU_OPERATOR_TYPED_KERNEL( + QuantizeLinear, + 10, + int8_t, + KernelDefBuilder() + .TypeConstraint("x", DataTypeImpl::GetTensorType()) + .TypeConstraint("y_zero_point", DataTypeImpl::GetTensorType()) + .TypeConstraint("y", DataTypeImpl::GetTensorType()), + QuantizeLinear); // clamp doesn't exist in the version of that we're using, so // make a local one. @@ -85,9 +94,9 @@ static float RoundHalfToEven(float input) { return result; } -template <> +template // formula is Y = X / Scale + ZeroPoint -Status QuantizeLinear::Compute(OpKernelContext* ctx) const { +Status QuantizeLinear::Compute(OpKernelContext* ctx) const { auto& x = *ctx->Input(0); auto& y_scale = *ctx->Input(1); auto& y_zero_point = *ctx->Input(2); @@ -102,14 +111,18 @@ Status QuantizeLinear::Compute(OpKernelContext* ctx) const { ORT_ENFORCE(scale_shape.NumDimensions() == 0 || (scale_shape.NumDimensions() == 1 && scale_shape.GetDims().size() == 1), "x_scale must be a scalar."); ORT_ENFORCE(zero_point_shape.NumDimensions() == 0 || (zero_point_shape.NumDimensions() == 1 && zero_point_shape.GetDims().size() == 1), "x_zero_point must be a scalar."); - const uint8_t zero_point = *(y_zero_point.template Data()); + const T zero_point = *(y_zero_point.template Data()); const float scale = *(y_scale.template Data()); const auto* input = x.template Data(); - auto* output = y.template MutableData(); + auto* output = y.template MutableData(); const auto num_of_elements = x_shape.Size(); + const float qmax = std::numeric_limits::max(); + const float qmin_default = std::numeric_limits::min(); + // adjust qmin for int8 inputs. This is required to keep zero point as zero + const float qmin = qmin_default == -128 ? -127 : qmin_default; for (int i = 0; i < num_of_elements; ++i) { - output[i] = static_cast(clamp(RoundHalfToEven(static_cast(input[i]/scale)) + zero_point, 0.0f, float(UINT8_MAX))); + output[i] = static_cast(clamp(RoundHalfToEven(static_cast(input[i]/scale)) + zero_point, qmin, qmax)); } return Status::OK(); diff --git a/onnxruntime/test/providers/cpu/tensor/quantize_linear_test.cc b/onnxruntime/test/providers/cpu/tensor/quantize_linear_test.cc index 3eef712e1f6c3..5a33f0c021dd3 100644 --- a/onnxruntime/test/providers/cpu/tensor/quantize_linear_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/quantize_linear_test.cc @@ -47,7 +47,7 @@ TEST(DequantizeLinearOpTest, DequantizeLinear_2) { // quantize with scalar zero point and scale -TEST(QuantizeLinearOpTest, QuantizeLinear_0) { +TEST(QuantizeLinearOpTest, QuantizeLinear_uint8) { OpTester test("QuantizeLinear", 10); std::vector dims{6}; test.AddInput("x", dims, {0, 2, 3, 1000, -254, -1000}); @@ -57,6 +57,16 @@ TEST(QuantizeLinearOpTest, QuantizeLinear_0) { test.Run(); } +// quantize with scalar zero point and scale +TEST(QuantizeLinearOpTest, QuantizeLinear_int8) { + OpTester test("QuantizeLinear", 10); + std::vector dims{6}; + test.AddInput("x", dims, {0, 2, 3, 5, -2, -5}); + test.AddInput("y_scale", {}, {.039215686f}); + test.AddInput("y_zero_point", {}, {0}); + test.AddOutput("y", dims, {0, 51, 76, 127, -51, -127}); + test.Run(); +} // quantize with 2D data TEST(QuantizeLinearOpTest, QuantizeLinear_1) {