diff --git a/tensorflow/lite/kernels/add.cc b/tensorflow/lite/kernels/add.cc index d9b8c87eeb7e42..7ad744b4910d4e 100644 --- a/tensorflow/lite/kernels/add.cc +++ b/tensorflow/lite/kernels/add.cc @@ -93,12 +93,24 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_size = TfLiteIntArrayCopy(input1->dims); } - if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { + // 8bit -> 8bit general quantized path, with general rescalings + // as well as, 16bit -> 16bit with general rescalings + bool general_16bit = input1->type == kTfLiteInt16 && + input2->type == kTfLiteInt16 && + output->type == kTfLiteInt16; + + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || + general_16bit) { // 8bit -> 8bit general quantized path, with general rescalings + // as well as, 16bit -> 16bit with general rescalings data->input1_offset = -input1->params.zero_point; data->input2_offset = -input2->params.zero_point; data->output_offset = output->params.zero_point; - data->left_shift = 20; + + // The shift is set to 15 for 16-bit and 20 in case of 8-bit, accordingly. + // In case of 16-bit we have 65535 << 15 which is less than 1 << 31, + // therefore the addition will still fit in a 32 bit accumulator. + data->left_shift = general_16bit ? 15 : 20; const double twice_max_input_scale = 2 * std::max(input1->params.scale, input2->params.scale); const double real_input1_multiplier = @@ -221,7 +233,12 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input1, const TfLiteTensor* input2, TfLiteTensor* output) { - if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { + bool general_16bit = input1->type == kTfLiteInt16 && + input2->type == kTfLiteInt16 && + output->type == kTfLiteInt16; + + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || + general_16bit) { tflite::ArithmeticParams op_params; op_params.left_shift = data->left_shift; op_params.input1_offset = data->input1_offset; @@ -256,6 +273,12 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, TF_LITE_ADD(optimized_integer_ops, Add, int8_t); } } + } else if (output->type == kTfLiteInt16) { + if (need_broadcast) { + TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, int16_t); + } else { + TF_LITE_ADD(reference_ops, Add, int16_t); + } } else { if (kernel_type == kReference) { if (need_broadcast) { @@ -286,7 +309,7 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, // The quantized version of Add doesn't support activations, so we // always use BroadcastAdd. if (kernel_type == kReference) { - TF_LITE_ADD(reference_ops, Add); + TF_LITE_ADD(reference_ops, AddLSTM); } else { TF_LITE_ADD(optimized_ops, Add); } diff --git a/tensorflow/lite/kernels/add_test.cc b/tensorflow/lite/kernels/add_test.cc index 267b80564c992e..1a243c7a4e66e2 100644 --- a/tensorflow/lite/kernels/add_test.cc +++ b/tensorflow/lite/kernels/add_test.cc @@ -306,15 +306,18 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) { const float kMin = -1.f; const float kMax = 32767.f / 32768.f; float kQuantizedTolerance = GetToleranceInt16(kMin, kMax); - std::vector> inputs1 = { - {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}}; - std::vector> inputs2 = { - {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}}; - std::vector> results = { - {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}}; + std::vector> inputs1 = {{0.1, 0.2, 0.3, 0.4, 0.9, 0.7}, + {-0.8, 0.2, 0.4, 0.7, 0.1, 0.0}, + {-0.8, 0.2, 0.7, 0.3, 0.9, 0.1}}; + std::vector> inputs2 = {{0.6, 0.4, 0.3, 0.1, -0.1, 0.3}, + {0.6, 0.4, 0.5, -0.8, 0.0, -1.0}, + {0.6, 0.4, -0.8, 0.5, -0.9, 0.1}}; + std::vector> results = {{0.7, 0.6, 0.6, 0.5, 0.8, 1.0}, + {-0.2, 0.6, 0.9, -0.1, 0.1, -1.0}, + {-0.2, 0.6, -0.1, 0.8, 0.0, 0.2}}; for (size_t i = 0; i < inputs1.size(); ++i) { - QuantizedAddOpModel m({TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, - {TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, + QuantizedAddOpModel m({TensorType_INT16, {1, 2, 3, 1}, kMin, kMax}, + {TensorType_INT16, {1, 2, 3, 1}, kMin, kMax}, {TensorType_INT16, {}, kMin, kMax}, ActivationFunctionType_NONE); m.QuantizeAndPopulate(m.input1(), inputs1[i]); @@ -435,6 +438,10 @@ TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcastInt8) { QuantizedWithScalarBroadcast(); } +TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcastInt16) { + QuantizedWithScalarBroadcast(); +} + template void QuantizedWithMixedBroadcast() { float kQuantizedTolerance = GetTolerance(-3.f, 3.f); @@ -497,6 +504,10 @@ TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastInt8) { QuantizedWithMixedBroadcast(); } +TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastInt16) { + QuantizedWithMixedBroadcast(); +} + template void QuantizedWithGenericBroadcast() { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); @@ -523,5 +534,9 @@ TEST(QuantizedAddOpModel, QuantizedWithGenericdBroadcastInt8) { QuantizedWithGenericBroadcast(); } +TEST(QuantizedAddOpModel, QuantizedWithGenericdBroadcastInt16) { + QuantizedWithGenericBroadcast(); +} + } // namespace } // namespace tflite diff --git a/tensorflow/lite/kernels/internal/reference/add.h b/tensorflow/lite/kernels/internal/reference/add.h index d0c40912091a19..c1b0163640ba88 100644 --- a/tensorflow/lite/kernels/internal/reference/add.h +++ b/tensorflow/lite/kernels/internal/reference/add.h @@ -51,13 +51,18 @@ inline void Add(const ArithmeticParams& params, // Element-wise add that can often be used for inner loop of broadcast add as // well as the non-broadcast add. + +// This function is used for 8-bit as well as for 16-bit, but the accumulator +// is 32-bit for both cases. The overflow does not happen due to the +// choice of the shift (20 or 15, accordingly - see add.cc for more comments). +template inline void AddElementwise(int size, const ArithmeticParams& params, - const uint8* input1_data, const uint8* input2_data, - uint8* output_data) { - TFLITE_DCHECK_GT(params.input1_offset, -256); - TFLITE_DCHECK_GT(params.input2_offset, -256); - TFLITE_DCHECK_LT(params.input1_offset, 256); - TFLITE_DCHECK_LT(params.input2_offset, 256); + const T* input1_data, const T* input2_data, + T* output_data) { + TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits::max()); + TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits::max()); + TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits::max()); + TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits::max()); for (int i = 0; i < size; ++i) { const int32 input1_val = params.input1_offset + input1_data[i]; @@ -78,7 +83,7 @@ inline void AddElementwise(int size, const ArithmeticParams& params, const int32 clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); - output_data[i] = static_cast(clamped_output); + output_data[i] = static_cast(clamped_output); } } @@ -138,6 +143,24 @@ inline void Add(const ArithmeticParams& params, const RuntimeShape& output_shape, int16* output_data) { TFLITE_DCHECK_LE(params.quantized_activation_min, params.quantized_activation_max); + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + + int max_value = std::numeric_limits::max(); + + TFLITE_DCHECK_GT(params.input1_offset, -max_value); + TFLITE_DCHECK_GT(params.input2_offset, -max_value); + TFLITE_DCHECK_LT(params.input1_offset, max_value); + TFLITE_DCHECK_LT(params.input2_offset, max_value); + AddElementwise(flat_size, params, input1_data, input2_data, output_data); +} + +inline void AddLSTM(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const int16* input1_data, + const RuntimeShape& input2_shape, const int16* input2_data, + const RuntimeShape& output_shape, int16* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); const int input1_shift = params.input1_shift; const int flat_size = @@ -257,13 +280,14 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params, } } -inline void BroadcastAdd4DSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const uint8* input1_data, - const RuntimeShape& input2_shape, - const uint8* input2_data, - const RuntimeShape& output_shape, - uint8* output_data) { +// This function is used for 8-bit as well as for 16-bit, but the accumulator +// is 32-bit for both cases. The overflow does not happen due to the +// choice of the shift (20 or 15, accordingly - see add.cc for more comments). +template +inline void BroadcastAdd4DSlow( + const ArithmeticParams& params, const RuntimeShape& input1_shape, + const T* input1_data, const RuntimeShape& input2_shape, + const T* input2_data, const RuntimeShape& output_shape, T* output_data) { NdArrayDesc<4> desc1; NdArrayDesc<4> desc2; NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, @@ -313,7 +337,7 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params, std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); output_data[Offset(extended_output_shape, b, y, x, c)] = - static_cast(clamped_output); + static_cast(clamped_output); } } } diff --git a/tensorflow/lite/kernels/sub.cc b/tensorflow/lite/kernels/sub.cc index f2913faeb76cea..077533c733807b 100644 --- a/tensorflow/lite/kernels/sub.cc +++ b/tensorflow/lite/kernels/sub.cc @@ -72,13 +72,14 @@ void Free(TfLiteContext* context, void* buffer) { delete reinterpret_cast(buffer); } -TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, - const TfLiteTensor* input_1, - const TfLiteTensor* input_2, TfLiteTensor* output, - TfLiteSubParams* params, OpData* op_params, - int op_sign) { - TF_LITE_ENSURE(context, - output->type == kTfLiteUInt8 || output->type == kTfLiteInt8); +TfLiteStatus PrepareGeneralSubOp(TfLiteContext* context, + const TfLiteTensor* input_1, + const TfLiteTensor* input_2, + TfLiteTensor* output, TfLiteSubParams* params, + OpData* op_params, int op_sign) { + TF_LITE_ENSURE(context, output->type == kTfLiteUInt8 || + output->type == kTfLiteInt8 || + output->type == kTfLiteInt16); const auto& input1_quantization_params = input_1->params; const auto& input2_quantization_params = input_2->params; const auto& output_quantization_params = output->params; @@ -87,6 +88,9 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, if (output->type == kTfLiteUInt8) { integer_type_min = std::numeric_limits::min(); integer_type_max = std::numeric_limits::max(); + } else if (output->type == kTfLiteInt16) { + integer_type_min = std::numeric_limits::min(); + integer_type_max = std::numeric_limits::max(); } else { // output->type == kTfLiteInt8 integer_type_min = std::numeric_limits::min(); @@ -109,7 +113,11 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, op_params->input1_offset = -input1_quantization_params.zero_point; op_params->input2_offset = -input2_quantization_params.zero_point; op_params->output_offset = output_quantization_params.zero_point; - op_params->left_shift = 20; + + // The shift is set to 15 in case of 16-bit and 20 in case of 8-bit, + // accordingly. In case of 16-bit we have 65535 << 15 which is less than 1 << + // 31, therefore the addition will still fit in a 32 bit accumulator. + op_params->left_shift = output->type == kTfLiteInt16 ? 15 : 20; const double twice_max_input_scale = 2 * std::max(input1_quantization_params.scale, input2_quantization_params.scale); @@ -135,13 +143,14 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( context, params->activation, output, &op_params->output_activation_min, &op_params->output_activation_max)); + return kTfLiteOk; } -TfLiteStatus PrepareInt16SubOp(TfLiteContext* context, - const TfLiteTensor* input1, - const TfLiteTensor* input2, TfLiteTensor* output, - TfLiteSubParams* params, OpData* data) { +TfLiteStatus PrepareLSTMSubOp(TfLiteContext* context, + const TfLiteTensor* input1, + const TfLiteTensor* input2, TfLiteTensor* output, + TfLiteSubParams* params, OpData* data) { // 16bit -> 16bit special quantized path, supporting only a rather // narrow case of quantization parameters: zero_points must all be 0 // ("symmetric quantization") and scales must be power-of-two (which @@ -208,12 +217,21 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_size = TfLiteIntArrayCopy(input1->dims); } - if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { - TF_LITE_ENSURE_OK(context, Prepare8BitSubOp(context, input1, input2, output, - params, data, -1)); + // 8bit -> 8bit general quantized path, with general rescalings + // as well as, 16bit -> 16bit with general rescalings + + bool general_16bit = output->type == kTfLiteInt16 && + input1->type == kTfLiteInt16 && + input2->type == kTfLiteInt16; + + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || + general_16bit) { + TF_LITE_ENSURE_OK(context, PrepareGeneralSubOp(context, input1, input2, + output, params, data, -1)); } else if (output->type == kTfLiteInt16) { - TF_LITE_ENSURE_OK(context, PrepareInt16SubOp(context, input1, input2, - output, params, data)); + // LSTM-special case with scale parameter of POT + TF_LITE_ENSURE_OK(context, PrepareLSTMSubOp(context, input1, input2, output, + params, data)); } return context->ResizeTensor(context, output, output_size); @@ -288,6 +306,11 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const bool need_broadcast = optimized_ops::ProcessBroadcastShapes( GetTensorShape(input1), GetTensorShape(input2), &op_params); + // 16bit -> 16bit with general rescaling + bool general_16bit = output->type == kTfLiteInt16 && + input1->type == kTfLiteInt16 && + input2->type == kTfLiteInt16; + #define TF_LITE_SUB(type, opname, data_type) \ type::opname(op_params, GetTensorShape(input1), \ GetTensorData(input1), GetTensorShape(input2), \ @@ -301,6 +324,12 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, } else { TF_LITE_SUB(reference_integer_ops, Add, int8_t); } + } else if (general_16bit) { + if (need_broadcast) { + TF_LITE_SUB(reference_ops, BroadcastAdd4DSlow, int16_t); + } else { + TF_LITE_SUB(reference_ops, Add, int16_t); + } } else if (output->type == kTfLiteUInt8) { if (kernel_type == kReference) { if (need_broadcast) { diff --git a/tensorflow/lite/kernels/sub_test.cc b/tensorflow/lite/kernels/sub_test.cc index 24b554f087baab..b5363a038a34a8 100644 --- a/tensorflow/lite/kernels/sub_test.cc +++ b/tensorflow/lite/kernels/sub_test.cc @@ -226,6 +226,10 @@ TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt8) { QuantizedTestsNoActivation(); } +TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt16Generic) { + QuantizedTestsNoActivation(); +} + template void QuantizedTestsActivationRELU_N1_TO_1() { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); @@ -287,6 +291,10 @@ TEST(QuantizedSubOpModel, QuantizedVariousInputShapesInt8) { QuantizedVariousInputShapes(); } +TEST(QuantizedSubOpModel, QuantizedVariousInputShapesInt16) { + QuantizedVariousInputShapes(); +} + template void QuantizedWithBroadcast() { float kQuantizedTolerance = GetTolerance(-3.0, 3.0); @@ -315,6 +323,10 @@ TEST(QuantizedSubOpModel, QuantizedWithBroadcastInt8) { QuantizedWithBroadcast(); } +TEST(QuantizedSubOpModel, QuantizedWithBroadcastInt16) { + QuantizedWithBroadcast(); +} + TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt16) { const float kMin = -1.f; const float kMax =