From 19f0e71a95249a91213e9314a4729967bd64a114 Mon Sep 17 00:00:00 2001 From: Vyacheslav Bazhenov Date: Mon, 8 Apr 2024 14:40:10 +0300 Subject: [PATCH] [luci] Introduce MinimumMSE quantization algorithm This commit introduces MinimumMSE quantization algorithm. ONE-DCO-1.0-Signed-off-by: Vyacheslav Bazhenov --- .../luci/Pass/QuantizationParameters.h | 6 + .../luci/pass/src/QuantizeWeightsOnly.cpp | 173 +++++++++++++++++- compiler/luci/pass/src/QuantizeWeightsOnly.h | 6 +- 3 files changed, 177 insertions(+), 8 deletions(-) diff --git a/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h b/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h index 30c8db058e4..279857e446d 100644 --- a/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h +++ b/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h @@ -37,6 +37,12 @@ struct LayerInfo QuantizationGranularity granularity; }; +enum struct QuantizationAlgorithm +{ + Common = 0, + MinimumMSE = 1, +}; + } // namespace luci #endif // __LUCI_QUANTIZATION_PARAMETERS_H__ diff --git a/compiler/luci/pass/src/QuantizeWeightsOnly.cpp b/compiler/luci/pass/src/QuantizeWeightsOnly.cpp index edaf13e596f..7c424c39ff2 100644 --- a/compiler/luci/pass/src/QuantizeWeightsOnly.cpp +++ b/compiler/luci/pass/src/QuantizeWeightsOnly.cpp @@ -61,6 +61,140 @@ void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc } } +template +void sym_wquant_per_channel_minimum_mse(CircleConst *node, std::vector &min, + std::vector &max, std::vector &scaling_factor, + std::vector &nudged_min, + std::vector &nudged_max, int32_t &channel_dim_index) +{ + assert(node->dtype() == loco::DataType::FLOAT32); + assert(out_type == loco::DataType::S4 || out_type == loco::DataType::S8 || + out_type == loco::DataType::S16); + + const auto kSearchIterations = 100; + const auto kPhi = 1.618033988749894848204586834365638118; + const auto kRangeCoefficient = 0.1; + + const int32_t kMaxScale = max_for_sym_quant(out_type); + const int32_t kMinScale = -kMaxScale; + + uint32_t size = node->size(); + std::vector quantized_values(size); + + for (size_t i = 0; i < min.size(); ++i) + { + compute_sym_scale(min[i], max[i], scaling_factor[i], nudged_min[i], nudged_max[i], out_type); + } + + auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) { + int channel_idx = indices[channel_dim_index]; + const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx]; + auto data = node->at(cal_offset(dimension, indices)); + data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data; + data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data; + quantized_values[cal_offset(dimension, indices)] = + static_cast(std::round(data * scaling_factor_inv)); + }; + std::vector max_scale(min.size()); + for (size_t i = 0; i < min.size(); ++i) + { + max_scale[i] = std::max(std::fabs(min[i]), std::fabs(max[i])); + } + std::vector channel_mse(min.size()); + std::vector channel_min_mse(min.size(), std::numeric_limits::max()); + + auto calculate_mse = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) { + int channel_idx = indices[channel_dim_index]; + auto data = node->at(cal_offset(dimension, indices)); + data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data; + data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data; + double diff = + data - quantized_values[cal_offset(dimension, indices)] * scaling_factor[channel_idx]; + channel_mse[channel_idx] += diff * diff; + }; + + std::vector scaling_factor_base = scaling_factor; + std::vector> golden_start_end(min.size()); + + for (size_t i = 0; i < max_scale.size(); ++i) + { + golden_start_end[i].first = scaling_factor_base[i] * (1.0 - kRangeCoefficient); + golden_start_end[i].second = scaling_factor_base[i] * (1.0 + kRangeCoefficient); + } + + for (size_t i = 0; i < kSearchIterations; ++i) + { + for (size_t j = 0; j < scaling_factor.size(); ++j) + { + scaling_factor[j] = golden_start_end[j].second - + (golden_start_end[j].second - golden_start_end[j].first) / kPhi; + } + for (auto &val : channel_mse) + { + val = 0; + } + iterate_per_channel(node, channel_dim_index, quantize); + iterate_per_channel(node, channel_dim_index, calculate_mse); + auto channel_mse_x1 = channel_mse; + + for (size_t j = 0; j < scaling_factor.size(); ++j) + { + scaling_factor[j] = + golden_start_end[j].first + (golden_start_end[j].second - golden_start_end[j].first) / kPhi; + } + for (auto &val : channel_mse) + { + val = 0; + } + iterate_per_channel(node, channel_dim_index, quantize); + iterate_per_channel(node, channel_dim_index, calculate_mse); + auto channel_mse_x2 = channel_mse; + + for (size_t k = 0; k < channel_mse_x1.size(); ++k) + { + if (channel_mse_x1[k] > channel_mse_x2[k]) + { + golden_start_end[k].first = golden_start_end[k].second - + (golden_start_end[k].second - golden_start_end[k].first) / kPhi; + } + else + { + golden_start_end[k].second = + golden_start_end[k].first + + (golden_start_end[k].second - golden_start_end[k].first) / kPhi; + } + } + } + for (size_t i = 0; i < golden_start_end.size(); ++i) + { + scaling_factor[i] = (golden_start_end[i].first + golden_start_end[i].second) / 2; + } + iterate_per_channel(node, channel_dim_index, quantize); + iterate_per_channel(node, channel_dim_index, calculate_mse); + auto channel_mse_opt = channel_mse; + scaling_factor = scaling_factor_base; + iterate_per_channel(node, channel_dim_index, quantize); + iterate_per_channel(node, channel_dim_index, calculate_mse); + auto channel_mse_base = channel_mse; + + // Checking if found scale is better than base + for (size_t i = 0; i < channel_mse_base.size(); ++i) + { + if (channel_mse_opt[i] < channel_mse_base[i]) + scaling_factor[i] = (golden_start_end[i].first + golden_start_end[i].second) / 2; + else + channel_mse_opt[i] = channel_mse_base[i]; + } + iterate_per_channel(node, channel_dim_index, quantize); + + node->dtype(out_type); // change the type of tensor + node->size(size); // resize tensor + for (uint32_t i = 0; i < size; ++i) + { + node->at(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i])); + } +} + // TODO Reduce duplicate code with QuantizeDequantizeWeights template void sym_wquant_per_channel(CircleConst *node, std::vector &min, std::vector &max, @@ -166,18 +300,45 @@ void QuantizeWeightsOnly::quantize_weights(luci::CircleConst *weights) if (output_type == loco::DataType::S4) { - sym_wquant_per_channel(weights, min, max, scaling_factor, nudged_min, - nudged_max, channel_dim_index); + switch (algorithm) + { + case luci::QuantizationAlgorithm::MinimumMSE: + sym_wquant_per_channel_minimum_mse( + weights, min, max, scaling_factor, nudged_min, nudged_max, channel_dim_index); + break; + default: + sym_wquant_per_channel(weights, min, max, scaling_factor, + nudged_min, nudged_max, channel_dim_index); + break; + } } else if (output_type == loco::DataType::S8) { - sym_wquant_per_channel(weights, min, max, scaling_factor, nudged_min, - nudged_max, channel_dim_index); + switch (algorithm) + { + case luci::QuantizationAlgorithm::MinimumMSE: + sym_wquant_per_channel_minimum_mse( + weights, min, max, scaling_factor, nudged_min, nudged_max, channel_dim_index); + break; + default: + sym_wquant_per_channel(weights, min, max, scaling_factor, + nudged_min, nudged_max, channel_dim_index); + break; + } } else if (output_type == loco::DataType::S16) { - sym_wquant_per_channel(weights, min, max, scaling_factor, nudged_min, - nudged_max, channel_dim_index); + switch (algorithm) + { + case luci::QuantizationAlgorithm::MinimumMSE: + sym_wquant_per_channel_minimum_mse( + weights, min, max, scaling_factor, nudged_min, nudged_max, channel_dim_index); + break; + default: + sym_wquant_per_channel(weights, min, max, scaling_factor, + nudged_min, nudged_max, channel_dim_index); + break; + } } else { diff --git a/compiler/luci/pass/src/QuantizeWeightsOnly.h b/compiler/luci/pass/src/QuantizeWeightsOnly.h index 8d1421f4b0f..ce55b5c2223 100644 --- a/compiler/luci/pass/src/QuantizeWeightsOnly.h +++ b/compiler/luci/pass/src/QuantizeWeightsOnly.h @@ -29,14 +29,16 @@ namespace luci */ struct QuantizeWeightsOnly final : public luci::CircleNodeMutableVisitor { - QuantizeWeightsOnly(loco::DataType input, loco::DataType output, QuantizationGranularity gr) - : input_type(input), output_type(output), granularity(gr) + QuantizeWeightsOnly(loco::DataType input, loco::DataType output, QuantizationGranularity gr, + QuantizationAlgorithm alg = QuantizationAlgorithm::Common) + : input_type(input), output_type(output), granularity(gr), algorithm(alg) { } loco::DataType input_type; loco::DataType output_type; QuantizationGranularity granularity; + QuantizationAlgorithm algorithm; private: void quantize_weights(luci::CircleConst *weights);