From 19f0e71a95249a91213e9314a4729967bd64a114 Mon Sep 17 00:00:00 2001
From: Vyacheslav Bazhenov <v.bazhenov@partner.samsung.com>
Date: Mon, 8 Apr 2024 14:40:10 +0300
Subject: [PATCH] [luci] Introduce MinimumMSE quantization algorithm

This commit introduces MinimumMSE quantization algorithm.

ONE-DCO-1.0-Signed-off-by: Vyacheslav Bazhenov <slavikmipt@gmail.com>
---
 .../luci/Pass/QuantizationParameters.h        |   6 +
 .../luci/pass/src/QuantizeWeightsOnly.cpp     | 173 +++++++++++++++++-
 compiler/luci/pass/src/QuantizeWeightsOnly.h  |   6 +-
 3 files changed, 177 insertions(+), 8 deletions(-)
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h b/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h
index 30c8db058e4..279857e446d 100644
--- a/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h
@@ -37,6 +37,12 @@ struct LayerInfo
   QuantizationGranularity granularity;
 };
 
+enum struct QuantizationAlgorithm
+{
+  Common = 0,
+  MinimumMSE = 1,
+};
+
 } // namespace luci
 
 #endif // __LUCI_QUANTIZATION_PARAMETERS_H__
diff --git a/compiler/luci/pass/src/QuantizeWeightsOnly.cpp b/compiler/luci/pass/src/QuantizeWeightsOnly.cpp
index edaf13e596f..7c424c39ff2 100644
--- a/compiler/luci/pass/src/QuantizeWeightsOnly.cpp
+++ b/compiler/luci/pass/src/QuantizeWeightsOnly.cpp
@@ -61,6 +61,140 @@ void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc
   }
 }
 
+template <loco::DataType out_type>
+void sym_wquant_per_channel_minimum_mse(CircleConst *node, std::vector<float> &min,
+                                        std::vector<float> &max, std::vector<float> &scaling_factor,
+                                        std::vector<float> &nudged_min,
+                                        std::vector<float> &nudged_max, int32_t &channel_dim_index)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+  assert(out_type == loco::DataType::S4 || out_type == loco::DataType::S8 ||
+         out_type == loco::DataType::S16);
+
+  const auto kSearchIterations = 100;
+  const auto kPhi = 1.618033988749894848204586834365638118;
+  const auto kRangeCoefficient = 0.1;
+
+  const int32_t kMaxScale = max_for_sym_quant(out_type);
+  const int32_t kMinScale = -kMaxScale;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(size);
+
+  for (size_t i = 0; i < min.size(); ++i)
+  {
+    compute_sym_scale(min[i], max[i], scaling_factor[i], nudged_min[i], nudged_max[i], out_type);
+  }
+
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+    data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round(data * scaling_factor_inv));
+  };
+  std::vector<float> max_scale(min.size());
+  for (size_t i = 0; i < min.size(); ++i)
+  {
+    max_scale[i] = std::max(std::fabs(min[i]), std::fabs(max[i]));
+  }
+  std::vector<double> channel_mse(min.size());
+  std::vector<double> channel_min_mse(min.size(), std::numeric_limits<double>::max());
+
+  auto calculate_mse = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+    data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+    double diff =
+      data - quantized_values[cal_offset(dimension, indices)] * scaling_factor[channel_idx];
+    channel_mse[channel_idx] += diff * diff;
+  };
+
+  std::vector<float> scaling_factor_base = scaling_factor;
+  std::vector<std::pair<float, float>> golden_start_end(min.size());
+
+  for (size_t i = 0; i < max_scale.size(); ++i)
+  {
+    golden_start_end[i].first = scaling_factor_base[i] * (1.0 - kRangeCoefficient);
+    golden_start_end[i].second = scaling_factor_base[i] * (1.0 + kRangeCoefficient);
+  }
+
+  for (size_t i = 0; i < kSearchIterations; ++i)
+  {
+    for (size_t j = 0; j < scaling_factor.size(); ++j)
+    {
+      scaling_factor[j] = golden_start_end[j].second -
+                          (golden_start_end[j].second - golden_start_end[j].first) / kPhi;
+    }
+    for (auto &val : channel_mse)
+    {
+      val = 0;
+    }
+    iterate_per_channel(node, channel_dim_index, quantize);
+    iterate_per_channel(node, channel_dim_index, calculate_mse);
+    auto channel_mse_x1 = channel_mse;
+
+    for (size_t j = 0; j < scaling_factor.size(); ++j)
+    {
+      scaling_factor[j] =
+        golden_start_end[j].first + (golden_start_end[j].second - golden_start_end[j].first) / kPhi;
+    }
+    for (auto &val : channel_mse)
+    {
+      val = 0;
+    }
+    iterate_per_channel(node, channel_dim_index, quantize);
+    iterate_per_channel(node, channel_dim_index, calculate_mse);
+    auto channel_mse_x2 = channel_mse;
+
+    for (size_t k = 0; k < channel_mse_x1.size(); ++k)
+    {
+      if (channel_mse_x1[k] > channel_mse_x2[k])
+      {
+        golden_start_end[k].first = golden_start_end[k].second -
+                                    (golden_start_end[k].second - golden_start_end[k].first) / kPhi;
+      }
+      else
+      {
+        golden_start_end[k].second =
+          golden_start_end[k].first +
+          (golden_start_end[k].second - golden_start_end[k].first) / kPhi;
+      }
+    }
+  }
+  for (size_t i = 0; i < golden_start_end.size(); ++i)
+  {
+    scaling_factor[i] = (golden_start_end[i].first + golden_start_end[i].second) / 2;
+  }
+  iterate_per_channel(node, channel_dim_index, quantize);
+  iterate_per_channel(node, channel_dim_index, calculate_mse);
+  auto channel_mse_opt = channel_mse;
+  scaling_factor = scaling_factor_base;
+  iterate_per_channel(node, channel_dim_index, quantize);
+  iterate_per_channel(node, channel_dim_index, calculate_mse);
+  auto channel_mse_base = channel_mse;
+
+  // Checking if found scale is better than base
+  for (size_t i = 0; i < channel_mse_base.size(); ++i)
+  {
+    if (channel_mse_opt[i] < channel_mse_base[i])
+      scaling_factor[i] = (golden_start_end[i].first + golden_start_end[i].second) / 2;
+    else
+      channel_mse_opt[i] = channel_mse_base[i];
+  }
+  iterate_per_channel(node, channel_dim_index, quantize);
+
+  node->dtype(out_type);      // change the type of tensor
+  node->size<out_type>(size); // resize tensor
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    node->at<out_type>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+}
+
 // TODO Reduce duplicate code with QuantizeDequantizeWeights
 template <loco::DataType out_type>
 void sym_wquant_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
@@ -166,18 +300,45 @@ void QuantizeWeightsOnly::quantize_weights(luci::CircleConst *weights)
 
       if (output_type == loco::DataType::S4)
       {
-        sym_wquant_per_channel<loco::DataType::S4>(weights, min, max, scaling_factor, nudged_min,
-                                                   nudged_max, channel_dim_index);
+        switch (algorithm)
+        {
+          case luci::QuantizationAlgorithm::MinimumMSE:
+            sym_wquant_per_channel_minimum_mse<loco::DataType::S4>(
+              weights, min, max, scaling_factor, nudged_min, nudged_max, channel_dim_index);
+            break;
+          default:
+            sym_wquant_per_channel<loco::DataType::S4>(weights, min, max, scaling_factor,
+                                                       nudged_min, nudged_max, channel_dim_index);
+            break;
+        }
       }
       else if (output_type == loco::DataType::S8)
       {
-        sym_wquant_per_channel<loco::DataType::S8>(weights, min, max, scaling_factor, nudged_min,
-                                                   nudged_max, channel_dim_index);
+        switch (algorithm)
+        {
+          case luci::QuantizationAlgorithm::MinimumMSE:
+            sym_wquant_per_channel_minimum_mse<loco::DataType::S8>(
+              weights, min, max, scaling_factor, nudged_min, nudged_max, channel_dim_index);
+            break;
+          default:
+            sym_wquant_per_channel<loco::DataType::S8>(weights, min, max, scaling_factor,
+                                                       nudged_min, nudged_max, channel_dim_index);
+            break;
+        }
       }
       else if (output_type == loco::DataType::S16)
       {
-        sym_wquant_per_channel<loco::DataType::S16>(weights, min, max, scaling_factor, nudged_min,
-                                                    nudged_max, channel_dim_index);
+        switch (algorithm)
+        {
+          case luci::QuantizationAlgorithm::MinimumMSE:
+            sym_wquant_per_channel_minimum_mse<loco::DataType::S16>(
+              weights, min, max, scaling_factor, nudged_min, nudged_max, channel_dim_index);
+            break;
+          default:
+            sym_wquant_per_channel<loco::DataType::S16>(weights, min, max, scaling_factor,
+                                                        nudged_min, nudged_max, channel_dim_index);
+            break;
+        }
       }
       else
       {
diff --git a/compiler/luci/pass/src/QuantizeWeightsOnly.h b/compiler/luci/pass/src/QuantizeWeightsOnly.h
index 8d1421f4b0f..ce55b5c2223 100644
--- a/compiler/luci/pass/src/QuantizeWeightsOnly.h
+++ b/compiler/luci/pass/src/QuantizeWeightsOnly.h
@@ -29,14 +29,16 @@ namespace luci
  */
 struct QuantizeWeightsOnly final : public luci::CircleNodeMutableVisitor<void>
 {
-  QuantizeWeightsOnly(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
-    : input_type(input), output_type(output), granularity(gr)
+  QuantizeWeightsOnly(loco::DataType input, loco::DataType output, QuantizationGranularity gr,
+                      QuantizationAlgorithm alg = QuantizationAlgorithm::Common)
+    : input_type(input), output_type(output), granularity(gr), algorithm(alg)
   {
   }
 
   loco::DataType input_type;
   loco::DataType output_type;
   QuantizationGranularity granularity;
+  QuantizationAlgorithm algorithm;
 
 private:
   void quantize_weights(luci::CircleConst *weights);