From 9b17a984b22f376a50f3fec4e1f556d8d2df4a35 Mon Sep 17 00:00:00 2001
From: Vyacheslav Bazhenov <v.bazhenov@partner.samsung.com>
Date: Mon, 8 Apr 2024 14:40:10 +0300
Subject: [PATCH] [luci] Introduce MinimumMSE quantization algorithm

This commit introduces MinimumMSE quantization algorithm.

ONE-DCO-1.0-Signed-off-by: Vyacheslav Bazhenov <slavikmipt@gmail.com>
---
 .../luci/Pass/QuantizationParameters.h        |   6 +
 .../include/luci/Pass/QuantizeWeightsPass.h   |   4 +-
 .../luci/pass/src/QuantizeWeightsOnly.cpp     | 173 +++++++++++++++++-
 compiler/luci/pass/src/QuantizeWeightsOnly.h  |   6 +-
 .../luci/pass/src/QuantizeWeightsPass.cpp     |   3 +-
 .../pass/src/QuantizeWeightsPass.test.cpp     |  28 ++-
 6 files changed, 206 insertions(+), 14 deletions(-)
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h b/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h
index 30c8db058e4..279857e446d 100644
--- a/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h
@@ -37,6 +37,12 @@ struct LayerInfo
   QuantizationGranularity granularity;
 };
 
+enum struct QuantizationAlgorithm
+{
+  Common = 0,
+  MinimumMSE = 1,
+};
+
 } // namespace luci
 
 #endif // __LUCI_QUANTIZATION_PARAMETERS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeWeightsPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeWeightsPass.h
index 64659731227..2d45a55fb54 100644
--- a/compiler/luci/pass/include/luci/Pass/QuantizeWeightsPass.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizeWeightsPass.h
@@ -37,6 +37,7 @@ class QuantizeWeightsPass : public logo::Pass
     loco::DataType input_model_dtype = loco::DataType::Unknown;
     loco::DataType output_model_dtype = loco::DataType::Unknown;
     QuantizationGranularity granularity = QuantizationGranularity::ChannelWise;
+    QuantizationAlgorithm algorithm = QuantizationAlgorithm::Common;
   };
 
 public:
@@ -47,13 +48,14 @@ class QuantizeWeightsPass : public logo::Pass
 
 public:
   QuantizeWeightsPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
-                      QuantizationGranularity granularity)
+                      QuantizationGranularity granularity, QuantizationAlgorithm algorithm)
   {
     _ctx = std::make_unique<Context>();
     {
       _ctx->input_model_dtype = input_model_dtype;
       _ctx->output_model_dtype = output_model_dtype;
       _ctx->granularity = granularity;
+      _ctx->algorithm = algorithm;
     }
   }
   virtual const char *name(void) const { return "luci::QuantizeWeightsPass"; }
diff --git a/compiler/luci/pass/src/QuantizeWeightsOnly.cpp b/compiler/luci/pass/src/QuantizeWeightsOnly.cpp
index edaf13e596f..7c424c39ff2 100644
--- a/compiler/luci/pass/src/QuantizeWeightsOnly.cpp
+++ b/compiler/luci/pass/src/QuantizeWeightsOnly.cpp
@@ -61,6 +61,140 @@ void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc
   }
 }
 
+template <loco::DataType out_type>
+void sym_wquant_per_channel_minimum_mse(CircleConst *node, std::vector<float> &min,
+                                        std::vector<float> &max, std::vector<float> &scaling_factor,
+                                        std::vector<float> &nudged_min,
+                                        std::vector<float> &nudged_max, int32_t &channel_dim_index)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+  assert(out_type == loco::DataType::S4 || out_type == loco::DataType::S8 ||
+         out_type == loco::DataType::S16);
+
+  const auto kSearchIterations = 100;
+  const auto kPhi = 1.618033988749894848204586834365638118;
+  const auto kRangeCoefficient = 0.1;
+
+  const int32_t kMaxScale = max_for_sym_quant(out_type);
+  const int32_t kMinScale = -kMaxScale;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(size);
+
+  for (size_t i = 0; i < min.size(); ++i)
+  {
+    compute_sym_scale(min[i], max[i], scaling_factor[i], nudged_min[i], nudged_max[i], out_type);
+  }
+
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+    data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round(data * scaling_factor_inv));
+  };
+  std::vector<float> max_scale(min.size());
+  for (size_t i = 0; i < min.size(); ++i)
+  {
+    max_scale[i] = std::max(std::fabs(min[i]), std::fabs(max[i]));
+  }
+  std::vector<double> channel_mse(min.size());
+  std::vector<double> channel_min_mse(min.size(), std::numeric_limits<double>::max());
+
+  auto calculate_mse = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+    data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+    double diff =
+      data - quantized_values[cal_offset(dimension, indices)] * scaling_factor[channel_idx];
+    channel_mse[channel_idx] += diff * diff;
+  };
+
+  std::vector<float> scaling_factor_base = scaling_factor;
+  std::vector<std::pair<float, float>> golden_start_end(min.size());
+
+  for (size_t i = 0; i < max_scale.size(); ++i)
+  {
+    golden_start_end[i].first = scaling_factor_base[i] * (1.0 - kRangeCoefficient);
+    golden_start_end[i].second = scaling_factor_base[i] * (1.0 + kRangeCoefficient);
+  }
+
+  for (size_t i = 0; i < kSearchIterations; ++i)
+  {
+    for (size_t j = 0; j < scaling_factor.size(); ++j)
+    {
+      scaling_factor[j] = golden_start_end[j].second -
+                          (golden_start_end[j].second - golden_start_end[j].first) / kPhi;
+    }
+    for (auto &val : channel_mse)
+    {
+      val = 0;
+    }
+    iterate_per_channel(node, channel_dim_index, quantize);
+    iterate_per_channel(node, channel_dim_index, calculate_mse);
+    auto channel_mse_x1 = channel_mse;
+
+    for (size_t j = 0; j < scaling_factor.size(); ++j)
+    {
+      scaling_factor[j] =
+        golden_start_end[j].first + (golden_start_end[j].second - golden_start_end[j].first) / kPhi;
+    }
+    for (auto &val : channel_mse)
+    {
+      val = 0;
+    }
+    iterate_per_channel(node, channel_dim_index, quantize);
+    iterate_per_channel(node, channel_dim_index, calculate_mse);
+    auto channel_mse_x2 = channel_mse;
+
+    for (size_t k = 0; k < channel_mse_x1.size(); ++k)
+    {
+      if (channel_mse_x1[k] > channel_mse_x2[k])
+      {
+        golden_start_end[k].first = golden_start_end[k].second -
+                                    (golden_start_end[k].second - golden_start_end[k].first) / kPhi;
+      }
+      else
+      {
+        golden_start_end[k].second =
+          golden_start_end[k].first +
+          (golden_start_end[k].second - golden_start_end[k].first) / kPhi;
+      }
+    }
+  }
+  for (size_t i = 0; i < golden_start_end.size(); ++i)
+  {
+    scaling_factor[i] = (golden_start_end[i].first + golden_start_end[i].second) / 2;
+  }
+  iterate_per_channel(node, channel_dim_index, quantize);
+  iterate_per_channel(node, channel_dim_index, calculate_mse);
+  auto channel_mse_opt = channel_mse;
+  scaling_factor = scaling_factor_base;
+  iterate_per_channel(node, channel_dim_index, quantize);
+  iterate_per_channel(node, channel_dim_index, calculate_mse);
+  auto channel_mse_base = channel_mse;
+
+  // Checking if found scale is better than base
+  for (size_t i = 0; i < channel_mse_base.size(); ++i)
+  {
+    if (channel_mse_opt[i] < channel_mse_base[i])
+      scaling_factor[i] = (golden_start_end[i].first + golden_start_end[i].second) / 2;
+    else
+      channel_mse_opt[i] = channel_mse_base[i];
+  }
+  iterate_per_channel(node, channel_dim_index, quantize);
+
+  node->dtype(out_type);      // change the type of tensor
+  node->size<out_type>(size); // resize tensor
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    node->at<out_type>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+}
+
 // TODO Reduce duplicate code with QuantizeDequantizeWeights
 template <loco::DataType out_type>
 void sym_wquant_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
@@ -166,18 +300,45 @@ void QuantizeWeightsOnly::quantize_weights(luci::CircleConst *weights)
 
       if (output_type == loco::DataType::S4)
       {
-        sym_wquant_per_channel<loco::DataType::S4>(weights, min, max, scaling_factor, nudged_min,
-                                                   nudged_max, channel_dim_index);
+        switch (algorithm)
+        {
+          case luci::QuantizationAlgorithm::MinimumMSE:
+            sym_wquant_per_channel_minimum_mse<loco::DataType::S4>(
+              weights, min, max, scaling_factor, nudged_min, nudged_max, channel_dim_index);
+            break;
+          default:
+            sym_wquant_per_channel<loco::DataType::S4>(weights, min, max, scaling_factor,
+                                                       nudged_min, nudged_max, channel_dim_index);
+            break;
+        }
       }
       else if (output_type == loco::DataType::S8)
       {
-        sym_wquant_per_channel<loco::DataType::S8>(weights, min, max, scaling_factor, nudged_min,
-                                                   nudged_max, channel_dim_index);
+        switch (algorithm)
+        {
+          case luci::QuantizationAlgorithm::MinimumMSE:
+            sym_wquant_per_channel_minimum_mse<loco::DataType::S8>(
+              weights, min, max, scaling_factor, nudged_min, nudged_max, channel_dim_index);
+            break;
+          default:
+            sym_wquant_per_channel<loco::DataType::S8>(weights, min, max, scaling_factor,
+                                                       nudged_min, nudged_max, channel_dim_index);
+            break;
+        }
       }
       else if (output_type == loco::DataType::S16)
       {
-        sym_wquant_per_channel<loco::DataType::S16>(weights, min, max, scaling_factor, nudged_min,
-                                                    nudged_max, channel_dim_index);
+        switch (algorithm)
+        {
+          case luci::QuantizationAlgorithm::MinimumMSE:
+            sym_wquant_per_channel_minimum_mse<loco::DataType::S16>(
+              weights, min, max, scaling_factor, nudged_min, nudged_max, channel_dim_index);
+            break;
+          default:
+            sym_wquant_per_channel<loco::DataType::S16>(weights, min, max, scaling_factor,
+                                                        nudged_min, nudged_max, channel_dim_index);
+            break;
+        }
       }
       else
       {
diff --git a/compiler/luci/pass/src/QuantizeWeightsOnly.h b/compiler/luci/pass/src/QuantizeWeightsOnly.h
index 8d1421f4b0f..7cf79595522 100644
--- a/compiler/luci/pass/src/QuantizeWeightsOnly.h
+++ b/compiler/luci/pass/src/QuantizeWeightsOnly.h
@@ -29,14 +29,16 @@ namespace luci
  */
 struct QuantizeWeightsOnly final : public luci::CircleNodeMutableVisitor<void>
 {
-  QuantizeWeightsOnly(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
-    : input_type(input), output_type(output), granularity(gr)
+  QuantizeWeightsOnly(loco::DataType input, loco::DataType output, QuantizationGranularity gr,
+                      QuantizationAlgorithm alg)
+    : input_type(input), output_type(output), granularity(gr), algorithm(alg)
   {
   }
 
   loco::DataType input_type;
   loco::DataType output_type;
   QuantizationGranularity granularity;
+  QuantizationAlgorithm algorithm;
 
 private:
   void quantize_weights(luci::CircleConst *weights);
diff --git a/compiler/luci/pass/src/QuantizeWeightsPass.cpp b/compiler/luci/pass/src/QuantizeWeightsPass.cpp
index 9ac203e7740..1b2c27070bf 100644
--- a/compiler/luci/pass/src/QuantizeWeightsPass.cpp
+++ b/compiler/luci/pass/src/QuantizeWeightsPass.cpp
@@ -35,7 +35,8 @@ bool QuantizeWeightsPass::run(loco::Graph *g)
   for (auto node : loco::active_nodes(loco::output_nodes(g)))
   {
     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-    QuantizeWeightsOnly qw(_ctx->input_model_dtype, _ctx->output_model_dtype, _ctx->granularity);
+    QuantizeWeightsOnly qw(_ctx->input_model_dtype, _ctx->output_model_dtype, _ctx->granularity,
+                           _ctx->algorithm);
     circle_node->accept(&qw);
   }
 
diff --git a/compiler/luci/pass/src/QuantizeWeightsPass.test.cpp b/compiler/luci/pass/src/QuantizeWeightsPass.test.cpp
index 058e029abe4..9abf7bb1878 100644
--- a/compiler/luci/pass/src/QuantizeWeightsPass.test.cpp
+++ b/compiler/luci/pass/src/QuantizeWeightsPass.test.cpp
@@ -78,7 +78,7 @@ struct QuantizeWeightsPassTest : public ::testing::Test
     output->dtype(loco::DataType::FLOAT32);
     output->name("output");
   }
-  virtual void SetUp() { MakeGraph(); }
+  virtual void SetUp() override { MakeGraph(); }
   loco::Graph _g;
 };
 
@@ -87,7 +87,8 @@ struct QuantizeWeightsPassTest : public ::testing::Test
 TEST_F(QuantizeWeightsPassTest, name)
 {
   luci::QuantizeWeightsPass pass(loco::DataType::FLOAT32, loco::DataType::S8,
-                                 luci::QuantizationGranularity::ChannelWise);
+                                 luci::QuantizationGranularity::ChannelWise,
+                                 luci::QuantizationAlgorithm::Common);
   auto const name = pass.name();
   ASSERT_NE(nullptr, name);
 }
@@ -99,6 +100,7 @@ TEST_F(QuantizeWeightsPassTest, name_ctx)
     ctx->input_model_dtype = loco::DataType::FLOAT32;
     ctx->output_model_dtype = loco::DataType::S8;
     ctx->granularity = luci::QuantizationGranularity::ChannelWise;
+    ctx->algorithm = luci::QuantizationAlgorithm::Common;
   }
 
   luci::QuantizeWeightsPass pass(std::move(ctx));
@@ -106,11 +108,28 @@ TEST_F(QuantizeWeightsPassTest, name_ctx)
   ASSERT_NE(nullptr, name);
 }
 
+TEST_F(QuantizeWeightsPassTest, run_minimum_mse_s8)
+{
+  luci::QuantizeWeightsPass pass(loco::DataType::FLOAT32, loco::DataType::S8,
+                                 luci::QuantizationGranularity::ChannelWise,
+                                 luci::QuantizationAlgorithm::MinimumMSE);
+  pass.run(&_g);
+}
+
+TEST_F(QuantizeWeightsPassTest, run_input_U8_mse_NEG)
+{
+  luci::QuantizeWeightsPass pass(loco::DataType::U8, loco::DataType::S8,
+                                 luci::QuantizationGranularity::ChannelWise,
+                                 luci::QuantizationAlgorithm::MinimumMSE);
+  EXPECT_THROW(pass.run(&_g), std::runtime_error);
+}
+
 TEST_F(QuantizeWeightsPassTest, run_input_U8_NEG)
 {
   loco::Graph g;
   luci::QuantizeWeightsPass pass(loco::DataType::U8, loco::DataType::S8,
-                                 luci::QuantizationGranularity::ChannelWise);
+                                 luci::QuantizationGranularity::ChannelWise,
+                                 luci::QuantizationAlgorithm::Common);
   EXPECT_THROW(pass.run(&_g), std::runtime_error);
 }
 
@@ -118,6 +137,7 @@ TEST_F(QuantizeWeightsPassTest, run_output_f32_NEG)
 {
   loco::Graph g;
   luci::QuantizeWeightsPass pass(loco::DataType::FLOAT32, loco::DataType::FLOAT32,
-                                 luci::QuantizationGranularity::ChannelWise);
+                                 luci::QuantizationGranularity::ChannelWise,
+                                 luci::QuantizationAlgorithm::Common);
   EXPECT_THROW(pass.run(&_g), std::runtime_error);
 }