From 08f07fe496e1a1b1f769eaf93c85acd0e9a659f7 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Thu, 21 Dec 2023 13:23:15 +0800
Subject: [PATCH 1/8] update sq tuning

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/strategy/strategy.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py
index 95691a38142..3f90162b37a 100644
--- a/neural_compressor/strategy/strategy.py
+++ b/neural_compressor/strategy/strategy.py
@@ -186,6 +186,7 @@ def __init__(
         # track tuning cfg with the current best accuracy
         self.cur_best_tuning_cfg = {}
         self.re_quant = False
+        self.early_stop_sq_tuning_process = False
 
         self._trials_count = 0
         self._capability = None
@@ -1152,6 +1153,8 @@ def _should_tuning_sq_alpha(self, recipes):
     def tuning_sq_alpha(self, tuning_space, tuning_cfg, recipes):
         """Tuning smooth quant's alpha.
 
+        After trying all alpha values, the sq tuning process will stop early, returning the current best qmodel, even if the current best accuracy does not meet the accuracy criterion.
+
         Args:
             tuning_space: tuning space
             tuning_cfg: the initial tuning config
@@ -1166,8 +1169,12 @@ def tuning_sq_alpha(self, tuning_space, tuning_cfg, recipes):
         ), "Only tune the smooth quant's alpha when user provide the alpha list,\
             but got alpha_list: {alpha_list}"
         logger.info("[STRATEGY] Start tuning smooth quant'alpha.")
+        number_of_alpha = len(sq_alpha_list)
+        sq_trials_cnt = 0
         sq_sampler = tuning_sampler_dict.get_class("smooth_quant")(tuning_space, [], tuning_cfg, sq_alpha_list)
         for tune_cfg in sq_sampler:
+            sq_trials_cnt += 1
+            self.early_stop_sq_tuning_process = sq_trials_cnt == number_of_alpha
             yield tune_cfg
 
     def _should_tuning_woq_algo(self):
@@ -1961,6 +1968,12 @@ def stop(self, timeout, trials_count):
             need_stop = True
         else:
             need_stop = False
+        if not need_stop and self.early_stop_sq_tuning_process:
+            logger.info(
+                "[Strategy] Tried all alpha values but none met the accuracy criterion. The tuning process was early stopped and the currently best model was returned."
+            )
+
+            need_stop = True
 
         return need_stop
 

From a89411ed1cc9d0b98d64de8279b4a265ce0b4257 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Thu, 21 Dec 2023 13:29:24 +0800
Subject: [PATCH 2/8] fix pylint

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/strategy/strategy.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py
index 3f90162b37a..f2d938814b8 100644
--- a/neural_compressor/strategy/strategy.py
+++ b/neural_compressor/strategy/strategy.py
@@ -1153,7 +1153,8 @@ def _should_tuning_sq_alpha(self, recipes):
     def tuning_sq_alpha(self, tuning_space, tuning_cfg, recipes):
         """Tuning smooth quant's alpha.
 
-        After trying all alpha values, the sq tuning process will stop early, returning the current best qmodel, even if the current best accuracy does not meet the accuracy criterion.
+        After trying all alpha values, the sq tuning process will stop early, returning the current best qmodel,
+        even if the current best accuracy does not meet the accuracy criterion.
 
         Args:
             tuning_space: tuning space
@@ -1970,7 +1971,8 @@ def stop(self, timeout, trials_count):
             need_stop = False
         if not need_stop and self.early_stop_sq_tuning_process:
             logger.info(
-                "[Strategy] Tried all alpha values but none met the accuracy criterion. The tuning process was early stopped and the currently best model was returned."
+                "[Strategy] Tried all alpha values but none met the accuracy criterion.",
+                "The tuning process was early stopped and the currently best model was returned.",
             )
 
             need_stop = True

From 08085e9ecad9ad7e64eded8420fea5cb6880198d Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Thu, 21 Dec 2023 15:11:30 +0800
Subject: [PATCH 3/8] fixed typo

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/strategy/strategy.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py
index f2d938814b8..d22aa08983f 100644
--- a/neural_compressor/strategy/strategy.py
+++ b/neural_compressor/strategy/strategy.py
@@ -1971,8 +1971,8 @@ def stop(self, timeout, trials_count):
             need_stop = False
         if not need_stop and self.early_stop_sq_tuning_process:
             logger.info(
-                "[Strategy] Tried all alpha values but none met the accuracy criterion.",
-                "The tuning process was early stopped and the currently best model was returned.",
+                "[Strategy] Tried all alpha values but none met the accuracy criterion."
+                + "The tuning process was early stopped and the currently best model was returned."
             )
 
             need_stop = True

From b4ce8f1b8056db685492057931773f4f3445f59b Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Fri, 22 Dec 2023 08:17:20 +0800
Subject: [PATCH 4/8] track the current best tune cfg

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/adaptor/ox_utils/util.py | 2 +-
 neural_compressor/strategy/strategy.py     | 7 +++++--
 test/algorithm/test_smooth_quant.py        | 2 ++
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/neural_compressor/adaptor/ox_utils/util.py b/neural_compressor/adaptor/ox_utils/util.py
index fb7fa97e047..19bf8d7007a 100644
--- a/neural_compressor/adaptor/ox_utils/util.py
+++ b/neural_compressor/adaptor/ox_utils/util.py
@@ -100,7 +100,7 @@ def simple_progress_bar(total, i):
     bar = "#" * int(bar_length * progress)
     spaces = " " * (bar_length - len(bar))
     percentage = progress * 100
-    print(f"\rProgress: [{bar}{spaces}] {percentage:.2f}%", end="")
+    print(f"\rProgress: [{bar}{spaces}] {percentage:.2f}%")
 
 
 def dtype_to_name(dtype_mapping, dtype):
diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py
index d22aa08983f..df5627be4b3 100644
--- a/neural_compressor/strategy/strategy.py
+++ b/neural_compressor/strategy/strategy.py
@@ -1970,9 +1970,12 @@ def stop(self, timeout, trials_count):
         else:
             need_stop = False
         if not need_stop and self.early_stop_sq_tuning_process:
+            if self.best_tuning_cfg is None:
+                self.best_tuning_cfg = self._tune_cfg_converter(self.cur_best_tuning_cfg)
             logger.info(
-                "[Strategy] Tried all alpha values but none met the accuracy criterion."
-                + "The tuning process was early stopped and the currently best model was returned."
+                "[Strategy] Tried all alpha values but none met the accuracy criterion. "
+                "The tuning process was early stopped and "
+                f"the currently best model(accuracy: {self.cur_best_acc}) was returned."
             )
 
             need_stop = True
diff --git a/test/algorithm/test_smooth_quant.py b/test/algorithm/test_smooth_quant.py
index 08003161662..89092034043 100644
--- a/test/algorithm/test_smooth_quant.py
+++ b/test/algorithm/test_smooth_quant.py
@@ -1150,6 +1150,8 @@ def _test_sq_tune_alpha_common(self, eval_func, alpha=np.arange(0.1, 0.2, 0.05).
         from neural_compressor import quantization
         from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion
 
+        logger.info(f"alpha is: {alpha}")
+
         tuning_criterion = TuningCriterion(max_trials=8)
 
         fp32_model = DemoModel()

From 21accb70befb57ccfde2f515cbb84cc4bc053abc Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Fri, 22 Dec 2023 08:24:00 +0800
Subject: [PATCH 5/8] update uts

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 test/algorithm/test_smooth_quant.py      | 12 +++---------
 test/algorithm/test_smooth_quant_onnx.py |  6 ------
 2 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/test/algorithm/test_smooth_quant.py b/test/algorithm/test_smooth_quant.py
index 89092034043..080fa779119 100644
--- a/test/algorithm/test_smooth_quant.py
+++ b/test/algorithm/test_smooth_quant.py
@@ -1185,8 +1185,8 @@ def fake_eval(model, eval_result_lst):
         # test for alpha is a list
         for eval_result_lst, note in [
             ([1, 0.8, 1.1, 0.7, 1.1], "Expect tuning ends at 2nd trial with alpha is 0.15"),
-            ([1, 0.8, 0.9, 0.7, 1.1], "Expect tuning ends at 4th trial with alpha is 0.15"),
-            ([1, 0.9, 0.8, 0.7, 1.1], "Expect tuning ends at 4th trial with alpha is 0.10"),
+            ([1, 0.8, 0.9, 0.7, 1.1], "Expect tuning ends at 2th trial with alpha is 0.15"),
+            ([1, 0.9, 0.8, 0.7, 1.1], "Expect tuning ends at 1th trial with alpha is 0.10"),
         ]:
             logger.info(f"test_sq_tune_alpha_common with eval_result_lst: {eval_result_lst}")
             logger.info(note)
@@ -1224,13 +1224,7 @@ def fake_eval(model, eval_result_lst):
                 [1, 0.8, 0.9, 0.7, 1.1],
                 np.arange(0.1, 0.2, 0.05).tolist(),
                 "auto",
-                "Expect tuning ends at 4th trial with alpha is  0.15 at basic strategy.",
-            ),
-            (
-                [1, 1.1, 0.8, 0.7, 1.1],
-                np.arange(0.1, 0.2, 0.05).tolist(),
-                0,
-                "Expect tuning ends at 1th trial with alpha is 0.1",
+                "Expect tuning ends at 2th trial with alpha is  0.15 at basic strategy.",
             ),
         ]:
             logger.info("test_sq_tune_alpha_common with ")
diff --git a/test/algorithm/test_smooth_quant_onnx.py b/test/algorithm/test_smooth_quant_onnx.py
index db2877638ce..6cc67b9803e 100644
--- a/test/algorithm/test_smooth_quant_onnx.py
+++ b/test/algorithm/test_smooth_quant_onnx.py
@@ -279,12 +279,6 @@ def fake_eval(model, eval_result_lst):
                 "auto",
                 "Expect tuning ends at 4th trial with alpha is  0.15 at basic strategy.",
             ),
-            (
-                [1, 1.1, 0.8, 0.7, 1.1],
-                np.arange(0.1, 0.2, 0.05).tolist(),
-                0,
-                "Expect tuning ends at 1th trial with alpha is 0.1",
-            ),
         ]:
             logger.info("test_sq_tune_alpha_common with ")
             logger.info(f"eval_result_lst: {eval_result_lst}, alpha: {alpha}, quant_level: {quant_level}")

From 887fdb21ab250ff769a99afc152b1ad1396e62fb Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Fri, 22 Dec 2023 12:25:33 +0800
Subject: [PATCH 6/8] fix typos

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 test/algorithm/test_smooth_quant.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/algorithm/test_smooth_quant.py b/test/algorithm/test_smooth_quant.py
index 080fa779119..564fa84c84e 100644
--- a/test/algorithm/test_smooth_quant.py
+++ b/test/algorithm/test_smooth_quant.py
@@ -1185,8 +1185,8 @@ def fake_eval(model, eval_result_lst):
         # test for alpha is a list
         for eval_result_lst, note in [
             ([1, 0.8, 1.1, 0.7, 1.1], "Expect tuning ends at 2nd trial with alpha is 0.15"),
-            ([1, 0.8, 0.9, 0.7, 1.1], "Expect tuning ends at 2th trial with alpha is 0.15"),
-            ([1, 0.9, 0.8, 0.7, 1.1], "Expect tuning ends at 1th trial with alpha is 0.10"),
+            ([1, 0.8, 0.9, 0.7, 1.1], "Expect tuning ends at 2nd trial with alpha is 0.15"),
+            ([1, 0.9, 0.8, 0.7, 1.1], "Expect tuning ends at 1st trial with alpha is 0.10"),
         ]:
             logger.info(f"test_sq_tune_alpha_common with eval_result_lst: {eval_result_lst}")
             logger.info(note)

From 4c475ac307ab278e677b8fe56bf67c6526f8b636 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Fri, 22 Dec 2023 12:27:31 +0800
Subject: [PATCH 7/8] update docs

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 docs/source/tuning_strategies.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/tuning_strategies.md b/docs/source/tuning_strategies.md
index 7788149f7e3..6238218892b 100644
--- a/docs/source/tuning_strategies.md
+++ b/docs/source/tuning_strategies.md
@@ -179,7 +179,7 @@ flowchart TD
 
 > `*` INC will detect the block pattern for [transformer-like](https://arxiv.org/abs/1706.03762) model by default.
 
-> For [smooth quantization](./smooth_quant.md), users can tune the smooth quantization alpha by providing a list of scalars for the `alpha` item. The tuning process will take place at the **start stage** of the tuning procedure. For details usage, please refer to the [smooth quantization example](./smooth_quant.md#Example).
+> For [smooth quantization](./smooth_quant.md), users can tune the smooth quantization alpha by providing a list of scalars for the `alpha` item. For details usage, please refer to the [smooth quantization example](./smooth_quant.md#Usage).
 
 > For [weight-only quantization](./quantization_weight_only.md), users can tune the weight-only  algorithms from the available [pre-defined configurations](./quantization_weight_only.md#woq-algorithms-tuning). The tuning process will take place at the **start stage** of the tuning procedure, preceding the smooth quantization alpha tuning. For details usage, please refer to the [weight-only quantization example](./quantization_weight_only.md#woq-algorithms-tuning).
 *Please note that this behavior is specific to the `ONNX Runtime` backend.*

From b387c9d2ffe6e20f1fb9c5753418ff076e48ec14 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Fri, 22 Dec 2023 17:15:21 +0800
Subject: [PATCH 8/8] revert prgress bar change

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/adaptor/ox_utils/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neural_compressor/adaptor/ox_utils/util.py b/neural_compressor/adaptor/ox_utils/util.py
index 19bf8d7007a..fb7fa97e047 100644
--- a/neural_compressor/adaptor/ox_utils/util.py
+++ b/neural_compressor/adaptor/ox_utils/util.py
@@ -100,7 +100,7 @@ def simple_progress_bar(total, i):
     bar = "#" * int(bar_length * progress)
     spaces = " " * (bar_length - len(bar))
     percentage = progress * 100
-    print(f"\rProgress: [{bar}{spaces}] {percentage:.2f}%")
+    print(f"\rProgress: [{bar}{spaces}] {percentage:.2f}%", end="")
 
 
 def dtype_to_name(dtype_mapping, dtype):