From 08f07fe496e1a1b1f769eaf93c85acd0e9a659f7 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 21 Dec 2023 13:23:15 +0800 Subject: [PATCH 1/8] update sq tuning Signed-off-by: yiliu30 --- neural_compressor/strategy/strategy.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 95691a38142..3f90162b37a 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -186,6 +186,7 @@ def __init__( # track tuning cfg with the current best accuracy self.cur_best_tuning_cfg = {} self.re_quant = False + self.early_stop_sq_tuning_process = False self._trials_count = 0 self._capability = None @@ -1152,6 +1153,8 @@ def _should_tuning_sq_alpha(self, recipes): def tuning_sq_alpha(self, tuning_space, tuning_cfg, recipes): """Tuning smooth quant's alpha. + After trying all alpha values, the sq tuning process will stop early, returning the current best qmodel, even if the current best accuracy does not meet the accuracy criterion. + Args: tuning_space: tuning space tuning_cfg: the initial tuning config @@ -1166,8 +1169,12 @@ def tuning_sq_alpha(self, tuning_space, tuning_cfg, recipes): ), "Only tune the smooth quant's alpha when user provide the alpha list,\ but got alpha_list: {alpha_list}" logger.info("[STRATEGY] Start tuning smooth quant'alpha.") + number_of_alpha = len(sq_alpha_list) + sq_trials_cnt = 0 sq_sampler = tuning_sampler_dict.get_class("smooth_quant")(tuning_space, [], tuning_cfg, sq_alpha_list) for tune_cfg in sq_sampler: + sq_trials_cnt += 1 + self.early_stop_sq_tuning_process = sq_trials_cnt == number_of_alpha yield tune_cfg def _should_tuning_woq_algo(self): @@ -1961,6 +1968,12 @@ def stop(self, timeout, trials_count): need_stop = True else: need_stop = False + if not need_stop and self.early_stop_sq_tuning_process: + logger.info( + "[Strategy] Tried all alpha values but none met the accuracy criterion. The tuning process was early stopped and the currently best model was returned." + ) + + need_stop = True return need_stop From a89411ed1cc9d0b98d64de8279b4a265ce0b4257 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 21 Dec 2023 13:29:24 +0800 Subject: [PATCH 2/8] fix pylint Signed-off-by: yiliu30 --- neural_compressor/strategy/strategy.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 3f90162b37a..f2d938814b8 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -1153,7 +1153,8 @@ def _should_tuning_sq_alpha(self, recipes): def tuning_sq_alpha(self, tuning_space, tuning_cfg, recipes): """Tuning smooth quant's alpha. - After trying all alpha values, the sq tuning process will stop early, returning the current best qmodel, even if the current best accuracy does not meet the accuracy criterion. + After trying all alpha values, the sq tuning process will stop early, returning the current best qmodel, + even if the current best accuracy does not meet the accuracy criterion. Args: tuning_space: tuning space @@ -1970,7 +1971,8 @@ def stop(self, timeout, trials_count): need_stop = False if not need_stop and self.early_stop_sq_tuning_process: logger.info( - "[Strategy] Tried all alpha values but none met the accuracy criterion. The tuning process was early stopped and the currently best model was returned." + "[Strategy] Tried all alpha values but none met the accuracy criterion.", + "The tuning process was early stopped and the currently best model was returned.", ) need_stop = True From 08085e9ecad9ad7e64eded8420fea5cb6880198d Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 21 Dec 2023 15:11:30 +0800 Subject: [PATCH 3/8] fixed typo Signed-off-by: yiliu30 --- neural_compressor/strategy/strategy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index f2d938814b8..d22aa08983f 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -1971,8 +1971,8 @@ def stop(self, timeout, trials_count): need_stop = False if not need_stop and self.early_stop_sq_tuning_process: logger.info( - "[Strategy] Tried all alpha values but none met the accuracy criterion.", - "The tuning process was early stopped and the currently best model was returned.", + "[Strategy] Tried all alpha values but none met the accuracy criterion." + + "The tuning process was early stopped and the currently best model was returned." ) need_stop = True From b4ce8f1b8056db685492057931773f4f3445f59b Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 22 Dec 2023 08:17:20 +0800 Subject: [PATCH 4/8] track the current best tune cfg Signed-off-by: yiliu30 --- neural_compressor/adaptor/ox_utils/util.py | 2 +- neural_compressor/strategy/strategy.py | 7 +++++-- test/algorithm/test_smooth_quant.py | 2 ++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/neural_compressor/adaptor/ox_utils/util.py b/neural_compressor/adaptor/ox_utils/util.py index fb7fa97e047..19bf8d7007a 100644 --- a/neural_compressor/adaptor/ox_utils/util.py +++ b/neural_compressor/adaptor/ox_utils/util.py @@ -100,7 +100,7 @@ def simple_progress_bar(total, i): bar = "#" * int(bar_length * progress) spaces = " " * (bar_length - len(bar)) percentage = progress * 100 - print(f"\rProgress: [{bar}{spaces}] {percentage:.2f}%", end="") + print(f"\rProgress: [{bar}{spaces}] {percentage:.2f}%") def dtype_to_name(dtype_mapping, dtype): diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index d22aa08983f..df5627be4b3 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -1970,9 +1970,12 @@ def stop(self, timeout, trials_count): else: need_stop = False if not need_stop and self.early_stop_sq_tuning_process: + if self.best_tuning_cfg is None: + self.best_tuning_cfg = self._tune_cfg_converter(self.cur_best_tuning_cfg) logger.info( - "[Strategy] Tried all alpha values but none met the accuracy criterion." - + "The tuning process was early stopped and the currently best model was returned." + "[Strategy] Tried all alpha values but none met the accuracy criterion. " + "The tuning process was early stopped and " + f"the currently best model(accuracy: {self.cur_best_acc}) was returned." ) need_stop = True diff --git a/test/algorithm/test_smooth_quant.py b/test/algorithm/test_smooth_quant.py index 08003161662..89092034043 100644 --- a/test/algorithm/test_smooth_quant.py +++ b/test/algorithm/test_smooth_quant.py @@ -1150,6 +1150,8 @@ def _test_sq_tune_alpha_common(self, eval_func, alpha=np.arange(0.1, 0.2, 0.05). from neural_compressor import quantization from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + logger.info(f"alpha is: {alpha}") + tuning_criterion = TuningCriterion(max_trials=8) fp32_model = DemoModel() From 21accb70befb57ccfde2f515cbb84cc4bc053abc Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 22 Dec 2023 08:24:00 +0800 Subject: [PATCH 5/8] update uts Signed-off-by: yiliu30 --- test/algorithm/test_smooth_quant.py | 12 +++--------- test/algorithm/test_smooth_quant_onnx.py | 6 ------ 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/test/algorithm/test_smooth_quant.py b/test/algorithm/test_smooth_quant.py index 89092034043..080fa779119 100644 --- a/test/algorithm/test_smooth_quant.py +++ b/test/algorithm/test_smooth_quant.py @@ -1185,8 +1185,8 @@ def fake_eval(model, eval_result_lst): # test for alpha is a list for eval_result_lst, note in [ ([1, 0.8, 1.1, 0.7, 1.1], "Expect tuning ends at 2nd trial with alpha is 0.15"), - ([1, 0.8, 0.9, 0.7, 1.1], "Expect tuning ends at 4th trial with alpha is 0.15"), - ([1, 0.9, 0.8, 0.7, 1.1], "Expect tuning ends at 4th trial with alpha is 0.10"), + ([1, 0.8, 0.9, 0.7, 1.1], "Expect tuning ends at 2th trial with alpha is 0.15"), + ([1, 0.9, 0.8, 0.7, 1.1], "Expect tuning ends at 1th trial with alpha is 0.10"), ]: logger.info(f"test_sq_tune_alpha_common with eval_result_lst: {eval_result_lst}") logger.info(note) @@ -1224,13 +1224,7 @@ def fake_eval(model, eval_result_lst): [1, 0.8, 0.9, 0.7, 1.1], np.arange(0.1, 0.2, 0.05).tolist(), "auto", - "Expect tuning ends at 4th trial with alpha is 0.15 at basic strategy.", - ), - ( - [1, 1.1, 0.8, 0.7, 1.1], - np.arange(0.1, 0.2, 0.05).tolist(), - 0, - "Expect tuning ends at 1th trial with alpha is 0.1", + "Expect tuning ends at 2th trial with alpha is 0.15 at basic strategy.", ), ]: logger.info("test_sq_tune_alpha_common with ") diff --git a/test/algorithm/test_smooth_quant_onnx.py b/test/algorithm/test_smooth_quant_onnx.py index db2877638ce..6cc67b9803e 100644 --- a/test/algorithm/test_smooth_quant_onnx.py +++ b/test/algorithm/test_smooth_quant_onnx.py @@ -279,12 +279,6 @@ def fake_eval(model, eval_result_lst): "auto", "Expect tuning ends at 4th trial with alpha is 0.15 at basic strategy.", ), - ( - [1, 1.1, 0.8, 0.7, 1.1], - np.arange(0.1, 0.2, 0.05).tolist(), - 0, - "Expect tuning ends at 1th trial with alpha is 0.1", - ), ]: logger.info("test_sq_tune_alpha_common with ") logger.info(f"eval_result_lst: {eval_result_lst}, alpha: {alpha}, quant_level: {quant_level}") From 887fdb21ab250ff769a99afc152b1ad1396e62fb Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 22 Dec 2023 12:25:33 +0800 Subject: [PATCH 6/8] fix typos Signed-off-by: yiliu30 --- test/algorithm/test_smooth_quant.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/algorithm/test_smooth_quant.py b/test/algorithm/test_smooth_quant.py index 080fa779119..564fa84c84e 100644 --- a/test/algorithm/test_smooth_quant.py +++ b/test/algorithm/test_smooth_quant.py @@ -1185,8 +1185,8 @@ def fake_eval(model, eval_result_lst): # test for alpha is a list for eval_result_lst, note in [ ([1, 0.8, 1.1, 0.7, 1.1], "Expect tuning ends at 2nd trial with alpha is 0.15"), - ([1, 0.8, 0.9, 0.7, 1.1], "Expect tuning ends at 2th trial with alpha is 0.15"), - ([1, 0.9, 0.8, 0.7, 1.1], "Expect tuning ends at 1th trial with alpha is 0.10"), + ([1, 0.8, 0.9, 0.7, 1.1], "Expect tuning ends at 2nd trial with alpha is 0.15"), + ([1, 0.9, 0.8, 0.7, 1.1], "Expect tuning ends at 1st trial with alpha is 0.10"), ]: logger.info(f"test_sq_tune_alpha_common with eval_result_lst: {eval_result_lst}") logger.info(note) From 4c475ac307ab278e677b8fe56bf67c6526f8b636 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 22 Dec 2023 12:27:31 +0800 Subject: [PATCH 7/8] update docs Signed-off-by: yiliu30 --- docs/source/tuning_strategies.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/tuning_strategies.md b/docs/source/tuning_strategies.md index 7788149f7e3..6238218892b 100644 --- a/docs/source/tuning_strategies.md +++ b/docs/source/tuning_strategies.md @@ -179,7 +179,7 @@ flowchart TD > `*` INC will detect the block pattern for [transformer-like](https://arxiv.org/abs/1706.03762) model by default. -> For [smooth quantization](./smooth_quant.md), users can tune the smooth quantization alpha by providing a list of scalars for the `alpha` item. The tuning process will take place at the **start stage** of the tuning procedure. For details usage, please refer to the [smooth quantization example](./smooth_quant.md#Example). +> For [smooth quantization](./smooth_quant.md), users can tune the smooth quantization alpha by providing a list of scalars for the `alpha` item. For details usage, please refer to the [smooth quantization example](./smooth_quant.md#Usage). > For [weight-only quantization](./quantization_weight_only.md), users can tune the weight-only algorithms from the available [pre-defined configurations](./quantization_weight_only.md#woq-algorithms-tuning). The tuning process will take place at the **start stage** of the tuning procedure, preceding the smooth quantization alpha tuning. For details usage, please refer to the [weight-only quantization example](./quantization_weight_only.md#woq-algorithms-tuning). *Please note that this behavior is specific to the `ONNX Runtime` backend.* From b387c9d2ffe6e20f1fb9c5753418ff076e48ec14 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 22 Dec 2023 17:15:21 +0800 Subject: [PATCH 8/8] revert prgress bar change Signed-off-by: yiliu30 --- neural_compressor/adaptor/ox_utils/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/adaptor/ox_utils/util.py b/neural_compressor/adaptor/ox_utils/util.py index 19bf8d7007a..fb7fa97e047 100644 --- a/neural_compressor/adaptor/ox_utils/util.py +++ b/neural_compressor/adaptor/ox_utils/util.py @@ -100,7 +100,7 @@ def simple_progress_bar(total, i): bar = "#" * int(bar_length * progress) spaces = " " * (bar_length - len(bar)) percentage = progress * 100 - print(f"\rProgress: [{bar}{spaces}] {percentage:.2f}%") + print(f"\rProgress: [{bar}{spaces}] {percentage:.2f}%", end="") def dtype_to_name(dtype_mapping, dtype):