Skip to content

Commit

Permalink
Narrow the tuning space of sq auto-tune (#1489)
Browse files Browse the repository at this point in the history
Signed-off-by: yiliu30 <yi4.liu@intel.com>
  • Loading branch information
yiliu30 authored Dec 22, 2023
1 parent 6c78dfe commit 9600e1d
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 16 deletions.
2 changes: 1 addition & 1 deletion docs/source/tuning_strategies.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ flowchart TD
> `*` INC will detect the block pattern for [transformer-like](https://arxiv.org/abs/1706.03762) model by default.
> For [smooth quantization](./smooth_quant.md), users can tune the smooth quantization alpha by providing a list of scalars for the `alpha` item. The tuning process will take place at the **start stage** of the tuning procedure. For details usage, please refer to the [smooth quantization example](./smooth_quant.md#Example).
> For [smooth quantization](./smooth_quant.md), users can tune the smooth quantization alpha by providing a list of scalars for the `alpha` item. For details usage, please refer to the [smooth quantization example](./smooth_quant.md#Usage).
> For [weight-only quantization](./quantization_weight_only.md), users can tune the weight-only algorithms from the available [pre-defined configurations](./quantization_weight_only.md#woq-algorithms-tuning). The tuning process will take place at the **start stage** of the tuning procedure, preceding the smooth quantization alpha tuning. For details usage, please refer to the [weight-only quantization example](./quantization_weight_only.md#woq-algorithms-tuning).
*Please note that this behavior is specific to the `ONNX Runtime` backend.*
Expand Down
18 changes: 18 additions & 0 deletions neural_compressor/strategy/strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ def __init__(
# track tuning cfg with the current best accuracy
self.cur_best_tuning_cfg = {}
self.re_quant = False
self.early_stop_sq_tuning_process = False

self._trials_count = 0
self._capability = None
Expand Down Expand Up @@ -1152,6 +1153,9 @@ def _should_tuning_sq_alpha(self, recipes):
def tuning_sq_alpha(self, tuning_space, tuning_cfg, recipes):
"""Tuning smooth quant's alpha.
After trying all alpha values, the sq tuning process will stop early, returning the current best qmodel,
even if the current best accuracy does not meet the accuracy criterion.
Args:
tuning_space: tuning space
tuning_cfg: the initial tuning config
Expand All @@ -1166,8 +1170,12 @@ def tuning_sq_alpha(self, tuning_space, tuning_cfg, recipes):
), "Only tune the smooth quant's alpha when user provide the alpha list,\
but got alpha_list: {alpha_list}"
logger.info("[STRATEGY] Start tuning smooth quant'alpha.")
number_of_alpha = len(sq_alpha_list)
sq_trials_cnt = 0
sq_sampler = tuning_sampler_dict.get_class("smooth_quant")(tuning_space, [], tuning_cfg, sq_alpha_list)
for tune_cfg in sq_sampler:
sq_trials_cnt += 1
self.early_stop_sq_tuning_process = sq_trials_cnt == number_of_alpha
yield tune_cfg

def _should_tuning_woq_algo(self):
Expand Down Expand Up @@ -1961,6 +1969,16 @@ def stop(self, timeout, trials_count):
need_stop = True
else:
need_stop = False
if not need_stop and self.early_stop_sq_tuning_process:
if self.best_tuning_cfg is None:
self.best_tuning_cfg = self._tune_cfg_converter(self.cur_best_tuning_cfg)
logger.info(
"[Strategy] Tried all alpha values but none met the accuracy criterion. "
"The tuning process was early stopped and "
f"the currently best model(accuracy: {self.cur_best_acc}) was returned."
)

need_stop = True

return need_stop

Expand Down
14 changes: 5 additions & 9 deletions test/algorithm/test_smooth_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -1150,6 +1150,8 @@ def _test_sq_tune_alpha_common(self, eval_func, alpha=np.arange(0.1, 0.2, 0.05).
from neural_compressor import quantization
from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion

logger.info(f"alpha is: {alpha}")

tuning_criterion = TuningCriterion(max_trials=8)

fp32_model = DemoModel()
Expand Down Expand Up @@ -1183,8 +1185,8 @@ def fake_eval(model, eval_result_lst):
# test for alpha is a list
for eval_result_lst, note in [
([1, 0.8, 1.1, 0.7, 1.1], "Expect tuning ends at 2nd trial with alpha is 0.15"),
([1, 0.8, 0.9, 0.7, 1.1], "Expect tuning ends at 4th trial with alpha is 0.15"),
([1, 0.9, 0.8, 0.7, 1.1], "Expect tuning ends at 4th trial with alpha is 0.10"),
([1, 0.8, 0.9, 0.7, 1.1], "Expect tuning ends at 2nd trial with alpha is 0.15"),
([1, 0.9, 0.8, 0.7, 1.1], "Expect tuning ends at 1st trial with alpha is 0.10"),
]:
logger.info(f"test_sq_tune_alpha_common with eval_result_lst: {eval_result_lst}")
logger.info(note)
Expand Down Expand Up @@ -1222,13 +1224,7 @@ def fake_eval(model, eval_result_lst):
[1, 0.8, 0.9, 0.7, 1.1],
np.arange(0.1, 0.2, 0.05).tolist(),
"auto",
"Expect tuning ends at 4th trial with alpha is 0.15 at basic strategy.",
),
(
[1, 1.1, 0.8, 0.7, 1.1],
np.arange(0.1, 0.2, 0.05).tolist(),
0,
"Expect tuning ends at 1th trial with alpha is 0.1",
"Expect tuning ends at 2th trial with alpha is 0.15 at basic strategy.",
),
]:
logger.info("test_sq_tune_alpha_common with ")
Expand Down
6 changes: 0 additions & 6 deletions test/algorithm/test_smooth_quant_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,12 +279,6 @@ def fake_eval(model, eval_result_lst):
"auto",
"Expect tuning ends at 4th trial with alpha is 0.15 at basic strategy.",
),
(
[1, 1.1, 0.8, 0.7, 1.1],
np.arange(0.1, 0.2, 0.05).tolist(),
0,
"Expect tuning ends at 1th trial with alpha is 0.1",
),
]:
logger.info("test_sq_tune_alpha_common with ")
logger.info(f"eval_result_lst: {eval_result_lst}, alpha: {alpha}, quant_level: {quant_level}")
Expand Down

0 comments on commit 9600e1d

Please sign in to comment.