Merge branch 'master' into xinhe/patch-2

chensuyue · web-flow · commit 7499cd869e9d · 2024-01-29T18:50:53.000+08:00
diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_ort.sh b/.azure-pipelines/scripts/ut/3x/run_3x_ort.sh
@@ -13,6 +13,7 @@ export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverag
 inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
 cd /neural-compressor/test || exit 1
 find ./3x/onnxrt/* -name "test*.py" | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'> run.sh
+find ./3x/common/* -name "test*.py" | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'>> run.sh
 
 LOG_DIR=/neural-compressor/log_dir
 mkdir -p ${LOG_DIR}
diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh b/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh
@@ -13,6 +13,7 @@ export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverag
 inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
 cd /neural-compressor/test || exit 1
 find ./3x/torch/* -name "test*.py" | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'> run.sh
+find ./3x/common/* -name "test*.py" | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'>> run.sh
 
 LOG_DIR=/neural-compressor/log_dir
 mkdir -p ${LOG_DIR}
diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_tf.sh b/.azure-pipelines/scripts/ut/3x/run_3x_tf.sh
@@ -13,6 +13,7 @@ export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverag
 inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
 cd /neural-compressor/test || exit 1
 find ./3x/tensorflow/* -name "test*.py" | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'> run.sh
+find ./3x/common/* -name "test*.py" | sed 's,\.\/,coverage run --source='"${inc_path}"' --append ,g' | sed 's/$/ --verbose/'>> run.sh
 
 LOG_DIR=/neural-compressor/log_dir
 mkdir -p ${LOG_DIR}
diff --git a/.azure-pipelines/ut-3x-pt.yml b/.azure-pipelines/ut-3x-pt.yml
@@ -11,6 +11,7 @@ pr:
       - neural_compressor/common
       - neural_compressor/torch
       - test/3x/torch
+      - test/3x/common
       - setup.py
       - requirements_pt.txt
       - .azure-pipelines/scripts/ut/3x/collect_log_3x.sh
diff --git a/examples/.config/model_params_pytorch.json b/examples/.config/model_params_pytorch.json
@@ -492,13 +492,6 @@
       "main_script": "run_clm_no_trainer.py",
       "batch_size": 8
     },
-    "opt_125m_woq_gptq_debug_int4":{
-      "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/llm",
-      "dataset_location": "",
-      "input_model": "",
-      "main_script": "run_clm_no_trainer.py",
-      "batch_size": 8
-    },
     "opt_125m_woq_teq":{
       "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/llm",
       "dataset_location": "",
@@ -583,13 +576,6 @@
       "main_script": "run_clm_no_trainer.py",
       "batch_size": 1
     },
-    "gpt_j_woq_gptq_debug_int4":{
-      "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/llm",
-      "dataset_location": "",
-      "input_model": "",
-      "main_script": "run_clm_no_trainer.py",
-      "batch_size": 1
-    },
     "gpt_j_woq_gptq_int4":{
       "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/llm",
       "dataset_location": "",
@@ -618,7 +604,7 @@
       "main_script": "run_clm_no_trainer.py",
       "batch_size": 1
     },
-    "falcon_7b_woq_gptq_debug_int4":{
+    "falcon_7b_woq_gptq_int4":{
       "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/llm",
       "dataset_location": "",
       "input_model": "",
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/main.py b/examples/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/main.py
@@ -153,6 +153,8 @@ def download_url(url, root, filename=None, md5=None):  # pragma: no cover
         md5 (str): the md5 string.
     """
     import urllib
+    import ssl
+    ssl._create_default_https_context = ssl._create_unverified_context
     root = os.path.expanduser(root)
     if not filename:
         filename = os.path.basename(url)
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/README.md
@@ -61,10 +61,9 @@ python run_clm_no_trainer.py \
     --woq_scheme asym \
     --woq_group_size 128 \
     --gptq_pad_max_length 2048 \
-    --gptq_use_max_length \
-    --gptq_debug
+    --gptq_use_max_length
 ```
-**Notes**: Weight-only quantization based on fake quantization is previewly supported and supports RTN, GPTQ[1], AWQ[2], TEQ algorithms. For more details, please refer to [link](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization_weight_only.md). Our GPTQ API support various CLMs including GPTJ, OPTs, Blooms, Llamas, Falcons, MPTs, ChatGLMs, etc. Simply replace the "--model" argument with other models to quantize different CLMs with GPTQ.
+**Notes**: Weight-only quantization based on fake quantization is supported in preview, including RTN, GPTQ[1], AWQ[2], TEQ algorithms. For more details, please refer to [link](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization_weight_only.md). Our GPTQ API support various CLMs including GPTJ, OPTs, Blooms, Llamas, Falcons, MPTs, ChatGLMs, etc. Simply replace the "--model" argument with other models to quantize different CLMs with GPTQ.
 
 
 #### Accuracy with lm_eval
@@ -111,8 +110,7 @@ python run_clm_no_trainer.py \
     --woq_scheme asym \
     --woq_group_size 128 \
     --gptq_pad_max_length 2048 \
-    --gptq_use_max_length \
-    --gptq_debug
+    --gptq_use_max_length
 ```
 
 #### Accuracy with lm_eval
@@ -158,8 +156,7 @@ python run_clm_no_trainer.py \
     --woq_scheme asym \
     --woq_group_size 128 \
     --gptq_pad_max_length 2048 \
-    --gptq_use_max_length \
-    --gptq_debug
+    --gptq_use_max_length
 ```
 
 #### Accuracy with lm_eval
@@ -202,8 +199,7 @@ python run_clm_no_trainer.py \
     --woq_scheme asym \
     --woq_group_size 128 \
     --gptq_pad_max_length 2048 \
-    --gptq_use_max_length \
-    --gptq_debug
+    --gptq_use_max_length
 ```
 #### Accuracy with lm_eval
 ```bash
@@ -244,8 +240,7 @@ python run_clm_no_trainer.py \
     --woq_scheme asym \
     --woq_group_size 128 \
     --gptq_pad_max_length 2048 \
-    --gptq_use_max_length \
-    --gptq_debug
+    --gptq_use_max_length
 ```
 #### Accuracy with lm_eval
 ```bash
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh
@@ -79,10 +79,10 @@ function run_benchmark {
         model_name_or_path="facebook/opt-125m"
         approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ"
-    elif [ "${topology}" = "opt_125m_woq_gptq_debug_int4" ]; then
+    elif [ "${topology}" = "opt_125m_woq_gptq_int4" ]; then
         model_name_or_path="facebook/opt-125m"
         approach="weight_only"
-        extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_scheme asym --woq_group_size 128 --gptq_use_max_length --gptq_debug"
+        extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_scheme asym --woq_group_size 128 --gptq_use_max_length"
     elif [ "${topology}" = "opt_125m_woq_teq" ]; then
         model_name_or_path="facebook/opt-125m"
         approach="weight_only"
@@ -106,17 +106,17 @@ function run_benchmark {
         model_name_or_path="EleutherAI/gpt-j-6b"
         approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_enable_mse_search"
-    elif [ "${topology}" = "gpt_j_woq_gptq_debug_int4" ]; then
+    elif [ "${topology}" = "gpt_j_woq_gptq_int4" ]; then
         model_name_or_path="EleutherAI/gpt-j-6b"
         approach="weight_only"
-        extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length --gptq_debug"
+        extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length"
     elif [ "${topology}" = "falcon_7b_sq" ]; then
         model_name_or_path="tiiuae/falcon-7b-instruct"
         extra_cmd=$extra_cmd" --sq --alpha 0.5"
-    elif [ "${topology}" = "falcon_7b_woq_gptq_debug_int4" ]; then
+    elif [ "${topology}" = "falcon_7b_woq_gptq_int4" ]; then
         model_name_or_path="tiiuae/falcon-7b-instruct"
         approach="weight_only"
-        extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length --gptq_debug"
+        extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length"
     fi
 
     python -u run_clm_no_trainer.py \
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
@@ -77,7 +77,6 @@
 parser.add_argument('--gptq_pad_max_length', type=int, default=2048, help='Calibration dataset sequence max length, \
                                                                            this should align with your model config, \
                                                                            and your dataset builder args: args.pad_max_length')
-parser.add_argument('--gptq_debug', action='store_true', help='Whether to use debug model ')
 parser.add_argument('--gptq_static_groups', action='store_true', help='Use determined group to do quantization')
 # ==============code generation args===========
 parser.add_argument("--code_generation", action="store_true")
@@ -292,35 +291,6 @@ def calib_func(prepared_model):
             op_name_dict=op_name_dict,
             recipes=recipes,
         )
-
-        # for test on various models, keep the code of directly call gptq_quantize
-        if args.gptq_debug:
-
-            from neural_compressor.adaptor.torch_utils.weight_only import gptq_quantize
-
-            gptq_conf = {
-                ".*": {
-                    'wbits': args.woq_bits,  # 1-8 bits
-                    'group_size': args.woq_group_size,  # -1 (per-channel)
-                    'sym': (args.woq_scheme == "sym"),
-                    'act_order': args.gptq_actorder,
-                    'static_groups': args.gptq_static_groups,
-                }
-            }
-            q_model_gptq_debug, gptq_config = gptq_quantize(
-                user_model,
-                weight_config=gptq_conf,
-                dataloader=calib_dataloader,
-                nsamples=args.gptq_nsamples,
-                use_max_length=args.gptq_use_max_length,
-                pad_max_length=args.gptq_pad_max_length,
-            )
-
-            # save the fake quantized model
-            os.makedirs(args.output_dir, exist_ok=True)
-            torch.save(q_model_gptq_debug, os.path.join(args.output_dir, "gptq_best_model.pt"))
-            exit(0)
-
     else:
         if re.search("gpt", user_model.config.model_type):
             op_type_dict = {
@@ -371,12 +341,9 @@ def eval_func(model):
     if args.ipex:
         user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)))
     else:
-        if args.gptq_debug:
-            user_model = torch.load(os.path.join(args.output_dir, "gptq_best_model.pt"))
-        else:
-            user_model, _ = get_user_model()
-            kwargs = {'weight_only': True} if args.approach == 'weight_only' else {}
-            user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)), user_model, **kwargs)
+        user_model, _ = get_user_model()
+        kwargs = {'weight_only': True} if args.approach == 'weight_only' else {}
+        user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)), user_model, **kwargs)
 else:
     user_model, _ = get_user_model()
 
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_gptj_mlperf_int4.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_gptj_mlperf_int4.py
@@ -315,7 +315,7 @@ def forward(self, *inp, **kwargs):
                 'percdamp': 0.01, 
                 'act_order':args.act_order,
                 'block_size': args.block_size, 
-                'nsampeles': args.nsamples,
+                'nsamples': args.nsamples,
                 'use_max_length': args.use_max_length,
                 'pad_max_length': args.pad_max_length
             },
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh
@@ -50,10 +50,10 @@ function run_tuning {
         model_name_or_path="facebook/opt-125m"
         approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ"
-    elif [ "${topology}" = "opt_125m_woq_gptq_debug_int4" ]; then
+    elif [ "${topology}" = "opt_125m_woq_gptq_int4" ]; then
         model_name_or_path="facebook/opt-125m"
         approach="weight_only"
-        extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_scheme asym --woq_group_size 128 --gptq_use_max_length --gptq_debug"
+        extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_scheme asym --woq_group_size 128 --gptq_use_max_length"
     elif [ "${topology}" = "opt_125m_woq_teq" ]; then
         model_name_or_path="facebook/opt-125m"
         approach="weight_only"
@@ -77,17 +77,17 @@ function run_tuning {
         model_name_or_path="EleutherAI/gpt-j-6b"
         approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_enable_mse_search"
-    elif [ "${topology}" = "gpt_j_woq_gptq_debug_int4" ]; then
+    elif [ "${topology}" = "gpt_j_woq_gptq_int4" ]; then
         model_name_or_path="EleutherAI/gpt-j-6b"
         approach="weight_only"
-        extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length --gptq_debug"
+        extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length"
     elif [ "${topology}" = "falcon_7b_sq" ]; then
         model_name_or_path="tiiuae/falcon-7b-instruct"
         extra_cmd=$extra_cmd" --sq --alpha 0.5"
-    elif [ "${topology}" = "falcon_7b_woq_gptq_debug_int4" ]; then
+    elif [ "${topology}" = "falcon_7b_woq_gptq_int4" ]; then
         model_name_or_path="tiiuae/falcon-7b-instruct"
         approach="weight_only"
-        extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length --gptq_debug"
+        extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --gptq_use_max_length"
     fi
 
     python -u run_clm_no_trainer.py \
diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/prepare_dataset.py b/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/prepare_dataset.py
@@ -39,18 +39,20 @@
 import tempfile
 import urllib.request
 import zipfile
+import ssl
+ssl._create_default_https_context = ssl._create_unverified_context
 
 TASKS = ["CoLA", "SST", "MRPC", "QQP", "STS", "MNLI", "QNLI", "RTE", "WNLI", "diagnostic"]
-TASK2PATH = {"CoLA":'https://dl.fbaipublicfiles.com/glue/data/CoLA.zip',
-             "SST":'https://dl.fbaipublicfiles.com/glue/data/SST-2.zip',
-             "QQP":'https://dl.fbaipublicfiles.com/glue/data/STS-B.zip',
-             "STS":'https://dl.fbaipublicfiles.com/glue/data/QQP-clean.zip',
-             "MNLI":'https://dl.fbaipublicfiles.com/glue/data/MNLI.zip',
-             "QNLI":'https://dl.fbaipublicfiles.com/glue/data/QNLIv2.zip',
-             "RTE":'https://dl.fbaipublicfiles.com/glue/data/RTE.zip',
-             "WNLI":'https://dl.fbaipublicfiles.com/glue/data/WNLI.zip',
-             "MRPC":"https://raw.githubusercontent.com/MegEngine/Models/master/official/nlp/bert/glue_data/MRPC/dev_ids.tsv",
-             "diagnostic":'https://dl.fbaipublicfiles.com/glue/data/AX.tsv'}
+TASK2PATH = {"CoLA": 'https://dl.fbaipublicfiles.com/glue/data/CoLA.zip',
+             "SST": 'https://dl.fbaipublicfiles.com/glue/data/SST-2.zip',
+             "QQP": 'https://dl.fbaipublicfiles.com/glue/data/STS-B.zip',
+             "STS": 'https://dl.fbaipublicfiles.com/glue/data/QQP-clean.zip',
+             "MNLI": 'https://dl.fbaipublicfiles.com/glue/data/MNLI.zip',
+             "QNLI": 'https://dl.fbaipublicfiles.com/glue/data/QNLIv2.zip',
+             "RTE": 'https://dl.fbaipublicfiles.com/glue/data/RTE.zip',
+             "WNLI": 'https://dl.fbaipublicfiles.com/glue/data/WNLI.zip',
+             "MRPC": "https://raw.githubusercontent.com/MegEngine/Models/master/official/nlp/bert/glue_data/MRPC/dev_ids.tsv",
+             "diagnostic": 'https://dl.fbaipublicfiles.com/glue/data/AX.tsv'}
 
 MRPC_TRAIN = 'https://dl.fbaipublicfiles.com/senteval/senteval_data/msr_paraphrase_train.txt'
 MRPC_TEST = 'https://dl.fbaipublicfiles.com/senteval/senteval_data/msr_paraphrase_test.txt'
diff --git a/neural_compressor/adaptor/torch_utils/gptq.py b/neural_compressor/adaptor/torch_utils/gptq.py
@@ -133,7 +133,8 @@ def find_layers(module, layers=[nn.Conv2d, nn.Conv1d, nn.Linear, transformers.Co
         return {name: module}
     else:
         # use string type to find name:
-        if type(module).__name__ in ["Linear"]:
+        # if type(module).__name__ in ["Linear"]:
+        if isinstance(module, (nn.Conv2d, nn.Conv1d, nn.Linear, transformers.Conv1D)):
             return {name: module}
         else:
             pass
diff --git a/neural_compressor/common/base_config.py b/neural_compressor/common/base_config.py
@@ -39,12 +39,13 @@
 logger = Logger().get_logger()
 
 __all__ = [
-    "ConfigRegistry",
+    "options",
     "register_config",
+    "get_all_config_set_from_config_registry",
+    "register_supported_configs_for_fwk",
     "BaseConfig",
+    "ConfigRegistry",
     "ComposableConfig",
-    "get_all_config_set_from_config_registry",
-    "options",
 ]
 
 
@@ -444,6 +445,17 @@ def get_all_config_set_from_config_registry(fwk_name: str) -> Union[BaseConfig,
     return config_set
 
 
+def register_supported_configs_for_fwk(fwk_name: str):
+    """Register supported configs for specific framework.
+
+    Args:
+        fwk_name: the framework name.
+    """
+    all_registered_config_cls: List[BaseConfig] = config_registry.get_all_config_cls_by_fwk_name(fwk_name)
+    for config_cls in all_registered_config_cls:
+        config_cls.register_supported_configs()
+
+
 #######################################################
 ####   Options
 #######################################################
diff --git a/neural_compressor/data/datasets/dataset.py b/neural_compressor/data/datasets/dataset.py
@@ -299,8 +299,11 @@ def download_url(url, root, filename=None, md5=None):  # pragma: no cover
         filename (str): the file name for saving.
         md5 (str): the md5 string.
     """
+    import ssl
     import urllib
 
+    ssl._create_default_https_context = ssl._create_unverified_context
+
     root = os.path.expanduser(root)
     if not filename:
         filename = os.path.basename(url)
diff --git a/neural_compressor/experimental/data/datasets/dataset.py b/neural_compressor/experimental/data/datasets/dataset.py
@@ -299,8 +299,11 @@ def download_url(url, root, filename=None, md5=None):  # pragma: no cover
         filename (str): the file name for saving.
         md5 (str): the md5 string.
     """
+    import ssl
     import urllib
 
+    ssl._create_default_https_context = ssl._create_unverified_context
+
     root = os.path.expanduser(root)
     if not filename:
         filename = os.path.basename(url)
diff --git a/neural_compressor/onnxrt/quantization/config.py b/neural_compressor/onnxrt/quantization/config.py
@@ -24,7 +24,7 @@
 import onnx
 
 from neural_compressor.common import Logger
-from neural_compressor.common.base_config import BaseConfig, register_config
+from neural_compressor.common.base_config import BaseConfig, register_config, register_supported_configs_for_fwk
 from neural_compressor.common.utils import DEFAULT_WHITE_LIST, OP_NAME_OR_MODULE_TYPE, RTN
 
 logger = Logger().get_logger()
@@ -150,8 +150,7 @@ def get_config_set_for_tuning(cls) -> Union[None, "RTNConfig", List["RTNConfig"]
         return RTNConfig(weight_bits=[4, 6])
 
 
-# TODO(Yi) run `register_supported_configs` for all registered config.
-RTNConfig.register_supported_configs()
+register_supported_configs_for_fwk(fwk_name=FRAMEWORK_NAME)
 
 
 def get_default_rtn_config() -> RTNConfig:
diff --git a/neural_compressor/tensorflow/quantization/config.py b/neural_compressor/tensorflow/quantization/config.py
diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py
diff --git a/test/3x/common/test_common.py b/test/3x/common/test_common.py
diff --git a/test/data/test_exp_transformers.py b/test/data/test_exp_transformers.py
diff --git a/test/data/test_transform.py b/test/data/test_transform.py