intel · chensuyue · May 16, 2024 · May 7, 2024 · May 8, 2024 · May 8, 2024
diff --git a/...i/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py b/...i/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
@@ -340,13 +340,13 @@ def run_fn_for_gptq(model, dataloader_for_calibration, *args):
                 quant_config = SmoothQuantConfig(alpha=args.alpha, folding=True)
 
             if re.search("gpt", user_model.config.model_type):
-                quant_config.set_local("add", SmoothQuantConfig(w_dtype="fp32", act_dtype="fp32"))
+                quant_config.set_local(torch.add, SmoothQuantConfig(w_dtype="fp32", act_dtype="fp32"))
         else:
             from neural_compressor.torch.quantization import get_default_static_config, StaticQuantConfig
 
             quant_config =  get_default_static_config()
             if re.search("gpt", user_model.config.model_type):
-                quant_config.set_local("add", StaticQuantConfig(w_dtype="fp32", act_dtype="fp32"))
+                quant_config.set_local(torch.add, StaticQuantConfig(w_dtype="fp32", act_dtype="fp32"))
 
         from neural_compressor.torch.algorithms.smooth_quant import move_input_to_device
         from tqdm import tqdm
@@ -397,7 +397,7 @@ def run_fn(model):
 #         print("Int8 model loading does not support WeightOnlyQuant now.")
 #         pass
 # else:
-#     user_model, _ = get_user_model()
+        user_model, _ = get_user_model()
 
 
 if args.accuracy:

diff --git a/neural_compressor/torch/algorithms/static_quant/utility.py b/neural_compressor/torch/algorithms/static_quant/utility.py
@@ -90,9 +90,10 @@ def check_cfg_and_qconfig(user_cfg, cfgs, op_infos_from_cfgs, output_tensor_ids_
         for i, op_name in enumerate(op):
             for ops, _ in op_infos_from_cfgs.items():
                 if "fqn" in op_infos_from_cfgs[ops].keys() and op_infos_from_cfgs[ops]["fqn"] == op_name:
-                    ori_op = (tuple(ops), unify_op_type_mapping_ipex[op_infos_from_cfgs[ops]["op_type"]])
-                    tmp_user_cfg[((ori_op[0],), ori_op[1])] = user_cfg[op]
-                    break
+                    if op_infos_from_cfgs[ops]["op_type"] in unify_op_type_mapping_ipex:
+                        ori_op = (tuple(ops), unify_op_type_mapping_ipex[op_infos_from_cfgs[ops]["op_type"]])
+                        tmp_user_cfg[((ori_op[0],), ori_op[1])] = user_cfg[op]
+                        break
     user_cfg = tmp_user_cfg
     for op_name in user_cfg:
         inc_op_cfg = user_cfg[op_name]
@@ -291,15 +292,12 @@ def get_quantizable_ops_recursively(model, example_inputs):  # pragma: no cover
                     map_op_name_to_fqn[(tuple(name), ipex_op_type)] = module_fqn
                     if "class" in ipex_op_type:  # "<class 'torch.nn.modules.activation.ReLU'>"
                         op_type = ipex_op_type.split("'")[1]
-                        op_name_info.append((module_fqn, eval(op_type)))
+                        op_name_info.append((module_fqn, eval(op_type).__name__))
                     elif "method" in ipex_op_type:  # "<method 'add' of 'torch._C._TensorBase' objects>"
                         method = ipex_op_type.split("'")[1]
-                        op_type = getattr(
-                            torch._C._TensorBase if ipex_ver.release < Version("2.2") else torch._C.TensorBase, method
-                        )
-                        op_name_info.append((module_fqn, op_type))
-                    else:
-                        op_name_info.append((module_fqn, op_type))
+                        op_name_info.append((module_fqn, method))
+                    elif "Convolution" in ipex_op_type:  # "Convolution_Relu"
+                        op_name_info.append((module_fqn, "Conv2d"))
                 else:
                     re_flag = False
                     for pattern, unify_op_type in unify_op_type_mapping_ipex["re"].items():

diff --git a/test/3x/torch/quantization/test_smooth_quant.py b/test/3x/torch/quantization/test_smooth_quant.py
@@ -55,6 +55,21 @@ def test_smooth_quant_auto(self):
         q_model = quantize(fp32_model, quant_config=quant_config, run_fn=run_fn, example_inputs=example_inputs)
         assert q_model is not None, "Quantization failed!"
 
+    @pytest.mark.skipif(not is_ipex_available(), reason="Requires IPEX")
+    def test_smooth_quant_fallback(self):
+        fp32_model = copy.deepcopy(model)
+        quant_config = get_default_sq_config()
+        example_inputs = torch.randn([1, 3])
+        # fallback by op_type
+        quant_config.set_local(torch.nn.Linear, SmoothQuantConfig(w_dtype="fp32", act_dtype="fp32"))
+        q_model = quantize(fp32_model, quant_config=quant_config, run_fn=run_fn, example_inputs=example_inputs)
+        assert q_model is not None, "Quantization failed!"
+
+        for op, op_info in q_model.tune_cfg[" "]["q_op_infos"].items():
+            if op_info["op_type"] == "<class 'torch.nn.modules.linear.Linear'>":
+                dtype = q_model.tune_cfg[" "]["q_op_infos"][op]["input_tensor_infos"][0]["force_dtype"]
+                assert dtype == "torch.float32", "Failed to fallback linear op, please check!"
+
     @pytest.mark.skipif(not is_ipex_available(), reason="Requires IPEX")
     @pytest.mark.parametrize(
         "act_sym, act_algo, alpha, folding, scale_sharing",

diff --git a/test/3x/torch/quantization/test_static_quant.py b/test/3x/torch/quantization/test_static_quant.py
@@ -63,19 +63,29 @@ def test_static_quant_fallback(self):
         quant_config = get_default_static_config()
         example_inputs = self.input
         # fallback by op_type
-        quant_config.set_local(torch.nn.modules.linear.Linear, StaticQuantConfig(w_dtype="fp32", act_dtype="fp32"))
+        quant_config.set_local(torch.nn.Linear, StaticQuantConfig(w_dtype="fp32", act_dtype="fp32"))
         prepared_model = prepare(fp32_model, quant_config=quant_config, example_inputs=example_inputs)
         run_fn(prepared_model)
         q_model = convert(prepared_model)
         assert q_model is not None, "Quantization failed!"
 
+        for op, op_info in q_model.tune_cfg[" "]["q_op_infos"].items():
+            if op_info["op_type"] == "<class 'torch.nn.modules.linear.Linear'>":
+                dtype = q_model.tune_cfg[" "]["q_op_infos"][op]["input_tensor_infos"][0]["force_dtype"]
+                assert dtype == "torch.float32", "Failed to fallback linear op, please check!"
+
         # fallback by op_name
         quant_config.set_local("fc1", StaticQuantConfig(w_dtype="fp32", act_dtype="fp32"))
         prepared_model = prepare(fp32_model, quant_config=quant_config, example_inputs=example_inputs)
         run_fn(prepared_model)
         q_model = convert(prepared_model)
         assert q_model is not None, "Quantization failed!"
 
+        for op, op_info in q_model.tune_cfg[" "]["q_op_infos"].items():
+            if op_info["fqn"] == "fc1":
+                dtype = q_model.tune_cfg[" "]["q_op_infos"][op]["input_tensor_infos"][0]["force_dtype"]
+                assert dtype == "torch.float32", "Failed to fallback fc1 layer, please check!"
+
     @pytest.mark.skipif(not is_ipex_available(), reason="Requires IPEX")
     @pytest.mark.parametrize(
         "act_sym, act_algo",