Fixed ipex linear param check and logging once (#795)

* fix ipex linear group size check and sym check Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * fix logging once Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * fix typo Signed-off-by: jiqing-feng <jiqing.feng@intel.com> --------- Signed-off-by: jiqing-feng <jiqing.feng@intel.com>
ModelCloud · Dec 6, 2024 · 26961ce · 26961ce
1 parent 684da50
commit 26961ce
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 9 deletions.
diff --git a/gptqmodel/nn_modules/qlinear/ipex.py b/gptqmodel/nn_modules/qlinear/ipex.py
@@ -51,7 +51,7 @@ def convert_dtype_torch2str(dtype):
 
 class IPEXQuantLinear(BaseQuantLinear):
     SUPPORTS_BITS = [4]
-    SUPPORTS_GROUP_SIZE = [-1, 16, 32, 64, 128]
+    SUPPORTS_GROUP_SIZE = [16, 32, 64, 128]
     SUPPORTS_DESC_ACT = [True, False]
     SUPPORTS_SYM = [True, False]
     SUPPORTS_SHARDS = True
@@ -78,7 +78,6 @@ def __init__(
         weight_dtype=None,
         **kwargs,
     ):
-        self.sym = False
         super().__init__(bits=bits, group_size=group_size, sym=sym, desc_act=desc_act, infeatures=infeatures, outfeatures=outfeatures, **kwargs)
 
         if weight_dtype is None:
@@ -87,10 +86,9 @@ def __init__(
         self.infeatures = infeatures
         self.outfeatures = outfeatures
         self.bits = bits
-        self.group_size = group_size if group_size != -1 else infeatures
+        self.group_size = group_size
         self.maxq = 2**self.bits - 1
         self.weight_dtype = weight_dtype
-        self.asym = True
         self.init_ipex = False
 
         self.register_buffer(

diff --git a/gptqmodel/quantization/gptq.py b/gptqmodel/quantization/gptq.py
@@ -185,6 +185,8 @@ def fasterquant(
 
         if torch.cuda.is_available():
             torch.cuda.synchronize()
+        if hasattr(torch, "xpu") and torch.xpu.is_available():
+            torch.xpu.synchronize()
         duration = time.time() - tick
         avg_loss = torch.sum(Losses).item() / self.nsamples
 
@@ -224,6 +226,8 @@ def free(self):
         self.Losses = None
         self.Trace = None
         torch.cuda.empty_cache()
+        if hasattr(torch, "xpu") and torch.xpu.is_available():
+            torch.xpu.synchronize()
 
 
 __all__ = ["GPTQ"]
diff --git a/gptqmodel/utils/importer.py b/gptqmodel/utils/importer.py
@@ -17,6 +17,7 @@
 from ..utils.logger import setup_logger
 from .backend import BACKEND, get_backend
 
+message_logged = False
 logger = setup_logger()
 
 backend_dict = OrderedDict({
@@ -104,23 +105,23 @@ def select_quant_linear(
         allow_backends = format_dict[format]
         allow_quant_linears = backend_dict
         err = None
+        global message_logged
         # Suppose all quant linears in the model should have the same backend.
-        has_logged = False
         for k, v in allow_quant_linears.items():
             in_allow_backends = k in allow_backends
             validate, err = v.validate(bits, group_size, desc_act, sym, dynamic=dynamic, device=device, trainable=trainable)
             if in_allow_backends and validate:
                 if pack:
                     check_pack_func = hasattr(v, "pack")
                     if check_pack_func:
-                        if not has_logged:
+                        if not message_logged:
                             logger.info(f"Auto choose the fastest one based on quant model compatibility: {v}")
-                            has_logged = True
+                            message_logged = True
                         return v
                 else:
-                    if not has_logged:
+                    if not message_logged:
                         logger.info(f"Auto choose the fastest one based on quant model compatibility: {v}")
-                        has_logged = True
+                        message_logged = True
                     return v
 
         if err: