Skip to content

Commit

Permalink
Fixed ipex linear param check and logging once (#795)
Browse files Browse the repository at this point in the history
* fix ipex linear group size check and sym check

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix logging once

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix typo

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

---------

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>
  • Loading branch information
jiqing-feng authored Dec 6, 2024
1 parent 684da50 commit 26961ce
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 9 deletions.
6 changes: 2 additions & 4 deletions gptqmodel/nn_modules/qlinear/ipex.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def convert_dtype_torch2str(dtype):

class IPEXQuantLinear(BaseQuantLinear):
SUPPORTS_BITS = [4]
SUPPORTS_GROUP_SIZE = [-1, 16, 32, 64, 128]
SUPPORTS_GROUP_SIZE = [16, 32, 64, 128]
SUPPORTS_DESC_ACT = [True, False]
SUPPORTS_SYM = [True, False]
SUPPORTS_SHARDS = True
Expand All @@ -78,7 +78,6 @@ def __init__(
weight_dtype=None,
**kwargs,
):
self.sym = False
super().__init__(bits=bits, group_size=group_size, sym=sym, desc_act=desc_act, infeatures=infeatures, outfeatures=outfeatures, **kwargs)

if weight_dtype is None:
Expand All @@ -87,10 +86,9 @@ def __init__(
self.infeatures = infeatures
self.outfeatures = outfeatures
self.bits = bits
self.group_size = group_size if group_size != -1 else infeatures
self.group_size = group_size
self.maxq = 2**self.bits - 1
self.weight_dtype = weight_dtype
self.asym = True
self.init_ipex = False

self.register_buffer(
Expand Down
4 changes: 4 additions & 0 deletions gptqmodel/quantization/gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ def fasterquant(

if torch.cuda.is_available():
torch.cuda.synchronize()
if hasattr(torch, "xpu") and torch.xpu.is_available():
torch.xpu.synchronize()
duration = time.time() - tick
avg_loss = torch.sum(Losses).item() / self.nsamples

Expand Down Expand Up @@ -224,6 +226,8 @@ def free(self):
self.Losses = None
self.Trace = None
torch.cuda.empty_cache()
if hasattr(torch, "xpu") and torch.xpu.is_available():
torch.xpu.synchronize()


__all__ = ["GPTQ"]
11 changes: 6 additions & 5 deletions gptqmodel/utils/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from ..utils.logger import setup_logger
from .backend import BACKEND, get_backend

message_logged = False
logger = setup_logger()

backend_dict = OrderedDict({
Expand Down Expand Up @@ -104,23 +105,23 @@ def select_quant_linear(
allow_backends = format_dict[format]
allow_quant_linears = backend_dict
err = None
global message_logged
# Suppose all quant linears in the model should have the same backend.
has_logged = False
for k, v in allow_quant_linears.items():
in_allow_backends = k in allow_backends
validate, err = v.validate(bits, group_size, desc_act, sym, dynamic=dynamic, device=device, trainable=trainable)
if in_allow_backends and validate:
if pack:
check_pack_func = hasattr(v, "pack")
if check_pack_func:
if not has_logged:
if not message_logged:
logger.info(f"Auto choose the fastest one based on quant model compatibility: {v}")
has_logged = True
message_logged = True
return v
else:
if not has_logged:
if not message_logged:
logger.info(f"Auto choose the fastest one based on quant model compatibility: {v}")
has_logged = True
message_logged = True
return v

if err:
Expand Down

0 comments on commit 26961ce

Please sign in to comment.