From 99ce2b4215b6b9e725076b72e0b2ee39c017768b Mon Sep 17 00:00:00 2001 From: rand-fly Date: Tue, 20 May 2025 01:26:38 +0800 Subject: [PATCH] [Bugfix] fix adding bias twice in ipex GPTQ quantization Signed-off-by: rand-fly --- vllm/model_executor/layers/quantization/ipex_quant.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vllm/model_executor/layers/quantization/ipex_quant.py b/vllm/model_executor/layers/quantization/ipex_quant.py index 8bce6bba460a..b7baa3d3363b 100644 --- a/vllm/model_executor/layers/quantization/ipex_quant.py +++ b/vllm/model_executor/layers/quantization/ipex_quant.py @@ -181,8 +181,6 @@ def apply(self, bias: Optional[torch.Tensor] = None) -> torch.Tensor: reshaped_x = x.reshape(-1, x.shape[-1]) out = layer.ipex_qlinear(reshaped_x) - if bias is not None: - out.add_(bias) return out.reshape(x.shape[:-1] + (layer.ipex_output_size, ))