Skip to content

Commit

Permalink
[Bugfix] Fix awq_marlin and gptq_marlin flags (vllm-project#6745)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexm-redhat authored and cadedaniel committed Jul 27, 2024
1 parent 06641ec commit 4aab180
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
5 changes: 3 additions & 2 deletions vllm/model_executor/layers/quantization/awq_marlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class AWQMarlinConfig(QuantizationConfig):
def __init__(self, weight_bits: int, group_size: int, has_zp: bool,
lm_head_quantized: bool) -> None:
self.weight_bits = weight_bits
self.pack_factor = 32 // self.weight_bits # packed into int32
self.pack_factor = 32 // self.weight_bits # packed into 32bits
self.group_size = group_size
self.has_zp = has_zp
self.lm_head_quantized = lm_head_quantized
Expand Down Expand Up @@ -69,7 +69,8 @@ def from_config(cls, config: Dict[str, Any]) -> "AWQMarlinConfig":
def override_quantization_method(cls, hf_quant_cfg,
user_quant) -> Optional[str]:
can_convert = cls.is_awq_marlin_compatible(hf_quant_cfg)
is_valid_user_quant = (user_quant is None or user_quant == "marlin")
is_valid_user_quant = (user_quant is None or user_quant == "marlin"
or user_quant == "awq_marlin")

if can_convert and is_valid_user_quant:
msg = ("The model is convertible to {} during runtime."
Expand Down
3 changes: 2 additions & 1 deletion vllm/model_executor/layers/quantization/gptq_marlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ def override_quantization_method(cls, hf_quant_cfg,
user_quant) -> Optional[str]:
can_convert = cls.is_gptq_marlin_compatible(hf_quant_cfg)

is_valid_user_quant = (user_quant is None or user_quant == "marlin")
is_valid_user_quant = (user_quant is None or user_quant == "marlin"
or user_quant == "gptq_marlin")

if can_convert and is_valid_user_quant:
msg = ("The model is convertible to {} during runtime."
Expand Down

0 comments on commit 4aab180

Please sign in to comment.