Skip to content

Commit 0d4702e

Browse files
Fix pre-commit issues
- Fix QuarkW8A8Fp8 constructor args (uses weight_config/input_config) - Fix line length in compressed_tensors_w8a8_int8.py - Apply yapf and isort formatting Signed-off-by: padg9912 <phone.and.desktop@gmail.com>
1 parent e62cfa0 commit 0d4702e

File tree

2 files changed

+9
-3
lines changed

2 files changed

+9
-3
lines changed

vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ def int8_to_fp8_weight_loader(param, loaded_weight, *args, **kwargs):
8383
# This preserves the actual values rather than reinterpreting bits
8484
fp8_dtype = current_platform.fp8_dtype()
8585
# Simple cast - FP8 can represent the INT8 range
86-
loaded_weight = loaded_weight.to(torch.float32).to(fp8_dtype)
86+
loaded_weight = loaded_weight.to(
87+
torch.float32).to(fp8_dtype)
8788
return original_loader(param, loaded_weight, *args, **kwargs)
8889

8990
return self._fp8_scheme.create_weights(

vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_int8.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,14 @@ def __init__(self, qscheme: str, is_static_input_scheme: Optional[bool],
4747
from vllm.model_executor.layers.quantization.quark.schemes.quark_w8a8_fp8 import ( # noqa: E501
4848
QuarkW8A8Fp8)
4949

50+
# Create FP8 config matching INT8 setup
51+
weight_config = {"qscheme": qscheme}
52+
input_config = None
53+
if is_static_input_scheme:
54+
input_config = {"is_dynamic": False, "qscheme": qscheme}
55+
5056
self._fp8_scheme = QuarkW8A8Fp8(
51-
qscheme=qscheme,
52-
is_static_input_scheme=is_static_input_scheme)
57+
weight_config=weight_config, input_config=input_config)
5358

5459
@classmethod
5560
def get_min_capability(cls) -> int:

0 commit comments

Comments
 (0)