File tree Expand file tree Collapse file tree 1 file changed +3
-3
lines changed
vllm/model_executor/layers/quantization Expand file tree Collapse file tree 1 file changed +3
-3
lines changed Original file line number Diff line number Diff line change @@ -174,8 +174,8 @@ class ModelOptNvFp4Config(QuantizationConfig):
174174
175175 def __init__ (self ,
176176 is_checkpoint_nvfp4_serialized : bool = False ,
177- kv_cache_quant_algo : str = None ,
178- group_size : int = None ,
177+ kv_cache_quant_algo : str = "" ,
178+ group_size : int = 16 ,
179179 exclude_modules : List [str ] = None ) -> None :
180180 self .is_checkpoint_nvfp4_serialized = is_checkpoint_nvfp4_serialized
181181 if is_checkpoint_nvfp4_serialized :
@@ -252,7 +252,7 @@ class ModelOptNvFp4LinearMethod(LinearMethodBase):
252252 Args: quant_config: The ModelOpt quantization config.
253253 """
254254
255- def __init__ (self , quant_config : ModelOptFp8Config ):
255+ def __init__ (self , quant_config : ModelOptNvFp4Config ):
256256 self .quant_config = quant_config
257257 self .cutlass_nvfp4_supported = cutlass_fp4_supported ()
258258 if not self .cutlass_nvfp4_supported :
You can’t perform that action at this time.
0 commit comments