neuralmagic · rahul-tuli · Oct 4, 2024 · Oct 4, 2024 · kylesayrs · Oct 7, 2024
diff --git a/src/compressed_tensors/quantization/lifecycle/apply.py b/src/compressed_tensors/quantization/lifecycle/apply.py
@@ -107,8 +107,8 @@ def load_pretrained_quantization(model: Module, model_name_or_path: str):
 
 
 def apply_quantization_config(
-    model: Module, config: QuantizationConfig, run_compressed: bool = False
-) -> Dict:
+    model: Module, config: Union[QuantizationConfig, None], run_compressed: bool = False
+) -> OrderedDict:
     """
     Initializes the model for quantization in-place based on the given config
 
@@ -117,6 +117,10 @@ def apply_quantization_config(
     :param run_compressed: Whether the model will be run in compressed mode or
         decompressed fully on load
     """
+    # Workaround for when HF Quantizer passes None, see PR #180
+    if config is None:
+        return OrderedDict()
+
     # remove reference to the original `config`
     # argument. This function can mutate it, and we'd
     # like to keep the original `config` as it is.
@@ -186,14 +190,14 @@ def apply_quantization_config(
     return names_to_scheme
 
 
-def process_quantization_config(config: QuantizationConfig) -> QuantizationConfig:
+def process_quantization_config(config: Optional[QuantizationConfig]) -> Optional[QuantizationConfig]:
     """
     Preprocess the raw QuantizationConfig
 
-    :param config: the raw QuantizationConfig
-    :return: the processed QuantizationConfig
+    :param config: Optional raw QuantizationConfig
+    :return: the processed QuantizationConfig, if the raw config is not None
     """
-    if config.kv_cache_scheme is not None:
+    if config is not None and config.kv_cache_scheme is not None:
         config = process_kv_cache_config(config)
 
     return config