vllm-project · vllm-bot · Aug 4, 2025 · Aug 4, 2025
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
@@ -290,20 +290,29 @@ def _maybe_remap_hf_config_attrs(config: PretrainedConfig) -> PretrainedConfig:
 
 
 def maybe_override_with_speculators_target_model(
-        model: str,
-        tokenizer: str,
-        trust_remote_code: bool,
-        revision: Optional[str] = None) -> tuple[str, str]:
+    model: str,
+    tokenizer: str,
+    trust_remote_code: bool,
+    revision: Optional[str] = None,
+    **kwargs,
+) -> tuple[str, str]:
     """
     If running a speculators config, override running model with target model
     """
+    is_gguf = check_gguf_file(model)
+    if is_gguf:
+        kwargs["gguf_file"] = Path(model).name
+        gguf_model_repo = Path(model).parent
+    else:
+        gguf_model_repo = None
     config_dict, _ = PretrainedConfig.get_config_dict(
-        model,
+        model if gguf_model_repo is None else gguf_model_repo,
         revision=revision,
         trust_remote_code=trust_remote_code,
         token=_get_hf_token(),
+        **kwargs,
     )
-    spec_config = config_dict.get("speculators_config")
+    spec_config = config_dict.get("speculators_config", None)
     # Return the target model
     if spec_config is not None:
         model = tokenizer = spec_config["verifier"]["name_or_path"]