diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 0e633c2c0b6a..cc41a771d06c 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -290,20 +290,29 @@ def _maybe_remap_hf_config_attrs(config: PretrainedConfig) -> PretrainedConfig: def maybe_override_with_speculators_target_model( - model: str, - tokenizer: str, - trust_remote_code: bool, - revision: Optional[str] = None) -> tuple[str, str]: + model: str, + tokenizer: str, + trust_remote_code: bool, + revision: Optional[str] = None, + **kwargs, +) -> tuple[str, str]: """ If running a speculators config, override running model with target model """ + is_gguf = check_gguf_file(model) + if is_gguf: + kwargs["gguf_file"] = Path(model).name + gguf_model_repo = Path(model).parent + else: + gguf_model_repo = None config_dict, _ = PretrainedConfig.get_config_dict( - model, + model if gguf_model_repo is None else gguf_model_repo, revision=revision, trust_remote_code=trust_remote_code, token=_get_hf_token(), + **kwargs, ) - spec_config = config_dict.get("speculators_config") + spec_config = config_dict.get("speculators_config", None) # Return the target model if spec_config is not None: model = tokenizer = spec_config["verifier"]["name_or_path"]