Bugfix - vLLM S3 with Spec config

omer-dayan · pwschuurman · commit baa3e387640a · 2025-09-05T10:29:08.000-07:00
Signed-off-by: Omer Dayan (SW-GPU) &lt;omer@run.ai&gt;
diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py
@@ -560,15 +560,6 @@ def __post_init__(self) -> None:
                     "affect the random state of the Python process that "
                     "launched vLLM.", self.seed)
 
-        if self.runner != "draft":
-            # If we're not running the draft model, check for speculators config
-            # If speculators config, set model / tokenizer to be target model
-            self.model, self.tokenizer = maybe_override_with_speculators_target_model(  # noqa: E501
-                model=self.model,
-                tokenizer=self.tokenizer,
-                revision=self.revision,
-                trust_remote_code=self.trust_remote_code)
-
         # Keep set served_model_name before maybe_model_redirect(self.model)
         self.served_model_name = get_served_model_name(self.model,
                                                        self.served_model_name)
@@ -609,6 +600,15 @@ def __post_init__(self) -> None:
 
         self.maybe_pull_model_tokenizer_for_s3(self.model, self.tokenizer)
 
+        if self.runner != "draft":
+            # If we're not running the draft model, check for speculators config
+            # If speculators config, set model / tokenizer to be target model
+            self.model, self.tokenizer = maybe_override_with_speculators_target_model(  # noqa: E501
+                model=self.model,
+                tokenizer=self.tokenizer,
+                revision=self.revision,
+                trust_remote_code=self.trust_remote_code)
+
         if (backend := envs.VLLM_ATTENTION_BACKEND
             ) and backend == "FLASHINFER" and find_spec("flashinfer") is None:
             raise ValueError(
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
@@ -1053,9 +1053,10 @@ def create_speculative_config(
             SpeculatorsConfig)
 
         if self.speculative_config is None:
-            hf_config = get_config(self.hf_config_path or self.model,
-                                   self.trust_remote_code, self.revision,
-                                   self.code_revision, self.config_format)
+            hf_config = get_config(
+                self.hf_config_path or target_model_config.model,
+                self.trust_remote_code, self.revision, self.code_revision,
+                self.config_format)
 
             # if loading a SpeculatorsConfig, load the speculative_config
             # details from the config directly
@@ -1065,7 +1066,7 @@ def create_speculative_config(
                 self.speculative_config = {}
                 self.speculative_config[
                     "num_speculative_tokens"] = hf_config.num_lookahead_tokens
-                self.speculative_config["model"] = self.model
+                self.speculative_config["model"] = target_model_config.model
                 self.speculative_config["method"] = hf_config.method
             else:
                 return None