Skip to content

Commit baa3e38

Browse files
omer-dayanpwschuurman
authored andcommitted
Bugfix - vLLM S3 with Spec config
Signed-off-by: Omer Dayan (SW-GPU) <omer@run.ai>
1 parent 23a6c52 commit baa3e38

File tree

2 files changed

+14
-13
lines changed

2 files changed

+14
-13
lines changed

vllm/config/__init__.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -560,15 +560,6 @@ def __post_init__(self) -> None:
560560
"affect the random state of the Python process that "
561561
"launched vLLM.", self.seed)
562562

563-
if self.runner != "draft":
564-
# If we're not running the draft model, check for speculators config
565-
# If speculators config, set model / tokenizer to be target model
566-
self.model, self.tokenizer = maybe_override_with_speculators_target_model( # noqa: E501
567-
model=self.model,
568-
tokenizer=self.tokenizer,
569-
revision=self.revision,
570-
trust_remote_code=self.trust_remote_code)
571-
572563
# Keep set served_model_name before maybe_model_redirect(self.model)
573564
self.served_model_name = get_served_model_name(self.model,
574565
self.served_model_name)
@@ -609,6 +600,15 @@ def __post_init__(self) -> None:
609600

610601
self.maybe_pull_model_tokenizer_for_s3(self.model, self.tokenizer)
611602

603+
if self.runner != "draft":
604+
# If we're not running the draft model, check for speculators config
605+
# If speculators config, set model / tokenizer to be target model
606+
self.model, self.tokenizer = maybe_override_with_speculators_target_model( # noqa: E501
607+
model=self.model,
608+
tokenizer=self.tokenizer,
609+
revision=self.revision,
610+
trust_remote_code=self.trust_remote_code)
611+
612612
if (backend := envs.VLLM_ATTENTION_BACKEND
613613
) and backend == "FLASHINFER" and find_spec("flashinfer") is None:
614614
raise ValueError(

vllm/engine/arg_utils.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,9 +1053,10 @@ def create_speculative_config(
10531053
SpeculatorsConfig)
10541054

10551055
if self.speculative_config is None:
1056-
hf_config = get_config(self.hf_config_path or self.model,
1057-
self.trust_remote_code, self.revision,
1058-
self.code_revision, self.config_format)
1056+
hf_config = get_config(
1057+
self.hf_config_path or target_model_config.model,
1058+
self.trust_remote_code, self.revision, self.code_revision,
1059+
self.config_format)
10591060

10601061
# if loading a SpeculatorsConfig, load the speculative_config
10611062
# details from the config directly
@@ -1065,7 +1066,7 @@ def create_speculative_config(
10651066
self.speculative_config = {}
10661067
self.speculative_config[
10671068
"num_speculative_tokens"] = hf_config.num_lookahead_tokens
1068-
self.speculative_config["model"] = self.model
1069+
self.speculative_config["model"] = target_model_config.model
10691070
self.speculative_config["method"] = hf_config.method
10701071
else:
10711072
return None

0 commit comments

Comments
 (0)