@@ -560,15 +560,6 @@ def __post_init__(self) -> None:
560560 "affect the random state of the Python process that "
561561 "launched vLLM." , self .seed )
562562
563- if self .runner != "draft" :
564- # If we're not running the draft model, check for speculators config
565- # If speculators config, set model / tokenizer to be target model
566- self .model , self .tokenizer = maybe_override_with_speculators_target_model ( # noqa: E501
567- model = self .model ,
568- tokenizer = self .tokenizer ,
569- revision = self .revision ,
570- trust_remote_code = self .trust_remote_code )
571-
572563 # Keep set served_model_name before maybe_model_redirect(self.model)
573564 self .served_model_name = get_served_model_name (self .model ,
574565 self .served_model_name )
@@ -609,6 +600,15 @@ def __post_init__(self) -> None:
609600
610601 self .maybe_pull_model_tokenizer_for_s3 (self .model , self .tokenizer )
611602
603+ if self .runner != "draft" :
604+ # If we're not running the draft model, check for speculators config
605+ # If speculators config, set model / tokenizer to be target model
606+ self .model , self .tokenizer = maybe_override_with_speculators_target_model ( # noqa: E501
607+ model = self .model ,
608+ tokenizer = self .tokenizer ,
609+ revision = self .revision ,
610+ trust_remote_code = self .trust_remote_code )
611+
612612 if (backend := envs .VLLM_ATTENTION_BACKEND
613613 ) and backend == "FLASHINFER" and find_spec ("flashinfer" ) is None :
614614 raise ValueError (
0 commit comments