|
48 | 48 | is_interleaved, maybe_override_with_speculators_target_model, |
49 | 49 | try_get_generation_config, try_get_safetensors_metadata, |
50 | 50 | try_get_tokenizer_config, uses_mrope) |
51 | | -from vllm.transformers_utils.s3_utils import S3Model |
52 | | -from vllm.transformers_utils.utils import is_s3, maybe_model_redirect |
| 51 | +from vllm.transformers_utils.runai_utils import (ObjectStorageModel, |
| 52 | + is_runai_obj_uri) |
| 53 | +from vllm.transformers_utils.utils import maybe_model_redirect |
53 | 54 | from vllm.utils import (DEFAULT_MAX_NUM_BATCHED_TOKENS, |
54 | 55 | STR_DUAL_CHUNK_FLASH_ATTN_VAL, LayerBlockType, |
55 | 56 | LazyLoader, common_broadcastable_dtype, random_uuid) |
@@ -556,15 +557,6 @@ def __post_init__(self) -> None: |
556 | 557 | "affect the random state of the Python process that " |
557 | 558 | "launched vLLM.", self.seed) |
558 | 559 |
|
559 | | - if self.runner != "draft": |
560 | | - # If we're not running the draft model, check for speculators config |
561 | | - # If speculators config, set model / tokenizer to be target model |
562 | | - self.model, self.tokenizer = maybe_override_with_speculators_target_model( # noqa: E501 |
563 | | - model=self.model, |
564 | | - tokenizer=self.tokenizer, |
565 | | - revision=self.revision, |
566 | | - trust_remote_code=self.trust_remote_code) |
567 | | - |
568 | 560 | # Keep set served_model_name before maybe_model_redirect(self.model) |
569 | 561 | self.served_model_name = get_served_model_name(self.model, |
570 | 562 | self.served_model_name) |
@@ -603,7 +595,16 @@ def __post_init__(self) -> None: |
603 | 595 | f"'Please instead use `--hf-overrides '{hf_overrides_str}'`") |
604 | 596 | warnings.warn(DeprecationWarning(msg), stacklevel=2) |
605 | 597 |
|
606 | | - self.maybe_pull_model_tokenizer_for_s3(self.model, self.tokenizer) |
| 598 | + self.maybe_pull_model_tokenizer_for_runai(self.model, self.tokenizer) |
| 599 | + |
| 600 | + if self.runner != "draft": |
| 601 | + # If we're not running the draft model, check for speculators config |
| 602 | + # If speculators config, set model / tokenizer to be target model |
| 603 | + self.model, self.tokenizer = maybe_override_with_speculators_target_model( # noqa: E501 |
| 604 | + model=self.model, |
| 605 | + tokenizer=self.tokenizer, |
| 606 | + revision=self.revision, |
| 607 | + trust_remote_code=self.trust_remote_code) |
607 | 608 |
|
608 | 609 | if (backend := envs.VLLM_ATTENTION_BACKEND |
609 | 610 | ) and backend == "FLASHINFER" and find_spec("flashinfer") is None: |
@@ -832,41 +833,42 @@ def architecture(self) -> str: |
832 | 833 | """The architecture vllm actually used.""" |
833 | 834 | return self._architecture |
834 | 835 |
|
835 | | - def maybe_pull_model_tokenizer_for_s3(self, model: str, |
836 | | - tokenizer: str) -> None: |
837 | | - """Pull model/tokenizer from S3 to temporary directory when needed. |
| 836 | + def maybe_pull_model_tokenizer_for_runai(self, model: str, |
| 837 | + tokenizer: str) -> None: |
| 838 | + """Pull model/tokenizer from Object Storage to temporary |
| 839 | + directory when needed. |
838 | 840 |
|
839 | 841 | Args: |
840 | 842 | model: Model name or path |
841 | 843 | tokenizer: Tokenizer name or path |
842 | 844 | """ |
843 | | - if not (is_s3(model) or is_s3(tokenizer)): |
| 845 | + if not (is_runai_obj_uri(model) or is_runai_obj_uri(tokenizer)): |
844 | 846 | return |
845 | 847 |
|
846 | | - if is_s3(model): |
847 | | - s3_model = S3Model() |
848 | | - s3_model.pull_files(model, |
849 | | - allow_pattern=["*.model", "*.py", "*.json"]) |
| 848 | + if is_runai_obj_uri(model): |
| 849 | + object_storage_model = ObjectStorageModel() |
| 850 | + object_storage_model.pull_files( |
| 851 | + model, allow_pattern=["*.model", "*.py", "*.json"]) |
850 | 852 | self.model_weights = model |
851 | | - self.model = s3_model.dir |
| 853 | + self.model = object_storage_model.dir |
852 | 854 |
|
853 | 855 | # If tokenizer is same as model, download to same directory |
854 | 856 | if model == tokenizer: |
855 | | - s3_model.pull_files(model, |
856 | | - ignore_pattern=[ |
857 | | - "*.pt", "*.safetensors", "*.bin", |
858 | | - "*.tensors" |
859 | | - ]) |
860 | | - self.tokenizer = s3_model.dir |
| 857 | + object_storage_model.pull_files(model, |
| 858 | + ignore_pattern=[ |
| 859 | + "*.pt", "*.safetensors", |
| 860 | + "*.bin", "*.tensors" |
| 861 | + ]) |
| 862 | + self.tokenizer = object_storage_model.dir |
861 | 863 | return |
862 | 864 |
|
863 | 865 | # Only download tokenizer if needed and not already handled |
864 | | - if is_s3(tokenizer): |
865 | | - s3_tokenizer = S3Model() |
866 | | - s3_tokenizer.pull_files( |
| 866 | + if is_runai_obj_uri(tokenizer): |
| 867 | + object_storage_tokenizer = ObjectStorageModel() |
| 868 | + object_storage_tokenizer.pull_files( |
867 | 869 | model, |
868 | 870 | ignore_pattern=["*.pt", "*.safetensors", "*.bin", "*.tensors"]) |
869 | | - self.tokenizer = s3_tokenizer.dir |
| 871 | + self.tokenizer = object_storage_tokenizer.dir |
870 | 872 |
|
871 | 873 | def _init_multimodal_config(self) -> Optional["MultiModalConfig"]: |
872 | 874 | if self._model_info.supports_multimodal: |
|
0 commit comments