|
47 | 47 | is_interleaved, maybe_override_with_speculators_target_model, |
48 | 48 | try_get_generation_config, try_get_safetensors_metadata, |
49 | 49 | try_get_tokenizer_config, uses_mrope) |
50 | | -from vllm.transformers_utils.s3_utils import S3Model |
51 | | -from vllm.transformers_utils.utils import is_s3, maybe_model_redirect |
| 50 | +from vllm.transformers_utils.runai_utils import (ObjectStorageModel, |
| 51 | + is_runai_obj_uri) |
| 52 | +from vllm.transformers_utils.utils import maybe_model_redirect |
52 | 53 | from vllm.utils import (DEFAULT_MAX_NUM_BATCHED_TOKENS, |
53 | 54 | STR_DUAL_CHUNK_FLASH_ATTN_VAL, LayerBlockType, |
54 | 55 | LazyLoader, common_broadcastable_dtype, random_uuid) |
@@ -598,7 +599,7 @@ def __post_init__(self) -> None: |
598 | 599 | f"'Please instead use `--hf-overrides '{hf_overrides_str}'`") |
599 | 600 | warnings.warn(DeprecationWarning(msg), stacklevel=2) |
600 | 601 |
|
601 | | - self.maybe_pull_model_tokenizer_for_s3(self.model, self.tokenizer) |
| 602 | + self.maybe_pull_model_tokenizer_for_runai(self.model, self.tokenizer) |
602 | 603 |
|
603 | 604 | if self.runner != "draft": |
604 | 605 | # If we're not running the draft model, check for speculators config |
@@ -840,41 +841,42 @@ def architecture(self) -> str: |
840 | 841 | """The architecture vllm actually used.""" |
841 | 842 | return self._architecture |
842 | 843 |
|
843 | | - def maybe_pull_model_tokenizer_for_s3(self, model: str, |
844 | | - tokenizer: str) -> None: |
845 | | - """Pull model/tokenizer from S3 to temporary directory when needed. |
| 844 | + def maybe_pull_model_tokenizer_for_runai(self, model: str, |
| 845 | + tokenizer: str) -> None: |
| 846 | + """Pull model/tokenizer from Object Storage to temporary |
| 847 | + directory when needed. |
846 | 848 |
|
847 | 849 | Args: |
848 | 850 | model: Model name or path |
849 | 851 | tokenizer: Tokenizer name or path |
850 | 852 | """ |
851 | | - if not (is_s3(model) or is_s3(tokenizer)): |
| 853 | + if not (is_runai_obj_uri(model) or is_runai_obj_uri(tokenizer)): |
852 | 854 | return |
853 | 855 |
|
854 | | - if is_s3(model): |
855 | | - s3_model = S3Model() |
856 | | - s3_model.pull_files(model, |
857 | | - allow_pattern=["*.model", "*.py", "*.json"]) |
| 856 | + if is_runai_obj_uri(model): |
| 857 | + object_storage_model = ObjectStorageModel() |
| 858 | + object_storage_model.pull_files( |
| 859 | + model, allow_pattern=["*.model", "*.py", "*.json"]) |
858 | 860 | self.model_weights = model |
859 | | - self.model = s3_model.dir |
| 861 | + self.model = object_storage_model.dir |
860 | 862 |
|
861 | 863 | # If tokenizer is same as model, download to same directory |
862 | 864 | if model == tokenizer: |
863 | | - s3_model.pull_files(model, |
864 | | - ignore_pattern=[ |
865 | | - "*.pt", "*.safetensors", "*.bin", |
866 | | - "*.tensors" |
867 | | - ]) |
868 | | - self.tokenizer = s3_model.dir |
| 865 | + object_storage_model.pull_files(model, |
| 866 | + ignore_pattern=[ |
| 867 | + "*.pt", "*.safetensors", |
| 868 | + "*.bin", "*.tensors" |
| 869 | + ]) |
| 870 | + self.tokenizer = object_storage_model.dir |
869 | 871 | return |
870 | 872 |
|
871 | 873 | # Only download tokenizer if needed and not already handled |
872 | | - if is_s3(tokenizer): |
873 | | - s3_tokenizer = S3Model() |
874 | | - s3_tokenizer.pull_files( |
| 874 | + if is_runai_obj_uri(tokenizer): |
| 875 | + object_storage_tokenizer = ObjectStorageModel() |
| 876 | + object_storage_tokenizer.pull_files( |
875 | 877 | model, |
876 | 878 | ignore_pattern=["*.pt", "*.safetensors", "*.bin", "*.tensors"]) |
877 | | - self.tokenizer = s3_tokenizer.dir |
| 879 | + self.tokenizer = object_storage_tokenizer.dir |
878 | 880 |
|
879 | 881 | def _init_multimodal_config(self) -> Optional["MultiModalConfig"]: |
880 | 882 | if self._model_info.supports_multimodal: |
|
0 commit comments