Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/models/extensions/runai_model_streamer.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ vllm serve s3://core-llm/Llama-3-8b \
--load-format runai_streamer
```

To run model from Google Cloud Storage run:

```bash
vllm serve gs://core-llm/Llama-3-8b \
--load-format runai_streamer
```

To run model from a S3 compatible object store run:

```bash
Expand Down
2 changes: 1 addition & 1 deletion requirements/nightly_torch_test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,6 @@ tritonclient==2.51.0
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
numba == 0.61.2; python_version > '3.9'
numpy
runai-model-streamer[s3]==0.14.0
runai-model-streamer[s3,gcs]==0.14.0
fastsafetensors>=0.1.10
pydantic>=2.10 # 2.9 leads to error on python 3.10
4 changes: 2 additions & 2 deletions requirements/rocm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ tensorizer==2.10.1
packaging>=24.2
setuptools>=77.0.3,<80.0.0
setuptools-scm>=8
runai-model-streamer[s3]==0.14.0
runai-model-streamer[s3,gcs]==0.14.0
conch-triton-kernels==1.2.1
timm>=1.0.17
timm>=1.0.17
2 changes: 1 addition & 1 deletion requirements/test.in
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ tritonclient==2.51.0
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
numba == 0.61.2; python_version > '3.9'
numpy
runai-model-streamer[s3]==0.14.0
runai-model-streamer[s3,gcs]==0.14.0
fastsafetensors>=0.1.10
pydantic>=2.10 # 2.9 leads to error on python 3.10
decord==0.6.0
Expand Down
21 changes: 20 additions & 1 deletion requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -251,11 +251,27 @@ gitdb==4.0.12
gitpython==3.1.44
# via mlflow-skinny
google-api-core==2.24.2
# via opencensus
# via
# google-cloud-core
# google-cloud-storage
# opencensus
google-auth==2.40.2
# via
# databricks-sdk
# google-api-core
# google-cloud-core
# google-cloud-storage
# runai-model-streamer-gcs
google-cloud-core==2.4.3
# via google-cloud-storage
google-cloud-storage==3.4.0
# via runai-model-streamer-gcs
google-crc32c==1.7.1
# via
# google-cloud-storage
# google-resumable-media
google-resumable-media==2.7.2
# via google-cloud-storage
googleapis-common-protos==1.70.0
# via google-api-core
graphene==3.4.3
Expand Down Expand Up @@ -890,6 +906,7 @@ requests==2.32.3
# docker
# evaluate
# google-api-core
# google-cloud-storage
# huggingface-hub
# lightly
# lm-eval
Expand Down Expand Up @@ -929,6 +946,8 @@ rtree==1.4.0
# via torchgeo
runai-model-streamer==0.14.0
# via -r requirements/test.in
runai-model-streamer-gcs==0.14.0
# via runai-model-streamer
runai-model-streamer-s3==0.14.0
# via runai-model-streamer
s3transfer==0.10.3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import glob
import hashlib
import os
import tempfile

import huggingface_hub.constants

from vllm.model_executor.model_loader.weight_utils import (
download_weights_from_hf)
from vllm.transformers_utils.runai_utils import (is_runai_obj_uri,
from vllm.transformers_utils.runai_utils import (ObjectStorageModel,
is_runai_obj_uri,
list_safetensors)


Expand All @@ -34,6 +36,23 @@ def test_runai_list_safetensors_local():
assert len(safetensors) == len(files)


if __name__ == "__main__":
test_is_runai_obj_uri()
test_runai_list_safetensors_local()
def test_runai_pull_files_gcs(monkeypatch):
monkeypatch.setenv("RUNAI_STREAMER_GCS_USE_ANONYMOUS_CREDENTIALS", "true")
# Bypass default project lookup by setting GOOGLE_CLOUD_PROJECT
monkeypatch.setenv("GOOGLE_CLOUD_PROJECT", "fake-project")
filename = "LT08_L1GT_074061_20130309_20170505_01_T2_MTL.txt"
gcs_bucket = "gs://gcp-public-data-landsat/LT08/01/074/061/LT08_L1GT_074061_20130309_20170505_01_T2/"
gcs_url = f"{gcs_bucket}/{filename}"
model = ObjectStorageModel(gcs_url)
model.pull_files(gcs_bucket, allow_pattern=[f"*{filename}"])
# To re-generate / change URLs:
# gsutil ls -L gs://<gcs-url> | grep "Hash (md5)" | tr -d ' ' \
# | cut -d":" -f2 | base64 -d | xxd -p
expected_checksum = "f60dea775da1392434275b311b31a431"
hasher = hashlib.new("md5")
with open(os.path.join(model.dir, filename), 'rb') as f:
# Read the file in chunks to handle large files efficiently
for chunk in iter(lambda: f.read(4096), b''):
hasher.update(chunk)
actual_checksum = hasher.hexdigest()
assert actual_checksum == expected_checksum