Skip to content

Commit c8bde93

Browse files
[BUG] Allows for RunAI Streamer and Torch.compile cache to be used together (#24922)
Signed-off-by: ahao-anyscale <ahao@anyscale.com>
1 parent 88d7bdb commit c8bde93

File tree

3 files changed

+119
-4
lines changed

3 files changed

+119
-4
lines changed

tests/test_config.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
import os
45
from dataclasses import MISSING, Field, asdict, dataclass, field
6+
from unittest.mock import patch
57

68
import pytest
79

@@ -388,3 +390,108 @@ def test_get_and_verify_max_len(model_id, max_model_len, expected_max_len,
388390
else:
389391
actual_max_len = model_config.get_and_verify_max_len(max_model_len)
390392
assert actual_max_len == expected_max_len
393+
394+
395+
class MockConfig:
396+
"""Simple mock object for testing maybe_pull_model_tokenizer_for_runai"""
397+
398+
def __init__(self, model: str, tokenizer: str):
399+
self.model = model
400+
self.tokenizer = tokenizer
401+
self.model_weights = None
402+
403+
404+
@pytest.mark.parametrize("s3_url", [
405+
"s3://example-bucket-1/model/",
406+
"s3://example-bucket-2/model/",
407+
])
408+
@patch('vllm.transformers_utils.runai_utils.ObjectStorageModel.pull_files')
409+
def test_s3_url_model_tokenizer_paths(mock_pull_files, s3_url):
410+
"""Test that S3 URLs create deterministic local directories for model and
411+
tokenizer."""
412+
# Mock pull_files to avoid actually downloading files during tests
413+
mock_pull_files.return_value = None
414+
415+
# Create first mock and run the method
416+
config1 = MockConfig(model=s3_url, tokenizer=s3_url)
417+
ModelConfig.maybe_pull_model_tokenizer_for_runai(config1, s3_url, s3_url)
418+
419+
# Check that model and tokenizer point to existing directories
420+
assert os.path.exists(
421+
config1.model), f"Model directory does not exist: {config1.model}"
422+
assert os.path.isdir(
423+
config1.model), f"Model path is not a directory: {config1.model}"
424+
assert os.path.exists(
425+
config1.tokenizer
426+
), f"Tokenizer directory does not exist: {config1.tokenizer}"
427+
assert os.path.isdir(
428+
config1.tokenizer
429+
), f"Tokenizer path is not a directory: {config1.tokenizer}"
430+
431+
# Verify that the paths are different from the original S3 URL
432+
assert config1.model != s3_url, (
433+
"Model path should be converted to local directory")
434+
assert config1.tokenizer != s3_url, (
435+
"Tokenizer path should be converted to local directory")
436+
437+
# Store the original paths
438+
created_model_dir = config1.model
439+
create_tokenizer_dir = config1.tokenizer
440+
441+
# Create a new mock and run the method with the same S3 URL
442+
config2 = MockConfig(model=s3_url, tokenizer=s3_url)
443+
ModelConfig.maybe_pull_model_tokenizer_for_runai(config2, s3_url, s3_url)
444+
445+
# Check that the new directories exist
446+
assert os.path.exists(
447+
config2.model), f"Model directory does not exist: {config2.model}"
448+
assert os.path.isdir(
449+
config2.model), f"Model path is not a directory: {config2.model}"
450+
assert os.path.exists(
451+
config2.tokenizer
452+
), f"Tokenizer directory does not exist: {config2.tokenizer}"
453+
assert os.path.isdir(
454+
config2.tokenizer
455+
), f"Tokenizer path is not a directory: {config2.tokenizer}"
456+
457+
# Verify that the paths are deterministic (same as before)
458+
assert config2.model == created_model_dir, (
459+
f"Model paths are not deterministic. "
460+
f"Original: {created_model_dir}, New: {config2.model}")
461+
assert config2.tokenizer == create_tokenizer_dir, (
462+
f"Tokenizer paths are not deterministic. "
463+
f"Original: {create_tokenizer_dir}, New: {config2.tokenizer}")
464+
465+
466+
@patch('vllm.transformers_utils.runai_utils.ObjectStorageModel.pull_files')
467+
def test_s3_url_different_models_create_different_directories(mock_pull_files):
468+
"""Test that different S3 URLs create different local directories."""
469+
# Mock pull_files to avoid actually downloading files during tests
470+
mock_pull_files.return_value = None
471+
472+
s3_url1 = "s3://example-bucket-1/model/"
473+
s3_url2 = "s3://example-bucket-2/model/"
474+
475+
# Create mocks with different S3 URLs and run the method
476+
config1 = MockConfig(model=s3_url1, tokenizer=s3_url1)
477+
ModelConfig.maybe_pull_model_tokenizer_for_runai(config1, s3_url1, s3_url1)
478+
479+
config2 = MockConfig(model=s3_url2, tokenizer=s3_url2)
480+
ModelConfig.maybe_pull_model_tokenizer_for_runai(config2, s3_url2, s3_url2)
481+
482+
# Verify that different URLs produce different directories
483+
assert config1.model != config2.model, (
484+
f"Different S3 URLs should create different model directories. "
485+
f"URL1 model: {config1.model}, URL2 model: {config2.model}")
486+
assert config1.tokenizer != config2.tokenizer, (
487+
f"Different S3 URLs should create different tokenizer directories. "
488+
f"URL1 tokenizer: {config1.tokenizer}, "
489+
f"URL2 tokenizer: {config2.tokenizer}")
490+
491+
# Verify that both sets of directories exist
492+
assert os.path.exists(config1.model) and os.path.isdir(config1.model)
493+
assert os.path.exists(config1.tokenizer) and os.path.isdir(
494+
config1.tokenizer)
495+
assert os.path.exists(config2.model) and os.path.isdir(config2.model)
496+
assert os.path.exists(config2.tokenizer) and os.path.isdir(
497+
config2.tokenizer)

vllm/config/model.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -699,11 +699,12 @@ def maybe_pull_model_tokenizer_for_runai(self, model: str,
699699
model: Model name or path
700700
tokenizer: Tokenizer name or path
701701
"""
702+
702703
if not (is_runai_obj_uri(model) or is_runai_obj_uri(tokenizer)):
703704
return
704705

705706
if is_runai_obj_uri(model):
706-
object_storage_model = ObjectStorageModel()
707+
object_storage_model = ObjectStorageModel(url=model)
707708
object_storage_model.pull_files(
708709
model, allow_pattern=["*.model", "*.py", "*.json"])
709710
self.model_weights = model
@@ -722,7 +723,7 @@ def maybe_pull_model_tokenizer_for_runai(self, model: str,
722723

723724
# Only download tokenizer if needed and not already handled
724725
if is_runai_obj_uri(tokenizer):
725-
object_storage_tokenizer = ObjectStorageModel()
726+
object_storage_tokenizer = ObjectStorageModel(url=tokenizer)
726727
object_storage_tokenizer.pull_files(model,
727728
ignore_pattern=[
728729
"*.pt", "*.safetensors",

vllm/transformers_utils/runai_utils.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
import hashlib
45
import os
56
import shutil
67
import signal
@@ -56,12 +57,18 @@ class ObjectStorageModel:
5657
pull_files(): Pull model from object storage to the temporary directory.
5758
"""
5859

59-
def __init__(self) -> None:
60+
def __init__(self, url: str) -> None:
6061
for sig in (signal.SIGINT, signal.SIGTERM):
6162
existing_handler = signal.getsignal(sig)
6263
signal.signal(sig, self._close_by_signal(existing_handler))
6364

64-
self.dir = tempfile.mkdtemp()
65+
dir_name = os.path.join(
66+
tempfile.gettempdir(),
67+
hashlib.sha256(str(url).encode()).hexdigest()[:8])
68+
if os.path.exists(dir_name):
69+
shutil.rmtree(dir_name)
70+
os.makedirs(dir_name)
71+
self.dir = dir_name
6572

6673
def __del__(self):
6774
self._close()

0 commit comments

Comments
 (0)