Skip to content

Commit 1b2826a

Browse files
authored
ci: remove not slow filter (#2944)
Signed-off-by: alec-flowers <aflowers@nvidia.com>
1 parent 2eced09 commit 1b2826a

File tree

13 files changed

+151
-221
lines changed

13 files changed

+151
-221
lines changed

.github/workflows/container-validation-backends.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222
pytest_marks: "e2e and vllm and gpu_1 and not slow"
2323
- framework: sglang
2424
target: runtime
25-
pytest_marks: "e2e and sglang and gpu_1"
25+
pytest_marks: "e2e and sglang and gpu_1 and not slow"
2626

2727
# Do not cancel main branch runs
2828
concurrency:

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,8 @@ markers = [
178178
"sglang: marks tests as requiring sglang",
179179
"slow: marks tests as known to be slow",
180180
"h100: marks tests to run on H100",
181-
"kvbm: marks tests for KV behavior and model determinism"
181+
"kvbm: marks tests for KV behavior and model determinism",
182+
"model: model id used by a test or parameter"
182183
]
183184

184185
# Linting/formatting

tests/conftest.py

Lines changed: 47 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,25 @@
2020

2121
import pytest
2222

23+
from tests.utils.constants import TEST_MODELS
2324
from tests.utils.managed_process import ManagedProcess
2425

25-
# Custom format inspired by your example
26+
27+
def pytest_configure(config):
28+
# Defining model morker to avoid `'model' not found in `markers` configuration option`
29+
# error when pyproject.toml is not available in the container
30+
config.addinivalue_line("markers", "model: model id used by a test or parameter")
31+
32+
2633
LOG_FORMAT = "[TEST] %(asctime)s %(levelname)s %(name)s: %(message)s"
2734
DATE_FORMAT = "%Y-%m-%dT%H:%M:%S"
2835

29-
# Configure logging
3036
logging.basicConfig(
3137
level=logging.INFO,
3238
format=LOG_FORMAT,
3339
datefmt=DATE_FORMAT, # ISO 8601 UTC format
3440
)
3541

36-
# List of models used in tests
37-
TEST_MODELS = [
38-
"Qwen/Qwen3-0.6B",
39-
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
40-
"llava-hf/llava-1.5-7b-hf",
41-
]
42-
4342

4443
def download_models(model_list=None, ignore_weights=False):
4544
"""Download models - can be called directly or via fixture
@@ -107,16 +106,34 @@ def download_models(model_list=None, ignore_weights=False):
107106

108107

109108
@pytest.fixture(scope="session")
110-
def predownload_models():
111-
"""Fixture wrapper around download_models for all TEST_MODELS"""
112-
download_models()
109+
def predownload_models(pytestconfig):
110+
"""Fixture wrapper around download_models for models used in collected tests"""
111+
# Get models from pytest config if available, otherwise fall back to TEST_MODELS
112+
models = getattr(pytestconfig, "models_to_download", None)
113+
if models:
114+
logging.info(
115+
f"Downloading {len(models)} models needed for collected tests\nModels: {models}"
116+
)
117+
download_models(model_list=list(models))
118+
else:
119+
# Fallback to original behavior if extraction failed
120+
download_models()
113121
yield
114122

115123

116124
@pytest.fixture(scope="session")
117-
def predownload_tokenizers():
118-
"""Fixture wrapper around download_models for all TEST_MODELS"""
119-
download_models(ignore_weights=True)
125+
def predownload_tokenizers(pytestconfig):
126+
"""Fixture wrapper around download_models for tokenizers used in collected tests"""
127+
# Get models from pytest config if available, otherwise fall back to TEST_MODELS
128+
models = getattr(pytestconfig, "models_to_download", None)
129+
if models:
130+
logging.info(
131+
f"Downloading tokenizers for {len(models)} models needed for collected tests\nModels: {models}"
132+
)
133+
download_models(model_list=list(models), ignore_weights=True)
134+
else:
135+
# Fallback to original behavior if extraction failed
136+
download_models(ignore_weights=True)
120137
yield
121138

122139

@@ -135,42 +152,26 @@ def logger(request):
135152
logger.removeHandler(handler)
136153

137154

155+
@pytest.hookimpl(trylast=True)
138156
def pytest_collection_modifyitems(config, items):
139157
"""
140158
This function is called to modify the list of tests to run.
141-
It is used to skip tests that are not supported on all environments.
142159
"""
143-
144-
# Tests marked with trtllm requires specific environment with tensorrtllm
145-
# installed. Hence, we skip them if the user did not explicitly ask for them.
146-
if config.getoption("-m") and "trtllm_marker" in config.getoption("-m"):
147-
return
148-
skip_trtllm = pytest.mark.skip(reason="need -m trtllm_marker to run")
160+
# Collect models via explicit pytest mark from final filtered items only
161+
models_to_download = set()
149162
for item in items:
150-
if "trtllm_marker" in item.keywords:
151-
item.add_marker(skip_trtllm)
152-
153-
# Auto-inject predownload_models fixture for serve tests only (not router tests)
154-
# Skip items that don't have fixturenames (like MypyFileItem)
155-
if hasattr(item, "fixturenames"):
156-
# Guard clause: skip if already has the fixtures
157-
if (
158-
"predownload_models" in item.fixturenames
159-
or "predownload_tokenizers" in item.fixturenames
160-
):
161-
continue
162-
163-
# Guard clause: skip if marked with skip_model_download
164-
if item.get_closest_marker("skip_model_download"):
165-
continue
166-
167-
# Add appropriate fixture based on test path
168-
if "serve" in str(item.path):
169-
item.fixturenames = list(item.fixturenames)
170-
item.fixturenames.append("predownload_models")
171-
elif "router" in str(item.path):
172-
item.fixturenames = list(item.fixturenames)
173-
item.fixturenames.append("predownload_tokenizers")
163+
# Only collect from items that are not skipped
164+
if any(
165+
getattr(m, "name", "") == "skip" for m in getattr(item, "own_markers", [])
166+
):
167+
continue
168+
model_mark = item.get_closest_marker("model")
169+
if model_mark and model_mark.args:
170+
models_to_download.add(model_mark.args[0])
171+
172+
# Store models to download in pytest config for fixtures to access
173+
if models_to_download:
174+
config.models_to_download = models_to_download
174175

175176

176177
class EtcdServer(ManagedProcess):

tests/fault_tolerance/test_request_cancellation.py

Lines changed: 10 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
import pytest
1111
import requests
12-
from huggingface_hub import snapshot_download
1312

13+
from tests.utils.constants import FAULT_TOLERANCE_MODEL_NAME
1414
from tests.utils.engine_process import FRONTEND_PORT
1515
from tests.utils.managed_process import ManagedProcess
1616
from tests.utils.payloads import check_health_generate, check_models_api
@@ -56,7 +56,7 @@ def __init__(self, request, is_prefill: bool = False):
5656
"-m",
5757
"dynamo.vllm",
5858
"--model",
59-
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
59+
FAULT_TOLERANCE_MODEL_NAME,
6060
"--enforce-eager",
6161
"--gpu-memory-utilization",
6262
"0.45",
@@ -137,47 +137,12 @@ def is_ready(self, response) -> bool:
137137
return False
138138

139139

140-
def download_model() -> None:
141-
"""
142-
Download the DeepSeek-R1-Distill-Llama-8B model from HuggingFace Hub if not already cached.
143-
"""
144-
model_id = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
145-
logger.info(f"Caching model {model_id}...")
146-
147-
max_retries = 5
148-
retry_delay = 30 # seconds
149-
150-
for attempt in range(max_retries):
151-
try:
152-
# Download the model to the default cache directory
153-
# This will skip download if the model is already cached
154-
snapshot_download(
155-
repo_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
156-
repo_type="model",
157-
local_files_only=False,
158-
)
159-
logger.info(f"Model {model_id} is ready for use")
160-
return # Success, exit the function
161-
except Exception as e:
162-
if attempt < max_retries - 1: # Not the last attempt
163-
logger.warning(
164-
f"Failed to download model {model_id} (attempt {attempt + 1}/{max_retries}): {e}"
165-
)
166-
logger.info(f"Retrying in {retry_delay} seconds...")
167-
time.sleep(retry_delay)
168-
else: # Last attempt failed
169-
logger.error(
170-
f"Failed to download model {model_id} after {max_retries} attempts: {e}"
171-
)
172-
raise
173-
174-
175140
def send_completion_request(
176141
prompt: str, max_tokens: int, timeout: int = 120
177142
) -> requests.Response:
178143
"""Send a completion request to the frontend"""
179144
payload = {
180-
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
145+
"model": FAULT_TOLERANCE_MODEL_NAME,
181146
"prompt": prompt,
182147
"max_tokens": max_tokens,
183148
}
@@ -211,7 +176,7 @@ def send_chat_completion_request(
211176
) -> requests.Response:
212177
"""Send a chat completion request to the frontend"""
213178
payload = {
214-
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
179+
"model": FAULT_TOLERANCE_MODEL_NAME,
215180
"messages": [{"role": "user", "content": prompt}],
216181
"max_tokens": max_tokens,
217182
"stream": stream,
@@ -383,8 +348,8 @@ def verify_request_cancelled(
383348
@pytest.mark.vllm
384349
@pytest.mark.gpu_1
385350
@pytest.mark.e2e
386-
@pytest.mark.slow
387-
def test_request_cancellation_vllm(request, runtime_services):
351+
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
352+
def test_request_cancellation_vllm(request, runtime_services, predownload_models):
388353
"""
389354
End-to-end test for request cancellation functionality.
390355
@@ -395,8 +360,6 @@ def test_request_cancellation_vllm(request, runtime_services):
395360
2. Chat completion request (non-streaming)
396361
3. Chat completion request (streaming)
397362
"""
398-
# Step 0: Download the model from HuggingFace if not already cached
399-
download_model()
400363

401364
# Step 1: Start the frontend
402365
with DynamoFrontendProcess(request) as frontend:
@@ -446,17 +409,17 @@ def test_request_cancellation_vllm(request, runtime_services):
446409
@pytest.mark.vllm
447410
@pytest.mark.gpu_1
448411
@pytest.mark.e2e
449-
@pytest.mark.slow
450-
def test_request_cancellation_vllm_decode(request, runtime_services):
412+
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
413+
def test_request_cancellation_vllm_decode(
414+
request, runtime_services, predownload_models
415+
):
451416
"""
452417
End-to-end test for request cancellation functionality with remote prefill.
453418
454419
This test verifies that when a request is cancelled by the client,
455420
the system properly handles the cancellation and cleans up resources
456421
on the decode worker side in a disaggregated setup.
457422
"""
458-
# Step 0: Download the model from HuggingFace if not already cached
459-
download_model()
460423

461424
# Step 1: Start the frontend
462425
with DynamoFrontendProcess(request) as frontend:
@@ -501,7 +464,6 @@ def test_request_cancellation_vllm_decode(request, runtime_services):
501464
@pytest.mark.vllm
502465
@pytest.mark.gpu_1
503466
@pytest.mark.e2e
504-
@pytest.mark.slow
505467
def test_request_cancellation_vllm_prefill(request, runtime_services):
506468
"""
507469
End-to-end test for request cancellation on remote prefill.

tests/fault_tolerance/test_request_migration.py

Lines changed: 5 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010

1111
import pytest
1212
import requests
13-
from huggingface_hub import snapshot_download
1413

14+
from tests.utils.constants import FAULT_TOLERANCE_MODEL_NAME
1515
from tests.utils.engine_process import FRONTEND_PORT
1616
from tests.utils.managed_process import ManagedProcess, terminate_process_tree
1717
from tests.utils.payloads import check_models_api
@@ -54,7 +54,7 @@ def __init__(self, request, worker_id: str):
5454
"-m",
5555
"dynamo.vllm",
5656
"--model",
57-
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
57+
FAULT_TOLERANCE_MODEL_NAME,
5858
"--enforce-eager",
5959
"--gpu-memory-utilization",
6060
"0.45",
@@ -117,47 +117,12 @@ def is_ready(self, response) -> bool:
117117
return False
118118

119119

120-
def download_model() -> None:
121-
"""
122-
Download the DeepSeek-R1-Distill-Llama-8B model from HuggingFace Hub if not already cached.
123-
"""
124-
model_id = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
125-
logger.info(f"Caching model {model_id}...")
126-
127-
max_retries = 5
128-
retry_delay = 30 # seconds
129-
130-
for attempt in range(max_retries):
131-
try:
132-
# Download the model to the default cache directory
133-
# This will skip download if the model is already cached
134-
snapshot_download(
135-
repo_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
136-
repo_type="model",
137-
local_files_only=False,
138-
)
139-
logger.info(f"Model {model_id} is ready for use")
140-
return # Success, exit the function
141-
except Exception as e:
142-
if attempt < max_retries - 1: # Not the last attempt
143-
logger.warning(
144-
f"Failed to download model {model_id} (attempt {attempt + 1}/{max_retries}): {e}"
145-
)
146-
logger.info(f"Retrying in {retry_delay} seconds...")
147-
time.sleep(retry_delay)
148-
else: # Last attempt failed
149-
logger.error(
150-
f"Failed to download model {model_id} after {max_retries} attempts: {e}"
151-
)
152-
raise
153-
154-
155120
def send_completion_request(
156121
prompt: str, max_tokens: int, timeout: int = 120
157122
) -> requests.Response:
158123
"""Send a completion request to the frontend"""
159124
payload = {
160-
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
125+
"model": FAULT_TOLERANCE_MODEL_NAME,
161126
"prompt": prompt,
162127
"max_tokens": max_tokens,
163128
}
@@ -324,17 +289,15 @@ def verify_migration_occurred(frontend_process: DynamoFrontendProcess) -> None:
324289
@pytest.mark.vllm
325290
@pytest.mark.gpu_1
326291
@pytest.mark.e2e
327-
@pytest.mark.slow
328-
def test_request_migration_vllm(request, runtime_services):
292+
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
293+
def test_request_migration_vllm(request, runtime_services, predownload_models):
329294
"""
330295
End-to-end test for worker fault tolerance with migration support.
331296
332297
This test verifies that when a worker is killed during request processing,
333298
the system can handle the failure gracefully and migrate the request to
334299
another worker.
335300
"""
336-
# Step 0: Download the model from HuggingFace if not already cached
337-
download_model()
338301

339302
# Step 1: Start the frontend
340303
with DynamoFrontendProcess(request) as frontend:

0 commit comments

Comments
 (0)