|
3 | 3 |
|
4 | 4 | import asyncio |
5 | 5 | from http import HTTPStatus |
| 6 | +from unittest.mock import AsyncMock, Mock |
6 | 7 |
|
7 | 8 | import openai |
8 | 9 | import pytest |
9 | 10 | import pytest_asyncio |
10 | 11 | import requests |
| 12 | +from fastapi import Request |
11 | 13 |
|
| 14 | +from vllm.v1.engine.exceptions import EngineDeadError |
12 | 15 | from vllm.version import __version__ as VLLM_VERSION |
13 | 16 |
|
14 | 17 | from ...utils import RemoteOpenAIServer |
@@ -224,3 +227,24 @@ def make_long_completion_request(): |
224 | 227 | response = requests.get(server.url_for("load")) |
225 | 228 | assert response.status_code == HTTPStatus.OK |
226 | 229 | assert response.json().get("server_load") == 0 |
| 230 | + |
| 231 | + |
| 232 | +@pytest.mark.asyncio |
| 233 | +async def test_health_check_engine_dead_error(): |
| 234 | + # Import the health function directly to test it in isolation |
| 235 | + from vllm.entrypoints.openai.api_server import health |
| 236 | + |
| 237 | + # Create a mock request that simulates what FastAPI would provide |
| 238 | + mock_request = Mock(spec=Request) |
| 239 | + mock_app_state = Mock() |
| 240 | + mock_engine_client = AsyncMock() |
| 241 | + mock_engine_client.check_health.side_effect = EngineDeadError() |
| 242 | + mock_app_state.engine_client = mock_engine_client |
| 243 | + mock_request.app.state = mock_app_state |
| 244 | + |
| 245 | + # Test the health function directly with our mocked request |
| 246 | + # This simulates what would happen if the engine dies |
| 247 | + response = await health(mock_request) |
| 248 | + |
| 249 | + # Assert that it returns 503 Service Unavailable |
| 250 | + assert response.status_code == 503 |
0 commit comments