vllm-project · DarkLight1337 · Oct 18, 2025 · Oct 2, 2025 · Oct 18, 2025
@@ -3,12 +3,15 @@
 
 import asyncio
 from http import HTTPStatus
+from unittest.mock import AsyncMock, Mock
 
 import openai
 import pytest
 import pytest_asyncio
 import requests
+from fastapi import Request
 
+from vllm.v1.engine.exceptions import EngineDeadError
 from vllm.version import __version__ as VLLM_VERSION
 
 from ...utils import RemoteOpenAIServer
@@ -224,3 +227,24 @@ def make_long_completion_request():
     response = requests.get(server.url_for("load"))
     assert response.status_code == HTTPStatus.OK
     assert response.json().get("server_load") == 0
+
+
+@pytest.mark.asyncio
+async def test_health_check_engine_dead_error():
+    # Import the health function directly to test it in isolation
+    from vllm.entrypoints.openai.api_server import health
+
+    # Create a mock request that simulates what FastAPI would provide
+    mock_request = Mock(spec=Request)
+    mock_app_state = Mock()
+    mock_engine_client = AsyncMock()
+    mock_engine_client.check_health.side_effect = EngineDeadError()
+    mock_app_state.engine_client = mock_engine_client
+    mock_request.app.state = mock_app_state
+
+    # Test the health function directly with our mocked request
+    # This simulates what would happen if the engine dies
+    response = await health(mock_request)
+
+    # Assert that it returns 503 Service Unavailable
+    assert response.status_code == 503