Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions tests/entrypoints/openai/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@

import asyncio
from http import HTTPStatus
from unittest.mock import AsyncMock, Mock

import openai
import pytest
import pytest_asyncio
import requests
from fastapi import Request

from vllm.v1.engine.exceptions import EngineDeadError
from vllm.version import __version__ as VLLM_VERSION

from ...utils import RemoteOpenAIServer
Expand Down Expand Up @@ -224,3 +227,24 @@ def make_long_completion_request():
response = requests.get(server.url_for("load"))
assert response.status_code == HTTPStatus.OK
assert response.json().get("server_load") == 0


@pytest.mark.asyncio
async def test_health_check_engine_dead_error():
# Import the health function directly to test it in isolation
from vllm.entrypoints.openai.api_server import health

# Create a mock request that simulates what FastAPI would provide
mock_request = Mock(spec=Request)
mock_app_state = Mock()
mock_engine_client = AsyncMock()
mock_engine_client.check_health.side_effect = EngineDeadError()
mock_app_state.engine_client = mock_engine_client
mock_request.app.state = mock_app_state

# Test the health function directly with our mocked request
# This simulates what would happen if the engine dies
response = await health(mock_request)

# Assert that it returns 503 Service Unavailable
assert response.status_code == 503