Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ steps:
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- vllm/
- tests/mq_llm_engine
- tests/async_engine
- tests/test_inputs.py
- tests/test_outputs.py
Expand All @@ -57,7 +56,6 @@ steps:
- tests/transformers_utils
commands:
- python3 standalone_tests/lazy_imports.py
- pytest -v -s mq_llm_engine # MQLLMEngine
- pytest -v -s async_engine # AsyncLLMEngine
- pytest -v -s test_inputs.py
- pytest -v -s test_outputs.py
Expand Down
4 changes: 2 additions & 2 deletions tests/entrypoints/openai/test_lora_resolvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
import pytest

from vllm.config.multimodal import MultiModalConfig
from vllm.engine.multiprocessing.client import MQLLMEngineClient
from vllm.entrypoints.openai.protocol import CompletionRequest, ErrorResponse
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.entrypoints.openai.serving_models import (BaseModelPath,
OpenAIServingModels)
from vllm.lora.request import LoRARequest
from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.v1.engine.async_llm import AsyncLLM

MODEL_NAME = "openai-community/gpt2"
BASE_MODEL_PATHS = [BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME)]
Expand Down Expand Up @@ -82,7 +82,7 @@ def register_mock_resolver():
@pytest.fixture
def mock_serving_setup():
"""Provides a mocked engine and serving completion instance."""
mock_engine = MagicMock(spec=MQLLMEngineClient)
mock_engine = MagicMock(spec=AsyncLLM)
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False

Expand Down
14 changes: 7 additions & 7 deletions tests/entrypoints/openai/test_serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
import pytest_asyncio

from vllm.config.multimodal import MultiModalConfig
from vllm.engine.multiprocessing.client import MQLLMEngineClient
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_models import (BaseModelPath,
OpenAIServingModels)
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.v1.engine.async_llm import AsyncLLM

from ...utils import RemoteOpenAIServer

Expand Down Expand Up @@ -276,7 +276,7 @@ def test_async_serving_chat_init():

@pytest.mark.asyncio
async def test_serving_chat_returns_correct_model_name():
mock_engine = MagicMock(spec=MQLLMEngineClient)
mock_engine = MagicMock(spec=AsyncLLM)
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False

Expand Down Expand Up @@ -312,7 +312,7 @@ async def return_model_name(*args):

@pytest.mark.asyncio
async def test_serving_chat_should_set_correct_max_tokens():
mock_engine = MagicMock(spec=MQLLMEngineClient)
mock_engine = MagicMock(spec=AsyncLLM)
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False

Expand Down Expand Up @@ -355,7 +355,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
}

# Reinitialize the engine with new settings
mock_engine = MagicMock(spec=MQLLMEngineClient)
mock_engine = MagicMock(spec=AsyncLLM)
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False

Expand Down Expand Up @@ -410,7 +410,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
}

# Reinitialize the engine with new settings
mock_engine = MagicMock(spec=MQLLMEngineClient)
mock_engine = MagicMock(spec=AsyncLLM)
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False

Expand Down Expand Up @@ -467,7 +467,7 @@ async def test_serving_chat_could_load_correct_generation_config():
"repetition_penalty": 1.05
}

mock_engine = MagicMock(spec=MQLLMEngineClient)
mock_engine = MagicMock(spec=AsyncLLM)
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False

Expand Down Expand Up @@ -523,7 +523,7 @@ async def test_serving_chat_did_set_correct_cache_salt(model_type):
mock_model_config = MockModelConfig()
mock_model_config.hf_config.model_type = model_type

mock_engine = MagicMock(spec=MQLLMEngineClient)
mock_engine = MagicMock(spec=AsyncLLM)
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False

Expand Down
Empty file removed tests/mq_llm_engine/__init__.py
Empty file.
12 changes: 0 additions & 12 deletions tests/mq_llm_engine/conftest.py

This file was deleted.

69 changes: 0 additions & 69 deletions tests/mq_llm_engine/test_abort.py

This file was deleted.

Loading
Loading