Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
[CI/Build] Simplify OpenAI server setup in tests (vllm-project#5100)
Browse files Browse the repository at this point in the history
  • Loading branch information
DarkLight1337 authored and robertgshaw2-neuralmagic committed Jun 16, 2024
1 parent c44efdc commit b9c0824
Show file tree
Hide file tree
Showing 6 changed files with 284 additions and 238 deletions.
31 changes: 15 additions & 16 deletions tests/async_engine/test_openapi_server_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import ray

from tests.nm_utils.utils_skip import should_skip_test_group
from tests.utils import ServerRunner
from tests.utils import VLLM_PATH, RemoteOpenAIServer

if should_skip_test_group(group_name="TEST_ASYNC_ENGINE"):
pytest.skip("TEST_ASYNC_ENGINE=DISABLE, skipping async engine test group",
Expand All @@ -16,9 +16,15 @@


@pytest.fixture(scope="module")
def server():
ray.init()
server_runner = ServerRunner.remote([
def ray_ctx():
ray.init(runtime_env={"working_dir": VLLM_PATH})
yield
ray.shutdown()


@pytest.fixture(scope="module")
def server(ray_ctx):
return RemoteOpenAIServer([
"--model",
MODEL_NAME,
# use half precision for speed and memory savings in CI environment
Expand All @@ -29,22 +35,15 @@ def server():
"--enforce-eager",
"--engine-use-ray"
])
ray.get(server_runner.ready.remote())
yield server_runner
ray.shutdown()


@pytest.fixture(scope="module")
def client():
client = openai.AsyncOpenAI(
base_url="http://localhost:8000/v1",
api_key="token-abc123",
)
yield client
def client(server):
return server.get_async_client()


@pytest.mark.asyncio
async def test_check_models(server, client: openai.AsyncOpenAI):
async def test_check_models(client: openai.AsyncOpenAI):
models = await client.models.list()
models = models.data
served_model = models[0]
Expand All @@ -53,7 +52,7 @@ async def test_check_models(server, client: openai.AsyncOpenAI):


@pytest.mark.asyncio
async def test_single_completion(server, client: openai.AsyncOpenAI):
async def test_single_completion(client: openai.AsyncOpenAI):
completion = await client.completions.create(model=MODEL_NAME,
prompt="Hello, my name is",
max_tokens=5,
Expand All @@ -77,7 +76,7 @@ async def test_single_completion(server, client: openai.AsyncOpenAI):


@pytest.mark.asyncio
async def test_single_chat_session(server, client: openai.AsyncOpenAI):
async def test_single_chat_session(client: openai.AsyncOpenAI):
messages = [{
"role": "system",
"content": "you are a helpful assistant"
Expand Down
113 changes: 113 additions & 0 deletions tests/entrypoints/test_openai_embedding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import openai
import pytest
import ray

from ..utils import VLLM_PATH, RemoteOpenAIServer

EMBEDDING_MODEL_NAME = "intfloat/e5-mistral-7b-instruct"

pytestmark = pytest.mark.openai


@pytest.fixture(scope="module")
def ray_ctx():
ray.init(runtime_env={"working_dir": VLLM_PATH})
yield
ray.shutdown()


@pytest.fixture(scope="module")
def embedding_server(ray_ctx):
return RemoteOpenAIServer([
"--model",
EMBEDDING_MODEL_NAME,
# use half precision for speed and memory savings in CI environment
"--dtype",
"bfloat16",
"--enforce-eager",
"--max-model-len",
"8192",
"--enforce-eager",
])


@pytest.mark.asyncio
@pytest.fixture(scope="module")
def embedding_client(embedding_server):
return embedding_server.get_async_client()


@pytest.mark.asyncio
@pytest.mark.parametrize(
"model_name",
[EMBEDDING_MODEL_NAME],
)
async def test_single_embedding(embedding_client: openai.AsyncOpenAI,
model_name: str):
input_texts = [
"The chef prepared a delicious meal.",
]

# test single embedding
embeddings = await embedding_client.embeddings.create(
model=model_name,
input=input_texts,
encoding_format="float",
)
assert embeddings.id is not None
assert len(embeddings.data) == 1
assert len(embeddings.data[0].embedding) == 4096
assert embeddings.usage.completion_tokens == 0
assert embeddings.usage.prompt_tokens == 9
assert embeddings.usage.total_tokens == 9

# test using token IDs
input_tokens = [1, 1, 1, 1, 1]
embeddings = await embedding_client.embeddings.create(
model=model_name,
input=input_tokens,
encoding_format="float",
)
assert embeddings.id is not None
assert len(embeddings.data) == 1
assert len(embeddings.data[0].embedding) == 4096
assert embeddings.usage.completion_tokens == 0
assert embeddings.usage.prompt_tokens == 5
assert embeddings.usage.total_tokens == 5


@pytest.mark.asyncio
@pytest.mark.parametrize(
"model_name",
[EMBEDDING_MODEL_NAME],
)
async def test_batch_embedding(embedding_client: openai.AsyncOpenAI,
model_name: str):
# test List[str]
input_texts = [
"The cat sat on the mat.", "A feline was resting on a rug.",
"Stars twinkle brightly in the night sky."
]
embeddings = await embedding_client.embeddings.create(
model=model_name,
input=input_texts,
encoding_format="float",
)
assert embeddings.id is not None
assert len(embeddings.data) == 3
assert len(embeddings.data[0].embedding) == 4096

# test List[List[int]]
input_tokens = [[4, 5, 7, 9, 20], [15, 29, 499], [24, 24, 24, 24, 24],
[25, 32, 64, 77]]
embeddings = await embedding_client.embeddings.create(
model=model_name,
input=input_tokens,
encoding_format="float",
)
assert embeddings.id is not None
assert len(embeddings.data) == 4
assert len(embeddings.data[0].embedding) == 4096
assert embeddings.usage.completion_tokens == 0
assert embeddings.usage.prompt_tokens == 17
assert embeddings.usage.total_tokens == 17
Loading

0 comments on commit b9c0824

Please sign in to comment.