Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
42352ba
+test
noooop Jul 7, 2025
3f887ab
Merge branch 'vllm-project:main' into pooler_config
noooop Jul 23, 2025
f27031c
+ using_normalize
noooop Jul 23, 2025
593df3e
+ using_activation
noooop Jul 24, 2025
3473b05
conflicts
noooop Jul 24, 2025
b90f8a7
Merge branch 'vllm-project:main' into pooler_config
noooop Jul 24, 2025
8c5f744
+ pooling_params
noooop Jul 24, 2025
e3bc35a
fix
noooop Jul 24, 2025
1a370b5
fix
noooop Jul 25, 2025
9d75628
+ test_pooling_params.py
noooop Jul 25, 2025
5a131d1
+ merge_default_parameters
noooop Jul 25, 2025
5385b76
Remove unnecessary changes
noooop Jul 25, 2025
1946596
Remove unnecessary changes
noooop Jul 25, 2025
beead6b
fix
noooop Jul 26, 2025
32e4533
Merge branch 'vllm-project:main' into pooler_config
noooop Jul 26, 2025
889570a
fix
noooop Jul 26, 2025
fa1367e
mypy
noooop Jul 26, 2025
efcf72e
fix
noooop Jul 26, 2025
8526e2a
+ test_reward_models_using_softmax
noooop Jul 26, 2025
f644bc2
Merge branch 'vllm-project:main' into pooler_config
noooop Jul 28, 2025
2ab4d55
Merge branch 'vllm-project:main' into pooler_config
noooop Jul 30, 2025
684a2d9
+ test_reward
noooop Jul 30, 2025
d5e30e6
- default_normalize & default_softmax
noooop Aug 1, 2025
cfa1a3d
+ JambaForSequenceClassificationConfig
noooop Aug 1, 2025
d0488e7
fix
noooop Aug 1, 2025
6668c47
Merge branch 'vllm-project:main' into pooler_config
noooop Aug 1, 2025
5274e2f
fix
noooop Aug 1, 2025
2ffa834
- merge_default_parameters
noooop Aug 4, 2025
9e69222
fix
noooop Aug 4, 2025
bd83ada
fix
noooop Aug 4, 2025
dab5b55
fix
noooop Aug 4, 2025
9129093
fix
noooop Aug 4, 2025
0973e6b
fix
noooop Aug 4, 2025
f0d6190
using tomaarsen/Qwen3-Reranker-0.6B-seq-cls
noooop Aug 4, 2025
e55a342
fix
noooop Aug 4, 2025
b3624e1
ci bug ?
noooop Aug 4, 2025
a42938c
Merge branch 'vllm-project:main' into pooler_config
noooop Aug 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions tests/entrypoints/llm/test_classify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import weakref

import pytest
import torch

from vllm import LLM, PoolingParams
from vllm.distributed import cleanup_dist_env_and_memory

from ...models.utils import softmax

MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"

prompts = ["The chef prepared a delicious meal."]


@pytest.fixture(autouse=True)
def v1(run_with_both_engines):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass


@pytest.fixture(scope="module")
def llm():
# pytest caches the fixture so we use weakref.proxy to
# enable garbage collection
llm = LLM(model=MODEL_NAME,
max_num_batched_tokens=32768,
tensor_parallel_size=1,
gpu_memory_utilization=0.75,
enforce_eager=True,
seed=0)

with llm.deprecate_legacy_api():
yield weakref.proxy(llm)

del llm

cleanup_dist_env_and_memory()


@pytest.mark.skip_global_cleanup
def test_pooling_params(llm: LLM):

def get_outputs(activation):
outputs = llm.classify(
prompts,
pooling_params=PoolingParams(activation=activation),
use_tqdm=False)
return torch.tensor([x.outputs.probs for x in outputs])

default = get_outputs(activation=None)
w_activation = get_outputs(activation=True)
wo_activation = get_outputs(activation=False)

assert torch.allclose(default, w_activation,
atol=1e-2), "Default should use activation."
assert not torch.allclose(
w_activation, wo_activation,
atol=1e-2), "wo_activation should not use activation."
assert torch.allclose(
softmax(wo_activation), w_activation, atol=1e-2
), "w_activation should be close to activation(wo_activation)."
56 changes: 56 additions & 0 deletions tests/entrypoints/llm/test_embedding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import weakref

import pytest
import torch
import torch.nn.functional as F

from vllm import LLM, PoolingParams
from vllm.distributed import cleanup_dist_env_and_memory

MODEL_NAME = "intfloat/multilingual-e5-small"

prompts = ["The chef prepared a delicious meal."]


@pytest.fixture(scope="module")
def llm():
# pytest caches the fixture so we use weakref.proxy to
# enable garbage collection
llm = LLM(model=MODEL_NAME,
max_num_batched_tokens=32768,
tensor_parallel_size=1,
gpu_memory_utilization=0.75,
enforce_eager=True,
seed=0)

with llm.deprecate_legacy_api():
yield weakref.proxy(llm)

del llm

cleanup_dist_env_and_memory()


@pytest.mark.skip_global_cleanup
def test_pooling_params(llm: LLM):

def get_outputs(normalize):
outputs = llm.embed(prompts,
pooling_params=PoolingParams(normalize=normalize),
use_tqdm=False)
return torch.tensor([x.outputs.embedding for x in outputs])

default = get_outputs(normalize=None)
w_normal = get_outputs(normalize=True)
wo_normal = get_outputs(normalize=False)

assert torch.allclose(default, w_normal,
atol=1e-2), "Default should use normal."
assert not torch.allclose(w_normal, wo_normal,
atol=1e-2), "wo_normal should not use normal."
assert torch.allclose(
w_normal, F.normalize(wo_normal, p=2, dim=-1),
atol=1e-2), "w_normal should be close to normal(wo_normal)."
66 changes: 66 additions & 0 deletions tests/entrypoints/llm/test_reward.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import weakref

import pytest
import torch

from vllm import LLM, PoolingParams
from vllm.distributed import cleanup_dist_env_and_memory

from ...models.utils import softmax

MODEL_NAME = "internlm/internlm2-1_8b-reward"

prompts = ["The chef prepared a delicious meal."]


@pytest.fixture(autouse=True)
def v1(run_with_both_engines):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass


@pytest.fixture(scope="module")
def llm():
# pytest caches the fixture so we use weakref.proxy to
# enable garbage collection
llm = LLM(model=MODEL_NAME,
max_num_batched_tokens=32768,
tensor_parallel_size=1,
gpu_memory_utilization=0.75,
enforce_eager=True,
trust_remote_code=True,
seed=0)

with llm.deprecate_legacy_api():
yield weakref.proxy(llm)

del llm

cleanup_dist_env_and_memory()


@pytest.mark.skip_global_cleanup
def test_pooling_params(llm: LLM):

def get_outputs(softmax):
outputs = llm.reward(prompts,
pooling_params=PoolingParams(softmax=softmax),
use_tqdm=False)
return torch.cat([x.outputs.data for x in outputs])

default = get_outputs(softmax=None)
w_softmax = get_outputs(softmax=True)
wo_softmax = get_outputs(softmax=False)

assert torch.allclose(default, w_softmax,
atol=1e-2), "Default should use softmax."
assert not torch.allclose(w_softmax, wo_softmax,
atol=1e-2), "wo_softmax should not use softmax."
assert torch.allclose(
softmax(wo_softmax), w_softmax,
atol=1e-2), "w_softmax should be close to softmax(wo_softmax)."
69 changes: 69 additions & 0 deletions tests/entrypoints/llm/test_score.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import weakref

import pytest
import torch

from vllm import LLM, PoolingParams
from vllm.distributed import cleanup_dist_env_and_memory

from ...models.utils import softmax

MODEL_NAME = "tomaarsen/Qwen3-Reranker-0.6B-seq-cls"


@pytest.fixture(autouse=True)
def v1(run_with_both_engines):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass


@pytest.fixture(scope="module")
def llm():
# pytest caches the fixture so we use weakref.proxy to
# enable garbage collection
llm = LLM(model=MODEL_NAME,
max_num_batched_tokens=32768,
tensor_parallel_size=1,
gpu_memory_utilization=0.75,
enforce_eager=True,
seed=0)

with llm.deprecate_legacy_api():
yield weakref.proxy(llm)

del llm

cleanup_dist_env_and_memory()


@pytest.mark.skip_global_cleanup
def test_pooling_params(llm: LLM):

def get_outputs(activation):
text_1 = "What is the capital of France?"
text_2 = "The capital of France is Paris."

outputs = llm.score(
text_1,
text_2,
pooling_params=PoolingParams(activation=activation),
use_tqdm=False)
return torch.tensor([x.outputs.score for x in outputs])

default = get_outputs(activation=None)
w_activation = get_outputs(activation=True)
wo_activation = get_outputs(activation=False)

assert torch.allclose(default, w_activation,
atol=1e-2), "Default should use activation."
assert not torch.allclose(
w_activation, wo_activation,
atol=1e-2), "wo_activation should not use activation."
assert torch.allclose(
softmax(wo_activation), w_activation, atol=1e-2
), "w_activation should be close to activation(wo_activation)."
31 changes: 31 additions & 0 deletions tests/entrypoints/openai/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import pytest
import requests
import torch
import torch.nn.functional as F

from vllm.entrypoints.openai.protocol import ClassificationResponse

Expand Down Expand Up @@ -181,3 +183,32 @@ async def test_invocations(server: RemoteOpenAIServer):
assert classification_data.keys() == invocation_data.keys()
assert classification_data["probs"] == pytest.approx(
invocation_data["probs"], rel=0.01)


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_activation(server: RemoteOpenAIServer, model_name: str):
input_text = ["This product was excellent and exceeded my expectations"]

async def get_outputs(activation):
response = requests.post(server.url_for("classify"),
json={
"model": model_name,
"input": input_text,
"activation": activation
})
outputs = response.json()
return torch.tensor([x['probs'] for x in outputs["data"]])

default = await get_outputs(activation=None)
w_activation = await get_outputs(activation=True)
wo_activation = await get_outputs(activation=False)

assert torch.allclose(default, w_activation,
atol=1e-2), "Default should use activation."
assert not torch.allclose(
w_activation, wo_activation,
atol=1e-2), "wo_activation should not use activation."
assert torch.allclose(
F.softmax(wo_activation, dim=-1), w_activation, atol=1e-2
), "w_activation should be close to activation(wo_activation)."
34 changes: 34 additions & 0 deletions tests/entrypoints/openai/test_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import pytest
import pytest_asyncio
import requests
import torch
import torch.nn.functional as F

from vllm.entrypoints.openai.protocol import EmbeddingResponse
from vllm.transformers_utils.tokenizer import get_tokenizer
Expand Down Expand Up @@ -369,3 +371,35 @@ async def test_invocations_conversation(server: RemoteOpenAIServer):
embeddings_1_lst=[invocation_data["embedding"]],
name_0="chat",
name_1="invocation")


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_normalize(server: RemoteOpenAIServer, model_name: str):
input_text = ["The chef prepared a delicious meal."]

async def get_outputs(normalize):
request_args = {
"model": MODEL_NAME,
"input": input_text,
"encoding_format": "float",
"normalize": normalize
}

response = requests.post(server.url_for("v1/embeddings"),
json=request_args)
outputs = response.json()

return torch.tensor([x['embedding'] for x in outputs["data"]])

default = await get_outputs(normalize=None)
w_normal = await get_outputs(normalize=True)
wo_normal = await get_outputs(normalize=False)

assert torch.allclose(default, w_normal,
atol=1e-2), "Default should use normal."
assert not torch.allclose(w_normal, wo_normal,
atol=1e-2), "wo_normal should not use normal."
assert torch.allclose(
w_normal, F.normalize(wo_normal, p=2, dim=-1),
atol=1e-2), "w_normal should be close to normal(wo_normal)."
38 changes: 38 additions & 0 deletions tests/entrypoints/openai/test_rerank.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import pytest
import requests
import torch
import torch.nn.functional as F

from vllm.entrypoints.openai.protocol import RerankResponse

Expand Down Expand Up @@ -125,3 +127,39 @@ def test_invocations(server: RemoteOpenAIServer):
assert rerank_result.keys() == invocations_result.keys()
assert rerank_result["relevance_score"] == pytest.approx(
invocations_result["relevance_score"], rel=0.01)


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_activation(server: RemoteOpenAIServer, model_name: str):

async def get_outputs(activation):
query = "What is the capital of France?"
documents = [
"The capital of Brazil is Brasilia.",
"The capital of France is Paris."
]

response = requests.post(server.url_for("rerank"),
json={
"model": model_name,
"query": query,
"documents": documents,
"activation": activation
})
outputs = response.json()

return torch.tensor([x['relevance_score'] for x in outputs["results"]])

default = await get_outputs(activation=None)
w_activation = await get_outputs(activation=True)
wo_activation = await get_outputs(activation=False)

assert torch.allclose(default, w_activation,
atol=1e-2), "Default should use activation."
assert not torch.allclose(
w_activation, wo_activation,
atol=1e-2), "wo_activation should not use activation."
assert torch.allclose(
F.sigmoid(wo_activation), w_activation, atol=1e-2
), "w_activation should be close to activation(wo_activation)."
Loading