77import torch .nn .functional as F
88
99from tests .utils import RemoteOpenAIServer
10- from vllm .entrypoints .openai .protocol import ClassificationResponse
10+ from vllm .entrypoints .openai .protocol import ClassificationResponse , PoolingResponse
1111
1212MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"
1313DTYPE = "float32" # Use float32 to avoid NaN issue
@@ -191,18 +191,7 @@ async def get_outputs(activation):
191191
192192@pytest .mark .asyncio
193193@pytest .mark .parametrize ("model_name" , [MODEL_NAME ])
194- def test_pooling (server : RemoteOpenAIServer , model_name : str ):
195- # pooling api uses ALL pooling, which does not support chunked prefill.
196- response = requests .post (
197- server .url_for ("pooling" ),
198- json = {"model" : model_name , "input" : "test" , "encoding_format" : "float" },
199- )
200- assert response .json ()["error" ]["type" ] == "BadRequestError"
201-
202-
203- @pytest .mark .asyncio
204- @pytest .mark .parametrize ("model_name" , [MODEL_NAME ])
205- def test_score (server : RemoteOpenAIServer , model_name : str ):
194+ async def test_score (server : RemoteOpenAIServer , model_name : str ):
206195 # score api is only enabled for num_labels == 1.
207196 response = requests .post (
208197 server .url_for ("score" ),
@@ -217,7 +206,7 @@ def test_score(server: RemoteOpenAIServer, model_name: str):
217206
218207@pytest .mark .asyncio
219208@pytest .mark .parametrize ("model_name" , [MODEL_NAME ])
220- def test_rerank (server : RemoteOpenAIServer , model_name : str ):
209+ async def test_rerank (server : RemoteOpenAIServer , model_name : str ):
221210 # rerank api is only enabled for num_labels == 1.
222211 response = requests .post (
223212 server .url_for ("rerank" ),
@@ -228,3 +217,62 @@ def test_rerank(server: RemoteOpenAIServer, model_name: str):
228217 },
229218 )
230219 assert response .json ()["error" ]["type" ] == "BadRequestError"
220+
221+
222+ @pytest .mark .asyncio
223+ @pytest .mark .parametrize ("model_name" , [MODEL_NAME ])
224+ async def test_pooling_classify (server : RemoteOpenAIServer , model_name : str ):
225+ input_text = "This product was excellent and exceeded my expectations"
226+ response = requests .post (
227+ server .url_for ("pooling" ),
228+ json = {
229+ "model" : model_name ,
230+ "input" : input_text ,
231+ "encoding_format" : "float" ,
232+ "task" : "classify" ,
233+ },
234+ )
235+ poolings = PoolingResponse .model_validate (response .json ())
236+ assert len (poolings .data ) == 1
237+ assert len (poolings .data [0 ].data ) == 2
238+
239+
240+ @pytest .mark .asyncio
241+ @pytest .mark .parametrize ("model_name" , [MODEL_NAME ])
242+ async def test_pooling_token_classify (server : RemoteOpenAIServer , model_name : str ):
243+ # token_classify uses ALL pooling, which does not support chunked prefill.
244+ task = "token_classify"
245+ response = requests .post (
246+ server .url_for ("pooling" ),
247+ json = {
248+ "model" : model_name ,
249+ "input" : "test" ,
250+ "encoding_format" : "float" ,
251+ "task" : task ,
252+ },
253+ )
254+ assert response .json ()["error" ]["type" ] == "BadRequestError"
255+ assert response .json ()["error" ]["message" ].startswith (
256+ f"Task { task } is not supported"
257+ )
258+
259+
260+ @pytest .mark .asyncio
261+ @pytest .mark .parametrize ("model_name" , [MODEL_NAME ])
262+ @pytest .mark .parametrize ("task" , ["embed" , "token_embed" , "plugin" ])
263+ async def test_pooling_not_supported (
264+ server : RemoteOpenAIServer , model_name : str , task : str
265+ ):
266+ response = requests .post (
267+ server .url_for ("pooling" ),
268+ json = {
269+ "model" : model_name ,
270+ "input" : "test" ,
271+ "encoding_format" : "float" ,
272+ "task" : task ,
273+ },
274+ )
275+ assert response .json ()["error" ]["type" ] == "BadRequestError"
276+ assert response .json ()["error" ]["message" ].startswith (
277+ f"Task { task } is not supported"
278+ )
0 commit comments