11# SPDX-License-Identifier: Apache-2.0
2- """Compare the scoring outputs of HF and vLLM models.
3-
4- Run `pytest tests/models/embedding/language/test_scoring.py`.
5- """
62import math
73
84import pytest
95import torch
106import torch .nn .functional as F
117
12- MODELS = [
8+ CROSS_ENCODER_MODELS = [
139 "cross-encoder/ms-marco-MiniLM-L-6-v2" , # Bert
1410 "BAAI/bge-reranker-v2-m3" , # Roberta
1511]
2824 "The capital of Germany is Berlin." ,
2925]
3026
27+ DTYPE = "half"
28+
3129
32- @pytest .fixture (scope = "module" , params = MODELS )
30+ @pytest .fixture (scope = "module" , params = CROSS_ENCODER_MODELS )
3331def model_name (request ):
3432 yield request .param
3533
3634
37- @pytest .mark .parametrize ("dtype" , ["half" ])
38- def test_llm_1_to_1 (vllm_runner , hf_runner , model_name , dtype : str ):
39-
35+ def test_cross_encoder_1_to_1 (vllm_runner , hf_runner , model_name ):
4036 text_pair = [TEXTS_1 [0 ], TEXTS_2 [0 ]]
4137
42- with hf_runner (model_name , dtype = dtype , is_cross_encoder = True ) as hf_model :
38+ with hf_runner (model_name , dtype = DTYPE , is_cross_encoder = True ) as hf_model :
4339 hf_outputs = hf_model .predict ([text_pair ]).tolist ()
4440
45- with vllm_runner (model_name , task = "score" , dtype = dtype ,
41+ with vllm_runner (model_name , task = "score" , dtype = DTYPE ,
4642 max_model_len = None ) as vllm_model :
4743 vllm_outputs = vllm_model .score (text_pair [0 ], text_pair [1 ])
4844
@@ -52,18 +48,16 @@ def test_llm_1_to_1(vllm_runner, hf_runner, model_name, dtype: str):
5248 assert math .isclose (hf_outputs [0 ], vllm_outputs [0 ], rel_tol = 0.01 )
5349
5450
55- @pytest .mark .parametrize ("dtype" , ["half" ])
56- def test_llm_1_to_N (vllm_runner , hf_runner , model_name , dtype : str ):
57-
51+ def test_cross_encoder_1_to_N (vllm_runner , hf_runner , model_name ):
5852 text_pairs = [
5953 [TEXTS_1 [0 ], TEXTS_2 [0 ]],
6054 [TEXTS_1 [0 ], TEXTS_2 [1 ]],
6155 ]
6256
63- with hf_runner (model_name , dtype = dtype , is_cross_encoder = True ) as hf_model :
57+ with hf_runner (model_name , dtype = DTYPE , is_cross_encoder = True ) as hf_model :
6458 hf_outputs = hf_model .predict (text_pairs ).tolist ()
6559
66- with vllm_runner (model_name , task = "score" , dtype = dtype ,
60+ with vllm_runner (model_name , task = "score" , dtype = DTYPE ,
6761 max_model_len = None ) as vllm_model :
6862 vllm_outputs = vllm_model .score (TEXTS_1 [0 ], TEXTS_2 )
6963
@@ -74,18 +68,16 @@ def test_llm_1_to_N(vllm_runner, hf_runner, model_name, dtype: str):
7468 assert math .isclose (hf_outputs [1 ], vllm_outputs [1 ], rel_tol = 0.01 )
7569
7670
77- @pytest .mark .parametrize ("dtype" , ["half" ])
78- def test_llm_N_to_N (vllm_runner , hf_runner , model_name , dtype : str ):
79-
71+ def test_cross_encoder_N_to_N (vllm_runner , hf_runner , model_name ):
8072 text_pairs = [
8173 [TEXTS_1 [0 ], TEXTS_2 [0 ]],
8274 [TEXTS_1 [1 ], TEXTS_2 [1 ]],
8375 ]
8476
85- with hf_runner (model_name , dtype = dtype , is_cross_encoder = True ) as hf_model :
77+ with hf_runner (model_name , dtype = DTYPE , is_cross_encoder = True ) as hf_model :
8678 hf_outputs = hf_model .predict (text_pairs ).tolist ()
8779
88- with vllm_runner (model_name , task = "score" , dtype = dtype ,
80+ with vllm_runner (model_name , task = "score" , dtype = DTYPE ,
8981 max_model_len = None ) as vllm_model :
9082 vllm_outputs = vllm_model .score (TEXTS_1 , TEXTS_2 )
9183
@@ -101,13 +93,10 @@ def emb_model_name(request):
10193 yield request .param
10294
10395
104- @pytest .mark .parametrize ("dtype" , ["half" ])
105- def test_llm_1_to_1_embedding (vllm_runner , hf_runner , emb_model_name ,
106- dtype : str ):
107-
96+ def test_embedding_1_to_1 (vllm_runner , hf_runner , emb_model_name ):
10897 text_pair = [TEXTS_1 [0 ], TEXTS_2 [0 ]]
10998
110- with hf_runner (emb_model_name , dtype = dtype ,
99+ with hf_runner (emb_model_name , dtype = DTYPE ,
111100 is_sentence_transformer = True ) as hf_model :
112101 hf_embeddings = hf_model .encode (text_pair )
113102 hf_outputs = [
@@ -116,7 +105,7 @@ def test_llm_1_to_1_embedding(vllm_runner, hf_runner, emb_model_name,
116105
117106 with vllm_runner (emb_model_name ,
118107 task = "embed" ,
119- dtype = dtype ,
108+ dtype = DTYPE ,
120109 max_model_len = None ) as vllm_model :
121110 vllm_outputs = vllm_model .score (text_pair [0 ], text_pair [1 ])
122111
@@ -126,16 +115,13 @@ def test_llm_1_to_1_embedding(vllm_runner, hf_runner, emb_model_name,
126115 assert math .isclose (hf_outputs [0 ], vllm_outputs [0 ], rel_tol = 0.01 )
127116
128117
129- @pytest .mark .parametrize ("dtype" , ["half" ])
130- def test_llm_1_to_N_embedding (vllm_runner , hf_runner , emb_model_name ,
131- dtype : str ):
132-
118+ def test_embedding_1_to_N (vllm_runner , hf_runner , emb_model_name ):
133119 text_pairs = [
134120 [TEXTS_1 [0 ], TEXTS_2 [0 ]],
135121 [TEXTS_1 [0 ], TEXTS_2 [1 ]],
136122 ]
137123
138- with hf_runner (emb_model_name , dtype = dtype ,
124+ with hf_runner (emb_model_name , dtype = DTYPE ,
139125 is_sentence_transformer = True ) as hf_model :
140126 hf_embeddings = [
141127 hf_model .encode (text_pair ) for text_pair in text_pairs
@@ -147,7 +133,7 @@ def test_llm_1_to_N_embedding(vllm_runner, hf_runner, emb_model_name,
147133
148134 with vllm_runner (emb_model_name ,
149135 task = "embed" ,
150- dtype = dtype ,
136+ dtype = DTYPE ,
151137 max_model_len = None ) as vllm_model :
152138 vllm_outputs = vllm_model .score (TEXTS_1 [0 ], TEXTS_2 )
153139
@@ -158,16 +144,13 @@ def test_llm_1_to_N_embedding(vllm_runner, hf_runner, emb_model_name,
158144 assert math .isclose (hf_outputs [1 ], vllm_outputs [1 ], rel_tol = 0.01 )
159145
160146
161- @pytest .mark .parametrize ("dtype" , ["half" ])
162- def test_llm_N_to_N_embedding (vllm_runner , hf_runner , emb_model_name ,
163- dtype : str ):
164-
147+ def test_embedding_N_to_N (vllm_runner , hf_runner , emb_model_name ):
165148 text_pairs = [
166149 [TEXTS_1 [0 ], TEXTS_2 [0 ]],
167150 [TEXTS_1 [1 ], TEXTS_2 [1 ]],
168151 ]
169152
170- with hf_runner (emb_model_name , dtype = dtype ,
153+ with hf_runner (emb_model_name , dtype = DTYPE ,
171154 is_sentence_transformer = True ) as hf_model :
172155 hf_embeddings = [
173156 hf_model .encode (text_pair ) for text_pair in text_pairs
@@ -179,7 +162,7 @@ def test_llm_N_to_N_embedding(vllm_runner, hf_runner, emb_model_name,
179162
180163 with vllm_runner (emb_model_name ,
181164 task = "embed" ,
182- dtype = dtype ,
165+ dtype = DTYPE ,
183166 max_model_len = None ) as vllm_model :
184167 vllm_outputs = vllm_model .score (TEXTS_1 , TEXTS_2 )
185168
0 commit comments