File tree Expand file tree Collapse file tree 2 files changed +5
-1
lines changed
tests/models/language/pooling Expand file tree Collapse file tree 2 files changed +5
-1
lines changed Original file line number Diff line number Diff line change @@ -101,4 +101,4 @@ def test_prm_models(
101101 hf_output = torch .tensor (hf_output )
102102 vllm_output = torch .tensor (vllm_output )
103103
104- assert torch .allclose (hf_output , vllm_output , 1e -2 )
104+ assert torch .allclose (hf_output , vllm_output , 1.5e -2 )
Original file line number Diff line number Diff line change 77from vllm .config import VllmConfig
88from vllm .logger import init_logger
99from vllm .model_executor .model_loader import get_model
10+ from vllm .model_executor .models .interfaces import has_step_pooler
1011from vllm .v1 .worker .gpu_model_runner import GPUModelRunner
1112
1213logger = init_logger (__name__ )
@@ -52,6 +53,9 @@ def load_model(self) -> None:
5253 logger .info ("Starting to load model %s..." , self .model_config .model )
5354 self .model = get_model (vllm_config = self .vllm_config )
5455
56+ if has_step_pooler (self .model ):
57+ self .input_batch .logits_processing_needs_token_ids = True
58+
5559 if self .lora_config :
5660 self .model = self .load_lora_model (self .model , self .model_config ,
5761 self .scheduler_config ,
You can’t perform that action at this time.
0 commit comments