Skip to content

Commit 3bc918f

Browse files
committed
Throwing an exception when the model does not support pool tasks
Signed-off-by: zxw <1020938856@qq.com>
1 parent 0307428 commit 3bc918f

File tree

1 file changed

+19
-1
lines changed

1 file changed

+19
-1
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3375,8 +3375,26 @@ def _dummy_pooler_run(
33753375
hidden_states: torch.Tensor,
33763376
) -> PoolerOutput:
33773377
# Find the task that has the largest output for subsequent steps
3378+
supported_pooling_tasks = self.get_supported_pooling_tasks()
3379+
3380+
if not supported_pooling_tasks:
3381+
if self.scheduler_config.chunked_prefill_enabled:
3382+
raise RuntimeError(
3383+
f"Model {self.model_config.model} does not support "
3384+
"any pooling tasks with chunked prefill enabled. "
3385+
"Please add --no-enable-chunked-prefill to your "
3386+
"config or CLI args. See "
3387+
"https://docs.vllm.ai/en/latest/models/pooling_models.html "
3388+
"to learn more.")
3389+
else:
3390+
raise RuntimeError(
3391+
f"Model {self.model_config.model} does not support "
3392+
"any pooling tasks. See "
3393+
"https://docs.vllm.ai/en/latest/models/pooling_models.html "
3394+
"to learn more.")
3395+
33783396
output_size = dict[PoolingTask, float]()
3379-
for task in self.get_supported_pooling_tasks():
3397+
for task in supported_pooling_tasks:
33803398
# Run a full batch with each task to ensure none of them OOMs
33813399
output = self._dummy_pooler_run_task(hidden_states, task)
33823400
output_size[task] = sum(o.nbytes for o in output)

0 commit comments

Comments
 (0)