Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions vllm/model_executor/models/adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,9 @@ def as_reward_model(cls: _T) -> _T:
# Lazy import
from vllm.model_executor.layers.pooler import DispatchPooler, Pooler

from .interfaces_base import default_pooling_type

@default_pooling_type("ALL")
class ModelForReward(_create_pooling_model_cls(cls)):
def _init_pooler(self, vllm_config: "VllmConfig", prefix: str = ""):
pooler_config = vllm_config.model_config.pooler_config
Expand Down
22 changes: 21 additions & 1 deletion vllm/v1/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3622,8 +3622,28 @@ def _dummy_pooler_run(
hidden_states: torch.Tensor,
) -> PoolerOutput:
# Find the task that has the largest output for subsequent steps
supported_pooling_tasks = self.get_supported_pooling_tasks()

if not supported_pooling_tasks:
if self.scheduler_config.chunked_prefill_enabled:
raise RuntimeError(
f"Model {self.model_config.model} does not support "
"any pooling tasks with chunked prefill enabled. "
"Please add --no-enable-chunked-prefill to your "
"config or CLI args. See "
"https://docs.vllm.ai/en/latest/models/pooling_models.html "
"to learn more."
)
else:
raise RuntimeError(
f"Model {self.model_config.model} does not support "
"any pooling tasks. See "
"https://docs.vllm.ai/en/latest/models/pooling_models.html "
"to learn more."
)

output_size = dict[PoolingTask, float]()
for task in self.get_supported_pooling_tasks():
for task in supported_pooling_tasks:
# Run a full batch with each task to ensure none of them OOMs
output = self._dummy_pooler_run_task(hidden_states, task)
output_size[task] = sum(o.nbytes for o in output)
Expand Down