diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index f53e8b030885..2e0212d010da 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -2651,24 +2651,46 @@ class PoolerConfig: ## for embeddings models normalize: Optional[bool] = None """ - Whether to normalize the embeddings outputs. + Whether to normalize the embeddings outputs. Defaults to True. """ dimensions: Optional[int] = None """ Reduce the dimensions of embeddings if model - support matryoshka representation. + support matryoshka representation. Defaults to None. + """ + enable_chunked_processing: Optional[bool] = None + """ + Whether to enable chunked processing for long inputs that exceed the model's + maximum position embeddings. When enabled, long inputs will be split into + chunks, processed separately, and then aggregated using weighted averaging. + This allows embedding models to handle arbitrarily long text without CUDA + errors. Defaults to False. + """ + max_embed_len: Optional[int] = None + """ + Maximum input length allowed for embedding generation. When set, allows + inputs longer than max_embed_len to be accepted for embedding models. + When an input exceeds max_embed_len, it will be handled according to + the original max_model_len validation logic. + Defaults to None (i.e. set to max_model_len). """ ## for classification models activation: Optional[bool] = None """ Whether to apply activation function to the classification outputs. + Defaults to True. + """ + logit_bias: Optional[float] = None + """ + If provided, apply classification logit biases. Defaults to None. """ ## for reward models softmax: Optional[bool] = None """ Whether to apply softmax to the reward outputs. + Defaults to True. """ step_tag_id: Optional[int] = None """ @@ -2683,25 +2705,6 @@ class PoolerConfig: ``math-shepherd-mistral-7b-prm`` model. """ - enable_chunked_processing: Optional[bool] = None - """ - Whether to enable chunked processing for long inputs that exceed the model's - maximum position embeddings. When enabled, long inputs will be split into - chunks, processed separately, and then aggregated using weighted averaging. - This allows embedding models to handle arbitrarily long text without CUDA - errors. Defaults to False. - """ - - max_embed_len: Optional[int] = None - """ - Maximum input length allowed for embedding generation. When set, allows - inputs longer than max_embed_len to be accepted for embedding models. - This parameter enables accepting long inputs without requiring - VLLM_ALLOW_LONG_MAX_MODEL_LEN environment variable. When an input exceeds - max_embed_len, it will be handled according to the original max_model_len - validation logic. Defaults to None (i.e. set to max_model_len). - """ - def compute_hash(self) -> str: """ WARNING: Whenever a new field is added to this config, diff --git a/vllm/model_executor/layers/pooler.py b/vllm/model_executor/layers/pooler.py index 62b3ee1abaca..afe7ea7b8392 100644 --- a/vllm/model_executor/layers/pooler.py +++ b/vllm/model_executor/layers/pooler.py @@ -633,9 +633,14 @@ def __init__( ) -> None: super().__init__() + from vllm.config import get_current_vllm_config + vllm_config = get_current_vllm_config() + self.pooling = pooling self.classifier = classifier self.act_fn = act_fn or PoolerClassify() + self.logit_bias: Optional[ + float] = vllm_config.model_config.pooler_config.logit_bias def get_supported_tasks(self) -> Set[PoolingTask]: return {"classify", "score"} @@ -654,6 +659,9 @@ def forward( pooled_data = self.classifier(pooled_data) # pooled_data shape: [batchsize, num_labels] + if self.logit_bias is not None: + pooled_data -= self.logit_bias + pooling_params = get_pooling_params(pooling_metadata) flags = [p.activation for p in pooling_params] diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index 377b7bf26a07..0245e89f7da7 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -210,8 +210,10 @@ class JinaVLForSequenceClassificationConfig(VerifyAndUpdateConfig): @staticmethod def verify_and_update_config(vllm_config: "VllmConfig") -> None: config = vllm_config.model_config.hf_config - config.num_labels = 1 + pooler_config = vllm_config.model_config.pooler_config + if pooler_config.logit_bias is None: + pooler_config.logit_bias = 2.65 class SnowflakeGteNewModelConfig(VerifyAndUpdateConfig): diff --git a/vllm/model_executor/models/jina_vl.py b/vllm/model_executor/models/jina_vl.py index 8c64f636c6a0..140b0d167472 100644 --- a/vllm/model_executor/models/jina_vl.py +++ b/vllm/model_executor/models/jina_vl.py @@ -92,17 +92,14 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): pooler_config = vllm_config.model_config.pooler_config assert pooler_config is not None - # logit bias for sigmoid normalization - self.LOGIT_BIAS = 2.65 - self.score = JinaVLScorer(config) self.pooler = DispatchPooler({ "encode": Pooler.for_encode(pooler_config), "classify": - Pooler.for_classify(pooler_config, classifier=None), + Pooler.for_classify(pooler_config, classifier=self.score), "score": - Pooler.for_classify(pooler_config, classifier=None), + Pooler.for_classify(pooler_config, classifier=self.score), }) @classmethod @@ -137,9 +134,7 @@ def forward( inputs_embeds=inputs_embeds, **kwargs, ) - - logits = self.score(hidden_states) - self.LOGIT_BIAS - return logits + return hidden_states def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): loader = AutoWeightsLoader(self)