@@ -2651,24 +2651,46 @@ class PoolerConfig:
26512651 ## for embeddings models
26522652 normalize : Optional [bool ] = None
26532653 """
2654- Whether to normalize the embeddings outputs.
2654+ Whether to normalize the embeddings outputs. Defaults to True.
26552655 """
26562656 dimensions : Optional [int ] = None
26572657 """
26582658 Reduce the dimensions of embeddings if model
2659- support matryoshka representation.
2659+ support matryoshka representation. Defaults to None.
2660+ """
2661+ enable_chunked_processing : Optional [bool ] = None
2662+ """
2663+ Whether to enable chunked processing for long inputs that exceed the model's
2664+ maximum position embeddings. When enabled, long inputs will be split into
2665+ chunks, processed separately, and then aggregated using weighted averaging.
2666+ This allows embedding models to handle arbitrarily long text without CUDA
2667+ errors. Defaults to False.
2668+ """
2669+ max_embed_len : Optional [int ] = None
2670+ """
2671+ Maximum input length allowed for embedding generation. When set, allows
2672+ inputs longer than max_embed_len to be accepted for embedding models.
2673+ When an input exceeds max_embed_len, it will be handled according to
2674+ the original max_model_len validation logic.
2675+ Defaults to None (i.e. set to max_model_len).
26602676 """
26612677
26622678 ## for classification models
26632679 activation : Optional [bool ] = None
26642680 """
26652681 Whether to apply activation function to the classification outputs.
2682+ Defaults to True.
2683+ """
2684+ logit_bias : Optional [float ] = None
2685+ """
2686+ If provided, apply classification logit biases. Defaults to None.
26662687 """
26672688
26682689 ## for reward models
26692690 softmax : Optional [bool ] = None
26702691 """
26712692 Whether to apply softmax to the reward outputs.
2693+ Defaults to True.
26722694 """
26732695 step_tag_id : Optional [int ] = None
26742696 """
@@ -2683,25 +2705,6 @@ class PoolerConfig:
26832705 ``math-shepherd-mistral-7b-prm`` model.
26842706 """
26852707
2686- enable_chunked_processing : Optional [bool ] = None
2687- """
2688- Whether to enable chunked processing for long inputs that exceed the model's
2689- maximum position embeddings. When enabled, long inputs will be split into
2690- chunks, processed separately, and then aggregated using weighted averaging.
2691- This allows embedding models to handle arbitrarily long text without CUDA
2692- errors. Defaults to False.
2693- """
2694-
2695- max_embed_len : Optional [int ] = None
2696- """
2697- Maximum input length allowed for embedding generation. When set, allows
2698- inputs longer than max_embed_len to be accepted for embedding models.
2699- This parameter enables accepting long inputs without requiring
2700- VLLM_ALLOW_LONG_MAX_MODEL_LEN environment variable. When an input exceeds
2701- max_embed_len, it will be handled according to the original max_model_len
2702- validation logic. Defaults to None (i.e. set to max_model_len).
2703- """
2704-
27052708 def compute_hash (self ) -> str :
27062709 """
27072710 WARNING: Whenever a new field is added to this config,
0 commit comments