|
26 | 26 | DetailedTraceModules, Device, DeviceConfig, |
27 | 27 | DistributedExecutorBackend, GuidedDecodingBackend, |
28 | 28 | GuidedDecodingBackendV1, HfOverrides, KVEventsConfig, |
29 | | - KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig, |
30 | | - ModelConfig, ModelDType, ModelImpl, MultiModalConfig, |
31 | | - ObservabilityConfig, ParallelConfig, PoolerConfig, |
32 | | - PrefixCachingHashAlgo, PromptAdapterConfig, |
33 | | - SchedulerConfig, SchedulerPolicy, SpeculativeConfig, |
34 | | - TaskOption, TokenizerMode, VllmConfig, get_attr_docs, |
35 | | - get_field) |
| 29 | + KVTransferConfig, LoadConfig, LoadFormat, |
| 30 | + LogprobsMode, LoRAConfig, ModelConfig, ModelDType, |
| 31 | + ModelImpl, MultiModalConfig, ObservabilityConfig, |
| 32 | + ParallelConfig, PoolerConfig, PrefixCachingHashAlgo, |
| 33 | + PromptAdapterConfig, SchedulerConfig, SchedulerPolicy, |
| 34 | + SpeculativeConfig, TaskOption, TokenizerMode, |
| 35 | + VllmConfig, get_attr_docs, get_field) |
36 | 36 | from vllm.logger import init_logger |
37 | 37 | from vllm.platforms import CpuArchEnum, current_platform |
38 | 38 | from vllm.plugins import load_general_plugins |
@@ -324,6 +324,7 @@ class EngineArgs: |
324 | 324 | SchedulerConfig.long_prefill_token_threshold |
325 | 325 | max_num_seqs: Optional[int] = SchedulerConfig.max_num_seqs |
326 | 326 | max_logprobs: int = ModelConfig.max_logprobs |
| 327 | + logprobs_mode: LogprobsMode = ModelConfig.logprobs_mode |
327 | 328 | disable_log_stats: bool = False |
328 | 329 | revision: Optional[str] = ModelConfig.revision |
329 | 330 | code_revision: Optional[str] = ModelConfig.code_revision |
@@ -490,6 +491,8 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: |
490 | 491 | **model_kwargs["max_seq_len_to_capture"]) |
491 | 492 | model_group.add_argument("--max-logprobs", |
492 | 493 | **model_kwargs["max_logprobs"]) |
| 494 | + model_group.add_argument("--logprobs-mode", |
| 495 | + **model_kwargs["logprobs_mode"]) |
493 | 496 | model_group.add_argument("--disable-sliding-window", |
494 | 497 | **model_kwargs["disable_sliding_window"]) |
495 | 498 | model_group.add_argument("--disable-cascade-attn", |
@@ -892,6 +895,7 @@ def create_model_config(self) -> ModelConfig: |
892 | 895 | enforce_eager=self.enforce_eager, |
893 | 896 | max_seq_len_to_capture=self.max_seq_len_to_capture, |
894 | 897 | max_logprobs=self.max_logprobs, |
| 898 | + logprobs_mode=self.logprobs_mode, |
895 | 899 | disable_sliding_window=self.disable_sliding_window, |
896 | 900 | disable_cascade_attn=self.disable_cascade_attn, |
897 | 901 | skip_tokenizer_init=self.skip_tokenizer_init, |
|
0 commit comments