From bffb7996814b054c4bbfa3bef5814560ab104769 Mon Sep 17 00:00:00 2001 From: maxDavid40 Date: Mon, 17 Feb 2025 14:50:41 +0100 Subject: [PATCH 1/4] Fix docstring prompt_logprobs default value Signed-off-by: maxDavid40 --- vllm/sampling_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 04ddcd73fa95..b7094f6253bd 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -149,7 +149,7 @@ class SamplingParams( always return the log probability of the sampled token, so there may be up to `logprobs+1` elements in the response. prompt_logprobs: Number of log probabilities to return per prompt token. - detokenize: Whether to detokenize the output. Defaults to True. + detokenize: Whether to detokenize the output. Defaults to None. skip_special_tokens: Whether to skip special tokens in the output. spaces_between_special_tokens: Whether to add spaces between special tokens in the output. Defaults to True. From ee09046bfe412bb29a5393e054ed2de2824639fd Mon Sep 17 00:00:00 2001 From: maxDavid40 Date: Mon, 17 Feb 2025 14:51:01 +0100 Subject: [PATCH 2/4] create static method to clean prompt_logprobs Signed-off-by: maxDavid40 --- vllm/entrypoints/openai/serving_engine.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 785117ca1d45..da53c4134ffe 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -499,6 +499,20 @@ async def _get_trace_headers( return None + @staticmethod + def _clean_prompt_logprobs( + prompt_logprobs: Union[None, List[Union[None, Dict]]]) -> None: + """ + Preprocess prompt_logprobs result + to avoid starlette.response.JSONResponse's error + """ + if prompt_logprobs: + for logprob_dict in prompt_logprobs: + if logprob_dict: + for logprob_values in logprob_dict.values(): + if logprob_values.logprob == float('-inf'): + logprob_values.logprob = -9999.0 + @staticmethod def _base_request_id(raw_request: Optional[Request], default: Optional[str] = None) -> Optional[str]: From ba195f564567e95d0733c88edfee7eb089c30ed2 Mon Sep 17 00:00:00 2001 From: maxDavid40 Date: Mon, 17 Feb 2025 14:51:16 +0100 Subject: [PATCH 3/4] use static method to clean prompt logprobs Signed-off-by: maxDavid40 --- vllm/entrypoints/openai/serving_chat.py | 2 ++ vllm/entrypoints/openai/serving_completion.py | 8 ++------ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 934bd2a95063..d8cf6b0c0810 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -841,6 +841,8 @@ async def chat_completion_full_generator( request_metadata.final_usage_info = usage + self._clean_prompt_logprobs(final_res.prompt_logprobs) + response = ChatCompletionResponse( id=request_id, created=created_time, diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index e7ad263e7fbe..bcbb83fdcd5a 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -396,13 +396,9 @@ def request_output_to_completion_response( for final_res in final_res_batch: prompt_token_ids = final_res.prompt_token_ids assert prompt_token_ids is not None + self._clean_prompt_logprobs(final_res.prompt_logprobs) prompt_logprobs = final_res.prompt_logprobs - if prompt_logprobs: - for logprob_dict in prompt_logprobs: - if logprob_dict: - for logprob_values in logprob_dict.values(): - if logprob_values.logprob == float('-inf'): - logprob_values.logprob = -9999.0 + prompt_text = final_res.prompt token_ids: GenericSequence[int] From 334a6f188522535f82833da7d311c370699f20d4 Mon Sep 17 00:00:00 2001 From: "Max D." <35872787+maxDavid40@users.noreply.github.com> Date: Tue, 18 Feb 2025 08:47:16 +0100 Subject: [PATCH 4/4] Fix docstring mistake Signed-off-by: maxDavid40 --- vllm/sampling_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index b7094f6253bd..04ddcd73fa95 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -149,7 +149,7 @@ class SamplingParams( always return the log probability of the sampled token, so there may be up to `logprobs+1` elements in the response. prompt_logprobs: Number of log probabilities to return per prompt token. - detokenize: Whether to detokenize the output. Defaults to None. + detokenize: Whether to detokenize the output. Defaults to True. skip_special_tokens: Whether to skip special tokens in the output. spaces_between_special_tokens: Whether to add spaces between special tokens in the output. Defaults to True.