From bffb7996814b054c4bbfa3bef5814560ab104769 Mon Sep 17 00:00:00 2001
From: maxDavid40 <maxdavid40@gmail.com>
Date: Mon, 17 Feb 2025 14:50:41 +0100
Subject: [PATCH 1/4] Fix docstring prompt_logprobs default value

Signed-off-by: maxDavid40 <maxdavid40@gmail.com>
---
 vllm/sampling_params.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py
index 04ddcd73fa95..b7094f6253bd 100644
--- a/vllm/sampling_params.py
+++ b/vllm/sampling_params.py
@@ -149,7 +149,7 @@ class SamplingParams(
             always return the log probability of the sampled token, so there
             may be up to `logprobs+1` elements in the response.
         prompt_logprobs: Number of log probabilities to return per prompt token.
-        detokenize: Whether to detokenize the output. Defaults to True.
+        detokenize: Whether to detokenize the output. Defaults to None.
         skip_special_tokens: Whether to skip special tokens in the output.
         spaces_between_special_tokens: Whether to add spaces between special
             tokens in the output.  Defaults to True.

From ee09046bfe412bb29a5393e054ed2de2824639fd Mon Sep 17 00:00:00 2001
From: maxDavid40 <maxdavid40@gmail.com>
Date: Mon, 17 Feb 2025 14:51:01 +0100
Subject: [PATCH 2/4] create static method to clean prompt_logprobs

Signed-off-by: maxDavid40 <maxdavid40@gmail.com>
---
 vllm/entrypoints/openai/serving_engine.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
index 785117ca1d45..da53c4134ffe 100644
--- a/vllm/entrypoints/openai/serving_engine.py
+++ b/vllm/entrypoints/openai/serving_engine.py
@@ -499,6 +499,20 @@ async def _get_trace_headers(
 
         return None
 
+    @staticmethod
+    def _clean_prompt_logprobs(
+            prompt_logprobs: Union[None, List[Union[None, Dict]]]) -> None:
+        """
+        Preprocess prompt_logprobs result 
+         to avoid starlette.response.JSONResponse's error
+        """
+        if prompt_logprobs:
+            for logprob_dict in prompt_logprobs:
+                if logprob_dict:
+                    for logprob_values in logprob_dict.values():
+                        if logprob_values.logprob == float('-inf'):
+                            logprob_values.logprob = -9999.0
+
     @staticmethod
     def _base_request_id(raw_request: Optional[Request],
                          default: Optional[str] = None) -> Optional[str]:

From ba195f564567e95d0733c88edfee7eb089c30ed2 Mon Sep 17 00:00:00 2001
From: maxDavid40 <maxdavid40@gmail.com>
Date: Mon, 17 Feb 2025 14:51:16 +0100
Subject: [PATCH 3/4] use static method to clean prompt logprobs

Signed-off-by: maxDavid40 <maxdavid40@gmail.com>
---
 vllm/entrypoints/openai/serving_chat.py       | 2 ++
 vllm/entrypoints/openai/serving_completion.py | 8 ++------
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index 934bd2a95063..d8cf6b0c0810 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -841,6 +841,8 @@ async def chat_completion_full_generator(
 
         request_metadata.final_usage_info = usage
 
+        self._clean_prompt_logprobs(final_res.prompt_logprobs)
+
         response = ChatCompletionResponse(
             id=request_id,
             created=created_time,
diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py
index e7ad263e7fbe..bcbb83fdcd5a 100644
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@@ -396,13 +396,9 @@ def request_output_to_completion_response(
         for final_res in final_res_batch:
             prompt_token_ids = final_res.prompt_token_ids
             assert prompt_token_ids is not None
+            self._clean_prompt_logprobs(final_res.prompt_logprobs)
             prompt_logprobs = final_res.prompt_logprobs
-            if prompt_logprobs:
-                for logprob_dict in prompt_logprobs:
-                    if logprob_dict:
-                        for logprob_values in logprob_dict.values():
-                            if logprob_values.logprob == float('-inf'):
-                                logprob_values.logprob = -9999.0
+
             prompt_text = final_res.prompt
 
             token_ids: GenericSequence[int]

From 334a6f188522535f82833da7d311c370699f20d4 Mon Sep 17 00:00:00 2001
From: "Max D." <35872787+maxDavid40@users.noreply.github.com>
Date: Tue, 18 Feb 2025 08:47:16 +0100
Subject: [PATCH 4/4] Fix docstring mistake

Signed-off-by: maxDavid40 <maxdavid40@gmail.com>
---
 vllm/sampling_params.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py
index b7094f6253bd..04ddcd73fa95 100644
--- a/vllm/sampling_params.py
+++ b/vllm/sampling_params.py
@@ -149,7 +149,7 @@ class SamplingParams(
             always return the log probability of the sampled token, so there
             may be up to `logprobs+1` elements in the response.
         prompt_logprobs: Number of log probabilities to return per prompt token.
-        detokenize: Whether to detokenize the output. Defaults to None.
+        detokenize: Whether to detokenize the output. Defaults to True.
         skip_special_tokens: Whether to skip special tokens in the output.
         spaces_between_special_tokens: Whether to add spaces between special
             tokens in the output.  Defaults to True.