From 62a06b6ed79fca477e87501bf0a4615d1b6cb6a0 Mon Sep 17 00:00:00 2001
From: DarkLight1337 <tlleungac@connect.ust.hk>
Date: Sun, 5 Oct 2025 02:56:45 +0000
Subject: [PATCH] [Bugfix] Allow `--skip-tokenizer-init` with `echo and
 return_token_ids`

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 tests/entrypoints/openai/test_token_in_token_out.py | 2 +-
 vllm/entrypoints/openai/serving_completion.py       | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/entrypoints/openai/test_token_in_token_out.py b/tests/entrypoints/openai/test_token_in_token_out.py
index f84605690c53..ed003939c44b 100644
--- a/tests/entrypoints/openai/test_token_in_token_out.py
+++ b/tests/entrypoints/openai/test_token_in_token_out.py
@@ -54,7 +54,7 @@ async def test_token_in_token_out_and_logprobs(server):
             prompt=token_ids,
             max_tokens=20,
             temperature=0,
-            echo=False,
+            echo=True,
             extra_body={
                 "return_token_ids": True,
             },
diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py
index 6e4113e6cf1e..d0756e42b796 100644
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@@ -691,5 +691,6 @@ def _build_render_config(
             truncate_prompt_tokens=request.truncate_prompt_tokens,
             add_special_tokens=request.add_special_tokens,
             cache_salt=request.cache_salt,
-            needs_detokenization=bool(request.echo),
+            needs_detokenization=bool(request.echo
+                                      and not request.return_token_ids),
         )