diff --git a/tests/entrypoints/openai/test_token_in_token_out.py b/tests/entrypoints/openai/test_token_in_token_out.py index f84605690c53..ed003939c44b 100644 --- a/tests/entrypoints/openai/test_token_in_token_out.py +++ b/tests/entrypoints/openai/test_token_in_token_out.py @@ -54,7 +54,7 @@ async def test_token_in_token_out_and_logprobs(server): prompt=token_ids, max_tokens=20, temperature=0, - echo=False, + echo=True, extra_body={ "return_token_ids": True, }, diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index 6e4113e6cf1e..d0756e42b796 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -691,5 +691,6 @@ def _build_render_config( truncate_prompt_tokens=request.truncate_prompt_tokens, add_special_tokens=request.add_special_tokens, cache_salt=request.cache_salt, - needs_detokenization=bool(request.echo), + needs_detokenization=bool(request.echo + and not request.return_token_ids), )