Skip to content

Commit eb81fed

Browse files
njhillxuebwang-amd
authored andcommitted
[BugFix] Fix tokenize asyncio task leak (vllm-project#24677)
Signed-off-by: Nick Hill <nhill@redhat.com> Signed-off-by: xuebwang-amd <xuebwang@amd.com>
1 parent 575a561 commit eb81fed

File tree

1 file changed

+30
-28
lines changed

1 file changed

+30
-28
lines changed

vllm/entrypoints/renderer.py

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,8 @@ def _load_and_validate_embed(embed: bytes) -> EngineEmbedsPrompt:
168168

169169
if isinstance(prompt_embeds, list):
170170
return [_load_and_validate_embed(embed) for embed in prompt_embeds]
171-
else:
172-
return [_load_and_validate_embed(prompt_embeds)]
171+
172+
return [_load_and_validate_embed(prompt_embeds)]
173173

174174

175175
class CompletionRenderer(BaseRenderer):
@@ -182,7 +182,7 @@ def __init__(
182182
AsyncMicrobatchTokenizer]] = None,
183183
):
184184
super().__init__(model_config, tokenizer)
185-
self.async_tokenizer_pool = async_tokenizer_pool or {}
185+
self.async_tokenizer_pool = async_tokenizer_pool
186186
self.async_tokenizer: Optional[AsyncMicrobatchTokenizer] = None
187187

188188
async def render_prompt(
@@ -208,23 +208,21 @@ async def render_prompt(
208208
for prompt_input in batch_inputs:
209209
if prompt_input["is_tokens"] is True:
210210
# Token input
211-
detokenize_task = asyncio.create_task(
212-
# Note: detokenization is needed when echo is enabled,
213-
# where the input token IDs are decoded back to text.
214-
self._maybe_detokenize(prompt_input["content"],
215-
config.max_length,
216-
truncate_prompt_tokens,
217-
config.cache_salt,
218-
config.needs_detokenization))
219-
tasks.append(detokenize_task)
211+
# Note: detokenization is needed when echo is enabled,
212+
# where the input token IDs are decoded back to text.
213+
task = self._maybe_detokenize(prompt_input["content"],
214+
config.max_length,
215+
truncate_prompt_tokens,
216+
config.cache_salt,
217+
config.needs_detokenization)
220218
else:
221219
# Text input
222-
tokenize_task = asyncio.create_task(
223-
self._tokenize(prompt_input["content"], config.max_length,
224-
truncate_prompt_tokens,
225-
config.add_special_tokens,
226-
config.cache_salt))
227-
tasks.append(tokenize_task)
220+
task = self._tokenize(prompt_input["content"],
221+
config.max_length,
222+
truncate_prompt_tokens,
223+
config.add_special_tokens,
224+
config.cache_salt)
225+
tasks.append(task)
228226

229227
# Wait for all text tokenization to finish
230228
if tasks:
@@ -356,20 +354,24 @@ async def _maybe_detokenize(
356354

357355
def _get_async_tokenizer(self) -> AsyncMicrobatchTokenizer:
358356
"""Get or create async tokenizer using shared pool."""
359-
if self.async_tokenizer is not None:
360-
return self.async_tokenizer
357+
async_tokenizer = self.async_tokenizer
358+
if async_tokenizer is not None:
359+
return async_tokenizer
360+
361+
tokenizer = self.tokenizer
361362
if self.tokenizer is None:
362363
raise ValueError(
363364
"No tokenizer available for text input processing")
364365

365-
# Check shared pool first
366-
if self.tokenizer in self.async_tokenizer_pool:
367-
return self.async_tokenizer_pool[self.tokenizer]
368-
369-
# Create new async tokenizer and add to pool
370-
self.async_tokenizer = AsyncMicrobatchTokenizer(self.tokenizer)
371-
self.async_tokenizer_pool[self.tokenizer] = self.async_tokenizer
372-
return self.async_tokenizer
366+
if self.async_tokenizer_pool is None:
367+
async_tokenizer = AsyncMicrobatchTokenizer(tokenizer)
368+
else:
369+
async_tokenizer = self.async_tokenizer_pool.get(tokenizer)
370+
if async_tokenizer is None:
371+
async_tokenizer = AsyncMicrobatchTokenizer(tokenizer)
372+
self.async_tokenizer_pool[tokenizer] = async_tokenizer
373+
self.async_tokenizer = async_tokenizer
374+
return async_tokenizer
373375

374376
def _create_tokens_prompt(
375377
self,

0 commit comments

Comments
 (0)