@@ -168,8 +168,8 @@ def _load_and_validate_embed(embed: bytes) -> EngineEmbedsPrompt:
168168
169169 if isinstance (prompt_embeds , list ):
170170 return [_load_and_validate_embed (embed ) for embed in prompt_embeds ]
171- else :
172- return [_load_and_validate_embed (prompt_embeds )]
171+
172+ return [_load_and_validate_embed (prompt_embeds )]
173173
174174
175175class CompletionRenderer (BaseRenderer ):
@@ -182,7 +182,7 @@ def __init__(
182182 AsyncMicrobatchTokenizer ]] = None ,
183183 ):
184184 super ().__init__ (model_config , tokenizer )
185- self .async_tokenizer_pool = async_tokenizer_pool or {}
185+ self .async_tokenizer_pool = async_tokenizer_pool
186186 self .async_tokenizer : Optional [AsyncMicrobatchTokenizer ] = None
187187
188188 async def render_prompt (
@@ -208,23 +208,21 @@ async def render_prompt(
208208 for prompt_input in batch_inputs :
209209 if prompt_input ["is_tokens" ] is True :
210210 # Token input
211- detokenize_task = asyncio .create_task (
212- # Note: detokenization is needed when echo is enabled,
213- # where the input token IDs are decoded back to text.
214- self ._maybe_detokenize (prompt_input ["content" ],
215- config .max_length ,
216- truncate_prompt_tokens ,
217- config .cache_salt ,
218- config .needs_detokenization ))
219- tasks .append (detokenize_task )
211+ # Note: detokenization is needed when echo is enabled,
212+ # where the input token IDs are decoded back to text.
213+ task = self ._maybe_detokenize (prompt_input ["content" ],
214+ config .max_length ,
215+ truncate_prompt_tokens ,
216+ config .cache_salt ,
217+ config .needs_detokenization )
220218 else :
221219 # Text input
222- tokenize_task = asyncio . create_task (
223- self . _tokenize ( prompt_input [ "content" ], config .max_length ,
224- truncate_prompt_tokens ,
225- config .add_special_tokens ,
226- config .cache_salt ) )
227- tasks .append (tokenize_task )
220+ task = self . _tokenize ( prompt_input [ "content" ],
221+ config .max_length ,
222+ truncate_prompt_tokens ,
223+ config .add_special_tokens ,
224+ config .cache_salt )
225+ tasks .append (task )
228226
229227 # Wait for all text tokenization to finish
230228 if tasks :
@@ -356,20 +354,24 @@ async def _maybe_detokenize(
356354
357355 def _get_async_tokenizer (self ) -> AsyncMicrobatchTokenizer :
358356 """Get or create async tokenizer using shared pool."""
359- if self .async_tokenizer is not None :
360- return self .async_tokenizer
357+ async_tokenizer = self .async_tokenizer
358+ if async_tokenizer is not None :
359+ return async_tokenizer
360+
361+ tokenizer = self .tokenizer
361362 if self .tokenizer is None :
362363 raise ValueError (
363364 "No tokenizer available for text input processing" )
364365
365- # Check shared pool first
366- if self .tokenizer in self .async_tokenizer_pool :
367- return self .async_tokenizer_pool [self .tokenizer ]
368-
369- # Create new async tokenizer and add to pool
370- self .async_tokenizer = AsyncMicrobatchTokenizer (self .tokenizer )
371- self .async_tokenizer_pool [self .tokenizer ] = self .async_tokenizer
372- return self .async_tokenizer
366+ if self .async_tokenizer_pool is None :
367+ async_tokenizer = AsyncMicrobatchTokenizer (tokenizer )
368+ else :
369+ async_tokenizer = self .async_tokenizer_pool .get (tokenizer )
370+ if async_tokenizer is None :
371+ async_tokenizer = AsyncMicrobatchTokenizer (tokenizer )
372+ self .async_tokenizer_pool [tokenizer ] = async_tokenizer
373+ self .async_tokenizer = async_tokenizer
374+ return async_tokenizer
373375
374376 def _create_tokens_prompt (
375377 self ,
0 commit comments