From 9990b6ba8da2effe6eb347b1ecc6d4f1d0219b67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A0=94=E7=A9=B6=E7=A4=BE=E4=BA=A4?= Date: Mon, 3 Feb 2025 19:25:28 +0800 Subject: [PATCH] Gracefully stop when token generated does not make sense. (#153) --- crates/ai00-core/src/run.rs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/crates/ai00-core/src/run.rs b/crates/ai00-core/src/run.rs index 372a103..a2a662c 100644 --- a/crates/ai00-core/src/run.rs +++ b/crates/ai00-core/src/run.rs @@ -32,7 +32,6 @@ use crate::{ Environment, FinishReason, GenerateKind, GenerateRequest, ReloadRequest, Token, TokenCounter, }; -const END_OF_LINE_TOKEN: u16 = 261; const MIN_PROMPT_CACHE_TOKENS: usize = 32; const MAX_CACHE_ITEMS: usize = 256; @@ -900,16 +899,20 @@ impl Runtime { ); } - // map token 0 output to "\n\n" - let token = match token { - 0 => END_OF_LINE_TOKEN, - _ => *token, - }; + let token = *token; + let mut stop_token = token == 0; assert_eq!(context.suffix.len(), 0); context.suffix.0.push(token); - let mut word = self.tokenizer.decode(&[token])?; + let mut word = match self.tokenizer.decode(&[token]) { + Ok(word) => word, + Err(err) => { + log::warn!("{err}"); + stop_token = true; + Default::default() + } + }; context.model_text.append(&mut word.clone()); context.buffer.append(&mut word); context.model_tokens.push(token); @@ -1031,7 +1034,7 @@ impl Runtime { } let _ = context.sender.send(Token::Choose(perplexities)); done = true; - } else if halt || stop_matched { + } else if halt || stop_matched || stop_token { let output = String::from_utf8_lossy(head); let _ = context.sender.send(Token::Content(output.into())); stop(FinishReason::Stop);