Fix two bugs in kv-cache backtrack loop (mlc-ai#856)

Fix two bugs in kv-cache pop loop Bug 1: old code would stop early because output_ids was shortened in-place during the loop Bug 2: off-by-one in backoff size due to break
masahi · Oct 8, 2023 · ad3a6b9 · ad3a6b9
1 parent 898db76
commit ad3a6b9
Showing 1 changed file with 1 addition and 2 deletions.
diff --git a/cpp/llm_chat.cc b/cpp/llm_chat.cc
@@ -1107,10 +1107,9 @@ class LLMChat {
           // back tracking, find the first set of token that is smaller
           // than the length
           size_t backoff = 0;
-          for (; backoff < output_ids_.size(); ++backoff) {
+          for (; (output_ids_.size() > 0) && (output_message_.length() > stop_pos); ++backoff) {
             output_ids_.pop_back();
             output_message_ = tokenizer_->Decode(output_ids_);
-            if (output_message_.length() <= stop_pos) break;
           }
           // resize kv to remove the context
           ft_.fkvcache_array_popn_(kv_cache_, backoff);