From 870158a10f930d84320bff81178daee2f0a4a661 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Thu, 26 Sep 2024 00:04:41 +0200 Subject: [PATCH] align HF tokenization with genai chat scenario in sample test --- .github/workflows/causal_lm_cpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index 57fb8eb414..72cb11c663 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -665,7 +665,7 @@ jobs: output.write('question:\n') chat_history.append(gen_prompt(prompt)) chat_prompt = tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True) - tokenized = tokenizer(chat_prompt, return_tensors='pt') + tokenized = tokenizer(chat_prompt, return_tensors='pt', add_special_tokens=True) answer = model.generate(**tokenized, max_length=1000, do_sample=False) answer_str = tokenizer.decode(answer[0, tokenized['input_ids'].numel():], skip_special_tokens=True) chat_history.append(gen_answer(answer_str))