Skip to content

Commit

Permalink
gguf conversion add_prefix_space=None for llama3 (#31937)
Browse files Browse the repository at this point in the history
* gguf conversion forces add_prefix_space=False for llama3, this is not required and forces from_slow, which fails. changing to None + test

* typo

* clean test
  • Loading branch information
itazap authored Jul 23, 2024
1 parent 2e11342 commit a1844a3
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/transformers/integrations/ggml.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,7 @@ def tokenizer(self, proto):
self.additional_kwargs["bos_token"] = eos_token

if self.is_llama_3_tokenizer:
self.additional_kwargs["add_prefix_space"] = False
self.additional_kwargs["add_prefix_space"] = None
self.additional_kwargs["clean_up_tokenization_spaces"] = True

self.additional_kwargs["legacy"] = False
Expand Down
11 changes: 7 additions & 4 deletions tests/quantization/ggml/test_ggml.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,13 @@ def test_qwen2_q4_0(self):
self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)

def test_llama3_q4_0_tokenizer(self):
tokenizer_gguf = AutoTokenizer.from_pretrained(self.llama3_model_id, gguf_file=self.q4_llama3_model_id)
special_sentence = "สวัสดี"
predicted_text = tokenizer_gguf.decode(tokenizer_gguf.encode(special_sentence, return_tensors="pt")[0])
self.assertEqual(predicted_text, "<|begin_of_text|>" + special_sentence)
tokenizer = AutoTokenizer.from_pretrained(self.llama3_model_id, gguf_file=self.q4_llama3_model_id)
with tempfile.TemporaryDirectory() as tmpdirname:
tokenizer.save_pretrained(tmpdirname)
tokenizer = AutoTokenizer.from_pretrained(tmpdirname)
special_sentence = "สวัสดี"
predicted_text = tokenizer.decode(tokenizer.encode(special_sentence, return_tensors="pt")[0])
self.assertEqual(predicted_text, "<|begin_of_text|>" + special_sentence)

def test_llama3_q4_0(self):
tokenizer = AutoTokenizer.from_pretrained(self.llama3_model_id, gguf_file=self.q4_llama3_model_id)
Expand Down

0 comments on commit a1844a3

Please sign in to comment.