Skip to content

Commit

Permalink
Merge pull request #9 from jklj077/patch-7
Browse files Browse the repository at this point in the history
update tests
  • Loading branch information
JustinLin610 authored Jan 15, 2024
2 parents 7b670a8 + 8b144e1 commit ca4a8c5
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions tests/models/qwen2/test_tokenization_qwen2.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def test_nfc_normalization(self):
# per https://unicode.org/faq/normalization.html, there are three characters whose normalization forms
# under NFC, NFD, NFKC, and NFKD are all different
# using these, we can make sure only NFC is applied
input_string = "\u038e\u03ab\u1e61" # the NFKC form
input_string = "\u03d2\u0301\u03d2\u0308\u017f\u0307" # the NFD form
output_string = "\u03d3\u03d4\u1e9b" # the NFC form

if self.test_slow_tokenizer:
Expand All @@ -166,8 +166,8 @@ def test_slow_tokenizer_decode_spaces_between_special_tokens_default(self):
# special tokens in `spaces_between_special_tokens` means spaces between `legacy_added_tokens`
# that would be `"<|im_start|>"` and `"<|im_end|>"` in Qwen/Qwen2 Models
token_ids = [259, 260, 268, 269, 26]
sequence = " lower<|endoftext|><im_start>;"
sequence_with_space = " lower<|endoftext|> <im_start> ;"
sequence = " lower<|endoftext|><|im_start|>;"
sequence_with_space = " lower<|endoftext|> <|im_start|> ;"

tokenizer = self.get_tokenizer()
# let's add a legacy_added_tokens
Expand Down

0 comments on commit ca4a8c5

Please sign in to comment.