Skip to content

Commit

Permalink
applying feedback, comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Ita Zaporozhets authored and ArthurZucker committed May 24, 2024
1 parent 80b4e77 commit f5bf109
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions tests/test_tokenization_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4167,7 +4167,8 @@ def test_clean_up_tokenization_spaces(self):
def test_split_special_tokens(self):
if not self.test_slow_tokenizer:
return

# Tests the expected appearance (or absence) of special token in encoded output,
# explicit values are not tested because tokenization is model dependent and can change
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
special_token = "<my_new_token>"
special_sentence = f"Hey this is a {special_token} token"
Expand Down Expand Up @@ -4206,10 +4207,10 @@ def test_split_special_tokens(self):
self.assertTrue(special_token_id not in py_tokens_output)
self.assertTrue(special_token_id not in rust_tokens_output)

with tempfile.mkdtemp() as tmp_dir
with tempfile.mkdtemp() as tmp_dir:
tokenizer_py.save_pretrained(tmp_dir)
fast_from_saved = self.tokenizer_class.from_pretrained(tmp_dir)

output_tokens_reloaded_split = fast_from_saved.tokenize(special_sentence)
self.assertTrue(special_token not in output_tokens_reloaded_split)

Expand Down

0 comments on commit f5bf109

Please sign in to comment.