Skip to content

Commit

Permalink
Added a few tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
Narsil committed Sep 1, 2022
1 parent e5af91c commit 64d9eb8
Showing 1 changed file with 30 additions and 0 deletions.
30 changes: 30 additions & 0 deletions tests/models/gpt2/test_tokenization_gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,33 @@ def test_serialize_deserialize_fast_opt(self):
text,
)
self.assertEqual(tokens_ids, [2, 250, 1345, 9, 10, 4758])

def test_fast_slow_equivalence(self):
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m", use_slow=True)
text = "A photo of a cat"

tokens_ids = tokenizer.encode(
text,
)
# Same as above
self.assertEqual(tokens_ids, [2, 250, 1345, 9, 10, 4758])

def test_users_can_modify_bos(self):
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m", from_slow=True)

tokenizer.bos_token = "bos"
tokenizer.bos_token_id = tokenizer.get_vocab()["bos"]

text = "A photo of a cat"
tokens_ids = tokenizer.encode(
text,
)
# We changed the bos token
self.assertEqual(tokens_ids, [31957, 250, 1345, 9, 10, 4758])
tokenizer.save_pretrained("./tok")
tokenizer = AutoTokenizer.from_pretrained("./tok")
self.assertTrue(tokenizer.is_fast)
tokens_ids = tokenizer.encode(
text,
)
self.assertEqual(tokens_ids, [31957, 250, 1345, 9, 10, 4758])

0 comments on commit 64d9eb8

Please sign in to comment.