Skip to content

Commit

Permalink
tests: Add tests for adapt_tokenizer and convert_token_to_string
Browse files Browse the repository at this point in the history
  • Loading branch information
saattrupdan committed Oct 31, 2024
1 parent 9ef6968 commit 476f40a
Showing 1 changed file with 29 additions and 0 deletions.
29 changes: 29 additions & 0 deletions tests/models/test_vllm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Tests for the `vllm` module."""

from outlines.models.vllm import adapt_tokenizer, convert_token_to_string
from transformers import AutoTokenizer, SPIECE_UNDERLINE
import pytest

TEST_MODEL = "hf-internal-testing/tiny-random-GPTJForCausalLM"


def test_adapt_tokenizer():
tokenizer = AutoTokenizer.from_pretrained(TEST_MODEL, padding_side="left")
adapted_tokenizer = adapt_tokenizer(tokenizer=tokenizer)
assert hasattr(adapted_tokenizer, "vocabulary")
assert hasattr(adapted_tokenizer, "special_tokens")
assert adapted_tokenizer.convert_token_to_string == convert_token_to_string


@pytest.mark.parametrize(
"token, expected",
[
("baz", "baz"),
("<0x20>", " <0x20>"),
(SPIECE_UNDERLINE, f" {SPIECE_UNDERLINE}"),
],
)
def test_convert_token_to_string(token, expected):
tokenizer = AutoTokenizer.from_pretrained(TEST_MODEL, padding_side="left")
output = convert_token_to_string(token=token, tokenizer=tokenizer)
assert output == expected

0 comments on commit 476f40a

Please sign in to comment.