diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py new file mode 100644 index 00000000..4440d737 --- /dev/null +++ b/tests/test_tokenizer.py @@ -0,0 +1,23 @@ +import unittest +from autopr.utils.tokenizer import num_tokens_from_messages + +class TestTokenizer(unittest.TestCase): + + def test_num_tokens_from_messages_chat_completions(self): + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is the capital of France?"}, + {"role": "assistant", "content": "The capital of France is Paris."} + ] + + token_count = num_tokens_from_messages(messages) + self.assertTrue(token_count > 0, "Token count should be greater than 0") + + def test_num_tokens_from_messages_ordinary_completions(self): + completion_text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit." + + token_count = num_tokens_from_messages(completion_text) + self.assertTrue(token_count > 0, "Token count should be greater than 0") + +if __name__ == '__main__': + unittest.main() \ No newline at end of file