|
34 | 34 | AutoProcessor, |
35 | 35 | AutoTokenizer, |
36 | 36 | BertTokenizer, |
| 37 | + LlamaTokenizer, |
| 38 | + LlavaProcessor, |
37 | 39 | ProcessorMixin, |
| 40 | + SiglipImageProcessor, |
38 | 41 | Wav2Vec2Config, |
39 | 42 | Wav2Vec2FeatureExtractor, |
40 | 43 | Wav2Vec2Processor, |
|
57 | 60 |
|
58 | 61 |
|
59 | 62 | SAMPLE_PROCESSOR_CONFIG = get_tests_dir("fixtures/dummy_feature_extractor_config.json") |
| 63 | +SAMPLE_VOCAB_LLAMA = get_tests_dir("fixtures/test_sentencepiece.model") |
60 | 64 | SAMPLE_VOCAB = get_tests_dir("fixtures/vocab.json") |
61 | 65 | SAMPLE_PROCESSOR_CONFIG_DIR = get_tests_dir("fixtures") |
62 | 66 |
|
@@ -503,3 +507,43 @@ def test_push_to_hub_dynamic_processor(self): |
503 | 507 | new_processor = AutoProcessor.from_pretrained(tmp_repo.repo_id, trust_remote_code=True) |
504 | 508 | # Can't make an isinstance check because the new_processor is from the CustomProcessor class of a dynamic module |
505 | 509 | self.assertEqual(new_processor.__class__.__name__, "CustomProcessor") |
| 510 | + |
| 511 | + def test_push_to_hub_with_chat_templates(self): |
| 512 | + with tempfile.TemporaryDirectory() as tmp_dir: |
| 513 | + tokenizer = LlamaTokenizer(SAMPLE_VOCAB_LLAMA, keep_accents=True) |
| 514 | + image_processor = SiglipImageProcessor() |
| 515 | + chat_template = "default dummy template for testing purposes only" |
| 516 | + processor = LlavaProcessor( |
| 517 | + tokenizer=tokenizer, image_processor=image_processor, chat_template=chat_template |
| 518 | + ) |
| 519 | + self.assertEqual(processor.chat_template, chat_template) |
| 520 | + |
| 521 | + existing_tokenizer_template = getattr(processor.tokenizer, "chat_template", None) |
| 522 | + with TemporaryHubRepo(token=self._token) as tmp_repo: |
| 523 | + processor.save_pretrained( |
| 524 | + tmp_dir, repo_id=tmp_repo.repo_id, token=self._token, push_to_hub=True, save_jinja_files=False |
| 525 | + ) |
| 526 | + reloaded_processor = LlavaProcessor.from_pretrained(tmp_repo.repo_id) |
| 527 | + self.assertEqual(processor.chat_template, reloaded_processor.chat_template) |
| 528 | + # When we don't use single-file chat template saving, processor and tokenizer chat templates |
| 529 | + # should remain separate |
| 530 | + self.assertEqual( |
| 531 | + getattr(reloaded_processor.tokenizer, "chat_template", None), existing_tokenizer_template |
| 532 | + ) |
| 533 | + |
| 534 | + with TemporaryHubRepo(token=self._token) as tmp_repo: |
| 535 | + processor.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, token=self._token, push_to_hub=True) |
| 536 | + reloaded_processor = LlavaProcessor.from_pretrained(tmp_repo.repo_id) |
| 537 | + self.assertEqual(processor.chat_template, reloaded_processor.chat_template) |
| 538 | + # When we save as single files, tokenizers and processors share a chat template, which means |
| 539 | + # the reloaded tokenizer should get the chat template as well |
| 540 | + self.assertEqual(reloaded_processor.chat_template, reloaded_processor.tokenizer.chat_template) |
| 541 | + |
| 542 | + with TemporaryHubRepo(token=self._token) as tmp_repo: |
| 543 | + processor.chat_template = {"default": "a", "secondary": "b"} |
| 544 | + processor.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, token=self._token, push_to_hub=True) |
| 545 | + reloaded_processor = LlavaProcessor.from_pretrained(tmp_repo.repo_id) |
| 546 | + self.assertEqual(processor.chat_template, reloaded_processor.chat_template) |
| 547 | + # When we save as single files, tokenizers and processors share a chat template, which means |
| 548 | + # the reloaded tokenizer should get the chat template as well |
| 549 | + self.assertEqual(reloaded_processor.chat_template, reloaded_processor.tokenizer.chat_template) |
0 commit comments