Skip to content

Commit 7b80473

Browse files
committed
Use save_jinja_files instead of save_raw_chat_template_files
1 parent 83cb7b6 commit 7b80473

File tree

5 files changed

+21
-21
lines changed

5 files changed

+21
-21
lines changed

src/transformers/processing_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,7 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
642642
processor_dict = self.to_dict()
643643
# Save `chat_template` in its own file. We can't get it from `processor_dict` as we popped it in `to_dict`
644644
# to avoid serializing chat template in json config file. So let's get it from `self` directly
645-
save_as_jinja = kwargs.get("save_raw_chat_template", False)
645+
save_as_jinja = kwargs.get("save_jinja_files", False)
646646
is_single_template = isinstance(self.chat_template, str)
647647

648648
if save_as_jinja and is_single_template:
@@ -677,7 +677,7 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
677677
# chat template dicts are saved to chat_template.json as lists of dicts with fixed key names.
678678
raise ValueError(
679679
"Multiple chat templates are not supported in the legacy format. Please save them as separate files "
680-
"using the `save_raw_chat_template` argument."
680+
"using the `save_jinja_files` argument."
681681
)
682682

683683
# For now, let's not save to `processor_config.json` if the processor doesn't have extra attributes and

src/transformers/tokenization_utils_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2487,15 +2487,15 @@ def save_pretrained(
24872487
tokenizer_config.update(self.extra_special_tokens)
24882488

24892489
saved_raw_chat_template_files = []
2490-
if kwargs.get("save_raw_chat_template", False) and isinstance(self.chat_template, str):
2490+
if kwargs.get("save_jinja_files", False) and isinstance(self.chat_template, str):
24912491
# New format for single templates is to save them as chat_template.jinja
24922492
with open(chat_template_file, "w", encoding="utf-8") as f:
24932493
f.write(self.chat_template)
24942494
logger.info(f"chat template saved in {chat_template_file}")
24952495
saved_raw_chat_template_files.append(chat_template_file)
24962496
if "chat_template" in tokenizer_config:
24972497
tokenizer_config.pop("chat_template") # To ensure it doesn't somehow end up in the config too
2498-
elif kwargs.get("save_raw_chat_template", False) and isinstance(self.chat_template, dict):
2498+
elif kwargs.get("save_jinja_files", False) and isinstance(self.chat_template, dict):
24992499
# New format for multiple templates is to save the default as chat_template.jinja
25002500
# and the other templates in the chat_templates/ directory
25012501
for template_name, template in self.chat_template.items():

src/transformers/utils/hub.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -962,9 +962,9 @@ def push_to_hub(
962962
"""
963963
use_auth_token = deprecated_kwargs.pop("use_auth_token", None)
964964
ignore_metadata_errors = deprecated_kwargs.pop("ignore_metadata_errors", False)
965-
save_raw_chat_template = deprecated_kwargs.pop(
966-
"save_raw_chat_template", None
967-
) # TODO: This is only used for testing and should be removed once save_raw_chat_template becomes the default
965+
save_jinja_files = deprecated_kwargs.pop(
966+
"save_jinja_files", None
967+
) # TODO: This is only used for testing and should be removed once save_jinja_files becomes the default
968968
if use_auth_token is not None:
969969
warnings.warn(
970970
"The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
@@ -1021,12 +1021,12 @@ def push_to_hub(
10211021
files_timestamps = self._get_files_timestamps(work_dir)
10221022

10231023
# Save all files.
1024-
if save_raw_chat_template:
1024+
if save_jinja_files:
10251025
self.save_pretrained(
10261026
work_dir,
10271027
max_shard_size=max_shard_size,
10281028
safe_serialization=safe_serialization,
1029-
save_raw_chat_template=True,
1029+
save_jinja_files=True,
10301030
)
10311031
else:
10321032
self.save_pretrained(work_dir, max_shard_size=max_shard_size, safe_serialization=safe_serialization)

tests/test_processing_common.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -748,7 +748,7 @@ def test_chat_template_save_loading(self):
748748
self.assertEqual(getattr(reloaded_processor.tokenizer, "chat_template", None), existing_tokenizer_template)
749749

750750
with tempfile.TemporaryDirectory() as tmpdirname:
751-
processor.save_pretrained(tmpdirname, save_raw_chat_template=True)
751+
processor.save_pretrained(tmpdirname, save_jinja_files=True)
752752
self.assertTrue(Path(tmpdirname, "chat_template.jinja").is_file())
753753
self.assertFalse(Path(tmpdirname, "chat_template.json").is_file())
754754
self.assertFalse(Path(tmpdirname, "additional_chat_templates").is_dir())
@@ -760,7 +760,7 @@ def test_chat_template_save_loading(self):
760760

761761
with tempfile.TemporaryDirectory() as tmpdirname:
762762
processor.chat_template = {"default": "a", "secondary": "b"}
763-
processor.save_pretrained(tmpdirname, save_raw_chat_template=True)
763+
processor.save_pretrained(tmpdirname, save_jinja_files=True)
764764
self.assertTrue(Path(tmpdirname, "chat_template.jinja").is_file())
765765
self.assertFalse(Path(tmpdirname, "chat_template.json").is_file())
766766
self.assertTrue(Path(tmpdirname, "additional_chat_templates").is_dir())
@@ -772,7 +772,7 @@ def test_chat_template_save_loading(self):
772772

773773
with tempfile.TemporaryDirectory() as tmpdirname:
774774
processor.chat_template = {"default": "a", "secondary": "b"}
775-
processor.save_pretrained(tmpdirname, save_raw_chat_template=False)
775+
processor.save_pretrained(tmpdirname, save_jinja_files=False)
776776
self.assertFalse(Path(tmpdirname, "chat_template.jinja").is_file())
777777
self.assertTrue(Path(tmpdirname, "chat_template.json").is_file())
778778
self.assertFalse(Path(tmpdirname, "additional_chat_templates").is_dir())

tests/test_tokenization_common.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,7 +1164,7 @@ def test_chat_template(self):
11641164
new_tokenizer.apply_chat_template(dummy_conversation, tokenize=True, return_dict=False)
11651165

11661166
with tempfile.TemporaryDirectory() as tmp_dir_name:
1167-
save_files = tokenizer.save_pretrained(tmp_dir_name, save_raw_chat_template=True)
1167+
save_files = tokenizer.save_pretrained(tmp_dir_name, save_jinja_files=True)
11681168
# Check we are saving a chat_template.jinja file
11691169
self.assertTrue(any(file.endswith("chat_template.jinja") for file in save_files))
11701170
chat_template_file = Path(tmp_dir_name) / "chat_template.jinja"
@@ -1190,7 +1190,7 @@ def test_chat_template_save_loading(self):
11901190
self.skipTest("tokenizer doesn't accept chat templates at input")
11911191
tokenizer.chat_template = "test template"
11921192
with tempfile.TemporaryDirectory() as tmpdirname:
1193-
tokenizer.save_pretrained(tmpdirname, save_raw_chat_template=True)
1193+
tokenizer.save_pretrained(tmpdirname, save_jinja_files=True)
11941194
self.assertTrue(Path(tmpdirname, "chat_template.jinja").is_file())
11951195
self.assertFalse(Path(tmpdirname, "chat_template.json").is_file())
11961196
self.assertFalse(Path(tmpdirname, "additional_chat_templates").is_dir())
@@ -1202,7 +1202,7 @@ def test_chat_template_save_loading(self):
12021202

12031203
with tempfile.TemporaryDirectory() as tmpdirname:
12041204
tokenizer.chat_template = {"default": "a", "secondary": "b"}
1205-
tokenizer.save_pretrained(tmpdirname, save_raw_chat_template=True)
1205+
tokenizer.save_pretrained(tmpdirname, save_jinja_files=True)
12061206
self.assertTrue(Path(tmpdirname, "chat_template.jinja").is_file())
12071207
self.assertFalse(Path(tmpdirname, "chat_template.json").is_file())
12081208
self.assertTrue(Path(tmpdirname, "additional_chat_templates").is_dir())
@@ -1214,7 +1214,7 @@ def test_chat_template_save_loading(self):
12141214

12151215
with tempfile.TemporaryDirectory() as tmpdirname:
12161216
tokenizer.chat_template = {"default": "a", "secondary": "b"}
1217-
tokenizer.save_pretrained(tmpdirname, save_raw_chat_template=False)
1217+
tokenizer.save_pretrained(tmpdirname, save_jinja_files=False)
12181218
self.assertFalse(Path(tmpdirname, "chat_template.jinja").is_file())
12191219
self.assertFalse(Path(tmpdirname, "chat_template.json").is_file())
12201220
self.assertFalse(Path(tmpdirname, "additional_chat_templates").is_dir())
@@ -1713,12 +1713,12 @@ def test_chat_template_dict_saving(self):
17131713
tokenizers = self.get_tokenizers()
17141714
for tokenizer in tokenizers:
17151715
with self.subTest(f"{tokenizer.__class__.__name__}"):
1716-
for save_raw_chat_template in (True, False):
1716+
for save_jinja_files in (True, False):
17171717
tokenizer.chat_template = {"default": dummy_template_1, "template2": dummy_template_2}
17181718
with tempfile.TemporaryDirectory() as tmp_dir_name:
1719-
# Test that save_raw_chat_template is ignored when there's a dict of multiple templates
1720-
tokenizer.save_pretrained(tmp_dir_name, save_raw_chat_template=save_raw_chat_template)
1721-
if save_raw_chat_template:
1719+
# Test that save_jinja_files is ignored when there's a dict of multiple templates
1720+
tokenizer.save_pretrained(tmp_dir_name, save_jinja_files=save_jinja_files)
1721+
if save_jinja_files:
17221722
config_dict = json.load(open(os.path.join(tmp_dir_name, "tokenizer_config.json")))
17231723
self.assertNotIn("chat_template", config_dict)
17241724
self.assertTrue(os.path.exists(os.path.join(tmp_dir_name, "chat_template.jinja")))
@@ -1749,7 +1749,7 @@ def test_chat_template_file_priority(self):
17491749
with self.subTest(f"{tokenizer.__class__.__name__}"):
17501750
with tempfile.TemporaryDirectory() as tmp_dir_name:
17511751
tokenizer.chat_template = dummy_template1
1752-
tokenizer.save_pretrained(tmp_dir_name, save_raw_chat_template=False)
1752+
tokenizer.save_pretrained(tmp_dir_name, save_jinja_files=False)
17531753
with Path(tmp_dir_name, "chat_template.jinja").open("w") as f:
17541754
f.write(dummy_template2)
17551755
new_tokenizer = tokenizer.from_pretrained(tmp_dir_name)

0 commit comments

Comments
 (0)