Skip to content

Commit 124f9e0

Browse files
committed
define the file/dir names in a single place
1 parent 7fa0a74 commit 124f9e0

File tree

3 files changed

+9
-5
lines changed

3 files changed

+9
-5
lines changed

src/transformers/processing_utils.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@
5252
TruncationStrategy,
5353
)
5454
from .utils import (
55+
CHAT_TEMPLATE_DIR,
56+
CHAT_TEMPLATE_FILE,
5557
PROCESSOR_NAME,
5658
PushToHubMixin,
5759
TensorType,
@@ -618,9 +620,9 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
618620
# If we save using the predefined names, we can load using `from_pretrained`
619621
# plus we save chat_template in its own file
620622
output_processor_file = os.path.join(save_directory, PROCESSOR_NAME)
621-
output_raw_chat_template_file = os.path.join(save_directory, "chat_template.jinja")
622-
output_chat_template_file = os.path.join(save_directory, "chat_template.json")
623-
chat_template_dir = os.path.join(save_directory, "additional_chat_templates")
623+
output_raw_chat_template_file = os.path.join(save_directory, CHAT_TEMPLATE_FILE)
624+
output_chat_template_file = os.path.join(save_directory, "chat_template.json") # Legacy filename
625+
chat_template_dir = os.path.join(save_directory, CHAT_TEMPLATE_DIR)
624626

625627
processor_dict = self.to_dict()
626628
# Save `chat_template` in its own file. We can't get it from `processor_dict` as we popped it in `to_dict`

src/transformers/tokenization_utils_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939
from . import __version__
4040
from .dynamic_module_utils import custom_object_save
4141
from .utils import (
42+
CHAT_TEMPLATE_DIR,
43+
CHAT_TEMPLATE_FILE,
4244
ExplicitEnum,
4345
PaddingStrategy,
4446
PushToHubMixin,
@@ -148,8 +150,6 @@ class EncodingFast:
148150
SPECIAL_TOKENS_MAP_FILE = "special_tokens_map.json"
149151
ADDED_TOKENS_FILE = "added_tokens.json"
150152
TOKENIZER_CONFIG_FILE = "tokenizer_config.json"
151-
CHAT_TEMPLATE_FILE = "chat_template.jinja"
152-
CHAT_TEMPLATE_DIR = "additional_chat_templates"
153153

154154
# Fast tokenizers (provided by HuggingFace tokenizer's library) can be saved in a single file
155155
FULL_TOKENIZER_FILE = "tokenizer.json"

src/transformers/utils/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,8 @@
269269
PROCESSOR_NAME = "processor_config.json"
270270
GENERATION_CONFIG_NAME = "generation_config.json"
271271
MODEL_CARD_NAME = "modelcard.json"
272+
CHAT_TEMPLATE_FILE = "chat_template.jinja"
273+
CHAT_TEMPLATE_DIR = "additional_chat_templates"
272274

273275
SENTENCEPIECE_UNDERLINE = "▁"
274276
SPIECE_UNDERLINE = SENTENCEPIECE_UNDERLINE # Kept for backward compatibility

0 commit comments

Comments
 (0)