Skip to content

Commit 1df676d

Browse files
committed
A new approach to template files!
1 parent 3f4559c commit 1df676d

File tree

1 file changed

+13
-0
lines changed

1 file changed

+13
-0
lines changed

src/transformers/tokenization_utils_base.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
)
6969
from .utils.chat_template_utils import _compile_jinja_template, _render_with_assistant_indices
7070
from .utils.import_utils import PROTOBUF_IMPORT_ERROR
71+
from huggingface_hub import list_repo_tree
7172

7273

7374
if TYPE_CHECKING:
@@ -1983,6 +1984,18 @@ def from_pretrained(
19831984
"tokenizer_file": FULL_TOKENIZER_FILE,
19841985
"chat_template_file": CHAT_TEMPLATE_FILE,
19851986
}
1987+
if is_local:
1988+
template_dir = Path(pretrained_model_name_or_path, "templates")
1989+
if template_dir.is_dir():
1990+
for template_file in template_dir.glob("*.jinja"):
1991+
template_name = template_file.name.removesuffix(".jinja")
1992+
additional_files_names[f"{template_name}_template"] = f"templates/{template_file.name}"
1993+
else:
1994+
for template_file in list_repo_tree(pretrained_model_name_or_path, path_in_repo="templates", recursive=False):
1995+
if not template_file.endswith(".jinja"):
1996+
continue
1997+
template_name = template_file.split('/')[-1].removesuffix(".jinja")
1998+
additional_files_names[f"{template_name}_template"] = template_file # This might be wrong!
19861999
vocab_files = {**cls.vocab_files_names, **additional_files_names}
19872000
if "tokenizer_file" in vocab_files:
19882001
# Try to get the tokenizer config to see if there are versioned tokenizer files.

0 commit comments

Comments
 (0)