Skip to content

Commit 199d69f

Browse files
committed
A new approach to template files!
1 parent bcaf117 commit 199d69f

File tree

1 file changed

+13
-0
lines changed

1 file changed

+13
-0
lines changed

src/transformers/tokenization_utils_base.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
)
6969
from .utils.chat_template_utils import _compile_jinja_template, _render_with_assistant_indices
7070
from .utils.import_utils import PROTOBUF_IMPORT_ERROR
71+
from huggingface_hub import list_repo_tree
7172

7273

7374
if TYPE_CHECKING:
@@ -1966,6 +1967,18 @@ def from_pretrained(
19661967
"tokenizer_file": FULL_TOKENIZER_FILE,
19671968
"chat_template_file": CHAT_TEMPLATE_FILE,
19681969
}
1970+
if is_local:
1971+
template_dir = Path(pretrained_model_name_or_path, "templates")
1972+
if template_dir.is_dir():
1973+
for template_file in template_dir.glob("*.jinja"):
1974+
template_name = template_file.name.removesuffix(".jinja")
1975+
additional_files_names[f"{template_name}_template"] = f"templates/{template_file.name}"
1976+
else:
1977+
for template_file in list_repo_tree(pretrained_model_name_or_path, path_in_repo="templates", recursive=False):
1978+
if not template_file.endswith(".jinja"):
1979+
continue
1980+
template_name = template_file.split('/')[-1].removesuffix(".jinja")
1981+
additional_files_names[f"{template_name}_template"] = template_file # This might be wrong!
19691982
vocab_files = {**cls.vocab_files_names, **additional_files_names}
19701983
if "tokenizer_file" in vocab_files:
19711984
# Try to get the tokenizer config to see if there are versioned tokenizer files.

0 commit comments

Comments
 (0)