zucchini-nlp
diff --git a/‎src/transformers/models/llava_onevision/processing_llava_onevision.py‎
Lines changed: 7 additions & 6 deletions b/‎src/transformers/models/llava_onevision/processing_llava_onevision.py‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎src/transformers/processing_utils.py‎
Lines changed: 132 additions & 29 deletions b/‎src/transformers/processing_utils.py‎
Lines changed: 132 additions & 29 deletions
@@ -298,13 +298,14 @@ def save_pretrained(self, save_directory, **kwargs):
         self.video_processor.save_pretrained(video_processor_path)
 
         video_processor_present = "video_processor" in self.attributes
-        if video_processor_present:
-            self.attributes.remove("video_processor")
-
-        outputs = super().save_pretrained(save_directory, **kwargs)
+        try:
+            if video_processor_present:
+                self.attributes.remove("video_processor")
 
-        if video_processor_present:
-            self.attributes += ["video_processor"]
+            outputs = super().save_pretrained(save_directory, **kwargs)
+        finally:
+            if video_processor_present:
+                self.attributes += ["video_processor"]
         return outputs
 
     # override to load video-config from a separate config file
 
@@ -27,6 +27,7 @@
 
 import numpy as np
 import typing_extensions
+from huggingface_hub.errors import EntryNotFoundError
 
 from .audio_utils import load_audio
 from .dynamic_module_utils import custom_object_save
@@ -52,6 +53,9 @@
     TruncationStrategy,
 )
 from .utils import (
+    CHAT_TEMPLATE_DIR,
+    CHAT_TEMPLATE_FILE,
+    LEGACY_PROCESSOR_CHAT_TEMPLATE_FILE,
     PROCESSOR_NAME,
     PushToHubMixin,
     TensorType,
@@ -63,6 +67,7 @@
     download_url,
     is_offline_mode,
     is_remote_url,
+    list_repo_templates,
     logging,
 )
 
@@ -618,13 +623,19 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
             configs.append(self)
             custom_object_save(self, save_directory, config=configs)
 
+        save_jinja_files = kwargs.get("save_jinja_files", True)
+
         for attribute_name in self.attributes:
             attribute = getattr(self, attribute_name)
             # Include the processor class in the attribute config so this processor can then be reloaded with the
             # `AutoProcessor` API.
             if hasattr(attribute, "_set_processor_class"):
                 attribute._set_processor_class(self.__class__.__name__)
-            attribute.save_pretrained(save_directory)
+            if attribute_name == "tokenizer":
+                # Propagate save_jinja_files to tokenizer to ensure we don't get conflicts
+                attribute.save_pretrained(save_directory, save_jinja_files=save_jinja_files)
+            else:
+                attribute.save_pretrained(save_directory)
 
         if self._auto_class is not None:
             # We added an attribute to the init_kwargs of the tokenizers, which needs to be cleaned up.
@@ -636,24 +647,52 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
         # If we save using the predefined names, we can load using `from_pretrained`
         # plus we save chat_template in its own file
         output_processor_file = os.path.join(save_directory, PROCESSOR_NAME)
-        output_raw_chat_template_file = os.path.join(save_directory, "chat_template.jinja")
-        output_chat_template_file = os.path.join(save_directory, "chat_template.json")
+        output_chat_template_file_jinja = os.path.join(save_directory, CHAT_TEMPLATE_FILE)
+        output_chat_template_file_legacy = os.path.join(
+            save_directory, LEGACY_PROCESSOR_CHAT_TEMPLATE_FILE
+        )  # Legacy filename
+        chat_template_dir = os.path.join(save_directory, CHAT_TEMPLATE_DIR)
 
         processor_dict = self.to_dict()
         # Save `chat_template` in its own file. We can't get it from `processor_dict` as we popped it in `to_dict`
         # to avoid serializing chat template in json config file. So let's get it from `self` directly
         if self.chat_template is not None:
-            if kwargs.get("save_raw_chat_template", False):
-                with open(output_raw_chat_template_file, "w", encoding="utf-8") as writer:
-                    writer.write(self.chat_template)
-                logger.info(f"chat template saved in {output_raw_chat_template_file}")
-            else:
+            save_jinja_files = kwargs.get("save_jinja_files", True)
+            is_single_template = isinstance(self.chat_template, str)
+            if save_jinja_files and is_single_template:
+                # New format for single templates is to save them as chat_template.jinja
+                with open(output_chat_template_file_jinja, "w", encoding="utf-8") as f:
+                    f.write(self.chat_template)
+                logger.info(f"chat template saved in {output_chat_template_file_jinja}")
+            elif save_jinja_files and not is_single_template:
+                # New format for multiple templates is to save the default as chat_template.jinja
+                # and the other templates in the chat_templates/ directory
+                for template_name, template in self.chat_template.items():
+                    if template_name == "default":
+                        with open(output_chat_template_file_jinja, "w", encoding="utf-8") as f:
+                            f.write(self.chat_template["default"])
+                        logger.info(f"chat template saved in {output_chat_template_file_jinja}")
+                    else:
+                        os.makedirs(chat_template_dir, exist_ok=True)
+                        template_filepath = os.path.join(chat_template_dir, f"{template_name}.jinja")
+                        with open(template_filepath, "w", encoding="utf-8") as f:
+                            f.write(template)
+                        logger.info(f"chat template saved in {template_filepath}")
+            elif is_single_template:
+                # Legacy format for single templates: Put them in chat_template.json
                 chat_template_json_string = (
                     json.dumps({"chat_template": self.chat_template}, indent=2, sort_keys=True) + "\n"
                 )
-                with open(output_chat_template_file, "w", encoding="utf-8") as writer:
+                with open(output_chat_template_file_legacy, "w", encoding="utf-8") as writer:
                     writer.write(chat_template_json_string)
-                logger.info(f"chat template saved in {output_chat_template_file}")
+                logger.info(f"chat template saved in {output_chat_template_file_legacy}")
+            elif self.chat_template is not None:
+                # At this point we have multiple templates in the legacy format, which is not supported
+                # chat template dicts are saved to chat_template.json as lists of dicts with fixed key names.
+                raise ValueError(
+                    "Multiple chat templates are not supported in the legacy format. Please save them as "
+                    "separate files using the `save_jinja_files` argument."
+                )
 
         # For now, let's not save to `processor_config.json` if the processor doesn't have extra attributes and
         # `auto_map` is not specified.
@@ -717,6 +756,8 @@ def get_processor_dict(
         if os.path.isdir(pretrained_model_name_or_path):
             processor_file = os.path.join(pretrained_model_name_or_path, PROCESSOR_NAME)
 
+        additional_chat_template_files = {}
+        resolved_additional_chat_template_files = {}
         if os.path.isfile(pretrained_model_name_or_path):
             resolved_processor_file = pretrained_model_name_or_path
             # cant't load chat-template when given a file as pretrained_model_name_or_path
@@ -730,9 +771,25 @@ def get_processor_dict(
             resolved_chat_template_file = None
             resolved_raw_chat_template_file = None
         else:
+            if is_local:
+                template_dir = Path(pretrained_model_name_or_path, CHAT_TEMPLATE_DIR)
+                if template_dir.is_dir():
+                    for template_file in template_dir.glob("*.jinja"):
+                        template_name = template_file.stem
+                        additional_chat_template_files[template_name] = f"{CHAT_TEMPLATE_DIR}/{template_file.name}"
+            else:
+                try:
+                    for template in list_repo_templates(
+                        pretrained_model_name_or_path,
+                        local_files_only=local_files_only,
+                        revision=revision,
+                        cache_dir=cache_dir,
+                    ):
+                        additional_chat_template_files[template] = f"{CHAT_TEMPLATE_DIR}/{template}.jinja"
+                except EntryNotFoundError:
+                    pass  # No template dir means no template files
             processor_file = PROCESSOR_NAME
-            chat_template_file = "chat_template.json"
-            raw_chat_template_file = "chat_template.jinja"
+
             try:
                 # Load from local folder or from cache or download from model Hub and cache
                 resolved_processor_file = cached_file(
@@ -750,12 +807,11 @@ def get_processor_dict(
                     _raise_exceptions_for_missing_entries=False,
                 )
 
-                # Load chat template from a separate json if exists
-                # because making it part of processor-config break BC.
-                # Processors in older version do not accept any kwargs
+                # chat_template.json is a legacy file used by the processor class
+                # a raw chat_template.jinja is preferred in future
                 resolved_chat_template_file = cached_file(
                     pretrained_model_name_or_path,
-                    chat_template_file,
+                    LEGACY_PROCESSOR_CHAT_TEMPLATE_FILE,
                     cache_dir=cache_dir,
                     force_download=force_download,
                     proxies=proxies,
@@ -770,7 +826,7 @@ def get_processor_dict(
 
                 resolved_raw_chat_template_file = cached_file(
                     pretrained_model_name_or_path,
-                    raw_chat_template_file,
+                    CHAT_TEMPLATE_FILE,
                     cache_dir=cache_dir,
                     force_download=force_download,
                     proxies=proxies,
@@ -782,6 +838,24 @@ def get_processor_dict(
                     subfolder=subfolder,
                     _raise_exceptions_for_missing_entries=False,
                 )
+
+                resolved_additional_chat_template_files = {
+                    template_name: cached_file(
+                        pretrained_model_name_or_path,
+                        template_file,
+                        cache_dir=cache_dir,
+                        force_download=force_download,
+                        proxies=proxies,
+                        resume_download=resume_download,
+                        local_files_only=local_files_only,
+                        token=token,
+                        user_agent=user_agent,
+                        revision=revision,
+                        subfolder=subfolder,
+                        _raise_exceptions_for_missing_entries=False,
+                    )
+                    for template_name, template_file in additional_chat_template_files.items()
+                }
             except OSError:
                 # Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to
                 # the original exception.
@@ -796,15 +870,31 @@ def get_processor_dict(
                 )
 
         # Add chat template as kwarg before returning because most models don't have processor config
-        if resolved_raw_chat_template_file is not None:
-            with open(resolved_raw_chat_template_file, encoding="utf-8") as reader:
-                chat_template = reader.read()
-            kwargs["chat_template"] = chat_template
-        elif resolved_chat_template_file is not None:
+        if resolved_chat_template_file is not None:
+            # This is the legacy path
             with open(resolved_chat_template_file, encoding="utf-8") as reader:
-                text = reader.read()
-            chat_template = json.loads(text)["chat_template"]
-            kwargs["chat_template"] = chat_template
+                chat_template_json = json.loads(reader.read())
+                chat_templates = {"default": chat_template_json["chat_template"]}
+                if resolved_additional_chat_template_files:
+                    raise ValueError(
+                        "Cannot load chat template due to conflicting files - this checkpoint combines "
+                        "a legacy chat_template.json file with separate template files, which is not "
+                        "supported. To resolve this error, replace the legacy chat_template.json file "
+                        "with a modern chat_template.jinja file."
+                    )
+        else:
+            chat_templates = {
+                template_name: open(template_file, "r", encoding="utf-8").read()
+                for template_name, template_file in resolved_additional_chat_template_files.items()
+            }
+            if resolved_raw_chat_template_file is not None:
+                with open(resolved_raw_chat_template_file, "r", encoding="utf-8") as reader:
+                    chat_templates["default"] = reader.read()
+        if isinstance(chat_templates, dict) and "default" in chat_templates and len(chat_templates) == 1:
+            chat_templates = chat_templates["default"]  # Flatten when we just have a single template/file
+
+        if chat_templates:
+            kwargs["chat_template"] = chat_templates
 
         # Existing processors on the Hub created before #27761 being merged don't have `processor_config.json` (if not
         # updated afterward), and we need to keep `from_pretrained` work. So here it fallbacks to the empty dict.
@@ -1313,14 +1403,27 @@ def apply_chat_template(
         """
 
         if chat_template is None:
-            if self.chat_template is not None:
+            if isinstance(self.chat_template, dict) and "default" in self.chat_template:
+                chat_template = self.chat_template["default"]
+            elif isinstance(self.chat_template, dict):
+                raise ValueError(
+                    'The processor has multiple chat templates but none of them are named "default". You need to specify'
+                    " which one to use by passing the `chat_template` argument. Available templates are: "
+                    f"{', '.join(self.chat_template.keys())}"
+                )
+            elif self.chat_template is not None:
                 chat_template = self.chat_template
             else:
                 raise ValueError(
-                    "No chat template is set for this processor. Please either set the `chat_template` attribute, "
-                    "or provide a chat template as an argument. See "
-                    "https://huggingface.co/docs/transformers/main/en/chat_templating for more information."
+                    "Cannot use apply_chat_template because this processor does not have a chat template."
                 )
+        else:
+            if isinstance(self.chat_template, dict) and chat_template in self.chat_template:
+                # It's the name of a template, not a full template string
+                chat_template = self.chat_template[chat_template]
+            else:
+                # It's a template string, render it directly
+                chat_template = chat_template
 
         # Fill sets of kwargs that should be used by different parts of template
         processed_kwargs = {