Add tools for auto filling traced models cache (#537)

* add tool * improve sd hash * fix * remove json * fix tests * fix tests * apply suggestions * use task * remove debug * infer task and decouple a func in __main__ * restore assert
huggingface · Apr 3, 2024 · 6856557 · 6856557
1 parent 6253f12
commit 6856557
Show file tree

Hide file tree

Showing 10 changed files with 325 additions and 122 deletions.
diff --git a/docs/source/tutorials/stable_diffusion.mdx b/docs/source/tutorials/stable_diffusion.mdx
@@ -316,7 +316,7 @@ image = base(
 del base  # To avoid neuron device OOM
 
 refiner = NeuronStableDiffusionXLImg2ImgPipeline.from_pretrained("sd_neuron_xl_refiner/")
-image = image = refiner(
+image = refiner(
     prompt=prompt,
     num_inference_steps=40,
     denoising_start=0.8,

diff --git a/optimum/exporters/neuron/__init__.py b/optimum/exporters/neuron/__init__.py
@@ -22,6 +22,8 @@
         "infer_stable_diffusion_shapes_from_diffusers",
         "main_export",
         "normalize_stable_diffusion_input_shapes",
+        "get_submodels_and_neuron_configs",
+        "load_models_and_neuron_configs",
     ],
     "base": ["NeuronDefaultConfig"],
     "convert": ["export", "export_models", "validate_model_outputs", "validate_models_outputs"],
@@ -35,7 +37,9 @@
 
 if TYPE_CHECKING:
     from .__main__ import (
+        get_submodels_and_neuron_configs,
         infer_stable_diffusion_shapes_from_diffusers,
+        load_models_and_neuron_configs,
         main_export,
         normalize_stable_diffusion_input_shapes,
     )

diff --git a/optimum/exporters/neuron/__main__.py b/optimum/exporters/neuron/__main__.py
@@ -43,7 +43,7 @@
 from ...utils import is_diffusers_available, logging
 from ..error_utils import AtolError, OutputMatchError, ShapeError
 from ..tasks import TasksManager
-from .base import NeuronDecoderConfig
+from .base import NeuronConfig, NeuronDecoderConfig
 from .convert import export_models, validate_models_outputs
 from .model_configs import *  # noqa: F403
 from .utils import (
@@ -112,7 +112,14 @@ def infer_task(task: str, model_name_or_path: str) -> str:
 
 # This function is not applicable for diffusers / sentence transformers models
 def get_input_shapes_and_config_class(task: str, args: argparse.Namespace) -> Dict[str, int]:
-    config = AutoConfig.from_pretrained(args.model)
+    neuron_config_constructor = get_neuron_config_class(task, args.model)
+    input_args = neuron_config_constructor.func.get_input_args_for_task(task)
+    input_shapes = {name: getattr(args, name) for name in input_args}
+    return input_shapes, neuron_config_constructor.func
+
+
+def get_neuron_config_class(task: str, model_id: str) -> NeuronConfig:
+    config = AutoConfig.from_pretrained(model_id)
 
     model_type = config.model_type.replace("_", "-")
     if config.is_encoder_decoder:
@@ -124,9 +131,7 @@ def get_input_shapes_and_config_class(task: str, args: argparse.Namespace) -> Di
         task=task,
         library_name="transformers",
     )
-    input_args = neuron_config_constructor.func.get_input_args_for_task(task)
-    input_shapes = {name: getattr(args, name) for name in input_args}
-    return input_shapes, neuron_config_constructor.func
+    return neuron_config_constructor
 
 
 def normalize_sentence_transformers_input_shapes(args: argparse.Namespace) -> Dict[str, int]:
@@ -235,7 +240,7 @@ def infer_stable_diffusion_shapes_from_diffusers(
     return input_shapes
 
 
-def _get_submodels_and_neuron_configs(
+def get_submodels_and_neuron_configs(
     model: Union["PreTrainedModel", "DiffusionPipeline"],
     input_shapes: Dict[str, int],
     task: str,
@@ -418,6 +423,70 @@ def _get_submodels_and_neuron_configs_for_encoder_decoder(
     return models_and_neuron_configs, output_model_names
 
 
+def load_models_and_neuron_configs(
+    model_name_or_path: str,
+    output: Path,
+    model: Optional[Union["PreTrainedModel", "ModelMixin"]],
+    task: str,
+    dynamic_batch_size: bool,
+    cache_dir: Optional[str],
+    trust_remote_code: bool,
+    subfolder: str,
+    revision: str,
+    force_download: bool,
+    local_files_only: bool,
+    use_auth_token: Optional[Union[bool, str]],
+    submodels: Optional[Dict[str, Union[Path, str]]],
+    lora_model_ids: Optional[Union[str, List[str]]],
+    lora_weight_names: Optional[Union[str, List[str]]],
+    lora_adapter_names: Optional[Union[str, List[str]]],
+    lora_scales: Optional[Union[float, List[float]]],
+    output_attentions: bool = False,
+    output_hidden_states: bool = False,
+    library_name: Optional[str] = None,
+    **input_shapes,
+):
+    library_name = TasksManager.infer_library_from_model(
+        model_name_or_path, subfolder=subfolder, library_name=library_name
+    )
+
+    model_kwargs = {
+        "task": task,
+        "model_name_or_path": model_name_or_path,
+        "subfolder": subfolder,
+        "revision": revision,
+        "cache_dir": cache_dir,
+        "use_auth_token": use_auth_token,
+        "local_files_only": local_files_only,
+        "force_download": force_download,
+        "trust_remote_code": trust_remote_code,
+        "framework": "pt",
+        "library_name": library_name,
+    }
+    if model is None:
+        model = TasksManager.get_model_from_task(**model_kwargs)
+
+    models_and_neuron_configs, output_model_names = get_submodels_and_neuron_configs(
+        model=model,
+        input_shapes=input_shapes,
+        task=task,
+        library_name=library_name,
+        output=output,
+        subfolder=subfolder,
+        dynamic_batch_size=dynamic_batch_size,
+        model_name_or_path=model_name_or_path,
+        submodels=submodels,
+        output_attentions=output_attentions,
+        output_hidden_states=output_hidden_states,
+        lora_model_ids=lora_model_ids,
+        lora_weight_names=lora_weight_names,
+        lora_adapter_names=lora_adapter_names,
+        lora_scales=lora_scales,
+    )
+
+    return models_and_neuron_configs, output_model_names
+
+
 def main_export(
     model_name_or_path: str,
     output: Union[str, Path],
@@ -453,43 +522,29 @@ def main_export(
         output.parent.mkdir(parents=True)
 
     task = TasksManager.map_from_synonym(task)
-    is_stable_diffusion = "stable-diffusion" in task
-    library_name = TasksManager.infer_library_from_model(
-        model_name_or_path, subfolder=subfolder, library_name=library_name
-    )
 
-    model_kwargs = {
-        "task": task,
-        "model_name_or_path": model_name_or_path,
-        "subfolder": subfolder,
-        "revision": revision,
-        "cache_dir": cache_dir,
-        "use_auth_token": use_auth_token,
-        "local_files_only": local_files_only,
-        "force_download": force_download,
-        "trust_remote_code": trust_remote_code,
-        "framework": "pt",
-        "library_name": library_name,
-    }
-    if model is None:
-        model = TasksManager.get_model_from_task(**model_kwargs)
-
-    models_and_neuron_configs, output_model_names = _get_submodels_and_neuron_configs(
+    models_and_neuron_configs, output_model_names = load_models_and_neuron_configs(
+        model_name_or_path=model_name_or_path,
+        output=output,
         model=model,
-        input_shapes=input_shapes,
         task=task,
-        library_name=library_name,
-        output=output,
-        subfolder=subfolder,
         dynamic_batch_size=dynamic_batch_size,
-        model_name_or_path=model_name_or_path,
+        cache_dir=cache_dir,
+        trust_remote_code=trust_remote_code,
+        subfolder=subfolder,
+        revision=revision,
+        force_download=force_download,
+        local_files_only=local_files_only,
+        use_auth_token=use_auth_token,
         submodels=submodels,
         output_attentions=output_attentions,
         output_hidden_states=output_hidden_states,
+        library_name=library_name,
         lora_model_ids=lora_model_ids,
         lora_weight_names=lora_weight_names,
         lora_adapter_names=lora_adapter_names,
         lora_scales=lora_scales,
+        **input_shapes,
     )
 
     _, neuron_outputs = export_models(
@@ -506,6 +561,7 @@ def main_export(
 
     # Validate compiled model
     if do_validation is True:
+        is_stable_diffusion = "stable-diffusion" in task
         if is_stable_diffusion:
             # Do not validate vae encoder due to the sampling randomness
             del neuron_outputs[-2]  # -2 is the index of `vae_encoder`

diff --git a/optimum/neuron/modeling_diffusion.py b/optimum/neuron/modeling_diffusion.py
@@ -29,8 +29,7 @@
 from transformers import CLIPFeatureExtractor, CLIPTokenizer, PretrainedConfig
 
 from ..exporters.neuron import (
-    get_submodels_for_export_stable_diffusion,
-    infer_stable_diffusion_shapes_from_diffusers,
+    load_models_and_neuron_configs,
     main_export,
     normalize_stable_diffusion_input_shapes,
     replace_stable_diffusion_submodels,
@@ -48,7 +47,6 @@
     NEURON_FILE_NAME,
     DiffusersPretrainedConfig,
     check_if_weights_replacable,
-    get_stable_diffusion_configs,
     is_neuronx_available,
     replace_weights,
     store_compilation_config,
@@ -693,46 +691,54 @@ def _export(
 
         # Check if the cache exists
         if not inline_weights_to_neff and not disable_neuron_cache:
+            save_dir = TemporaryDirectory()
+            save_dir_path = Path(save_dir.name)
             # 1. Fetch all model configs
-            models_for_export = get_submodels_for_export_stable_diffusion(
-                pipeline=pipe,
+            models_and_neuron_configs, _ = load_models_and_neuron_configs(
+                model_name_or_path=model_id,
+                output=save_dir_path,
+                model=pipe,
                 task=task,
+                dynamic_batch_size=dynamic_batch_size,
+                cache_dir=cache_dir,
+                trust_remote_code=trust_remote_code,
+                subfolder=subfolder,
+                revision=revision,
+                force_download=force_download,
+                local_files_only=local_files_only,
+                use_auth_token=use_auth_token,
+                submodels=submodels,
                 lora_model_ids=lora_model_ids,
                 lora_weight_names=lora_weight_names,
                 lora_adapter_names=lora_adapter_names,
                 lora_scales=lora_scales,
+                **input_shapes,
             )
-            input_shapes = infer_stable_diffusion_shapes_from_diffusers(input_shapes, pipe)
-            model_configs = get_stable_diffusion_configs(models_for_export)
 
             # 2. Build compilation config
             compilation_configs = {}
-            for name, model_config in model_configs.items():
+            for name, (model, neuron_config) in models_and_neuron_configs.items():
                 if "vae" in name:  # vae configs are not cached.
                     continue
+                model_config = model.config
                 if isinstance(model_config, FrozenDict):
                     model_config = OrderedDict(model_config)
                     model_config = DiffusersPretrainedConfig.from_dict(model_config)
 
-                model_type = (
-                    getattr(model_config, "model_type")
-                    if isinstance(model_config, Dict)
-                    else getattr(model_config, "model_type", None)
-                )
                 compilation_config = store_compilation_config(
                     config=model_config,
-                    input_shapes=input_shapes[name],
+                    input_shapes=neuron_config.input_shapes,
                     compiler_kwargs=compiler_kwargs,
-                    dynamic_batch_size=dynamic_batch_size,
+                    input_names=neuron_config.inputs,
+                    output_names=neuron_config.outputs,
+                    dynamic_batch_size=neuron_config.dynamic_batch_size,
                     compiler_type=NEURON_COMPILER_TYPE,
                     compiler_version=NEURON_COMPILER_VERSION,
                     inline_weights_to_neff=inline_weights_to_neff,
                     optlevel=optlevel,
-                    model_type=model_type,
-                    task=task,
+                    model_type=getattr(neuron_config, "MODEL_TYPE", None),
+                    task=getattr(neuron_config, "task", None),
                 )
-                if getattr(compilation_config, "model_type", None) is not None:
-                    compilation_config.model_type = compilation_config.model_type.replace("-", "_")
                 compilation_configs[name] = compilation_config
 
             # 3. Lookup cached config
@@ -759,7 +765,6 @@ def _export(
                 model_name_or_path=model_id,
                 output=save_dir_path,
                 compiler_kwargs=compiler_kwargs,
-                model=pipe,
                 task=task,
                 dynamic_batch_size=dynamic_batch_size,
                 cache_dir=cache_dir,

diff --git a/optimum/neuron/pipelines/diffusers/pipeline_stable_diffusion_xl_inpaint.py b/optimum/neuron/pipelines/diffusers/pipeline_stable_diffusion_xl_inpaint.py
@@ -15,7 +15,7 @@
 
 
 import logging
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
 from diffusers import StableDiffusionXLInpaintPipeline
@@ -30,10 +30,6 @@
 from .pipeline_utils import StableDiffusionXLPipelineMixin
 
 
-if TYPE_CHECKING:
-    from diffusers.image_processor import PipelineImageInput
-
-
 logger = logging.getLogger(__name__)
 
 

diff --git a/optimum/neuron/utils/hub_neuronx_cache.py b/optimum/neuron/utils/hub_neuronx_cache.py
@@ -184,15 +184,21 @@ def download_folder(self, folder_path: str, dst_path: str):
                 folder_exists = False
 
             if folder_exists:
-                # cached remotely
-                for repo_content in folder_info:
-                    # TODO: this works for `RepoFile` but not `RepoFolder`
-                    local_path = self.api.hf_hub_download(self.repo_id, repo_content.path)
-                    filename = Path(local_path).name
-                    dst_path = Path(dst_path)
-                    dst_path.mkdir(parents=True, exist_ok=True)
-                    os.symlink(local_path, dst_path / filename)
-                logger.info(f"Fetched cached {rel_folder_path} from {self.repo_id}")
+                try:
+                    # cached remotely
+                    for repo_content in folder_info:
+                        # TODO: this works for `RepoFile` but not `RepoFolder`
+                        local_path = self.api.hf_hub_download(self.repo_id, repo_content.path)
+                        filename = Path(local_path).name
+                        dst_path = Path(dst_path)
+                        dst_path.mkdir(parents=True, exist_ok=True)
+                        os.symlink(local_path, dst_path / filename)
+                    logger.info(f"Fetched cached {rel_folder_path} from {self.repo_id}")
+                except Exception as e:
+                    logger.warning(
+                        f"Unable to download cached model in {self.repo_id}: {e} \nThe model will be recompiled."
+                    )
+                    folder_exists = False
 
             return folder_exists
 

diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
@@ -27,7 +27,7 @@
     "flaubert": "flaubert/flaubert_small_cased",
     "mobilebert": "hf-internal-testing/tiny-random-MobileBertModel",
     "mpnet": "hf-internal-testing/tiny-random-MPNetModel",
-    "phi": "hf-internal-testing/tiny-random-PhiModel",
+    "phi": "bumblebee-testing/tiny-random-PhiModel",
     "roberta": "hf-internal-testing/tiny-random-RobertaModel",
     "roformer": "hf-internal-testing/tiny-random-RoFormerModel",
     "xlm": "hf-internal-testing/tiny-random-XLMModel",