Skip to content

Commit

Permalink
Add tools for auto filling traced models cache (#537)
Browse files Browse the repository at this point in the history
* add tool

* improve sd hash

* fix

* remove json

* fix tests

* fix tests

* apply suggestions

* use task

* remove debug

* infer task and decouple a func in __main__

* restore assert
  • Loading branch information
JingyaHuang authored Apr 3, 2024
1 parent 6253f12 commit 6856557
Show file tree
Hide file tree
Showing 10 changed files with 325 additions and 122 deletions.
2 changes: 1 addition & 1 deletion docs/source/tutorials/stable_diffusion.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ image = base(
del base # To avoid neuron device OOM

refiner = NeuronStableDiffusionXLImg2ImgPipeline.from_pretrained("sd_neuron_xl_refiner/")
image = image = refiner(
image = refiner(
prompt=prompt,
num_inference_steps=40,
denoising_start=0.8,
Expand Down
4 changes: 4 additions & 0 deletions optimum/exporters/neuron/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
"infer_stable_diffusion_shapes_from_diffusers",
"main_export",
"normalize_stable_diffusion_input_shapes",
"get_submodels_and_neuron_configs",
"load_models_and_neuron_configs",
],
"base": ["NeuronDefaultConfig"],
"convert": ["export", "export_models", "validate_model_outputs", "validate_models_outputs"],
Expand All @@ -35,7 +37,9 @@

if TYPE_CHECKING:
from .__main__ import (
get_submodels_and_neuron_configs,
infer_stable_diffusion_shapes_from_diffusers,
load_models_and_neuron_configs,
main_export,
normalize_stable_diffusion_input_shapes,
)
Expand Down
120 changes: 88 additions & 32 deletions optimum/exporters/neuron/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from ...utils import is_diffusers_available, logging
from ..error_utils import AtolError, OutputMatchError, ShapeError
from ..tasks import TasksManager
from .base import NeuronDecoderConfig
from .base import NeuronConfig, NeuronDecoderConfig
from .convert import export_models, validate_models_outputs
from .model_configs import * # noqa: F403
from .utils import (
Expand Down Expand Up @@ -112,7 +112,14 @@ def infer_task(task: str, model_name_or_path: str) -> str:

# This function is not applicable for diffusers / sentence transformers models
def get_input_shapes_and_config_class(task: str, args: argparse.Namespace) -> Dict[str, int]:
config = AutoConfig.from_pretrained(args.model)
neuron_config_constructor = get_neuron_config_class(task, args.model)
input_args = neuron_config_constructor.func.get_input_args_for_task(task)
input_shapes = {name: getattr(args, name) for name in input_args}
return input_shapes, neuron_config_constructor.func


def get_neuron_config_class(task: str, model_id: str) -> NeuronConfig:
config = AutoConfig.from_pretrained(model_id)

model_type = config.model_type.replace("_", "-")
if config.is_encoder_decoder:
Expand All @@ -124,9 +131,7 @@ def get_input_shapes_and_config_class(task: str, args: argparse.Namespace) -> Di
task=task,
library_name="transformers",
)
input_args = neuron_config_constructor.func.get_input_args_for_task(task)
input_shapes = {name: getattr(args, name) for name in input_args}
return input_shapes, neuron_config_constructor.func
return neuron_config_constructor


def normalize_sentence_transformers_input_shapes(args: argparse.Namespace) -> Dict[str, int]:
Expand Down Expand Up @@ -235,7 +240,7 @@ def infer_stable_diffusion_shapes_from_diffusers(
return input_shapes


def _get_submodels_and_neuron_configs(
def get_submodels_and_neuron_configs(
model: Union["PreTrainedModel", "DiffusionPipeline"],
input_shapes: Dict[str, int],
task: str,
Expand Down Expand Up @@ -418,6 +423,70 @@ def _get_submodels_and_neuron_configs_for_encoder_decoder(
return models_and_neuron_configs, output_model_names


def load_models_and_neuron_configs(
model_name_or_path: str,
output: Path,
model: Optional[Union["PreTrainedModel", "ModelMixin"]],
task: str,
dynamic_batch_size: bool,
cache_dir: Optional[str],
trust_remote_code: bool,
subfolder: str,
revision: str,
force_download: bool,
local_files_only: bool,
use_auth_token: Optional[Union[bool, str]],
submodels: Optional[Dict[str, Union[Path, str]]],
lora_model_ids: Optional[Union[str, List[str]]],
lora_weight_names: Optional[Union[str, List[str]]],
lora_adapter_names: Optional[Union[str, List[str]]],
lora_scales: Optional[Union[float, List[float]]],
output_attentions: bool = False,
output_hidden_states: bool = False,
library_name: Optional[str] = None,
**input_shapes,
):
library_name = TasksManager.infer_library_from_model(
model_name_or_path, subfolder=subfolder, library_name=library_name
)

model_kwargs = {
"task": task,
"model_name_or_path": model_name_or_path,
"subfolder": subfolder,
"revision": revision,
"cache_dir": cache_dir,
"use_auth_token": use_auth_token,
"local_files_only": local_files_only,
"force_download": force_download,
"trust_remote_code": trust_remote_code,
"framework": "pt",
"library_name": library_name,
}
if model is None:
model = TasksManager.get_model_from_task(**model_kwargs)

models_and_neuron_configs, output_model_names = get_submodels_and_neuron_configs(
model=model,
input_shapes=input_shapes,
task=task,
library_name=library_name,
output=output,
subfolder=subfolder,
dynamic_batch_size=dynamic_batch_size,
model_name_or_path=model_name_or_path,
submodels=submodels,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
lora_model_ids=lora_model_ids,
lora_weight_names=lora_weight_names,
lora_adapter_names=lora_adapter_names,
lora_scales=lora_scales,
)

return models_and_neuron_configs, output_model_names


def main_export(
model_name_or_path: str,
output: Union[str, Path],
Expand Down Expand Up @@ -453,43 +522,29 @@ def main_export(
output.parent.mkdir(parents=True)

task = TasksManager.map_from_synonym(task)
is_stable_diffusion = "stable-diffusion" in task
library_name = TasksManager.infer_library_from_model(
model_name_or_path, subfolder=subfolder, library_name=library_name
)

model_kwargs = {
"task": task,
"model_name_or_path": model_name_or_path,
"subfolder": subfolder,
"revision": revision,
"cache_dir": cache_dir,
"use_auth_token": use_auth_token,
"local_files_only": local_files_only,
"force_download": force_download,
"trust_remote_code": trust_remote_code,
"framework": "pt",
"library_name": library_name,
}
if model is None:
model = TasksManager.get_model_from_task(**model_kwargs)

models_and_neuron_configs, output_model_names = _get_submodels_and_neuron_configs(
models_and_neuron_configs, output_model_names = load_models_and_neuron_configs(
model_name_or_path=model_name_or_path,
output=output,
model=model,
input_shapes=input_shapes,
task=task,
library_name=library_name,
output=output,
subfolder=subfolder,
dynamic_batch_size=dynamic_batch_size,
model_name_or_path=model_name_or_path,
cache_dir=cache_dir,
trust_remote_code=trust_remote_code,
subfolder=subfolder,
revision=revision,
force_download=force_download,
local_files_only=local_files_only,
use_auth_token=use_auth_token,
submodels=submodels,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
library_name=library_name,
lora_model_ids=lora_model_ids,
lora_weight_names=lora_weight_names,
lora_adapter_names=lora_adapter_names,
lora_scales=lora_scales,
**input_shapes,
)

_, neuron_outputs = export_models(
Expand All @@ -506,6 +561,7 @@ def main_export(

# Validate compiled model
if do_validation is True:
is_stable_diffusion = "stable-diffusion" in task
if is_stable_diffusion:
# Do not validate vae encoder due to the sampling randomness
del neuron_outputs[-2] # -2 is the index of `vae_encoder`
Expand Down
45 changes: 25 additions & 20 deletions optimum/neuron/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@
from transformers import CLIPFeatureExtractor, CLIPTokenizer, PretrainedConfig

from ..exporters.neuron import (
get_submodels_for_export_stable_diffusion,
infer_stable_diffusion_shapes_from_diffusers,
load_models_and_neuron_configs,
main_export,
normalize_stable_diffusion_input_shapes,
replace_stable_diffusion_submodels,
Expand All @@ -48,7 +47,6 @@
NEURON_FILE_NAME,
DiffusersPretrainedConfig,
check_if_weights_replacable,
get_stable_diffusion_configs,
is_neuronx_available,
replace_weights,
store_compilation_config,
Expand Down Expand Up @@ -693,46 +691,54 @@ def _export(

# Check if the cache exists
if not inline_weights_to_neff and not disable_neuron_cache:
save_dir = TemporaryDirectory()
save_dir_path = Path(save_dir.name)
# 1. Fetch all model configs
models_for_export = get_submodels_for_export_stable_diffusion(
pipeline=pipe,
models_and_neuron_configs, _ = load_models_and_neuron_configs(
model_name_or_path=model_id,
output=save_dir_path,
model=pipe,
task=task,
dynamic_batch_size=dynamic_batch_size,
cache_dir=cache_dir,
trust_remote_code=trust_remote_code,
subfolder=subfolder,
revision=revision,
force_download=force_download,
local_files_only=local_files_only,
use_auth_token=use_auth_token,
submodels=submodels,
lora_model_ids=lora_model_ids,
lora_weight_names=lora_weight_names,
lora_adapter_names=lora_adapter_names,
lora_scales=lora_scales,
**input_shapes,
)
input_shapes = infer_stable_diffusion_shapes_from_diffusers(input_shapes, pipe)
model_configs = get_stable_diffusion_configs(models_for_export)

# 2. Build compilation config
compilation_configs = {}
for name, model_config in model_configs.items():
for name, (model, neuron_config) in models_and_neuron_configs.items():
if "vae" in name: # vae configs are not cached.
continue
model_config = model.config
if isinstance(model_config, FrozenDict):
model_config = OrderedDict(model_config)
model_config = DiffusersPretrainedConfig.from_dict(model_config)

model_type = (
getattr(model_config, "model_type")
if isinstance(model_config, Dict)
else getattr(model_config, "model_type", None)
)
compilation_config = store_compilation_config(
config=model_config,
input_shapes=input_shapes[name],
input_shapes=neuron_config.input_shapes,
compiler_kwargs=compiler_kwargs,
dynamic_batch_size=dynamic_batch_size,
input_names=neuron_config.inputs,
output_names=neuron_config.outputs,
dynamic_batch_size=neuron_config.dynamic_batch_size,
compiler_type=NEURON_COMPILER_TYPE,
compiler_version=NEURON_COMPILER_VERSION,
inline_weights_to_neff=inline_weights_to_neff,
optlevel=optlevel,
model_type=model_type,
task=task,
model_type=getattr(neuron_config, "MODEL_TYPE", None),
task=getattr(neuron_config, "task", None),
)
if getattr(compilation_config, "model_type", None) is not None:
compilation_config.model_type = compilation_config.model_type.replace("-", "_")
compilation_configs[name] = compilation_config

# 3. Lookup cached config
Expand All @@ -759,7 +765,6 @@ def _export(
model_name_or_path=model_id,
output=save_dir_path,
compiler_kwargs=compiler_kwargs,
model=pipe,
task=task,
dynamic_batch_size=dynamic_batch_size,
cache_dir=cache_dir,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


import logging
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import torch
from diffusers import StableDiffusionXLInpaintPipeline
Expand All @@ -30,10 +30,6 @@
from .pipeline_utils import StableDiffusionXLPipelineMixin


if TYPE_CHECKING:
from diffusers.image_processor import PipelineImageInput


logger = logging.getLogger(__name__)


Expand Down
24 changes: 15 additions & 9 deletions optimum/neuron/utils/hub_neuronx_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,15 +184,21 @@ def download_folder(self, folder_path: str, dst_path: str):
folder_exists = False

if folder_exists:
# cached remotely
for repo_content in folder_info:
# TODO: this works for `RepoFile` but not `RepoFolder`
local_path = self.api.hf_hub_download(self.repo_id, repo_content.path)
filename = Path(local_path).name
dst_path = Path(dst_path)
dst_path.mkdir(parents=True, exist_ok=True)
os.symlink(local_path, dst_path / filename)
logger.info(f"Fetched cached {rel_folder_path} from {self.repo_id}")
try:
# cached remotely
for repo_content in folder_info:
# TODO: this works for `RepoFile` but not `RepoFolder`
local_path = self.api.hf_hub_download(self.repo_id, repo_content.path)
filename = Path(local_path).name
dst_path = Path(dst_path)
dst_path.mkdir(parents=True, exist_ok=True)
os.symlink(local_path, dst_path / filename)
logger.info(f"Fetched cached {rel_folder_path} from {self.repo_id}")
except Exception as e:
logger.warning(
f"Unable to download cached model in {self.repo_id}: {e} \nThe model will be recompiled."
)
folder_exists = False

return folder_exists

Expand Down
2 changes: 1 addition & 1 deletion tests/exporters/exporters_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"flaubert": "flaubert/flaubert_small_cased",
"mobilebert": "hf-internal-testing/tiny-random-MobileBertModel",
"mpnet": "hf-internal-testing/tiny-random-MPNetModel",
"phi": "hf-internal-testing/tiny-random-PhiModel",
"phi": "bumblebee-testing/tiny-random-PhiModel",
"roberta": "hf-internal-testing/tiny-random-RobertaModel",
"roformer": "hf-internal-testing/tiny-random-RoFormerModel",
"xlm": "hf-internal-testing/tiny-random-XLMModel",
Expand Down
Loading

0 comments on commit 6856557

Please sign in to comment.