From d9c478e1863778889352894938354bde0a546b25 Mon Sep 17 00:00:00 2001 From: strint Date: Thu, 16 May 2024 19:22:58 +0800 Subject: [PATCH 01/13] add nexfort and pixart alpha --- benchmarks/README.md | 13 +++++ benchmarks/text_to_image.py | 47 ++++++++++++++----- .../onediffx/__init__.py | 4 +- .../compilers/diffusion_pipeline_compiler.py | 40 +++++++++++++++- .../infer_compiler/backends/nexfort.py | 21 ++------- .../nexfort/deployable_module.py | 6 +-- src/onediff/infer_compiler/utils/options.py | 33 +------------ 7 files changed, 97 insertions(+), 67 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index b2fc652ed..4eee88dfb 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -22,3 +22,16 @@ docker run -it --rm --gpus all --shm-size 12g --ipc=host --security-opt seccomp= onediff:benchmark-community-default \ sh -c "cd /benchmark && sh run_all_benchmarks.sh -m models -o benchmark.md" ``` + +## Run Examples +### Run pixart alpha (with nexfort backend) +``` +# model_id_or_path_to_PixArt-XL-2-1024-MS: /data/hf_models/PixArt-XL-2-1024-MS/ +python3 text_to_image.py --model model_id_or_path_to_PixArt-XL-2-1024-MS --scheduler none --compiler nexfort +``` +Performance on NVIDIA A100-PCIE-40GB: +Iterations per second of progress bar: 11.7 +Inference time: 2.045s +Iterations per second: 10.517 +CUDA Mem after: 13.569GiB + diff --git a/benchmarks/text_to_image.py b/benchmarks/text_to_image.py index a8c97c510..ca8c5bb20 100644 --- a/benchmarks/text_to_image.py +++ b/benchmarks/text_to_image.py @@ -6,7 +6,7 @@ CONTROLNET = None STEPS = 30 PROMPT = "best quality, realistic, unreal engine, 4K, a beautiful girl" -NEGATIVE_PROMPT = None +NEGATIVE_PROMPT = "" SEED = None WARMUPS = 3 BATCH = 1 @@ -19,6 +19,8 @@ CACHE_INTERVAL = 3 CACHE_LAYER_ID = 0 CACHE_BLOCK_ID = 0 +COMPILER = "oneflow" +COMPILER_CONFIG = None import os import importlib @@ -30,7 +32,7 @@ from PIL import Image, ImageDraw from diffusers.utils import load_image -from onediffx import compile_pipe +from onediffx import compile_pipe, CompileOptions def parse_args(): @@ -60,9 +62,14 @@ def parse_args(): parser.add_argument( "--compiler", type=str, - default="oneflow", + default=COMPILER, choices=["none", "oneflow", "nexfort", "compile", "compile-max-autotune"], ) + parser.add_argument( + "--compiler-config", + type=str, + default=COMPILER_CONFIG, + ) return parser.parse_args() @@ -70,6 +77,8 @@ def load_pipe( pipeline_cls, model_name, variant=None, + dtype=torch.float16, + device="cuda", custom_pipeline=None, scheduler=None, lora=None, @@ -80,31 +89,34 @@ def load_pipe( extra_kwargs["custom_pipeline"] = custom_pipeline if variant is not None: extra_kwargs["variant"] = variant + if dtype is not None: + extra_kwargs["torch_dtype"] = dtype if controlnet is not None: from diffusers import ControlNetModel controlnet = ControlNetModel.from_pretrained( - controlnet, torch_dtype=torch.float16, + controlnet, torch_dtype=dtype, ) extra_kwargs["controlnet"] = controlnet if os.path.exists(os.path.join(model_name, "calibrate_info.txt")): from onediff.quantization import QuantPipeline pipe = QuantPipeline.from_quantized( - pipeline_cls, model_name, torch_dtype=torch.float16, **extra_kwargs + pipeline_cls, model_name, **extra_kwargs ) else: pipe = pipeline_cls.from_pretrained( - model_name, torch_dtype=torch.float16, **extra_kwargs + model_name, **extra_kwargs ) - if scheduler is not None: + if scheduler is not None and scheduler != "none": scheduler_cls = getattr(importlib.import_module("diffusers"), scheduler) pipe.scheduler = scheduler_cls.from_config(pipe.scheduler.config) if lora is not None: pipe.load_lora_weights(lora) pipe.fuse_lora() pipe.safety_checker = None - pipe.to(torch.device("cuda")) + if device is not None: + pipe.to(torch.device(device)) return pipe @@ -154,15 +166,25 @@ def main(): controlnet=args.controlnet, ) - height = args.height or pipe.unet.config.sample_size * pipe.vae_scale_factor - width = args.width or pipe.unet.config.sample_size * pipe.vae_scale_factor + core_net = None + if core_net is None: + core_net = getattr(pipe, "unet", None) + if core_net is None: + core_net = getattr(pipe, "transformer", None) + height = args.height or core_net.config.sample_size * pipe.vae_scale_factor + width = args.width or core_net.config.sample_size * pipe.vae_scale_factor if args.compiler == "none": pass elif args.compiler == "oneflow": pipe = compile_pipe(pipe) elif args.compiler == "nexfort": - pipe = compile_pipe(pipe, backend="nexfort") + options = CompileOptions() + if args.compiler_config is not None: + options.nexfort = json.load(args.compiler_config) + else: + options.nexfort = json.loads('{"mode": "max-autotune", "memory_format": "channels_last"}') + pipe = compile_pipe(pipe, backend="nexfort", options=options, fuse_qkv_projections=True) elif args.compiler in ("compile", "compile-max-autotune"): mode = "max-autotune" if args.compiler == "compile-max-autotune" else None pipe.unet = torch.compile(pipe.unet, mode=mode) @@ -199,7 +221,6 @@ def get_kwarg_inputs(): negative_prompt=args.negative_prompt, height=height, width=width, - num_inference_steps=args.steps, num_images_per_prompt=args.batch, generator=None if args.seed is None @@ -210,6 +231,8 @@ def get_kwarg_inputs(): else json.loads(args.extra_call_kwargs) ), ) + if args.steps is not None: + kwarg_inputs["num_inference_steps"] = args.steps if input_image is not None: kwarg_inputs["image"] = input_image if control_image is not None: diff --git a/onediff_diffusers_extensions/onediffx/__init__.py b/onediff_diffusers_extensions/onediffx/__init__.py index 2da48e8f8..f5f11e59d 100644 --- a/onediff_diffusers_extensions/onediffx/__init__.py +++ b/onediff_diffusers_extensions/onediffx/__init__.py @@ -1,5 +1,5 @@ __version__ = "1.1.0.dev1" -from onediff.infer_compiler import compile_options +from onediff.infer_compiler import compile_options, CompileOptions from .compilers.diffusion_pipeline_compiler import compile_pipe, save_pipe, load_pipe -__all__ = ["compile_pipe", "compile_options", "save_pipe", "load_pipe"] +__all__ = ["compile_pipe", "compile_options", "CompileOptions", "save_pipe", "load_pipe"] diff --git a/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py b/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py index 3307991e3..8cc31596e 100644 --- a/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py +++ b/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py @@ -29,11 +29,11 @@ def _recursive_setattr(obj, attr, value): "fast_unet", # for deepcache "prior", # for StableCascadePriorPipeline "decoder", # for StableCascadeDecoderPipeline + "transformer", # for Transformer-based DiffusionPipeline such as DiTPipeline and PixArtAlphaPipeline "vqgan.down_blocks", # for StableCascadeDecoderPipeline "vqgan.up_blocks", # for StableCascadeDecoderPipeline "vae.decoder", "vae.encoder", - "transformer", # for Transformer-based DiffusionPipeline such as DiTPipeline and PixArtAlphaPipeline ] @@ -52,8 +52,17 @@ def _filter_parts(ignores=()): def compile_pipe( - pipe, *, backend="oneflow", options=None, ignores=(), + pipe, *, backend="oneflow", options=None, ignores=(), fuse_qkv_projections=False, ): + if fuse_qkv_projections: + print("****** fuse qkv projections ******") + pipe = fuse_qkv_projections_in_pipe(pipe) + + if options.nexfort is not None and "memory_format" in options.nexfort: + memory_format = getattr(torch, options.nexfort["memory_format"]) + pipe = convert_pipe_to_memory_format(pipe, ignores=ignores, memory_format=memory_format) + del options.nexfort["memory_format"] + # To fix the bug of graph load of vae. Please refer to: https://github.com/siliconflow/onediff/issues/452 if ( hasattr(pipe, "upcast_vae") @@ -82,6 +91,33 @@ def compile_pipe( return pipe +def fuse_qkv_projections_in_pipe(pipe): + if hasattr(pipe, "fuse_qkv_projections"): + pipe.fuse_qkv_projections() + return pipe + + +def convert_pipe_to_memory_format(pipe, *, ignores=(), memory_format=torch.preserve_format): + from nexfort.utils.attributes import multi_recursive_apply + from nexfort.utils.memory_format import apply_memory_format + import functools + if memory_format == torch.preserve_format: + return pipe + + parts = [ + "unet", + "controlnet", + "fast_unet", # for deepcache + "prior", # for StableCascadePriorPipeline + "decoder", # for StableCascadeDecoderPipeline + "transformer", # for Transformer-based DiffusionPipeline such as DiTPipeline and PixArtAlphaPipeline + "vqgan", # for StableCascadeDecoderPipeline + "vae", + ] + multi_recursive_apply( + pipe, parts, functools.partial(apply_memory_format, memory_format=memory_format), ignores=ignores, verbose=True + ) + return pipe def save_pipe(pipe, dir="cached_pipe", *, ignores=(), overwrite=True): if not os.path.exists(dir): diff --git a/src/onediff/infer_compiler/backends/nexfort.py b/src/onediff/infer_compiler/backends/nexfort.py index 67cca8cbc..80e0df3b1 100644 --- a/src/onediff/infer_compiler/backends/nexfort.py +++ b/src/onediff/infer_compiler/backends/nexfort.py @@ -3,16 +3,6 @@ from .registry import register_backend -def make_inductor_options(options): - inductor_options = {} - if options is None: - return inductor_options - for filed in dataclasses.fields(options): - filed_name = filed.name - inductor_options[f"inductor.{filed_name}"] = getattr(options, filed_name) - return inductor_options - - @register_backend("nexfort") def compile(torch_module: torch.nn.Module, *, options=None): from nexfort.utils.memory_format import apply_memory_format @@ -22,11 +12,8 @@ def compile(torch_module: torch.nn.Module, *, options=None): options = options if options is not None else CompileOptions() nexfort_options = options.nexfort - if nexfort_options.memory_format != torch.preserve_format: - model = apply_memory_format( - torch_module, memory_format=nexfort_options.memory_format - ) - model = nexfort_compile( - model, options=make_inductor_options(nexfort_options.inductor) + compiled_model = nexfort_compile( + torch_module, **nexfort_options ) - return NexfortDeployableModule(model) + # return NexfortDeployableModule(compiled_model, torch_module) + return compiled_model diff --git a/src/onediff/infer_compiler/nexfort/deployable_module.py b/src/onediff/infer_compiler/nexfort/deployable_module.py index eb8a91be2..a565a8df4 100644 --- a/src/onediff/infer_compiler/nexfort/deployable_module.py +++ b/src/onediff/infer_compiler/nexfort/deployable_module.py @@ -3,10 +3,10 @@ class NexfortDeployableModule(DeployableModule): - def __init__(self, torch_module): + def __init__(self, compiled_module, torch_module): torch.nn.Module.__init__(self) - object.__setattr__(self, "_deployable_module_model", torch_module) - object.__setattr__(self, "_modules", torch_module._modules) + object.__setattr__(self, "_deployable_module_model", compiled_module) + object.__setattr__(self, "_modules", compiled_module._modules) object.__setattr__(self, "_torch_module", torch_module) def __call__(self, *args, **kwargs): diff --git a/src/onediff/infer_compiler/utils/options.py b/src/onediff/infer_compiler/utils/options.py index f96e83e37..beccd13db 100644 --- a/src/onediff/infer_compiler/utils/options.py +++ b/src/onediff/infer_compiler/utils/options.py @@ -42,35 +42,6 @@ class OneflowCompileOptions: kernel_glu_enable_y_gemm_impl: bool = None kernel_glu_quant_enable_dual_gemm_impl: bool = None - -@dataclasses.dataclass -class NexfortInductorCompileOptions: - disable: bool = False - mode: str = None - options: Dict = dataclasses.field(default_factory=dict) - - -@dataclasses.dataclass -class NexfortCompileOptions: - memory_format: torch.memory_format - fuse_qkv_projections: bool - inductor: NexfortInductorCompileOptions - - def __init__( - self, - memory_format=torch.channels_last, - fuse_qkv_projections=True, - inductor=None, - ): - if isinstance(memory_format, str): - memory_format = getattr(torch, memory_format) - self.memory_format = memory_format - self.fuse_qkv_projections = fuse_qkv_projections - self.inductor = ( - inductor if inductor is not None else NexfortInductorCompileOptions() - ) - - @dataclasses.dataclass class CompileOptions: # common options @@ -80,12 +51,12 @@ class CompileOptions: oneflow: OneflowCompileOptions # nexfort specific options - nexfort: NexfortCompileOptions + nexfort: Dict def __init__(self, dynamic=True, oneflow=None, nexfort=None): self.dynamic = dynamic self.oneflow = oneflow if oneflow is not None else OneflowCompileOptions() - self.nexfort = nexfort if nexfort is not None else NexfortCompileOptions() + self.nexfort = nexfort if nexfort is not None else dict() # a global default compile options From ec00bf992c4664e82e9b53880214ad05c84074f7 Mon Sep 17 00:00:00 2001 From: strint Date: Fri, 17 May 2024 23:49:50 +0800 Subject: [PATCH 02/13] refactor backends --- .../extras_nodes/nodes_oneflow_booster.py | 2 +- onediff_comfy_nodes/modules/oneflow/config.py | 2 +- .../modules/oneflow/utils/booster_utils.py | 2 +- .../oneflow/utils/quant_ksampler_tools.py | 3 +- .../examples/image_to_image_graph_load.py | 2 +- .../examples/text_to_image_sdxl_lora.py | 2 +- .../compilers/diffusion_pipeline_compiler.py | 6 +- .../onediffx/lora/lora.py | 2 +- .../onediffx/lora/text_encoder.py | 2 +- .../onediffx/lora/unet.py | 2 +- .../tests/profile_lora.py | 2 +- .../tests/profile_multi_lora.py | 2 +- .../scripts/onediff.py | 2 +- .../attention_processor_oflow.py | 2 +- src/onediff/infer_compiler/__init__.py | 8 +- .../infer_compiler/backends/__init__.py | 4 + .../compiler.py} | 3 +- .../{core => backends}/deployable_module.py | 1 - .../backends/nexfort/__init__.py | 1 + .../nexfort/deployable_module.py | 3 +- .../backends/{ => nexfort}/nexfort.py | 6 +- .../backends/oneflow/__init__.py | 3 + .../oneflow}/args_tree_util.py | 2 +- .../oneflow/deployable_module.py | 82 ++++++++-- .../{ => backends}/oneflow/dual_module.py | 26 ++- .../{utils => backends/oneflow}/env_var.py | 85 +++++----- .../{ => backends}/oneflow/graph.py | 8 +- .../oneflow}/graph_management_utils.py | 10 +- .../backends/{ => oneflow}/oneflow.py | 14 +- .../oneflow}/oneflow_exec_mode.py | 0 .../oneflow}/online_quantization_utils.py | 0 .../oneflow}/param_utils.py | 2 +- .../oneflow/utils}/__init__.py | 0 .../{ => backends/oneflow}/utils/cost_util.py | 2 +- .../oneflow}/utils/version_util.py | 2 +- .../infer_compiler/backends/options.py | 13 ++ .../infer_compiler/backends/registry.py | 17 +- src/onediff/infer_compiler/core/__init__.py | 2 - .../import_tools/dyn_mock_mod.py | 4 +- .../infer_compiler/import_tools/importer.py | 2 +- .../patch_for_compiler.py | 0 .../infer_compiler/oneflow/__init__.py | 1 - src/onediff/infer_compiler/oneflow/config.py | 148 ------------------ src/onediff/infer_compiler/oneflow/utils.py | 83 ---------- .../transform/builtin_transform.py | 4 +- .../transform/custom_transform.py | 2 +- .../infer_compiler/transform/manager.py | 2 +- .../patch_for_diffusers.py | 2 +- src/onediff/infer_compiler/utils/__init__.py | 19 --- src/onediff/infer_compiler/utils/options.py | 63 -------- .../optimization/attention_processor.py | 2 +- src/onediff/optimization/quant_optimizer.py | 6 +- src/onediff/torch_utils/__init__.py | 1 + .../model_inplace_assign.py | 0 .../module_operations.py | 0 src/onediff/utils/__init__.py | 7 + src/onediff/utils/env_var.py | 31 ++++ .../{infer_compiler => }/utils/log_utils.py | 0 tests/test_quantize_custom_model.py | 2 +- 59 files changed, 257 insertions(+), 449 deletions(-) rename src/onediff/infer_compiler/{core/with_onediff_compile.py => backends/compiler.py} (90%) rename src/onediff/infer_compiler/{core => backends}/deployable_module.py (99%) create mode 100644 src/onediff/infer_compiler/backends/nexfort/__init__.py rename src/onediff/infer_compiler/{ => backends}/nexfort/deployable_module.py (91%) rename src/onediff/infer_compiler/backends/{ => nexfort}/nexfort.py (78%) create mode 100644 src/onediff/infer_compiler/backends/oneflow/__init__.py rename src/onediff/infer_compiler/{utils => backends/oneflow}/args_tree_util.py (98%) rename src/onediff/infer_compiler/{ => backends}/oneflow/deployable_module.py (72%) rename src/onediff/infer_compiler/{ => backends}/oneflow/dual_module.py (88%) rename src/onediff/infer_compiler/{utils => backends/oneflow}/env_var.py (76%) rename src/onediff/infer_compiler/{ => backends}/oneflow/graph.py (92%) rename src/onediff/infer_compiler/{utils => backends/oneflow}/graph_management_utils.py (95%) rename src/onediff/infer_compiler/backends/{ => oneflow}/oneflow.py (89%) rename src/onediff/infer_compiler/{utils => backends/oneflow}/oneflow_exec_mode.py (100%) rename src/onediff/infer_compiler/{utils => backends/oneflow}/online_quantization_utils.py (100%) rename src/onediff/infer_compiler/{utils => backends/oneflow}/param_utils.py (99%) rename src/onediff/infer_compiler/{nexfort => backends/oneflow/utils}/__init__.py (100%) rename src/onediff/infer_compiler/{ => backends/oneflow}/utils/cost_util.py (99%) rename src/onediff/infer_compiler/{ => backends/oneflow}/utils/version_util.py (96%) create mode 100644 src/onediff/infer_compiler/backends/options.py delete mode 100644 src/onediff/infer_compiler/core/__init__.py rename src/onediff/infer_compiler/{utils => import_tools}/patch_for_compiler.py (100%) delete mode 100644 src/onediff/infer_compiler/oneflow/__init__.py delete mode 100644 src/onediff/infer_compiler/oneflow/config.py delete mode 100644 src/onediff/infer_compiler/oneflow/utils.py rename src/onediff/infer_compiler/{utils => transform}/patch_for_diffusers.py (95%) delete mode 100644 src/onediff/infer_compiler/utils/__init__.py delete mode 100644 src/onediff/infer_compiler/utils/options.py create mode 100644 src/onediff/torch_utils/__init__.py rename src/onediff/{infer_compiler/utils => torch_utils}/model_inplace_assign.py (100%) rename src/onediff/{infer_compiler/utils => torch_utils}/module_operations.py (100%) create mode 100644 src/onediff/utils/__init__.py create mode 100644 src/onediff/utils/env_var.py rename src/onediff/{infer_compiler => }/utils/log_utils.py (100%) diff --git a/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py b/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py index ca22873ee..c8cfb6103 100644 --- a/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py +++ b/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py @@ -7,7 +7,7 @@ from comfy import model_management from comfy.cli_args import args -from onediff.infer_compiler.utils import is_community_version +from onediff.infer_compiler.backends.oneflow.utils.version_util import is_community_version from ..modules.oneflow.config import ONEDIFF_QUANTIZED_OPTIMIZED_MODELS from ..modules.oneflow.hijack_animatediff import animatediff_hijacker diff --git a/onediff_comfy_nodes/modules/oneflow/config.py b/onediff_comfy_nodes/modules/oneflow/config.py index 353c4f024..8a6494e31 100644 --- a/onediff_comfy_nodes/modules/oneflow/config.py +++ b/onediff_comfy_nodes/modules/oneflow/config.py @@ -3,7 +3,7 @@ import sys from pathlib import Path -from onediff.infer_compiler.utils import is_community_version +from onediff.infer_compiler.backends.oneflow.utils.version_util import is_community_version # Set up paths ONEDIFF_QUANTIZED_OPTIMIZED_MODELS = "onediff_quant" diff --git a/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py b/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py index 57d775794..e8d70ebd6 100644 --- a/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py +++ b/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py @@ -6,7 +6,7 @@ from comfy.model_patcher import ModelPatcher from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule -from onediff.infer_compiler.utils import set_boolean_env_var +from onediff.utils import set_boolean_env_var from ..patch_management import PatchType, create_patch_executor diff --git a/onediff_comfy_nodes/modules/oneflow/utils/quant_ksampler_tools.py b/onediff_comfy_nodes/modules/oneflow/utils/quant_ksampler_tools.py index 048a0312d..a14b15603 100644 --- a/onediff_comfy_nodes/modules/oneflow/utils/quant_ksampler_tools.py +++ b/onediff_comfy_nodes/modules/oneflow/utils/quant_ksampler_tools.py @@ -10,8 +10,7 @@ from nodes import KSampler, VAEDecode from onediff.infer_compiler import oneflow_compile # onediff -from onediff.infer_compiler.utils.module_operations import (get_sub_module, - modify_sub_module) +from onediff.torch_utils.module_operations import (get_sub_module, modify_sub_module) from onediff_quant import Quantizer # onediff_quant from onediff_quant.utils import (find_quantizable_modules, get_quantize_module, diff --git a/onediff_diffusers_extensions/examples/image_to_image_graph_load.py b/onediff_diffusers_extensions/examples/image_to_image_graph_load.py index cbd7dc81f..ebdc5de2f 100644 --- a/onediff_diffusers_extensions/examples/image_to_image_graph_load.py +++ b/onediff_diffusers_extensions/examples/image_to_image_graph_load.py @@ -18,7 +18,7 @@ from diffusers import EulerDiscreteScheduler from diffusers import utils -from onediff.infer_compiler.utils.cost_util import cost_cnt +from onediff.infer_compiler.backends.oneflow.utils.cost_util import cost_cnt _MODEL_ID = "stabilityai/stable-diffusion-2" diff --git a/onediff_diffusers_extensions/examples/text_to_image_sdxl_lora.py b/onediff_diffusers_extensions/examples/text_to_image_sdxl_lora.py index aa5d86058..06d16c81f 100644 --- a/onediff_diffusers_extensions/examples/text_to_image_sdxl_lora.py +++ b/onediff_diffusers_extensions/examples/text_to_image_sdxl_lora.py @@ -2,7 +2,7 @@ from pathlib import Path from diffusers import DiffusionPipeline from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.utils import TensorInplaceAssign +from onediff.torch_utils import TensorInplaceAssign try: from onediffx.lora import load_and_fuse_lora, unfuse_lora, update_graph_with_constant_folding_info diff --git a/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py b/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py index 8cc31596e..6ddfcfdca 100644 --- a/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py +++ b/onediff_diffusers_extensions/onediffx/compilers/diffusion_pipeline_compiler.py @@ -1,7 +1,7 @@ import os import torch -from onediff.infer_compiler import compile, DeployableModule -from onediff.infer_compiler.utils.log_utils import logger +from onediff.infer_compiler import compile, DeployableModule, CompileOptions +from onediff.utils import logger def _recursive_getattr(obj, attr, default=None): @@ -54,6 +54,8 @@ def _filter_parts(ignores=()): def compile_pipe( pipe, *, backend="oneflow", options=None, ignores=(), fuse_qkv_projections=False, ): + if options is None: + options = CompileOptions() if fuse_qkv_projections: print("****** fuse qkv projections ******") pipe = fuse_qkv_projections_in_pipe(pipe) diff --git a/onediff_diffusers_extensions/onediffx/lora/lora.py b/onediff_diffusers_extensions/onediffx/lora/lora.py index f5bb290b4..8e7896094 100644 --- a/onediff_diffusers_extensions/onediffx/lora/lora.py +++ b/onediff_diffusers_extensions/onediffx/lora/lora.py @@ -5,7 +5,7 @@ import torch -from onediff.infer_compiler.utils.log_utils import logger +from onediff.utils import logger import diffusers from diffusers.loaders import LoraLoaderMixin diff --git a/onediff_diffusers_extensions/onediffx/lora/text_encoder.py b/onediff_diffusers_extensions/onediffx/lora/text_encoder.py index a0bdf76d0..df8f17ebe 100644 --- a/onediff_diffusers_extensions/onediffx/lora/text_encoder.py +++ b/onediff_diffusers_extensions/onediffx/lora/text_encoder.py @@ -19,7 +19,7 @@ from diffusers.utils import is_accelerate_available from diffusers.models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT -from onediff.infer_compiler.utils.log_utils import logger +from onediff.utils import logger from .utils import fuse_lora, get_adapter_names diff --git a/onediff_diffusers_extensions/onediffx/lora/unet.py b/onediff_diffusers_extensions/onediffx/lora/unet.py index cca033aa1..98834eeaa 100644 --- a/onediff_diffusers_extensions/onediffx/lora/unet.py +++ b/onediff_diffusers_extensions/onediffx/lora/unet.py @@ -4,7 +4,7 @@ import torch from onediff.infer_compiler import DeployableModule -from onediff.infer_compiler.utils.log_utils import logger +from onediff.utils import logger from diffusers.models.lora import ( LoRACompatibleConv, LoRACompatibleLinear, diff --git a/onediff_diffusers_extensions/tests/profile_lora.py b/onediff_diffusers_extensions/tests/profile_lora.py index 1bf310aee..1ecdc3535 100644 --- a/onediff_diffusers_extensions/tests/profile_lora.py +++ b/onediff_diffusers_extensions/tests/profile_lora.py @@ -7,7 +7,7 @@ from diffusers import DiffusionPipeline from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.utils import TensorInplaceAssign +from onediff.torch_utils import TensorInplaceAssign from onediffx.lora import load_and_fuse_lora, unfuse_lora _time = None diff --git a/onediff_diffusers_extensions/tests/profile_multi_lora.py b/onediff_diffusers_extensions/tests/profile_multi_lora.py index 88b3d7cde..e50b6a750 100644 --- a/onediff_diffusers_extensions/tests/profile_multi_lora.py +++ b/onediff_diffusers_extensions/tests/profile_multi_lora.py @@ -8,7 +8,7 @@ from diffusers.utils.constants import USE_PEFT_BACKEND from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.utils import TensorInplaceAssign +from onediff.torch_utils import TensorInplaceAssign from onediffx.lora import load_and_fuse_lora, unfuse_lora, set_and_fuse_adapters if not USE_PEFT_BACKEND: diff --git a/onediff_sd_webui_extensions/scripts/onediff.py b/onediff_sd_webui_extensions/scripts/onediff.py index 3c7e887cd..62119740c 100644 --- a/onediff_sd_webui_extensions/scripts/onediff.py +++ b/onediff_sd_webui_extensions/scripts/onediff.py @@ -18,7 +18,7 @@ from onediff_lora import HijackLoraActivate from onediff_hijack import do_hijack as onediff_do_hijack -from onediff.infer_compiler.utils.log_utils import logger +from onediff.utils import logger from onediff.optimization.quant_optimizer import ( quantize_model, varify_can_use_quantization, diff --git a/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py b/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py index 6406e01d5..68f3a1c2e 100644 --- a/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py +++ b/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py @@ -21,7 +21,7 @@ import diffusers from diffusers.utils import deprecate, logging -from onediff.infer_compiler.utils import parse_boolean_from_env, set_boolean_env_var +from onediff.utils import parse_boolean_from_env, set_boolean_env_var def is_xformers_available(): diff --git a/src/onediff/infer_compiler/__init__.py b/src/onediff/infer_compiler/__init__.py index bff98d894..7110e897e 100644 --- a/src/onediff/infer_compiler/__init__.py +++ b/src/onediff/infer_compiler/__init__.py @@ -1,10 +1,4 @@ import os import torch -from .core import * -from .utils import set_default_env_vars -from .utils.options import CompileOptions -from .utils.options import _GLOBAL_compile_options as compile_options - - -set_default_env_vars() +from .backends import * diff --git a/src/onediff/infer_compiler/backends/__init__.py b/src/onediff/infer_compiler/backends/__init__.py index e69de29bb..e69d3ead6 100644 --- a/src/onediff/infer_compiler/backends/__init__.py +++ b/src/onediff/infer_compiler/backends/__init__.py @@ -0,0 +1,4 @@ +from .deployable_module import DeployableModule +from .compiler import compile, oneflow_compile +from .options import CompileOptions +from .options import _GLOBAL_compile_options as compile_options \ No newline at end of file diff --git a/src/onediff/infer_compiler/core/with_onediff_compile.py b/src/onediff/infer_compiler/backends/compiler.py similarity index 90% rename from src/onediff/infer_compiler/core/with_onediff_compile.py rename to src/onediff/infer_compiler/backends/compiler.py index 3ab038162..34a67df7a 100644 --- a/src/onediff/infer_compiler/core/with_onediff_compile.py +++ b/src/onediff/infer_compiler/backends/compiler.py @@ -1,4 +1,5 @@ import torch + from .deployable_module import DeployableModule _DEFAULT_BACKEND = "oneflow" @@ -6,7 +7,7 @@ def compile( torch_module: torch.nn.Module, *, backend=_DEFAULT_BACKEND, options=None ) -> DeployableModule: - from ..backends.registry import lookup_backend + from .registry import lookup_backend backend = lookup_backend(backend) model = backend(torch_module, options=options) diff --git a/src/onediff/infer_compiler/core/deployable_module.py b/src/onediff/infer_compiler/backends/deployable_module.py similarity index 99% rename from src/onediff/infer_compiler/core/deployable_module.py rename to src/onediff/infer_compiler/backends/deployable_module.py index 51464df63..0982926a7 100644 --- a/src/onediff/infer_compiler/core/deployable_module.py +++ b/src/onediff/infer_compiler/backends/deployable_module.py @@ -1,7 +1,6 @@ from typing import Any import torch - class DeployableModule(torch.nn.Module): def __init__(self): torch.nn.Module.__init__(self) diff --git a/src/onediff/infer_compiler/backends/nexfort/__init__.py b/src/onediff/infer_compiler/backends/nexfort/__init__.py new file mode 100644 index 000000000..e50a723c5 --- /dev/null +++ b/src/onediff/infer_compiler/backends/nexfort/__init__.py @@ -0,0 +1 @@ +from . import nexfort as _nexfort_backend \ No newline at end of file diff --git a/src/onediff/infer_compiler/nexfort/deployable_module.py b/src/onediff/infer_compiler/backends/nexfort/deployable_module.py similarity index 91% rename from src/onediff/infer_compiler/nexfort/deployable_module.py rename to src/onediff/infer_compiler/backends/nexfort/deployable_module.py index a565a8df4..a9e94977e 100644 --- a/src/onediff/infer_compiler/nexfort/deployable_module.py +++ b/src/onediff/infer_compiler/backends/nexfort/deployable_module.py @@ -1,5 +1,6 @@ import torch -from ..core.deployable_module import DeployableModule + +from ..deployable_module import DeployableModule class NexfortDeployableModule(DeployableModule): diff --git a/src/onediff/infer_compiler/backends/nexfort.py b/src/onediff/infer_compiler/backends/nexfort/nexfort.py similarity index 78% rename from src/onediff/infer_compiler/backends/nexfort.py rename to src/onediff/infer_compiler/backends/nexfort/nexfort.py index 80e0df3b1..3d49ee062 100644 --- a/src/onediff/infer_compiler/backends/nexfort.py +++ b/src/onediff/infer_compiler/backends/nexfort/nexfort.py @@ -1,14 +1,14 @@ import dataclasses import torch -from .registry import register_backend +from ..registry import register_backend @register_backend("nexfort") def compile(torch_module: torch.nn.Module, *, options=None): from nexfort.utils.memory_format import apply_memory_format from nexfort.compilers import nexfort_compile - from ..nexfort.deployable_module import NexfortDeployableModule - from ..utils import CompileOptions + from .deployable_module import NexfortDeployableModule + from ..options import CompileOptions options = options if options is not None else CompileOptions() nexfort_options = options.nexfort diff --git a/src/onediff/infer_compiler/backends/oneflow/__init__.py b/src/onediff/infer_compiler/backends/oneflow/__init__.py new file mode 100644 index 000000000..69c5c9b11 --- /dev/null +++ b/src/onediff/infer_compiler/backends/oneflow/__init__.py @@ -0,0 +1,3 @@ +from . import oneflow as _oneflow_backend +from .deployable_module import OneflowDeployableModule +from .env_var import OneflowCompileOptions diff --git a/src/onediff/infer_compiler/utils/args_tree_util.py b/src/onediff/infer_compiler/backends/oneflow/args_tree_util.py similarity index 98% rename from src/onediff/infer_compiler/utils/args_tree_util.py rename to src/onediff/infer_compiler/backends/oneflow/args_tree_util.py index 598b95828..fb253e800 100644 --- a/src/onediff/infer_compiler/utils/args_tree_util.py +++ b/src/onediff/infer_compiler/backends/oneflow/args_tree_util.py @@ -1,7 +1,7 @@ import torch import oneflow as flow from oneflow.framework.args_tree import ArgsTree -from .log_utils import logger +from onediff.utils import logger def input_output_processor(func): diff --git a/src/onediff/infer_compiler/oneflow/deployable_module.py b/src/onediff/infer_compiler/backends/oneflow/deployable_module.py similarity index 72% rename from src/onediff/infer_compiler/oneflow/deployable_module.py rename to src/onediff/infer_compiler/backends/oneflow/deployable_module.py index 71db38870..dd15fa94a 100644 --- a/src/onediff/infer_compiler/oneflow/deployable_module.py +++ b/src/onediff/infer_compiler/backends/oneflow/deployable_module.py @@ -1,20 +1,56 @@ import types import torch +from functools import wraps + import oneflow as flow -from ..core.deployable_module import DeployableModule -from ..transform.manager import transform_mgr -from ..utils.oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled -from ..utils.args_tree_util import input_output_processor -from ..utils.log_utils import logger -from ..utils.param_utils import parse_device, check_device, generate_constant_folding_info -from ..utils.graph_management_utils import graph_file_management -from ..utils.online_quantization_utils import quantize_and_deploy_wrapper -from ..utils.options import OneflowCompileOptions +from onediff.utils import logger + +from ..deployable_module import DeployableModule + +from ...transform.manager import transform_mgr +from ...transform.builtin_transform import torch2oflow + +from .dual_module import DualModule, get_mixed_dual_module +from .oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled +from .args_tree_util import input_output_processor +from .param_utils import parse_device, check_device, generate_constant_folding_info +from .graph_management_utils import graph_file_management +from .online_quantization_utils import quantize_and_deploy_wrapper +from .env_var import OneflowCompileOptions + + +@torch2oflow.register +def _(mod: DualModule, verbose=False): + return torch2oflow(mod._torch_module, verbose) + + +def handle_deployable_exception(func): + @wraps(func) + def wrapper(self, *args, **kwargs): + if transform_mgr.debug_mode: + return func(self, *args, **kwargs) + else: + try: + return func(self, *args, **kwargs) + except Exception as e: + logger.error(f"Exception in {func.__name__}: {e=}") + logger.warning("Recompile oneflow module ...") + del self._deployable_module_model.oneflow_module + self._deployable_module_dpl_graph = None + return func(self, *args, **kwargs) -from .utils import handle_deployable_exception, get_mixed_dual_module, get_oneflow_graph + return wrapper +def get_oneflow_graph(model, size=9, dynamic_graph=True): + from .graph import OneflowGraph + + g = OneflowGraph(model) + g._dynamic_input_graph_cache.set_cache_size(size) + g._dynamic_input_graph_cache.enable_shared(dynamic_graph) + return g + class OneflowDeployableModule(DeployableModule): def __init__( self, torch_module, oneflow_module, dynamic=True, options=None, @@ -199,3 +235,29 @@ def apply_online_quant(self, quant_config): >>> model.apply_online_quant(quant_config) """ self._deployable_module_quant_config = quant_config + +def get_mixed_deployable_module(module_cls): + + class MixedOneflowDeployableModule(OneflowDeployableModule, module_cls): + def __init__(self, torch_module, oneflow_module, dynamic=True, options=None): + OneflowDeployableModule.__init__( + self, torch_module, oneflow_module, dynamic, options + ) + self._is_raw_deployable_module = False + + @classmethod + def from_existing(cls, existing_module, dynamic=True, options=None): + torch_module = existing_module._deployable_module_model._torch_module + oneflow_module = existing_module._deployable_module_model._oneflow_module + instance = cls(torch_module, oneflow_module, dynamic, options) + instance._deployable_module_dpl_graph = None + if hasattr(existing_module, "_deployable_module_dpl_graph"): + instance._deployable_module_dpl_graph = ( + existing_module._deployable_module_dpl_graph + ) + return instance + + def _get_name(self): + return f"{self.__class__.__name__}(of {module_cls.__name__})" + + return MixedOneflowDeployableModule diff --git a/src/onediff/infer_compiler/oneflow/dual_module.py b/src/onediff/infer_compiler/backends/oneflow/dual_module.py similarity index 88% rename from src/onediff/infer_compiler/oneflow/dual_module.py rename to src/onediff/infer_compiler/backends/oneflow/dual_module.py index 11a59ca18..7f2d67bc2 100644 --- a/src/onediff/infer_compiler/oneflow/dual_module.py +++ b/src/onediff/infer_compiler/backends/oneflow/dual_module.py @@ -7,9 +7,9 @@ import oneflow as flow from oneflow.utils.tensor import to_torch -from ..transform.builtin_transform import torch2oflow -from ..utils.oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled -from ..utils.log_utils import logger +from onediff.utils import logger +from ...transform.builtin_transform import torch2oflow +from .oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled class DualModule(torch.nn.Module): @@ -91,8 +91,6 @@ def __getattr__(self, name): return DualModuleList(torch_attr, oneflow_attr) elif isinstance(torch_attr, torch.nn.Module): - from .utils import get_mixed_dual_module - return get_mixed_dual_module(torch_attr.__class__)(torch_attr, oneflow_attr) else: return oneflow_attr if oneflow_exec_mode_enabled() else torch_attr @@ -120,7 +118,6 @@ def __init__(self, torch_modules, oneflow_modules): assert len(torch_modules) == len(oneflow_modules) self._torch_modules = torch_modules self._oneflow_modules = oneflow_modules - from .utils import get_mixed_dual_module dual_modules = [] for torch_module, oneflow_module in zip( @@ -152,3 +149,20 @@ def __setattr__(self, key, value): value = torch2oflow(value) setattr(self._oneflow_modules, key, value) return object.__setattr__(self, key, value) + +def get_mixed_dual_module(module_cls): + if issubclass(module_cls, DualModule) and "MixedDualModule" in module_cls.__name__: + return module_cls + + class MixedDualModule(DualModule, module_cls): + def __init__(self, torch_module, oneflow_module): + while isinstance(torch_module, DualModule): + torch_module = torch_module._torch_module + DualModule.__init__(self, torch_module, oneflow_module) + + def _get_name(self) -> str: + return f"{self.__class__.__name__}(of {module_cls.__name__})" + + return MixedDualModule + + diff --git a/src/onediff/infer_compiler/utils/env_var.py b/src/onediff/infer_compiler/backends/oneflow/env_var.py similarity index 76% rename from src/onediff/infer_compiler/utils/env_var.py rename to src/onediff/infer_compiler/backends/oneflow/env_var.py index ce58d8f93..6109330e3 100644 --- a/src/onediff/infer_compiler/utils/env_var.py +++ b/src/onediff/infer_compiler/backends/oneflow/env_var.py @@ -1,36 +1,49 @@ import dataclasses import os +import torch from typing import Optional - -def parse_boolean_from_env(env_var, default_value=None): - env_var = os.getenv(env_var) - if env_var is None: - return default_value - env_var = env_var.lower() - return env_var in ("1", "true", "yes", "on", "y") - - -def set_boolean_env_var(env_var: str, val: Optional[bool]): - if val is None: - os.environ.pop(env_var, None) - else: - os.environ[env_var] = "1" if val else "0" - - -def parse_integer_from_env(env_var, default_value=None): - env_var = os.getenv(env_var) - if env_var is None: - return default_value - return int(env_var) - - -def set_integer_env_var(env_var: str, val: Optional[int]): - if val is None: - os.environ.pop(env_var, None) - else: - os.environ[env_var] = str(int(val)) - +from onediff.utils import set_boolean_env_var, set_integer_env_var + + +@dataclasses.dataclass +class OneflowCompileOptions: + use_graph: bool = True + debug_level: int = -1 + max_cached_graph_size: int = 9 + graph_file: str = None + graph_file_device: torch.device = None + + # Optimization related environment variables + run_graph_by_vm: bool = None + graph_delay_variable_op_execution: bool = None + + conv_allow_half_precision_accumulation: bool = None + matmul_allow_half_precision_accumulation: bool = None + attention_allow_half_precision_accumulation: bool = None + attention_allow_half_precision_score_accumulation_max_m: int = None + attention_allow_quantization: bool = None + + mlir_cse: bool = None + mlir_enable_inference_optimization: bool = None + mlir_enable_round_trip: bool = None + mlir_fuse_forward_ops: bool = None + mlir_fuse_ops_with_backward_impl: bool = None + mlir_group_matmul: bool = None + mlir_prefer_nhwc: bool = None + mlir_fuse_kernel_launch: bool = None + + kernel_enable_cuda_graph: bool = None + kernel_enable_fused_conv_bias: bool = None + kernel_enable_fused_linear: bool = None + kernel_conv_cutlass_impl_enable_tuning_warmup: bool = None + kernel_enable_conv2d_tuning_warmup: bool = None + kernel_gemm_cutlass_impl_enable_tuning_warmup: bool = None + kernel_conv_enable_cutlass_impl: bool = None + kernel_gemm_enable_cutlass_impl: bool = None + kernel_glu_enable_dual_gemm_impl: bool = None + kernel_glu_enable_y_gemm_impl: bool = None + kernel_glu_quant_enable_dual_gemm_impl: bool = None def _set_env_vars(field2env_var, options): for field in dataclasses.fields(options): @@ -117,17 +130,3 @@ def set_oneflow_default_env_vars(): # TODO: enable this will cause the failure of multi resolution warmup # os.environ.setdefault("ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH", "1") # os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_CUDA_GRAPH", "1") - - -def set_nexfort_env_vars(options): - field2env_var = {} - _set_env_vars(field2env_var, options) - - -def set_nexfort_default_env_vars(): - pass - - -def set_default_env_vars(): - set_oneflow_default_env_vars() - set_nexfort_default_env_vars() diff --git a/src/onediff/infer_compiler/oneflow/graph.py b/src/onediff/infer_compiler/backends/oneflow/graph.py similarity index 92% rename from src/onediff/infer_compiler/oneflow/graph.py rename to src/onediff/infer_compiler/backends/oneflow/graph.py index 34aef1663..823041ca0 100644 --- a/src/onediff/infer_compiler/oneflow/graph.py +++ b/src/onediff/infer_compiler/backends/oneflow/graph.py @@ -1,9 +1,9 @@ import oneflow as flow -from ..transform.manager import transform_mgr -from ..transform.builtin_transform import reverse_proxy_class -from ..utils.log_utils import logger -from ..utils.cost_util import cost_cnt +from onediff.utils import logger +from ...transform.manager import transform_mgr +from ...transform.builtin_transform import reverse_proxy_class +from .utils.cost_util import cost_cnt class OneflowGraph(flow.nn.Graph): diff --git a/src/onediff/infer_compiler/utils/graph_management_utils.py b/src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py similarity index 95% rename from src/onediff/infer_compiler/utils/graph_management_utils.py rename to src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py index 27d6b5391..7fd0a3bb5 100644 --- a/src/onediff/infer_compiler/utils/graph_management_utils.py +++ b/src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py @@ -7,11 +7,11 @@ from pathlib import Path from functools import wraps from oneflow.framework.args_tree import ArgsTree -from ..transform.builtin_transform import torch2oflow -from ..transform.manager import transform_mgr -from .log_utils import logger -from .cost_util import cost_time -from .options import OneflowCompileOptions +from ...transform.builtin_transform import torch2oflow +from ...transform.manager import transform_mgr +from .utils.cost_util import cost_time +from .env_var import OneflowCompileOptions +from onediff.utils import logger def calculate_model_hash(model): diff --git a/src/onediff/infer_compiler/backends/oneflow.py b/src/onediff/infer_compiler/backends/oneflow/oneflow.py similarity index 89% rename from src/onediff/infer_compiler/backends/oneflow.py rename to src/onediff/infer_compiler/backends/oneflow/oneflow.py index 71b010950..bd6d0101a 100644 --- a/src/onediff/infer_compiler/backends/oneflow.py +++ b/src/onediff/infer_compiler/backends/oneflow/oneflow.py @@ -1,5 +1,6 @@ import torch -from .registry import register_backend + +from ..registry import register_backend @register_backend("oneflow") @@ -19,17 +20,18 @@ def compile(torch_module: torch.nn.Module, *, options=None): - 'graph_file' (None) generates a compilation cache file. If the file exists, loading occurs; if not, the compilation result is saved after the first run. - 'graph_file_device' (None) sets the device for the graph file, default None. If set, the compilation result will be converted to the specified device. """ - from ..oneflow.deployable_module import OneflowDeployableModule - from ..oneflow.utils import get_mixed_deployable_module - from ..transform.custom_transform import set_default_registry - from ..utils import CompileOptions, set_oneflow_env_vars - from ..utils.param_utils import ( + from .deployable_module import OneflowDeployableModule, get_mixed_deployable_module + from .env_var import set_oneflow_default_env_vars, set_oneflow_env_vars + from ..options import CompileOptions + from .param_utils import ( state_update_hook, init_state_update_attr, forward_pre_check_and_update_state_hook, forward_generate_constant_folding_info_hook, ) + from ...transform.custom_transform import set_default_registry + set_oneflow_default_env_vars() set_default_registry() options = options if options is not None else CompileOptions() diff --git a/src/onediff/infer_compiler/utils/oneflow_exec_mode.py b/src/onediff/infer_compiler/backends/oneflow/oneflow_exec_mode.py similarity index 100% rename from src/onediff/infer_compiler/utils/oneflow_exec_mode.py rename to src/onediff/infer_compiler/backends/oneflow/oneflow_exec_mode.py diff --git a/src/onediff/infer_compiler/utils/online_quantization_utils.py b/src/onediff/infer_compiler/backends/oneflow/online_quantization_utils.py similarity index 100% rename from src/onediff/infer_compiler/utils/online_quantization_utils.py rename to src/onediff/infer_compiler/backends/oneflow/online_quantization_utils.py diff --git a/src/onediff/infer_compiler/utils/param_utils.py b/src/onediff/infer_compiler/backends/oneflow/param_utils.py similarity index 99% rename from src/onediff/infer_compiler/utils/param_utils.py rename to src/onediff/infer_compiler/backends/oneflow/param_utils.py index cbe71d003..3e08c11f3 100644 --- a/src/onediff/infer_compiler/utils/param_utils.py +++ b/src/onediff/infer_compiler/backends/oneflow/param_utils.py @@ -3,7 +3,7 @@ import oneflow as flow from typing import List, Dict, Any, Union -from .log_utils import logger +from onediff.utils import logger def parse_device(args: List[Any], kwargs: Dict[str, Any]): diff --git a/src/onediff/infer_compiler/nexfort/__init__.py b/src/onediff/infer_compiler/backends/oneflow/utils/__init__.py similarity index 100% rename from src/onediff/infer_compiler/nexfort/__init__.py rename to src/onediff/infer_compiler/backends/oneflow/utils/__init__.py diff --git a/src/onediff/infer_compiler/utils/cost_util.py b/src/onediff/infer_compiler/backends/oneflow/utils/cost_util.py similarity index 99% rename from src/onediff/infer_compiler/utils/cost_util.py rename to src/onediff/infer_compiler/backends/oneflow/utils/cost_util.py index 59a12a36a..4cb1575f5 100644 --- a/src/onediff/infer_compiler/utils/cost_util.py +++ b/src/onediff/infer_compiler/backends/oneflow/utils/cost_util.py @@ -2,7 +2,7 @@ import oneflow as flow import time import inspect -from .log_utils import logger +from onediff.utils import logger __all__ = ["cost_cnt", "cost_time"] diff --git a/src/onediff/infer_compiler/utils/version_util.py b/src/onediff/infer_compiler/backends/oneflow/utils/version_util.py similarity index 96% rename from src/onediff/infer_compiler/utils/version_util.py rename to src/onediff/infer_compiler/backends/oneflow/utils/version_util.py index 58dc6ab08..5e0d22a8e 100644 --- a/src/onediff/infer_compiler/utils/version_util.py +++ b/src/onediff/infer_compiler/backends/oneflow/utils/version_util.py @@ -1,5 +1,5 @@ from importlib_metadata import version -from .log_utils import logger +from onediff.utils import logger def get_support_message(): diff --git a/src/onediff/infer_compiler/backends/options.py b/src/onediff/infer_compiler/backends/options.py new file mode 100644 index 000000000..79696466c --- /dev/null +++ b/src/onediff/infer_compiler/backends/options.py @@ -0,0 +1,13 @@ +from typing import Dict + + +class CompileOptions: + def __init__(self, dynamic=True, oneflow=None, nexfort=None): + from .oneflow import OneflowCompileOptions + self.dynamic = dynamic + self.oneflow = oneflow if oneflow is not None else OneflowCompileOptions() + self.nexfort = nexfort if nexfort is not None else dict() + + +# a global default compile options +_GLOBAL_compile_options = CompileOptions() diff --git a/src/onediff/infer_compiler/backends/registry.py b/src/onediff/infer_compiler/backends/registry.py index 46c1234cd..88c08a724 100644 --- a/src/onediff/infer_compiler/backends/registry.py +++ b/src/onediff/infer_compiler/backends/registry.py @@ -28,23 +28,14 @@ def lookup_backend(compiler_fn): """Expand backend strings to functions""" if isinstance(compiler_fn, str): if compiler_fn not in _BACKENDS: - _lazy_import() + _lazy_import(compiler_fn) if compiler_fn not in _BACKENDS: raise RuntimeError(f"invalid backend {compiler_fn}") compiler_fn = _BACKENDS[compiler_fn] return compiler_fn -@functools.lru_cache(None) -def _lazy_import(): +def _lazy_import(backend_name): from .. import backends - - def import_submodule(mod: types.ModuleType): - """ - Ensure all the files in a given submodule are imported - """ - for filename in sorted(os.listdir(os.path.dirname(cast(str, mod.__file__)))): - if filename.endswith(".py") and filename[0] != "_": - importlib.import_module(f"{mod.__name__}.{filename[:-3]}") - - import_submodule(backends) + backend_path = f"{backends.__name__}.{backend_name}" + importlib.import_module(backend_path) diff --git a/src/onediff/infer_compiler/core/__init__.py b/src/onediff/infer_compiler/core/__init__.py deleted file mode 100644 index 2c2324087..000000000 --- a/src/onediff/infer_compiler/core/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .deployable_module import DeployableModule -from .with_onediff_compile import compile, oneflow_compile diff --git a/src/onediff/infer_compiler/import_tools/dyn_mock_mod.py b/src/onediff/infer_compiler/import_tools/dyn_mock_mod.py index 4cb5fa6fc..8ac3ae0c9 100644 --- a/src/onediff/infer_compiler/import_tools/dyn_mock_mod.py +++ b/src/onediff/infer_compiler/import_tools/dyn_mock_mod.py @@ -10,8 +10,8 @@ from oneflow.mock_torch import enable from oneflow.mock_torch.mock_importer import _importer from .import_module_utils import import_module_from_path -from ..utils.log_utils import logger -from ..utils.patch_for_compiler import * +from onediff.utils import logger +from .patch_for_compiler import * __all__ = ["DynamicMockModule"] diff --git a/src/onediff/infer_compiler/import_tools/importer.py b/src/onediff/infer_compiler/import_tools/importer.py index 0ac9ac4ba..854a7577b 100644 --- a/src/onediff/infer_compiler/import_tools/importer.py +++ b/src/onediff/infer_compiler/import_tools/importer.py @@ -9,7 +9,7 @@ from importlib.metadata import requires from .format_utils import MockEntityNameFormatter from .dyn_mock_mod import DynamicMockModule -from ..utils.log_utils import logger +from onediff.utils import logger __all__ = ["LazyMocker", "is_need_mock"] diff --git a/src/onediff/infer_compiler/utils/patch_for_compiler.py b/src/onediff/infer_compiler/import_tools/patch_for_compiler.py similarity index 100% rename from src/onediff/infer_compiler/utils/patch_for_compiler.py rename to src/onediff/infer_compiler/import_tools/patch_for_compiler.py diff --git a/src/onediff/infer_compiler/oneflow/__init__.py b/src/onediff/infer_compiler/oneflow/__init__.py deleted file mode 100644 index 6066ae13e..000000000 --- a/src/onediff/infer_compiler/oneflow/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .deployable_module import OneflowDeployableModule diff --git a/src/onediff/infer_compiler/oneflow/config.py b/src/onediff/infer_compiler/oneflow/config.py deleted file mode 100644 index 0e1d2f543..000000000 --- a/src/onediff/infer_compiler/oneflow/config.py +++ /dev/null @@ -1,148 +0,0 @@ -import os -from typing import Optional -import dataclasses -from ..utils import ( - parse_boolean_from_env, - set_boolean_env_var, - parse_integer_from_env, - set_integer_env_var, -) - - -def init_default_env(): - # ONEFLOW_RUN_GRAPH_BY_VM must set here to enable nn.Graph init with vm run - os.environ.setdefault("ONEFLOW_RUN_GRAPH_BY_VM", "1") - os.environ.setdefault("ONEFLOW_GRAPH_DELAY_VARIABLE_OP_EXECUTION", "1") - - os.environ.setdefault("ONEFLOW_MLIR_CSE", "1") - os.environ.setdefault("ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION", "1") - os.environ.setdefault("ONEFLOW_MLIR_ENABLE_ROUND_TRIP", "1") - os.environ.setdefault("ONEFLOW_MLIR_FUSE_FORWARD_OPS", "1") - os.environ.setdefault("ONEFLOW_MLIR_FUSE_OPS_WITH_BACKWARD_IMPL", "1") - os.environ.setdefault("ONEFLOW_MLIR_GROUP_MATMUL", "1") - os.environ.setdefault("ONEFLOW_MLIR_PREFER_NHWC", "1") - - os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_FUSED_CONV_BIAS", "1") - os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_FUSED_LINEAR", "1") - os.environ.setdefault("ONEFLOW_KERNEL_CONV_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", "1") - os.environ.setdefault("ONEFLOW_KERNEL_GEMM_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", "1") - os.environ.setdefault("ONEFLOW_KERNEL_CONV_ENABLE_CUTLASS_IMPL", "1") - os.environ.setdefault("ONEFLOW_KERNEL_GEMM_ENABLE_CUTLASS_IMPL", "1") - os.environ.setdefault("ONEFLOW_CONVOLUTION_BIAS_ADD_ACT_FUSION", "1") - # os.environ.setdefault("ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL", "0") - # os.environ.setdefault("ONEFLOW_KERNEL_GLU_ENABLE_Y_GEMM_IMPL", "0") - # os.environ.setdefault("ONEFLOW_KERNEL_GLU_QUANT_ENABLE_DUAL_GEMM_IMPL", "0") - - os.environ.setdefault("ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION", "1") - os.environ.setdefault("ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION", "1") - os.environ.setdefault("ONEFLOW_LINEAR_EMBEDDING_SKIP_INIT", "1") - # os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_ACCUMULATION", "1") - # os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_SCORE_ACCUMULATION_MAX_M", "-1") - # os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_QUANTIZATION", "1") - - os.environ.setdefault("ONEFLOW_MLIR_GROUP_MATMUL_QUANT", "1") - os.environ.setdefault("ONEFLOW_CONV2D_KERNEL_ENABLE_TUNING_WARMUP", "1") - # TODO: enable this will cause the failure of multi resolution warmup - # os.environ.setdefault("ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH", "1") - # os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_CUDA_GRAPH", "1") - - -@dataclasses.dataclass -class OneFlowCompilerConfig: - run_graph_by_vm: Optional[bool] = None - graph_delay_variable_op_execution: Optional[bool] = None - - mlir_cse: Optional[bool] = None - mlir_enable_inference_optimization: Optional[bool] = None - mlir_enable_round_trip: Optional[bool] = None - mlir_fuse_forward_ops: Optional[bool] = None - mlir_fuse_ops_with_backward_impl: Optional[bool] = None - mlir_group_matmul: Optional[bool] = None - mlir_prefer_nhwc: Optional[bool] = None - mlir_fuse_kernel_launch: Optional[bool] = None - - kernel_enable_cuda_graph: Optional[bool] = None - kernel_enable_fused_conv_bias: Optional[bool] = None - kernel_enable_fused_linear: Optional[bool] = None - kernel_conv_cutlass_impl_enable_tuning_warmup: Optional[bool] = None - kernel_gemm_cutlass_impl_enable_tuning_warmup: Optional[bool] = None - kernel_conv_enable_cutlass_impl: Optional[bool] = None - kernel_gemm_enable_cutlass_impl: Optional[bool] = None - kernel_glu_enable_dual_gemm_impl: Optional[bool] = None - kernel_glu_enable_y_gemm_impl: Optional[bool] = None - kernel_glu_quant_enable_dual_gemm_impl: Optional[bool] = None - - conv_allow_half_precision_accumulation: Optional[bool] = None - matmul_allow_half_precision_accumulation: Optional[bool] = None - linear_embedding_skip_init: Optional[bool] = None - attention_allow_half_precision_accumulation: Optional[bool] = None - attention_allow_half_precision_score_accumulation_max_m: Optional[int] = None - attention_allow_quantization: Optional[bool] = None - conv2d_kernel_enable_tuning_warmup: Optional[bool] = None - - attr2env_var = { - "run_graph_by_vm": "ONEFLOW_RUN_GRAPH_BY_VM", - "graph_delay_variable_op_execution": "ONEFLOW_GRAPH_DELAY_VARIABLE_OP_EXECUTION", - "mlir_cse": "ONEFLOW_MLIR_CSE", - "mlir_enable_inference_optimization": "ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION", - "mlir_enable_round_trip": "ONEFLOW_MLIR_ENABLE_ROUND_TRIP", - "mlir_fuse_forward_ops": "ONEFLOW_MLIR_FUSE_FORWARD_OPS", - "mlir_fuse_ops_with_backward_impl": "ONEFLOW_MLIR_FUSE_OPS_WITH_BACKWARD_IMPL", - "mlir_group_matmul": "ONEFLOW_MLIR_GROUP_MATMUL", - "mlir_prefer_nhwc": "ONEFLOW_MLIR_PREFER_NHWC", - "mlir_fuse_kernel_launch": "ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH", - "kernel_enable_cuda_graph": "ONEFLOW_KERNEL_ENABLE_CUDA_GRAPH", - "kernel_enable_fused_conv_bias": "ONEFLOW_KERNEL_ENABLE_FUSED_CONV_BIAS", - "kernel_enable_fused_linear": "ONEFLOW_KERNEL_ENABLE_FUSED_LINEAR", - "kernel_conv_cutlass_impl_enable_tuning_warmup": "ONEFLOW_KERNEL_CONV_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", - "kernel_gemm_cutlass_impl_enable_tuning_warmup": "ONEFLOW_KERNEL_GEMM_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", - "kernel_conv_enable_cutlass_impl": "ONEFLOW_KERNEL_CONV_ENABLE_CUTLASS_IMPL", - "kernel_gemm_enable_cutlass_impl": "ONEFLOW_KERNEL_GEMM_ENABLE_CUTLASS_IMPL", - "kernel_glu_enable_dual_gemm_impl": "ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL", - "kernel_glu_enable_y_gemm_impl": "ONEFLOW_KERNEL_GLU_ENABLE_Y_GEMM_IMPL", - "kernel_glu_quant_enable_dual_gemm_impl": "ONEFLOW_KERNEL_GLU_QUANT_ENABLE_DUAL_GEMM_IMPL", - "conv_allow_half_precision_accumulation": "ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION", - "matmul_allow_half_precision_accumulation": "ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION", - "linear_embedding_skip_init": "ONEFLOW_LINEAR_EMBEDDING_SKIP_INIT", - "attention_allow_half_precision_accumulation": "ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_ACCUMULATION", - "attention_allow_half_precision_score_accumulation_max_m": "ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_SCORE_ACCUMULATION_MAX_M", - "conv2d_kernel_enable_tuning_warmup":'ONEFLOW_CONV2D_KERNEL_ENABLE_TUNING_WARMUP', - } - - def __post_init__(self): - fields = dataclasses.fields(self) - fields = {field.name: field for field in fields} - for name in self.attr2env_var: - if fields[name].type in (bool, Optional[bool]): - super().__setattr__( - name, parse_boolean_from_env(self.attr2env_var[name]) - ) - elif fields[name].type in (int, Optional[int]): - super().__setattr__( - name, parse_integer_from_env(self.attr2env_var[name]) - ) - else: - raise ValueError( - f"Unsupported type {dataclasses.fields(self)[name].type}" - ) - - super().__setattr__("_initialized", True) - - def __setattr__(self, name, value): - super().__setattr__(name, value) - if getattr(self, "_initialized", False) and name in self.attr2env_var: - fields = dataclasses.fields(self) - fields = dataclasses.fields(self) - fields = {field.name: field for field in fields} - if fields[name].type in (bool, Optional[bool]): - set_boolean_env_var(self.attr2env_var[name], value) - elif fields[name].type in (int, Optional[int]): - set_integer_env_var(self.attr2env_var[name], value) - else: - raise ValueError( - f"Unsupported type {dataclasses.fields(self)[name].type}" - ) - - -init_default_env() -oneflow_compiler_config = OneFlowCompilerConfig() diff --git a/src/onediff/infer_compiler/oneflow/utils.py b/src/onediff/infer_compiler/oneflow/utils.py deleted file mode 100644 index 4a5e899aa..000000000 --- a/src/onediff/infer_compiler/oneflow/utils.py +++ /dev/null @@ -1,83 +0,0 @@ -from functools import wraps - -from ..transform.builtin_transform import torch2oflow -from ..transform.manager import transform_mgr -from ..utils.log_utils import logger -from .dual_module import DualModule - - -@torch2oflow.register -def _(mod: DualModule, verbose=False): - return torch2oflow(mod._torch_module, verbose) - - -def handle_deployable_exception(func): - @wraps(func) - def wrapper(self, *args, **kwargs): - if transform_mgr.debug_mode: - return func(self, *args, **kwargs) - else: - try: - return func(self, *args, **kwargs) - except Exception as e: - logger.error(f"Exception in {func.__name__}: {e=}") - logger.warning("Recompile oneflow module ...") - del self._deployable_module_model.oneflow_module - self._deployable_module_dpl_graph = None - return func(self, *args, **kwargs) - - return wrapper - - -def get_mixed_dual_module(module_cls): - if issubclass(module_cls, DualModule) and "MixedDualModule" in module_cls.__name__: - return module_cls - - class MixedDualModule(DualModule, module_cls): - def __init__(self, torch_module, oneflow_module): - while isinstance(torch_module, DualModule): - torch_module = torch_module._torch_module - DualModule.__init__(self, torch_module, oneflow_module) - - def _get_name(self) -> str: - return f"{self.__class__.__name__}(of {module_cls.__name__})" - - return MixedDualModule - - -# Return a OneflowDeployableModule that using module_cls as it's parent class. -def get_mixed_deployable_module(module_cls): - from .deployable_module import OneflowDeployableModule - - class MixedOneflowDeployableModule(OneflowDeployableModule, module_cls): - def __init__(self, torch_module, oneflow_module, dynamic=True, options=None): - OneflowDeployableModule.__init__( - self, torch_module, oneflow_module, dynamic, options - ) - self._is_raw_deployable_module = False - - @classmethod - def from_existing(cls, existing_module, dynamic=True, options=None): - torch_module = existing_module._deployable_module_model._torch_module - oneflow_module = existing_module._deployable_module_model._oneflow_module - instance = cls(torch_module, oneflow_module, dynamic, options) - instance._deployable_module_dpl_graph = None - if hasattr(existing_module, "_deployable_module_dpl_graph"): - instance._deployable_module_dpl_graph = ( - existing_module._deployable_module_dpl_graph - ) - return instance - - def _get_name(self): - return f"{self.__class__.__name__}(of {module_cls.__name__})" - - return MixedOneflowDeployableModule - - -def get_oneflow_graph(model, size=9, dynamic_graph=True): - from .graph import OneflowGraph - - g = OneflowGraph(model) - g._dynamic_input_graph_cache.set_cache_size(size) - g._dynamic_input_graph_cache.enable_shared(dynamic_graph) - return g diff --git a/src/onediff/infer_compiler/transform/builtin_transform.py b/src/onediff/infer_compiler/transform/builtin_transform.py index d0121dd3b..04f49dca6 100644 --- a/src/onediff/infer_compiler/transform/builtin_transform.py +++ b/src/onediff/infer_compiler/transform/builtin_transform.py @@ -12,8 +12,8 @@ import oneflow as flow from .manager import transform_mgr -from ..utils.log_utils import logger -from ..utils.patch_for_diffusers import diffusers_checker +from onediff.utils import logger +from .patch_for_diffusers import diffusers_checker from ..import_tools.importer import is_need_mock from .patch_for_comfy import PatchForComfy diff --git a/src/onediff/infer_compiler/transform/custom_transform.py b/src/onediff/infer_compiler/transform/custom_transform.py index 0d0e71f59..45d46ed8d 100644 --- a/src/onediff/infer_compiler/transform/custom_transform.py +++ b/src/onediff/infer_compiler/transform/custom_transform.py @@ -6,7 +6,7 @@ from ..import_tools import import_module_from_path from .manager import transform_mgr from .builtin_transform import torch2oflow -from ..utils.log_utils import logger +from onediff.utils import logger __all__ = ["register"] diff --git a/src/onediff/infer_compiler/transform/manager.py b/src/onediff/infer_compiler/transform/manager.py index 63a95b5db..daaede7c1 100644 --- a/src/onediff/infer_compiler/transform/manager.py +++ b/src/onediff/infer_compiler/transform/manager.py @@ -5,7 +5,7 @@ import logging from typing import Dict, List, Union from pathlib import Path -from ..utils.log_utils import logger +from onediff.utils import logger from ..import_tools.importer import LazyMocker __all__ = ["transform_mgr"] diff --git a/src/onediff/infer_compiler/utils/patch_for_diffusers.py b/src/onediff/infer_compiler/transform/patch_for_diffusers.py similarity index 95% rename from src/onediff/infer_compiler/utils/patch_for_diffusers.py rename to src/onediff/infer_compiler/transform/patch_for_diffusers.py index 1de90c151..e5cb43cbf 100644 --- a/src/onediff/infer_compiler/utils/patch_for_diffusers.py +++ b/src/onediff/infer_compiler/transform/patch_for_diffusers.py @@ -1,6 +1,6 @@ # TODO: remove this file to diffusers/src/infer_compiler_registry/register_diffusers from abc import ABC, abstractmethod -from .log_utils import logger +from onediff.utils import logger try: import diffusers diff --git a/src/onediff/infer_compiler/utils/__init__.py b/src/onediff/infer_compiler/utils/__init__.py deleted file mode 100644 index 076b41bcd..000000000 --- a/src/onediff/infer_compiler/utils/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from .oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled -from .env_var import ( - parse_boolean_from_env, - set_boolean_env_var, - parse_integer_from_env, - set_integer_env_var, - set_oneflow_env_vars, - set_oneflow_default_env_vars, - set_nexfort_env_vars, - set_nexfort_default_env_vars, - set_default_env_vars, -) -from .model_inplace_assign import TensorInplaceAssign -from .version_util import ( - get_support_message, - is_quantization_enabled, - is_community_version, -) -from .options import * diff --git a/src/onediff/infer_compiler/utils/options.py b/src/onediff/infer_compiler/utils/options.py deleted file mode 100644 index beccd13db..000000000 --- a/src/onediff/infer_compiler/utils/options.py +++ /dev/null @@ -1,63 +0,0 @@ -import dataclasses -from typing import Dict -import torch - - -@dataclasses.dataclass -class OneflowCompileOptions: - use_graph: bool = True - debug_level: int = -1 - max_cached_graph_size: int = 9 - graph_file: str = None - graph_file_device: torch.device = None - - # Optimization related environment variables - run_graph_by_vm: bool = None - graph_delay_variable_op_execution: bool = None - - conv_allow_half_precision_accumulation: bool = None - matmul_allow_half_precision_accumulation: bool = None - attention_allow_half_precision_accumulation: bool = None - attention_allow_half_precision_score_accumulation_max_m: int = None - attention_allow_quantization: bool = None - - mlir_cse: bool = None - mlir_enable_inference_optimization: bool = None - mlir_enable_round_trip: bool = None - mlir_fuse_forward_ops: bool = None - mlir_fuse_ops_with_backward_impl: bool = None - mlir_group_matmul: bool = None - mlir_prefer_nhwc: bool = None - mlir_fuse_kernel_launch: bool = None - - kernel_enable_cuda_graph: bool = None - kernel_enable_fused_conv_bias: bool = None - kernel_enable_fused_linear: bool = None - kernel_conv_cutlass_impl_enable_tuning_warmup: bool = None - kernel_enable_conv2d_tuning_warmup: bool = None - kernel_gemm_cutlass_impl_enable_tuning_warmup: bool = None - kernel_conv_enable_cutlass_impl: bool = None - kernel_gemm_enable_cutlass_impl: bool = None - kernel_glu_enable_dual_gemm_impl: bool = None - kernel_glu_enable_y_gemm_impl: bool = None - kernel_glu_quant_enable_dual_gemm_impl: bool = None - -@dataclasses.dataclass -class CompileOptions: - # common options - dynamic: bool - - # oneflow specific options - oneflow: OneflowCompileOptions - - # nexfort specific options - nexfort: Dict - - def __init__(self, dynamic=True, oneflow=None, nexfort=None): - self.dynamic = dynamic - self.oneflow = oneflow if oneflow is not None else OneflowCompileOptions() - self.nexfort = nexfort if nexfort is not None else dict() - - -# a global default compile options -_GLOBAL_compile_options = CompileOptions() diff --git a/src/onediff/optimization/attention_processor.py b/src/onediff/optimization/attention_processor.py index 22650ab62..4ee76321d 100644 --- a/src/onediff/optimization/attention_processor.py +++ b/src/onediff/optimization/attention_processor.py @@ -84,7 +84,7 @@ def __call__( hidden_states = flow.bmm(attention_probs, value) hidden_states = attn.batch_to_head_dim(hidden_states) else: - from ..infer_compiler.utils import ( + from onediff.utils import ( parse_boolean_from_env, set_boolean_env_var, ) diff --git a/src/onediff/optimization/quant_optimizer.py b/src/onediff/optimization/quant_optimizer.py index 24a104dfc..02f379ef8 100644 --- a/src/onediff/optimization/quant_optimizer.py +++ b/src/onediff/optimization/quant_optimizer.py @@ -2,9 +2,9 @@ import torch import torch.nn as nn from copy import deepcopy -from ..infer_compiler.utils.log_utils import logger -from ..infer_compiler.utils.version_util import is_quantization_enabled -from ..infer_compiler.utils.cost_util import cost_cnt +from onediff.utils import logger +from ..infer_compiler.backends.oneflow.utils.version_util import is_quantization_enabled +from ..infer_compiler.backends.oneflow.utils.cost_util import cost_cnt from ..infer_compiler.utils.module_operations import modify_sub_module from ..infer_compiler.transform.manager import transform_mgr diff --git a/src/onediff/torch_utils/__init__.py b/src/onediff/torch_utils/__init__.py new file mode 100644 index 000000000..37be053e3 --- /dev/null +++ b/src/onediff/torch_utils/__init__.py @@ -0,0 +1 @@ +from .model_inplace_assign import TensorInplaceAssign \ No newline at end of file diff --git a/src/onediff/infer_compiler/utils/model_inplace_assign.py b/src/onediff/torch_utils/model_inplace_assign.py similarity index 100% rename from src/onediff/infer_compiler/utils/model_inplace_assign.py rename to src/onediff/torch_utils/model_inplace_assign.py diff --git a/src/onediff/infer_compiler/utils/module_operations.py b/src/onediff/torch_utils/module_operations.py similarity index 100% rename from src/onediff/infer_compiler/utils/module_operations.py rename to src/onediff/torch_utils/module_operations.py diff --git a/src/onediff/utils/__init__.py b/src/onediff/utils/__init__.py new file mode 100644 index 000000000..5ebea9cb4 --- /dev/null +++ b/src/onediff/utils/__init__.py @@ -0,0 +1,7 @@ +from .log_utils import logger +from .env_var import ( + parse_boolean_from_env, + set_boolean_env_var, + parse_integer_from_env, + set_integer_env_var, +) \ No newline at end of file diff --git a/src/onediff/utils/env_var.py b/src/onediff/utils/env_var.py new file mode 100644 index 000000000..23b6e749b --- /dev/null +++ b/src/onediff/utils/env_var.py @@ -0,0 +1,31 @@ +import os +from typing import Optional + + +def parse_boolean_from_env(env_var, default_value=None): + env_var = os.getenv(env_var) + if env_var is None: + return default_value + env_var = env_var.lower() + return env_var in ("1", "true", "yes", "on", "y") + + +def set_boolean_env_var(env_var: str, val: Optional[bool]): + if val is None: + os.environ.pop(env_var, None) + else: + os.environ[env_var] = "1" if val else "0" + + +def parse_integer_from_env(env_var, default_value=None): + env_var = os.getenv(env_var) + if env_var is None: + return default_value + return int(env_var) + + +def set_integer_env_var(env_var: str, val: Optional[int]): + if val is None: + os.environ.pop(env_var, None) + else: + os.environ[env_var] = str(int(val)) diff --git a/src/onediff/infer_compiler/utils/log_utils.py b/src/onediff/utils/log_utils.py similarity index 100% rename from src/onediff/infer_compiler/utils/log_utils.py rename to src/onediff/utils/log_utils.py diff --git a/tests/test_quantize_custom_model.py b/tests/test_quantize_custom_model.py index 36d92f9fc..8583cbb4e 100644 --- a/tests/test_quantize_custom_model.py +++ b/tests/test_quantize_custom_model.py @@ -8,7 +8,7 @@ from onediff.infer_compiler import oneflow_compile from onediff.infer_compiler.transform import register -from onediff.infer_compiler.utils import is_community_version +from onediff.infer_compiler.backends.oneflow.utils.version_util import is_community_version is_community = is_community_version() onediff_quant_spec = importlib.util.find_spec("onediff_quant") From c6041c41cea4efc5afd30c72fc646d3b01616834 Mon Sep 17 00:00:00 2001 From: strint Date: Fri, 17 May 2024 23:55:32 +0800 Subject: [PATCH 03/13] add readme for nexfort --- benchmarks/README.md | 15 +------------ .../infer_compiler/backends/nexfort/README.md | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+), 14 deletions(-) create mode 100644 src/onediff/infer_compiler/backends/nexfort/README.md diff --git a/benchmarks/README.md b/benchmarks/README.md index 4eee88dfb..5a4a7a045 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -21,17 +21,4 @@ docker run -it --rm --gpus all --shm-size 12g --ipc=host --security-opt seccomp= -v `pwd`:/benchmark \ onediff:benchmark-community-default \ sh -c "cd /benchmark && sh run_all_benchmarks.sh -m models -o benchmark.md" -``` - -## Run Examples -### Run pixart alpha (with nexfort backend) -``` -# model_id_or_path_to_PixArt-XL-2-1024-MS: /data/hf_models/PixArt-XL-2-1024-MS/ -python3 text_to_image.py --model model_id_or_path_to_PixArt-XL-2-1024-MS --scheduler none --compiler nexfort -``` -Performance on NVIDIA A100-PCIE-40GB: -Iterations per second of progress bar: 11.7 -Inference time: 2.045s -Iterations per second: 10.517 -CUDA Mem after: 13.569GiB - +``` \ No newline at end of file diff --git a/src/onediff/infer_compiler/backends/nexfort/README.md b/src/onediff/infer_compiler/backends/nexfort/README.md new file mode 100644 index 000000000..20a9e9f41 --- /dev/null +++ b/src/onediff/infer_compiler/backends/nexfort/README.md @@ -0,0 +1,21 @@ +## nexfort backend for compile in onediff +### Install nexfort +``` +wget https://oneflow-static.oss-cn-beijing.aliyuncs.com/sd/nexfort-0.1-cb3133ca2dae4265bc1d86068fc3aa1d.zip +unzip nexfort-0.1-cb3133ca2dae4265bc1d86068fc3aa1d.zip +cd nexfort-0.1-cb3133ca2dae4265bc1d86068fc3aa1d +pip3 install nexfort-0.1.dev195+torch230cu121-cp310-cp310-manylinux2014_x86_64.whl +``` + +### Run pixart alpha (with nexfort backend) + +``` +cd benchmarks +# model_id_or_path_to_PixArt-XL-2-1024-MS: /data/hf_models/PixArt-XL-2-1024-MS/ +python3 text_to_image.py --model model_id_or_path_to_PixArt-XL-2-1024-MS --scheduler none --compiler nexfort +``` +Performance on NVIDIA A100-PCIE-40GB: +Iterations per second of progress bar: 11.7 +Inference time: 2.045s +Iterations per second: 10.517 +CUDA Mem after: 13.569GiB \ No newline at end of file From 29f7973eb983174e77ac7d58b3596eef951c85c5 Mon Sep 17 00:00:00 2001 From: strint Date: Fri, 17 May 2024 23:56:37 +0800 Subject: [PATCH 04/13] format --- benchmarks/README.md | 2 +- src/onediff/infer_compiler/backends/nexfort/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 5a4a7a045..b2fc652ed 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -21,4 +21,4 @@ docker run -it --rm --gpus all --shm-size 12g --ipc=host --security-opt seccomp= -v `pwd`:/benchmark \ onediff:benchmark-community-default \ sh -c "cd /benchmark && sh run_all_benchmarks.sh -m models -o benchmark.md" -``` \ No newline at end of file +``` diff --git a/src/onediff/infer_compiler/backends/nexfort/README.md b/src/onediff/infer_compiler/backends/nexfort/README.md index 20a9e9f41..0ea06bb4f 100644 --- a/src/onediff/infer_compiler/backends/nexfort/README.md +++ b/src/onediff/infer_compiler/backends/nexfort/README.md @@ -18,4 +18,4 @@ Performance on NVIDIA A100-PCIE-40GB: Iterations per second of progress bar: 11.7 Inference time: 2.045s Iterations per second: 10.517 -CUDA Mem after: 13.569GiB \ No newline at end of file +CUDA Mem after: 13.569GiB From 460d6bf4aebd4710926dbbb0cdb98ee04e49963d Mon Sep 17 00:00:00 2001 From: strint Date: Fri, 17 May 2024 23:59:15 +0800 Subject: [PATCH 05/13] format with black --- .../register_diffusers/__init__.py | 22 ++++++++++--- .../transformer_2d_oflow.py | 4 ++- .../unet_2d_blocks_oflow.py | 8 +++-- .../infer_compiler/backends/__init__.py | 2 +- .../infer_compiler/backends/compiler.py | 1 + .../backends/deployable_module.py | 1 + .../backends/nexfort/__init__.py | 2 +- .../backends/nexfort/nexfort.py | 4 +-- .../backends/oneflow/deployable_module.py | 3 +- .../backends/oneflow/dual_module.py | 3 +- .../backends/oneflow/env_var.py | 1 + .../oneflow/online_quantization_utils.py | 31 ++++++++++++------- .../backends/oneflow/param_utils.py | 7 +++-- .../infer_compiler/backends/options.py | 1 + .../infer_compiler/backends/registry.py | 1 + .../import_tools/patch_for_compiler.py | 10 ++++-- .../transform/builtin_transform.py | 8 +++-- .../infer_compiler/transform/manager.py | 3 +- src/onediff/optimization/quant_optimizer.py | 1 - .../quantization/load_quantized_model.py | 18 ++++++++--- .../quantization/quant_pipeline_test.py | 27 +++++++++++----- src/onediff/quantization/quantize_utils.py | 2 +- src/onediff/torch_utils/__init__.py | 2 +- src/onediff/torch_utils/module_operations.py | 2 +- src/onediff/utils/__init__.py | 2 +- 25 files changed, 110 insertions(+), 56 deletions(-) diff --git a/src/infer_compiler_registry/register_diffusers/__init__.py b/src/infer_compiler_registry/register_diffusers/__init__.py index e8e4f59d0..4e1af1e62 100644 --- a/src/infer_compiler_registry/register_diffusers/__init__.py +++ b/src/infer_compiler_registry/register_diffusers/__init__.py @@ -15,11 +15,19 @@ if diffusers_version < version.parse("0.26.00"): from diffusers.models.unet_2d_condition import UNet2DConditionModel - from diffusers.models.unet_2d_blocks import AttnUpBlock2D, CrossAttnUpBlock2D, UpBlock2D + from diffusers.models.unet_2d_blocks import ( + AttnUpBlock2D, + CrossAttnUpBlock2D, + UpBlock2D, + ) from diffusers.models.transformer_2d import Transformer2DModel else: from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel - from diffusers.models.unets.unet_2d_blocks import AttnUpBlock2D, CrossAttnUpBlock2D, UpBlock2D + from diffusers.models.unets.unet_2d_blocks import ( + AttnUpBlock2D, + CrossAttnUpBlock2D, + UpBlock2D, + ) from diffusers.models.transformers.transformer_2d import Transformer2DModel if diffusers_version >= version.parse("0.25.00"): @@ -34,7 +42,9 @@ from diffusers.models.unets.unet_spatio_temporal_condition import ( UNetSpatioTemporalConditionModel, ) - from diffusers.models.transformers.transformer_temporal import TransformerSpatioTemporalModel + from diffusers.models.transformers.transformer_temporal import ( + TransformerSpatioTemporalModel, + ) else: from diffusers.models.transformer_temporal import TransformerSpatioTemporalModel from diffusers.models.unet_spatio_temporal_condition import ( @@ -47,8 +57,10 @@ ) else: from diffusers.models.autoencoder_kl_temporal_decoder import TemporalDecoder - - from .spatio_temporal_oflow import SpatioTemporalResBlock as SpatioTemporalResBlockOflow + + from .spatio_temporal_oflow import ( + SpatioTemporalResBlock as SpatioTemporalResBlockOflow, + ) from .spatio_temporal_oflow import TemporalDecoder as TemporalDecoderOflow from .spatio_temporal_oflow import ( TransformerSpatioTemporalModel as TransformerSpatioTemporalModelOflow, diff --git a/src/infer_compiler_registry/register_diffusers/transformer_2d_oflow.py b/src/infer_compiler_registry/register_diffusers/transformer_2d_oflow.py index f2a524413..3eff67c53 100644 --- a/src/infer_compiler_registry/register_diffusers/transformer_2d_oflow.py +++ b/src/infer_compiler_registry/register_diffusers/transformer_2d_oflow.py @@ -968,7 +968,9 @@ def forward( if diffusers_version >= diffusers_0270_v: if cross_attention_kwargs is not None: if cross_attention_kwargs.get("scale", None) is not None: - logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") + logger.warning( + "Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored." + ) # ensure attention_mask is a bias, and give it a singleton query_tokens dimension. # we may have done this conversion already, e.g. if we came here via UNet2DConditionModel#forward. # we can tell by counting dims; if ndim == 2: it's a mask rather than a bias. diff --git a/src/infer_compiler_registry/register_diffusers/unet_2d_blocks_oflow.py b/src/infer_compiler_registry/register_diffusers/unet_2d_blocks_oflow.py index 86234c5c7..2930273ac 100644 --- a/src/infer_compiler_registry/register_diffusers/unet_2d_blocks_oflow.py +++ b/src/infer_compiler_registry/register_diffusers/unet_2d_blocks_oflow.py @@ -70,7 +70,9 @@ def custom_forward(*inputs): ckpt_kwargs: Dict[str, Any] = { "use_reentrant": False - } if transformed_diffusers.utils.is_torch_version(">=", "1.11.0") else {} + } if transformed_diffusers.utils.is_torch_version( + ">=", "1.11.0" + ) else {} hidden_states = torch.utils.checkpoint.checkpoint( create_custom_forward(resnet), hidden_states, @@ -236,7 +238,9 @@ def custom_forward(*inputs): ckpt_kwargs: Dict[str, Any] = { "use_reentrant": False - } if transformed_diffusers.utils.is_torch_version(">=", "1.11.0") else {} + } if transformed_diffusers.utils.is_torch_version( + ">=", "1.11.0" + ) else {} hidden_states = torch.utils.checkpoint.checkpoint( create_custom_forward(resnet), hidden_states, diff --git a/src/onediff/infer_compiler/backends/__init__.py b/src/onediff/infer_compiler/backends/__init__.py index e69d3ead6..2f2b32ab3 100644 --- a/src/onediff/infer_compiler/backends/__init__.py +++ b/src/onediff/infer_compiler/backends/__init__.py @@ -1,4 +1,4 @@ from .deployable_module import DeployableModule from .compiler import compile, oneflow_compile from .options import CompileOptions -from .options import _GLOBAL_compile_options as compile_options \ No newline at end of file +from .options import _GLOBAL_compile_options as compile_options diff --git a/src/onediff/infer_compiler/backends/compiler.py b/src/onediff/infer_compiler/backends/compiler.py index 34a67df7a..4bf91bb83 100644 --- a/src/onediff/infer_compiler/backends/compiler.py +++ b/src/onediff/infer_compiler/backends/compiler.py @@ -4,6 +4,7 @@ _DEFAULT_BACKEND = "oneflow" + def compile( torch_module: torch.nn.Module, *, backend=_DEFAULT_BACKEND, options=None ) -> DeployableModule: diff --git a/src/onediff/infer_compiler/backends/deployable_module.py b/src/onediff/infer_compiler/backends/deployable_module.py index 0982926a7..51464df63 100644 --- a/src/onediff/infer_compiler/backends/deployable_module.py +++ b/src/onediff/infer_compiler/backends/deployable_module.py @@ -1,6 +1,7 @@ from typing import Any import torch + class DeployableModule(torch.nn.Module): def __init__(self): torch.nn.Module.__init__(self) diff --git a/src/onediff/infer_compiler/backends/nexfort/__init__.py b/src/onediff/infer_compiler/backends/nexfort/__init__.py index e50a723c5..1ea5f954e 100644 --- a/src/onediff/infer_compiler/backends/nexfort/__init__.py +++ b/src/onediff/infer_compiler/backends/nexfort/__init__.py @@ -1 +1 @@ -from . import nexfort as _nexfort_backend \ No newline at end of file +from . import nexfort as _nexfort_backend diff --git a/src/onediff/infer_compiler/backends/nexfort/nexfort.py b/src/onediff/infer_compiler/backends/nexfort/nexfort.py index 3d49ee062..1897ba00d 100644 --- a/src/onediff/infer_compiler/backends/nexfort/nexfort.py +++ b/src/onediff/infer_compiler/backends/nexfort/nexfort.py @@ -12,8 +12,6 @@ def compile(torch_module: torch.nn.Module, *, options=None): options = options if options is not None else CompileOptions() nexfort_options = options.nexfort - compiled_model = nexfort_compile( - torch_module, **nexfort_options - ) + compiled_model = nexfort_compile(torch_module, **nexfort_options) # return NexfortDeployableModule(compiled_model, torch_module) return compiled_model diff --git a/src/onediff/infer_compiler/backends/oneflow/deployable_module.py b/src/onediff/infer_compiler/backends/oneflow/deployable_module.py index dd15fa94a..51b2f1086 100644 --- a/src/onediff/infer_compiler/backends/oneflow/deployable_module.py +++ b/src/onediff/infer_compiler/backends/oneflow/deployable_module.py @@ -51,6 +51,7 @@ def get_oneflow_graph(model, size=9, dynamic_graph=True): g._dynamic_input_graph_cache.enable_shared(dynamic_graph) return g + class OneflowDeployableModule(DeployableModule): def __init__( self, torch_module, oneflow_module, dynamic=True, options=None, @@ -236,8 +237,8 @@ def apply_online_quant(self, quant_config): """ self._deployable_module_quant_config = quant_config -def get_mixed_deployable_module(module_cls): +def get_mixed_deployable_module(module_cls): class MixedOneflowDeployableModule(OneflowDeployableModule, module_cls): def __init__(self, torch_module, oneflow_module, dynamic=True, options=None): OneflowDeployableModule.__init__( diff --git a/src/onediff/infer_compiler/backends/oneflow/dual_module.py b/src/onediff/infer_compiler/backends/oneflow/dual_module.py index 7f2d67bc2..5483678c0 100644 --- a/src/onediff/infer_compiler/backends/oneflow/dual_module.py +++ b/src/onediff/infer_compiler/backends/oneflow/dual_module.py @@ -150,6 +150,7 @@ def __setattr__(self, key, value): setattr(self._oneflow_modules, key, value) return object.__setattr__(self, key, value) + def get_mixed_dual_module(module_cls): if issubclass(module_cls, DualModule) and "MixedDualModule" in module_cls.__name__: return module_cls @@ -164,5 +165,3 @@ def _get_name(self) -> str: return f"{self.__class__.__name__}(of {module_cls.__name__})" return MixedDualModule - - diff --git a/src/onediff/infer_compiler/backends/oneflow/env_var.py b/src/onediff/infer_compiler/backends/oneflow/env_var.py index 6109330e3..f4e82ba2f 100644 --- a/src/onediff/infer_compiler/backends/oneflow/env_var.py +++ b/src/onediff/infer_compiler/backends/oneflow/env_var.py @@ -45,6 +45,7 @@ class OneflowCompileOptions: kernel_glu_enable_y_gemm_impl: bool = None kernel_glu_quant_enable_dual_gemm_impl: bool = None + def _set_env_vars(field2env_var, options): for field in dataclasses.fields(options): field_name = field.name diff --git a/src/onediff/infer_compiler/backends/oneflow/online_quantization_utils.py b/src/onediff/infer_compiler/backends/oneflow/online_quantization_utils.py index a8fe99fd0..472c3d280 100644 --- a/src/onediff/infer_compiler/backends/oneflow/online_quantization_utils.py +++ b/src/onediff/infer_compiler/backends/oneflow/online_quantization_utils.py @@ -1,11 +1,15 @@ def patch_input_adapter(in_args, in_kwargs): return in_args, in_kwargs + def online_quantize_model( - model, input_args, input_kwargs, - seed=1, inplace=True, + model, + input_args, + input_kwargs, + seed=1, + inplace=True, module_selector=lambda x: x, - quant_config = None, + quant_config=None, calibration_info=None, ): """Optimize the quantization pipeline. @@ -19,18 +23,20 @@ def online_quantize_model( OnlineQuantModule, create_quantization_calculator, ) + if getattr(quant_config, "quantization_calculator", None): calculator = quant_config.quantization_calculator else: calculator = create_quantization_calculator( - model, quant_config, module_selector, seed, + model, + quant_config, + module_selector, + seed, calibration_info=calibration_info, ) module = OnlineQuantModule(calculator, False, inplace=inplace) - in_args , in_kwargs = patch_input_adapter(input_args, input_kwargs) - quantized_model, info = module.quantize_with_calibration( - *in_args, **in_kwargs - ) + in_args, in_kwargs = patch_input_adapter(input_args, input_kwargs) + quantized_model, info = module.quantize_with_calibration(*in_args, **in_kwargs) status = module.collect_quantization_status(model, info) return quantized_model, status @@ -42,14 +48,15 @@ def wrapper(self: "DeployableModule", *args, **kwargs): quant_config = self._deployable_module_quant_config if quant_config: torch_model, _ = online_quantize_model( - torch_model, args, kwargs, + torch_model, + args, + kwargs, module_selector=lambda x: x, quant_config=quant_config, inplace=True, ) - self._deployable_module_quant_config = None + self._deployable_module_quant_config = None output = func(self, *args, **kwargs) return output - return wrapper - \ No newline at end of file + return wrapper diff --git a/src/onediff/infer_compiler/backends/oneflow/param_utils.py b/src/onediff/infer_compiler/backends/oneflow/param_utils.py index 3e08c11f3..c5f53440f 100644 --- a/src/onediff/infer_compiler/backends/oneflow/param_utils.py +++ b/src/onediff/infer_compiler/backends/oneflow/param_utils.py @@ -80,8 +80,8 @@ def set_constant_folded_conv_attr( def generate_constant_folding_info( deployable_module, torch_module: torch.nn.Module = None ) -> Dict[str, flow.Tensor]: - removeprefix = lambda ss, prefix: ss[len(prefix):] if ss.startswith(prefix) else ss - + removeprefix = lambda ss, prefix: ss[len(prefix) :] if ss.startswith(prefix) else ss + # convert str like 'variable_transpose_model.input_blocks.10.0.in_layers.2.weight_239' # to 'input_blocks.10.0.in_layers.2.weight' def convert_var_name(s: str, prefix="variable_transpose_"): @@ -186,8 +186,9 @@ def forward_pre_check_and_update_state_hook(module, args): update_graph_with_constant_folding_info(module, constant_folding_info) setattr(module._torch_module, STATE_UPDATED_ATTR, False) + def removesuffix(s: str, suffix: str) -> str: if s.endswith(suffix): - return s[:len(s) - len(suffix)] + return s[: len(s) - len(suffix)] else: return s diff --git a/src/onediff/infer_compiler/backends/options.py b/src/onediff/infer_compiler/backends/options.py index 79696466c..6e6f06867 100644 --- a/src/onediff/infer_compiler/backends/options.py +++ b/src/onediff/infer_compiler/backends/options.py @@ -4,6 +4,7 @@ class CompileOptions: def __init__(self, dynamic=True, oneflow=None, nexfort=None): from .oneflow import OneflowCompileOptions + self.dynamic = dynamic self.oneflow = oneflow if oneflow is not None else OneflowCompileOptions() self.nexfort = nexfort if nexfort is not None else dict() diff --git a/src/onediff/infer_compiler/backends/registry.py b/src/onediff/infer_compiler/backends/registry.py index 88c08a724..bbf0e24bf 100644 --- a/src/onediff/infer_compiler/backends/registry.py +++ b/src/onediff/infer_compiler/backends/registry.py @@ -37,5 +37,6 @@ def lookup_backend(compiler_fn): def _lazy_import(backend_name): from .. import backends + backend_path = f"{backends.__name__}.{backend_name}" importlib.import_module(backend_path) diff --git a/src/onediff/infer_compiler/import_tools/patch_for_compiler.py b/src/onediff/infer_compiler/import_tools/patch_for_compiler.py index 501411d2c..8e7f7e40b 100644 --- a/src/onediff/infer_compiler/import_tools/patch_for_compiler.py +++ b/src/onediff/infer_compiler/import_tools/patch_for_compiler.py @@ -109,13 +109,17 @@ def scaled_dot_product_attention( from oneflow import Tensor + def oneflow_rfloordiv(): - original_rfloordiv = Tensor.__rfloordiv__ + original_rfloordiv = Tensor.__rfloordiv__ + def rfloordiv(self, other): if isinstance(other, int): other = flow.tensor(other) - + return original_rfloordiv(self, other) + return rfloordiv -Tensor.__rfloordiv__ = oneflow_rfloordiv() \ No newline at end of file + +Tensor.__rfloordiv__ = oneflow_rfloordiv() diff --git a/src/onediff/infer_compiler/transform/builtin_transform.py b/src/onediff/infer_compiler/transform/builtin_transform.py index 04f49dca6..e829baf16 100644 --- a/src/onediff/infer_compiler/transform/builtin_transform.py +++ b/src/onediff/infer_compiler/transform/builtin_transform.py @@ -17,6 +17,7 @@ from ..import_tools.importer import is_need_mock from .patch_for_comfy import PatchForComfy + __all__ = [ "proxy_class", "ProxySubmodule", @@ -26,6 +27,7 @@ "default_converter", ] + def singledispatch_proxy(func): dispatcher = singledispatch(func) _warning_set = set() @@ -57,10 +59,12 @@ def wrapper(first_param, *args, **kwargs): def proxy_class(cls: type): try: out = transform_mgr.transform_cls(cls) - return out + return out except Exception as e: # If an exception occurs during transformation, print traceback for debugging - raise RuntimeError(f"An exception occurred during class transformation:\n{traceback.format_exc()}\nException: {e}") + raise RuntimeError( + f"An exception occurred during class transformation:\n{traceback.format_exc()}\nException: {e}" + ) def reverse_proxy_class(cls: type): diff --git a/src/onediff/infer_compiler/transform/manager.py b/src/onediff/infer_compiler/transform/manager.py index daaede7c1..bc24f9ee6 100644 --- a/src/onediff/infer_compiler/transform/manager.py +++ b/src/onediff/infer_compiler/transform/manager.py @@ -115,6 +115,7 @@ def transform_package(self, package_name): if importlib.util.find_spec("pydantic") is not None: import pydantic + if pydantic.VERSION < "2.5.2": logger.warning( f"Pydantic version {pydantic.VERSION} is too low, please upgrade to 2.5.2 or higher." @@ -124,5 +125,3 @@ def transform_package(self, package_name): MockEnableDisableMixin.hazard_list.append( "huggingface_hub.inference._text_generation" ) - - diff --git a/src/onediff/optimization/quant_optimizer.py b/src/onediff/optimization/quant_optimizer.py index 02f379ef8..556db039f 100644 --- a/src/onediff/optimization/quant_optimizer.py +++ b/src/onediff/optimization/quant_optimizer.py @@ -107,4 +107,3 @@ def apply_quantization_to_modules(quantizable_modules): ) return model - diff --git a/src/onediff/quantization/load_quantized_model.py b/src/onediff/quantization/load_quantized_model.py index 9500aa314..913466137 100644 --- a/src/onediff/quantization/load_quantized_model.py +++ b/src/onediff/quantization/load_quantized_model.py @@ -1,22 +1,30 @@ from diffusers import AutoPipelineForText2Image from onediff.quantization.quantize_pipeline import QuantPipeline -import argparse +import argparse import torch from onediff.infer_compiler import oneflow_compile + def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument("--prompt", default="a photo of an astronaut riding a horse on mars") - parser.add_argument("--height", type= int,default=1024) - parser.add_argument("--width", type= int, default=1024) + parser.add_argument( + "--prompt", default="a photo of an astronaut riding a horse on mars" + ) + parser.add_argument("--height", type=int, default=1024) + parser.add_argument("--width", type=int, default=1024) parser.add_argument("--num_inference_steps", type=int, default=30) parser.add_argument("--quantized_model", type=str, required=True) return parser.parse_args() + args = parse_args() pipe = QuantPipeline.from_quantized( - AutoPipelineForText2Image, args.quantized_model, torch_dtype=torch.float16, variant="fp16", use_safetensors=True + AutoPipelineForText2Image, + args.quantized_model, + torch_dtype=torch.float16, + variant="fp16", + use_safetensors=True, ) pipe = pipe.to("cuda") diff --git a/src/onediff/quantization/quant_pipeline_test.py b/src/onediff/quantization/quant_pipeline_test.py index c68589fbc..a23efd134 100644 --- a/src/onediff/quantization/quant_pipeline_test.py +++ b/src/onediff/quantization/quant_pipeline_test.py @@ -1,15 +1,19 @@ from diffusers import AutoPipelineForText2Image from onediff.quantization.quantize_pipeline import QuantPipeline import torch -import argparse +import argparse def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument("--floatting_model_path", default="runwayml/stable-diffusion-v1-5") - parser.add_argument("--prompt", default="a photo of an astronaut riding a horse on mars") - parser.add_argument("--height",type=int, default=1024) - parser.add_argument("--width", type=int,default=1024) + parser.add_argument( + "--floatting_model_path", default="runwayml/stable-diffusion-v1-5" + ) + parser.add_argument( + "--prompt", default="a photo of an astronaut riding a horse on mars" + ) + parser.add_argument("--height", type=int, default=1024) + parser.add_argument("--width", type=int, default=1024) parser.add_argument("--num_inference_steps", type=int, default=30) parser.add_argument("--conv_compute_density_threshold", type=int, default=900) parser.add_argument("--linear_compute_density_threshold", type=int, default=300) @@ -20,10 +24,15 @@ def parse_args(): parser.add_argument("--quantized_model", default="./quantized_model") return parser.parse_args() + args = parse_args() pipe = QuantPipeline.from_pretrained( - AutoPipelineForText2Image, args.floatting_model_path, torch_dtype=torch.float16, variant="fp16", use_safetensors=True + AutoPipelineForText2Image, + args.floatting_model_path, + torch_dtype=torch.float16, + variant="fp16", + use_safetensors=True, ) pipe.to("cuda") @@ -34,13 +43,15 @@ def parse_args(): num_inference_steps=args.num_inference_steps, ) -pipe.quantize(**pipe_kwargs, +pipe.quantize( + **pipe_kwargs, conv_compute_density_threshold=args.conv_compute_density_threshold, linear_compute_density_threshold=args.linear_compute_density_threshold, conv_ssim_threshold=args.conv_ssim_threshold, linear_ssim_threshold=args.linear_ssim_threshold, save_as_float=args.save_as_float, plot_calibrate_info=False, - cache_dir=args.cache_dir) + cache_dir=args.cache_dir +) pipe.save_quantized(args.quantized_model, safe_serialization=True) diff --git a/src/onediff/quantization/quantize_utils.py b/src/onediff/quantization/quantize_utils.py index 678787586..9b0b58022 100644 --- a/src/onediff/quantization/quantize_utils.py +++ b/src/onediff/quantization/quantize_utils.py @@ -17,7 +17,7 @@ def load_calibration_and_quantize_pipeline(calibration_path, pipe): store = CalibrationStorage() calibrate_info = store.load_from_file(file_path=calibration_path) - + for sub_module_name, sub_calibrate_info in calibrate_info.items(): replace_sub_module_with_quantizable_module( pipe.unet, diff --git a/src/onediff/torch_utils/__init__.py b/src/onediff/torch_utils/__init__.py index 37be053e3..5a82505fa 100644 --- a/src/onediff/torch_utils/__init__.py +++ b/src/onediff/torch_utils/__init__.py @@ -1 +1 @@ -from .model_inplace_assign import TensorInplaceAssign \ No newline at end of file +from .model_inplace_assign import TensorInplaceAssign diff --git a/src/onediff/torch_utils/module_operations.py b/src/onediff/torch_utils/module_operations.py index c31856227..04cac3e58 100644 --- a/src/onediff/torch_utils/module_operations.py +++ b/src/onediff/torch_utils/module_operations.py @@ -16,7 +16,7 @@ def get_sub_module(module, sub_module_name) -> nn.Module: """ if sub_module_name == "": return module - + parts = sub_module_name.split(".") current_module = module diff --git a/src/onediff/utils/__init__.py b/src/onediff/utils/__init__.py index 5ebea9cb4..631812a59 100644 --- a/src/onediff/utils/__init__.py +++ b/src/onediff/utils/__init__.py @@ -4,4 +4,4 @@ set_boolean_env_var, parse_integer_from_env, set_integer_env_var, -) \ No newline at end of file +) From 21162c2e71607e42dcba6b6510675dc1b0fa398e Mon Sep 17 00:00:00 2001 From: strint Date: Sat, 18 May 2024 00:04:14 +0800 Subject: [PATCH 06/13] add depend --- src/onediff/infer_compiler/backends/nexfort/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/onediff/infer_compiler/backends/nexfort/README.md b/src/onediff/infer_compiler/backends/nexfort/README.md index 0ea06bb4f..9d61d5b61 100644 --- a/src/onediff/infer_compiler/backends/nexfort/README.md +++ b/src/onediff/infer_compiler/backends/nexfort/README.md @@ -1,4 +1,9 @@ ## nexfort backend for compile in onediff +### Dependency +``` +pip3 install -U torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 torchao==0.1 +``` + ### Install nexfort ``` wget https://oneflow-static.oss-cn-beijing.aliyuncs.com/sd/nexfort-0.1-cb3133ca2dae4265bc1d86068fc3aa1d.zip From f8b2f96a69b05b63b2fe6ee47d3e9865abc3377a Mon Sep 17 00:00:00 2001 From: strint Date: Sat, 18 May 2024 00:06:41 +0800 Subject: [PATCH 07/13] add depend --- src/onediff/infer_compiler/backends/nexfort/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/onediff/infer_compiler/backends/nexfort/README.md b/src/onediff/infer_compiler/backends/nexfort/README.md index 9d61d5b61..de12d9d12 100644 --- a/src/onediff/infer_compiler/backends/nexfort/README.md +++ b/src/onediff/infer_compiler/backends/nexfort/README.md @@ -1,4 +1,4 @@ -## nexfort backend for compile in onediff +## nexfort backend for compiler in onediff ### Dependency ``` pip3 install -U torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 torchao==0.1 From ab7e11e2dfd127ef2fb2d199f2d9994d05dd122f Mon Sep 17 00:00:00 2001 From: strint Date: Tue, 21 May 2024 10:47:45 +0800 Subject: [PATCH 08/13] add steps arg --- src/onediff/infer_compiler/backends/nexfort/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/onediff/infer_compiler/backends/nexfort/README.md b/src/onediff/infer_compiler/backends/nexfort/README.md index de12d9d12..6ee89ef75 100644 --- a/src/onediff/infer_compiler/backends/nexfort/README.md +++ b/src/onediff/infer_compiler/backends/nexfort/README.md @@ -17,7 +17,7 @@ pip3 install nexfort-0.1.dev195+torch230cu121-cp310-cp310-manylinux2014_x86_64.w ``` cd benchmarks # model_id_or_path_to_PixArt-XL-2-1024-MS: /data/hf_models/PixArt-XL-2-1024-MS/ -python3 text_to_image.py --model model_id_or_path_to_PixArt-XL-2-1024-MS --scheduler none --compiler nexfort +python3 text_to_image.py --model model_id_or_path_to_PixArt-XL-2-1024-MS --scheduler none --steps 20 --compiler nexfort ``` Performance on NVIDIA A100-PCIE-40GB: Iterations per second of progress bar: 11.7 From 8a08d72112364ff8d637a8c1ab014e48a5a45032 Mon Sep 17 00:00:00 2001 From: strint Date: Tue, 21 May 2024 12:29:05 +0800 Subject: [PATCH 09/13] mv transform and import --- benchmarks/patch_stable_cascade_of.py | 4 ++-- onediff_comfy_nodes/extras_nodes/nodes_compare.py | 2 +- .../modules/oneflow/hijack_animatediff/_config.py | 4 ++-- .../modules/oneflow/hijack_animatediff/motion_module_ad.py | 4 ++-- .../modules/oneflow/hijack_animatediff/sampling.py | 2 +- .../modules/oneflow/hijack_animatediff/utils_motion.py | 2 +- .../modules/oneflow/hijack_comfyui_instantid/_config.py | 4 ++-- .../modules/oneflow/hijack_ipadapter_plus/_config.py | 4 ++-- .../oneflow/hijack_ipadapter_plus/set_model_patch_replace.py | 2 +- .../infer_compiler_registry/register_comfy/__init__.py | 2 +- .../infer_compiler_registry/register_comfy/attention.py | 2 +- .../comfy_ldm_modules_diffusionmodules_model.py | 2 +- .../oneflow/infer_compiler_registry/register_comfy/linear.py | 2 +- .../infer_compiler_registry/register_comfy/openaimodel.py | 2 +- .../infer_compiler_registry/register_comfy/vae_patch.py | 2 +- .../oneflow/infer_compiler_registry/register_onediff_quant.py | 2 +- .../modules/oneflow/utils/loader_sample_tools.py | 2 +- onediff_sd_webui_extensions/compile_ldm.py | 2 +- onediff_sd_webui_extensions/compile_sgm.py | 2 +- onediff_sd_webui_extensions/compile_vae.py | 2 +- src/infer_compiler_registry/register_diffusers/__init__.py | 2 +- .../register_diffusers/resnet_oflow.py | 2 +- .../register_diffusers/spatio_temporal_oflow.py | 2 +- .../register_diffusers/transformer_2d_oflow.py | 2 +- .../register_diffusers/unet_2d_blocks_oflow.py | 2 +- .../register_diffusers/unet_2d_condition_oflow.py | 2 +- .../register_diffusers_enterprise_lite/__init__.py | 2 +- .../register_onediff_quant/__init__.py | 2 +- src/onediff/infer_compiler/backends/nexfort/README.md | 3 +-- .../{ => backends/oneflow}/import_tools/__init__.py | 0 .../{ => backends/oneflow}/import_tools/dyn_mock_mod.py | 0 .../{ => backends/oneflow}/import_tools/format_utils.py | 0 .../oneflow}/import_tools/import_module_utils.py | 0 .../{ => backends/oneflow}/import_tools/importer.py | 0 .../{ => backends/oneflow}/import_tools/patch_for_compiler.py | 0 .../{ => backends/oneflow}/transform/__init__.py | 0 .../{ => backends/oneflow}/transform/builtin_transform.py | 0 .../{ => backends/oneflow}/transform/custom_transform.py | 0 .../{ => backends/oneflow}/transform/manager.py | 0 .../{ => backends/oneflow}/transform/patch_for_comfy.py | 0 .../{ => backends/oneflow}/transform/patch_for_diffusers.py | 0 src/onediff/optimization/attention_processor.py | 4 ++-- src/onediff/optimization/quant_optimizer.py | 2 +- tests/convert_torch_to_of/test_torch2of_demo.py | 2 +- tests/test_dual_module_list.py | 2 +- tests/test_quantize_custom_model.py | 2 +- 46 files changed, 40 insertions(+), 41 deletions(-) rename src/onediff/infer_compiler/{ => backends/oneflow}/import_tools/__init__.py (100%) rename src/onediff/infer_compiler/{ => backends/oneflow}/import_tools/dyn_mock_mod.py (100%) rename src/onediff/infer_compiler/{ => backends/oneflow}/import_tools/format_utils.py (100%) rename src/onediff/infer_compiler/{ => backends/oneflow}/import_tools/import_module_utils.py (100%) rename src/onediff/infer_compiler/{ => backends/oneflow}/import_tools/importer.py (100%) rename src/onediff/infer_compiler/{ => backends/oneflow}/import_tools/patch_for_compiler.py (100%) rename src/onediff/infer_compiler/{ => backends/oneflow}/transform/__init__.py (100%) rename src/onediff/infer_compiler/{ => backends/oneflow}/transform/builtin_transform.py (100%) rename src/onediff/infer_compiler/{ => backends/oneflow}/transform/custom_transform.py (100%) rename src/onediff/infer_compiler/{ => backends/oneflow}/transform/manager.py (100%) rename src/onediff/infer_compiler/{ => backends/oneflow}/transform/patch_for_comfy.py (100%) rename src/onediff/infer_compiler/{ => backends/oneflow}/transform/patch_for_diffusers.py (100%) diff --git a/benchmarks/patch_stable_cascade_of.py b/benchmarks/patch_stable_cascade_of.py index 8f388111a..454a17344 100644 --- a/benchmarks/patch_stable_cascade_of.py +++ b/benchmarks/patch_stable_cascade_of.py @@ -5,7 +5,7 @@ from packaging import version import importlib.metadata -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr diffusers_of = transform_mgr.transform_package("diffusers") StableCascadeUnet_OF_CLS = ( @@ -120,7 +120,7 @@ def forward( ) # torch2oflow_class_map.update({StableCascadeUnet: StableCascadeUnetOflow}) -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register from contextlib import contextmanager diff --git a/onediff_comfy_nodes/extras_nodes/nodes_compare.py b/onediff_comfy_nodes/extras_nodes/nodes_compare.py index 4f4461d9b..f5a7f9707 100644 --- a/onediff_comfy_nodes/extras_nodes/nodes_compare.py +++ b/onediff_comfy_nodes/extras_nodes/nodes_compare.py @@ -5,7 +5,7 @@ import folder_paths import numpy as np import oneflow as flow -from onediff.infer_compiler.transform.builtin_transform import torch2oflow +from onediff.infer_compiler.backends.oneflow.transform.builtin_transform import torch2oflow from PIL import Image try: diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/_config.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/_config.py index 167789792..d6340640f 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/_config.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/_config.py @@ -4,8 +4,8 @@ """ import os -from onediff.infer_compiler.import_tools import DynamicModuleLoader -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.import_tools import DynamicModuleLoader +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr from ...sd_hijack_utils import Hijacker diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/motion_module_ad.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/motion_module_ad.py index 74f8dd9c4..3bbc579dc 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/motion_module_ad.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/motion_module_ad.py @@ -1,7 +1,7 @@ # ComfyUI/custom_nodes/ComfyUI-AnimateDiff-Evolved/animatediff/motion_module_ad.py import oneflow as torch from einops import repeat -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register from ._config import animatediff_of, animatediff_pt @@ -124,7 +124,7 @@ def forward( ) # import torch as torch_pt -# from onediff.infer_compiler.transform import torch2oflow +# from onediff.infer_compiler.backends.oneflow.transform import torch2oflow # @torch2oflow.register(TemporalTransformer3DModel_PT_CLS) # def _(mod, verbose=False): diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py index 720c5ab2a..ea201069b 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py @@ -2,7 +2,7 @@ import oneflow as flow from einops import rearrange from onediff.infer_compiler import DeployableModule -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register from oneflow.nn.functional import group_norm from ._config import animatediff_hijacker, animatediff_of, animatediff_pt, comfy_of diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/utils_motion.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/utils_motion.py index d1b4f3885..1fafec133 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/utils_motion.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/utils_motion.py @@ -1,6 +1,6 @@ # ComfyUI/custom_nodes/ComfyUI-AnimateDiff-Evolved/animatediff/utils_motion.py import oneflow as torch -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register from ._config import animatediff_of, animatediff_pt diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_comfyui_instantid/_config.py b/onediff_comfy_nodes/modules/oneflow/hijack_comfyui_instantid/_config.py index ec2a1903e..d18438434 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_comfyui_instantid/_config.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_comfyui_instantid/_config.py @@ -2,8 +2,8 @@ import traceback COMFYUI_ROOT = os.getenv("COMFYUI_ROOT") -from onediff.infer_compiler.import_tools import DynamicModuleLoader -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.import_tools import DynamicModuleLoader +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr from ...sd_hijack_utils import Hijacker diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/_config.py b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/_config.py index 37d11f083..b9da6376b 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/_config.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/_config.py @@ -2,8 +2,8 @@ import traceback COMFYUI_ROOT = os.getenv("COMFYUI_ROOT") -from onediff.infer_compiler.import_tools import DynamicModuleLoader -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.import_tools import DynamicModuleLoader +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr from ...sd_hijack_utils import Hijacker diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py index 9be19105d..e21915ea3 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py @@ -1,6 +1,6 @@ from register_comfy.CrossAttentionPatch import Attn2Replace, ipadapter_attention -from onediff.infer_compiler.transform import torch2oflow +from onediff.infer_compiler.backends.oneflow.transform import torch2oflow from ..utils.booster_utils import clear_deployable_module_cache_and_unbind from ..patch_management import PatchType, create_patch_executor diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py index e1c91b7ba..eafbcf192 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py @@ -7,7 +7,7 @@ from nodes import * # must imported before import comfy -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register from onediff.infer_compiler.utils import is_community_version from .attention import CrossAttention as CrossAttention1f diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/attention.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/attention.py index 3eb09d9fb..27bf9165a 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/attention.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/attention.py @@ -8,7 +8,7 @@ import oneflow as torch import oneflow.nn as nn from einops import rearrange, repeat -from onediff.infer_compiler.transform import proxy_class, transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import proxy_class, transform_mgr onediff_comfy = transform_mgr.transform_package("comfy") diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/comfy_ldm_modules_diffusionmodules_model.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/comfy_ldm_modules_diffusionmodules_model.py index 854eb9f85..e320170c0 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/comfy_ldm_modules_diffusionmodules_model.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/comfy_ldm_modules_diffusionmodules_model.py @@ -4,7 +4,7 @@ import oneflow as torch import oneflow.nn as nn import oneflow.nn.functional as F -from onediff.infer_compiler.transform import proxy_class +from onediff.infer_compiler.backends.oneflow.transform import proxy_class def Normalize(in_channels, num_groups=32): diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/linear.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/linear.py index cf6e54553..638b4b3cd 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/linear.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/linear.py @@ -1,5 +1,5 @@ import oneflow as torch -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr transformed_comfy = transform_mgr.transform_package("comfy") proxy_ops = transformed_comfy.ops diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/openaimodel.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/openaimodel.py index 88cc98469..b8469004b 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/openaimodel.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/openaimodel.py @@ -4,7 +4,7 @@ import oneflow as th # 'th' is the way ComfyUI name the torch import oneflow.nn.functional as F from einops import rearrange -from onediff.infer_compiler.transform import proxy_class, transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import proxy_class, transform_mgr onediff_comfy = transform_mgr.transform_package("comfy") diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/vae_patch.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/vae_patch.py index 86e822739..14f1a26d8 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/vae_patch.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/vae_patch.py @@ -1,7 +1,7 @@ # ComfyUI/comfy/ldm/modules/diffusionmodules/model.py import oneflow as torch -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr onediff_comfy = transform_mgr.transform_package("comfy") diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py index d05e8acb5..48f0de2a6 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py @@ -1,6 +1,6 @@ import onediff_quant import oneflow as flow -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register torch2oflow_class_map = { onediff_quant.FakeQuantModule: onediff_quant.OneFlowFakeQuantModule, diff --git a/onediff_comfy_nodes/modules/oneflow/utils/loader_sample_tools.py b/onediff_comfy_nodes/modules/oneflow/utils/loader_sample_tools.py index 96844fb2e..40678fabf 100644 --- a/onediff_comfy_nodes/modules/oneflow/utils/loader_sample_tools.py +++ b/onediff_comfy_nodes/modules/oneflow/utils/loader_sample_tools.py @@ -5,7 +5,7 @@ from folder_paths import get_input_directory # onediff from onediff.infer_compiler import CompileOptions, oneflow_compile -from onediff.infer_compiler.transform import torch2oflow +from onediff.infer_compiler.backends.oneflow.transform import torch2oflow from onediff.optimization.quant_optimizer import quantize_model # onediff_comfy_nodes diff --git a/onediff_sd_webui_extensions/compile_ldm.py b/onediff_sd_webui_extensions/compile_ldm.py index e6a3aec06..43fbf381f 100644 --- a/onediff_sd_webui_extensions/compile_ldm.py +++ b/onediff_sd_webui_extensions/compile_ldm.py @@ -1,7 +1,7 @@ import os import oneflow as flow from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.transform import proxy_class, register +from onediff.infer_compiler.backends.oneflow.transform import proxy_class, register from ldm.modules.attention import ( BasicTransformerBlock, diff --git a/onediff_sd_webui_extensions/compile_sgm.py b/onediff_sd_webui_extensions/compile_sgm.py index 12398a737..35d048d4b 100644 --- a/onediff_sd_webui_extensions/compile_sgm.py +++ b/onediff_sd_webui_extensions/compile_sgm.py @@ -1,6 +1,6 @@ import oneflow as flow from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.transform import proxy_class, register +from onediff.infer_compiler.backends.oneflow.transform import proxy_class, register from sd_webui_onediff_utils import ( CrossAttentionOflow, GroupNorm32Oflow, diff --git a/onediff_sd_webui_extensions/compile_vae.py b/onediff_sd_webui_extensions/compile_vae.py index d5c9c7f26..ac95af44e 100644 --- a/onediff_sd_webui_extensions/compile_vae.py +++ b/onediff_sd_webui_extensions/compile_vae.py @@ -2,7 +2,7 @@ from modules.sd_vae_approx import model as get_vae_model, sd_vae_approx_models from modules.sd_vae_approx import VAEApprox from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.transform import proxy_class, register +from onediff.infer_compiler.backends.oneflow.transform import proxy_class, register __all__ = ["VaeCompileCtx"] diff --git a/src/infer_compiler_registry/register_diffusers/__init__.py b/src/infer_compiler_registry/register_diffusers/__init__.py index 4e1af1e62..292ffcdaf 100644 --- a/src/infer_compiler_registry/register_diffusers/__init__.py +++ b/src/infer_compiler_registry/register_diffusers/__init__.py @@ -1,4 +1,4 @@ -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register from packaging import version import importlib.metadata diff --git a/src/infer_compiler_registry/register_diffusers/resnet_oflow.py b/src/infer_compiler_registry/register_diffusers/resnet_oflow.py index 5e33c9970..3133cabab 100644 --- a/src/infer_compiler_registry/register_diffusers/resnet_oflow.py +++ b/src/infer_compiler_registry/register_diffusers/resnet_oflow.py @@ -5,7 +5,7 @@ from packaging import version import importlib.metadata -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr transformed_diffusers = transform_mgr.transform_package("diffusers") diff --git a/src/infer_compiler_registry/register_diffusers/spatio_temporal_oflow.py b/src/infer_compiler_registry/register_diffusers/spatio_temporal_oflow.py index 12dbb49d2..fd4aacb54 100644 --- a/src/infer_compiler_registry/register_diffusers/spatio_temporal_oflow.py +++ b/src/infer_compiler_registry/register_diffusers/spatio_temporal_oflow.py @@ -30,7 +30,7 @@ if diffusers_version >= diffusers_0240_v: - from onediff.infer_compiler.transform import transform_mgr + from onediff.infer_compiler.backends.oneflow.transform import transform_mgr transformed_diffusers = transform_mgr.transform_package("diffusers") diff --git a/src/infer_compiler_registry/register_diffusers/transformer_2d_oflow.py b/src/infer_compiler_registry/register_diffusers/transformer_2d_oflow.py index 3eff67c53..2c3b2298f 100644 --- a/src/infer_compiler_registry/register_diffusers/transformer_2d_oflow.py +++ b/src/infer_compiler_registry/register_diffusers/transformer_2d_oflow.py @@ -6,7 +6,7 @@ import oneflow as torch import oneflow.nn.functional as F from oneflow import nn -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr transformed_diffusers = transform_mgr.transform_package("diffusers") diff --git a/src/infer_compiler_registry/register_diffusers/unet_2d_blocks_oflow.py b/src/infer_compiler_registry/register_diffusers/unet_2d_blocks_oflow.py index 2930273ac..54ae20ae3 100644 --- a/src/infer_compiler_registry/register_diffusers/unet_2d_blocks_oflow.py +++ b/src/infer_compiler_registry/register_diffusers/unet_2d_blocks_oflow.py @@ -2,7 +2,7 @@ from packaging import version import importlib.metadata import oneflow as torch -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr diffusers_0210_v = version.parse("0.21.0") diffusers_version = version.parse(importlib.metadata.version("diffusers")) diff --git a/src/infer_compiler_registry/register_diffusers/unet_2d_condition_oflow.py b/src/infer_compiler_registry/register_diffusers/unet_2d_condition_oflow.py index 7769092c4..5fb16e84e 100644 --- a/src/infer_compiler_registry/register_diffusers/unet_2d_condition_oflow.py +++ b/src/infer_compiler_registry/register_diffusers/unet_2d_condition_oflow.py @@ -2,7 +2,7 @@ from packaging import version import importlib.metadata import oneflow as torch -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr diffusers_0210_v = version.parse("0.21.0") diffusers_version = version.parse(importlib.metadata.version("diffusers")) diff --git a/src/infer_compiler_registry/register_diffusers_enterprise_lite/__init__.py b/src/infer_compiler_registry/register_diffusers_enterprise_lite/__init__.py index d8bf735f9..fb2028b40 100644 --- a/src/infer_compiler_registry/register_diffusers_enterprise_lite/__init__.py +++ b/src/infer_compiler_registry/register_diffusers_enterprise_lite/__init__.py @@ -1,4 +1,4 @@ -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register import oneflow as flow import diffusers_enterprise_lite diff --git a/src/infer_compiler_registry/register_onediff_quant/__init__.py b/src/infer_compiler_registry/register_onediff_quant/__init__.py index e9ab3afd8..dd5a37a26 100644 --- a/src/infer_compiler_registry/register_onediff_quant/__init__.py +++ b/src/infer_compiler_registry/register_onediff_quant/__init__.py @@ -1,4 +1,4 @@ -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register import oneflow as flow import onediff_quant diff --git a/src/onediff/infer_compiler/backends/nexfort/README.md b/src/onediff/infer_compiler/backends/nexfort/README.md index 6ee89ef75..cd76747b7 100644 --- a/src/onediff/infer_compiler/backends/nexfort/README.md +++ b/src/onediff/infer_compiler/backends/nexfort/README.md @@ -15,9 +15,8 @@ pip3 install nexfort-0.1.dev195+torch230cu121-cp310-cp310-manylinux2014_x86_64.w ### Run pixart alpha (with nexfort backend) ``` -cd benchmarks # model_id_or_path_to_PixArt-XL-2-1024-MS: /data/hf_models/PixArt-XL-2-1024-MS/ -python3 text_to_image.py --model model_id_or_path_to_PixArt-XL-2-1024-MS --scheduler none --steps 20 --compiler nexfort +python3 ./benchmarks/text_to_image.py --model model_id_or_path_to_PixArt-XL-2-1024-MS --scheduler none --steps 20 --compiler nexfort ``` Performance on NVIDIA A100-PCIE-40GB: Iterations per second of progress bar: 11.7 diff --git a/src/onediff/infer_compiler/import_tools/__init__.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/__init__.py similarity index 100% rename from src/onediff/infer_compiler/import_tools/__init__.py rename to src/onediff/infer_compiler/backends/oneflow/import_tools/__init__.py diff --git a/src/onediff/infer_compiler/import_tools/dyn_mock_mod.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/dyn_mock_mod.py similarity index 100% rename from src/onediff/infer_compiler/import_tools/dyn_mock_mod.py rename to src/onediff/infer_compiler/backends/oneflow/import_tools/dyn_mock_mod.py diff --git a/src/onediff/infer_compiler/import_tools/format_utils.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/format_utils.py similarity index 100% rename from src/onediff/infer_compiler/import_tools/format_utils.py rename to src/onediff/infer_compiler/backends/oneflow/import_tools/format_utils.py diff --git a/src/onediff/infer_compiler/import_tools/import_module_utils.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/import_module_utils.py similarity index 100% rename from src/onediff/infer_compiler/import_tools/import_module_utils.py rename to src/onediff/infer_compiler/backends/oneflow/import_tools/import_module_utils.py diff --git a/src/onediff/infer_compiler/import_tools/importer.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/importer.py similarity index 100% rename from src/onediff/infer_compiler/import_tools/importer.py rename to src/onediff/infer_compiler/backends/oneflow/import_tools/importer.py diff --git a/src/onediff/infer_compiler/import_tools/patch_for_compiler.py b/src/onediff/infer_compiler/backends/oneflow/import_tools/patch_for_compiler.py similarity index 100% rename from src/onediff/infer_compiler/import_tools/patch_for_compiler.py rename to src/onediff/infer_compiler/backends/oneflow/import_tools/patch_for_compiler.py diff --git a/src/onediff/infer_compiler/transform/__init__.py b/src/onediff/infer_compiler/backends/oneflow/transform/__init__.py similarity index 100% rename from src/onediff/infer_compiler/transform/__init__.py rename to src/onediff/infer_compiler/backends/oneflow/transform/__init__.py diff --git a/src/onediff/infer_compiler/transform/builtin_transform.py b/src/onediff/infer_compiler/backends/oneflow/transform/builtin_transform.py similarity index 100% rename from src/onediff/infer_compiler/transform/builtin_transform.py rename to src/onediff/infer_compiler/backends/oneflow/transform/builtin_transform.py diff --git a/src/onediff/infer_compiler/transform/custom_transform.py b/src/onediff/infer_compiler/backends/oneflow/transform/custom_transform.py similarity index 100% rename from src/onediff/infer_compiler/transform/custom_transform.py rename to src/onediff/infer_compiler/backends/oneflow/transform/custom_transform.py diff --git a/src/onediff/infer_compiler/transform/manager.py b/src/onediff/infer_compiler/backends/oneflow/transform/manager.py similarity index 100% rename from src/onediff/infer_compiler/transform/manager.py rename to src/onediff/infer_compiler/backends/oneflow/transform/manager.py diff --git a/src/onediff/infer_compiler/transform/patch_for_comfy.py b/src/onediff/infer_compiler/backends/oneflow/transform/patch_for_comfy.py similarity index 100% rename from src/onediff/infer_compiler/transform/patch_for_comfy.py rename to src/onediff/infer_compiler/backends/oneflow/transform/patch_for_comfy.py diff --git a/src/onediff/infer_compiler/transform/patch_for_diffusers.py b/src/onediff/infer_compiler/backends/oneflow/transform/patch_for_diffusers.py similarity index 100% rename from src/onediff/infer_compiler/transform/patch_for_diffusers.py rename to src/onediff/infer_compiler/backends/oneflow/transform/patch_for_diffusers.py diff --git a/src/onediff/optimization/attention_processor.py b/src/onediff/optimization/attention_processor.py index 4ee76321d..c57dcc602 100644 --- a/src/onediff/optimization/attention_processor.py +++ b/src/onediff/optimization/attention_processor.py @@ -123,7 +123,7 @@ def __call__( try: - from onediff.infer_compiler.transform import register + from onediff.infer_compiler.backends.oneflow.transform import register def convert_fused_self_attn_processor( mod: FusedSelfAttnProcessor, verbose=True @@ -132,4 +132,4 @@ def convert_fused_self_attn_processor( register(torch2oflow_funcs=convert_fused_self_attn_processor) except: - print("Skip onediff.infer_compiler.transform.register") + print("Skip onediff.infer_compiler.backends.oneflow.transform.register") diff --git a/src/onediff/optimization/quant_optimizer.py b/src/onediff/optimization/quant_optimizer.py index 556db039f..0e3813e05 100644 --- a/src/onediff/optimization/quant_optimizer.py +++ b/src/onediff/optimization/quant_optimizer.py @@ -6,7 +6,7 @@ from ..infer_compiler.backends.oneflow.utils.version_util import is_quantization_enabled from ..infer_compiler.backends.oneflow.utils.cost_util import cost_cnt from ..infer_compiler.utils.module_operations import modify_sub_module -from ..infer_compiler.transform.manager import transform_mgr +from ..infer_compiler.backends.oneflow.transform.manager import transform_mgr __all__ = ["quantize_model", "varify_can_use_quantization"] diff --git a/tests/convert_torch_to_of/test_torch2of_demo.py b/tests/convert_torch_to_of/test_torch2of_demo.py index eabb63f1e..df4eb5202 100644 --- a/tests/convert_torch_to_of/test_torch2of_demo.py +++ b/tests/convert_torch_to_of/test_torch2of_demo.py @@ -9,7 +9,7 @@ import unittest import numpy as np from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.transform import transform_mgr +from onediff.infer_compiler.backends.oneflow.transform import transform_mgr class PyTorchModel(torch.nn.Module): diff --git a/tests/test_dual_module_list.py b/tests/test_dual_module_list.py index 96e686b2a..94f7b3da8 100644 --- a/tests/test_dual_module_list.py +++ b/tests/test_dual_module_list.py @@ -1,6 +1,6 @@ import numpy as np from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register import torch import torch.nn as nn import oneflow as flow diff --git a/tests/test_quantize_custom_model.py b/tests/test_quantize_custom_model.py index 8583cbb4e..00a2fbce5 100644 --- a/tests/test_quantize_custom_model.py +++ b/tests/test_quantize_custom_model.py @@ -7,7 +7,7 @@ from torch import nn from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.transform import register +from onediff.infer_compiler.backends.oneflow.transform import register from onediff.infer_compiler.backends.oneflow.utils.version_util import is_community_version is_community = is_community_version() From 580e783368846637d5b58936f56a880998655f60 Mon Sep 17 00:00:00 2001 From: strint Date: Tue, 21 May 2024 13:01:10 +0800 Subject: [PATCH 10/13] fix path --- .../infer_compiler/backends/oneflow/deployable_module.py | 4 ++-- src/onediff/infer_compiler/backends/oneflow/dual_module.py | 2 +- src/onediff/infer_compiler/backends/oneflow/graph.py | 4 ++-- .../infer_compiler/backends/oneflow/graph_management_utils.py | 4 ++-- src/onediff/infer_compiler/backends/oneflow/oneflow.py | 2 +- .../backends/oneflow/transform/custom_transform.py | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/onediff/infer_compiler/backends/oneflow/deployable_module.py b/src/onediff/infer_compiler/backends/oneflow/deployable_module.py index 51b2f1086..dd73dd111 100644 --- a/src/onediff/infer_compiler/backends/oneflow/deployable_module.py +++ b/src/onediff/infer_compiler/backends/oneflow/deployable_module.py @@ -8,8 +8,8 @@ from ..deployable_module import DeployableModule -from ...transform.manager import transform_mgr -from ...transform.builtin_transform import torch2oflow +from .transform.manager import transform_mgr +from .transform.builtin_transform import torch2oflow from .dual_module import DualModule, get_mixed_dual_module from .oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled diff --git a/src/onediff/infer_compiler/backends/oneflow/dual_module.py b/src/onediff/infer_compiler/backends/oneflow/dual_module.py index 5483678c0..3e6bfb979 100644 --- a/src/onediff/infer_compiler/backends/oneflow/dual_module.py +++ b/src/onediff/infer_compiler/backends/oneflow/dual_module.py @@ -8,7 +8,7 @@ from oneflow.utils.tensor import to_torch from onediff.utils import logger -from ...transform.builtin_transform import torch2oflow +from .transform.builtin_transform import torch2oflow from .oneflow_exec_mode import oneflow_exec_mode, oneflow_exec_mode_enabled diff --git a/src/onediff/infer_compiler/backends/oneflow/graph.py b/src/onediff/infer_compiler/backends/oneflow/graph.py index 823041ca0..301270832 100644 --- a/src/onediff/infer_compiler/backends/oneflow/graph.py +++ b/src/onediff/infer_compiler/backends/oneflow/graph.py @@ -1,8 +1,8 @@ import oneflow as flow from onediff.utils import logger -from ...transform.manager import transform_mgr -from ...transform.builtin_transform import reverse_proxy_class +from .transform.manager import transform_mgr +from .transform.builtin_transform import reverse_proxy_class from .utils.cost_util import cost_cnt diff --git a/src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py b/src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py index ac9af233b..6dc83bc24 100644 --- a/src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py +++ b/src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py @@ -7,8 +7,8 @@ from pathlib import Path from functools import wraps from oneflow.framework.args_tree import ArgsTree -from ...transform.builtin_transform import torch2oflow -from ...transform.manager import transform_mgr +from .transform.builtin_transform import torch2oflow +from .transform.manager import transform_mgr from .utils.cost_util import cost_time from .env_var import OneflowCompileOptions from onediff.utils import logger diff --git a/src/onediff/infer_compiler/backends/oneflow/oneflow.py b/src/onediff/infer_compiler/backends/oneflow/oneflow.py index bd6d0101a..9590d67bf 100644 --- a/src/onediff/infer_compiler/backends/oneflow/oneflow.py +++ b/src/onediff/infer_compiler/backends/oneflow/oneflow.py @@ -29,7 +29,7 @@ def compile(torch_module: torch.nn.Module, *, options=None): forward_pre_check_and_update_state_hook, forward_generate_constant_folding_info_hook, ) - from ...transform.custom_transform import set_default_registry + from .transform.custom_transform import set_default_registry set_oneflow_default_env_vars() set_default_registry() diff --git a/src/onediff/infer_compiler/backends/oneflow/transform/custom_transform.py b/src/onediff/infer_compiler/backends/oneflow/transform/custom_transform.py index 45d46ed8d..feab6000f 100644 --- a/src/onediff/infer_compiler/backends/oneflow/transform/custom_transform.py +++ b/src/onediff/infer_compiler/backends/oneflow/transform/custom_transform.py @@ -49,7 +49,7 @@ def import_module_safely(module_path, module_name): logger.warning(f"Failed to import {module_name} from {module_path}. {e=}") # compiler_registry_path - registry_path = Path(__file__).parents[3] / "infer_compiler_registry" + registry_path = Path(__file__).parents[5] / "infer_compiler_registry" if importlib.util.find_spec("diffusers") is not None: import_module_safely(registry_path / "register_diffusers", "register_diffusers") From 8c10aa87c07b6f8bb9d36227a1789cb0026c1b92 Mon Sep 17 00:00:00 2001 From: strint Date: Tue, 21 May 2024 16:51:08 +0800 Subject: [PATCH 11/13] fix import --- .../hijack_ipadapter_plus/set_model_patch_replace.py | 2 -- .../infer_compiler_registry/register_comfy/__init__.py | 2 +- .../oneflow/patch_management/quantized_input_patch.py | 2 +- onediff_diffusers_extensions/onediffx/lora/__init__.py | 2 +- onediff_diffusers_extensions/onediffx/lora/utils.py | 2 +- onediff_sd_webui_extensions/onediff_lora.py | 2 +- onediff_sd_webui_extensions/scripts/onediff.py | 3 +-- .../backends/oneflow/graph_management_utils.py | 7 +++++-- .../backends/oneflow/transform/builtin_transform.py | 2 +- tests/convert_torch_to_of/test_patch_for_compiling.py | 2 +- 10 files changed, 13 insertions(+), 13 deletions(-) diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py index 29a71fce3..588fe7971 100644 --- a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py +++ b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py @@ -6,8 +6,6 @@ from ..utils.booster_utils import clear_deployable_module_cache_and_unbind from ..patch_management import PatchType, create_patch_executor -# from onediff.infer_compiler.utils.cost_util import cost_time -# @cost_time(debug=True, message="set_model_patch_replace_v2") def set_model_patch_replace_v2(org_fn, model, patch_kwargs, key): diff_model = model.model.diffusion_model cache_patch_executor = create_patch_executor(PatchType.CachedCrossAttentionPatch) diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py index eafbcf192..32b668121 100644 --- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py +++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py @@ -8,7 +8,7 @@ from nodes import * # must imported before import comfy from onediff.infer_compiler.backends.oneflow.transform import register -from onediff.infer_compiler.utils import is_community_version +from onediff.infer_compiler.backends.oneflow.utils.version_util import is_community_version from .attention import CrossAttention as CrossAttention1f from .attention import SpatialTransformer as SpatialTransformer1f diff --git a/onediff_comfy_nodes/modules/oneflow/patch_management/quantized_input_patch.py b/onediff_comfy_nodes/modules/oneflow/patch_management/quantized_input_patch.py index 5b8143605..80a242f2d 100644 --- a/onediff_comfy_nodes/modules/oneflow/patch_management/quantized_input_patch.py +++ b/onediff_comfy_nodes/modules/oneflow/patch_management/quantized_input_patch.py @@ -1,6 +1,6 @@ from register_comfy.CrossAttentionPatch import is_crossAttention_patch -from onediff.infer_compiler.utils import online_quantization_utils +from onediff.infer_compiler.backends.oneflow import online_quantization_utils from .patch_executor import PatchExecutorBase diff --git a/onediff_diffusers_extensions/onediffx/lora/__init__.py b/onediff_diffusers_extensions/onediffx/lora/__init__.py index 24b78f93d..5d99001bc 100644 --- a/onediff_diffusers_extensions/onediffx/lora/__init__.py +++ b/onediff_diffusers_extensions/onediffx/lora/__init__.py @@ -6,4 +6,4 @@ get_active_adapters, ) -from onediff.infer_compiler.utils.param_utils import update_graph_with_constant_folding_info +from onediff.infer_compiler.backends.oneflow.param_utils import update_graph_with_constant_folding_info diff --git a/onediff_diffusers_extensions/onediffx/lora/utils.py b/onediff_diffusers_extensions/onediffx/lora/utils.py index 49fe2aca2..1f62539eb 100644 --- a/onediff_diffusers_extensions/onediffx/lora/utils.py +++ b/onediff_diffusers_extensions/onediffx/lora/utils.py @@ -14,7 +14,7 @@ else: is_peft_available = lambda: False -from onediff.infer_compiler.utils.param_utils import update_graph_related_tensor +from onediff.infer_compiler.backends.oneflow.param_utils import update_graph_related_tensor if version.parse(diffusers.__version__) <= version.parse("0.20.0"): from diffusers.loaders import PatchedLoraProjection diff --git a/onediff_sd_webui_extensions/onediff_lora.py b/onediff_sd_webui_extensions/onediff_lora.py index 77066873f..ceb99971f 100644 --- a/onediff_sd_webui_extensions/onediff_lora.py +++ b/onediff_sd_webui_extensions/onediff_lora.py @@ -1,6 +1,6 @@ import torch from onediff.infer_compiler import DeployableModule -from onediff.infer_compiler.utils.param_utils import update_graph_related_tensor +from onediff.infer_compiler.backends.oneflow.param_utils import update_graph_related_tensor class HijackLoraActivate: diff --git a/onediff_sd_webui_extensions/scripts/onediff.py b/onediff_sd_webui_extensions/scripts/onediff.py index 62119740c..069dfa35a 100644 --- a/onediff_sd_webui_extensions/scripts/onediff.py +++ b/onediff_sd_webui_extensions/scripts/onediff.py @@ -18,12 +18,11 @@ from onediff_lora import HijackLoraActivate from onediff_hijack import do_hijack as onediff_do_hijack -from onediff.utils import logger +from onediff.utils import logger, parse_boolean_from_env from onediff.optimization.quant_optimizer import ( quantize_model, varify_can_use_quantization, ) -from onediff.infer_compiler.utils.env_var import parse_boolean_from_env from onediff import __version__ as onediff_version from oneflow import __version__ as oneflow_version diff --git a/src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py b/src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py index 6dc83bc24..534fe69c4 100644 --- a/src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py +++ b/src/onediff/infer_compiler/backends/oneflow/graph_management_utils.py @@ -57,9 +57,12 @@ def wrapper(self, *args, **kwargs): # Avoid graph file conflicts if importlib.util.find_spec("register_comfy"): from register_comfy import CrossAttntionStateDictPatch as state_patch + attn2_patch_sum = state_patch.attn2_patch_sum(input_kwargs=kwargs) if attn2_patch_sum > 0: - graph_file = graph_file.replace(".graph", f"_attn2_{attn2_patch_sum}.graph") + graph_file = graph_file.replace( + ".graph", f"_attn2_{attn2_patch_sum}.graph" + ) def process_state_dict_before_saving(state_dict: Dict): nonlocal self, args, kwargs, graph_file @@ -98,7 +101,7 @@ def handle_graph_saving(): parent_dir = os.path.dirname(graph_file) if parent_dir != "": os.makedirs(parent_dir, exist_ok=True) - + # Avoid graph file conflicts if os.path.exists(graph_file): raise FileExistsError(f"File {graph_file} exists!") diff --git a/src/onediff/infer_compiler/backends/oneflow/transform/builtin_transform.py b/src/onediff/infer_compiler/backends/oneflow/transform/builtin_transform.py index 776284717..83a2b9dd6 100644 --- a/src/onediff/infer_compiler/backends/oneflow/transform/builtin_transform.py +++ b/src/onediff/infer_compiler/backends/oneflow/transform/builtin_transform.py @@ -451,7 +451,7 @@ def _(mod: types.BuiltinFunctionType, verbose=False): if mod_name is not None: m = importlib.import_module(mod_name) return getattr(m, mod.__name__) - + return default_converter(mod, verbose) diff --git a/tests/convert_torch_to_of/test_patch_for_compiling.py b/tests/convert_torch_to_of/test_patch_for_compiling.py index 6df8ac758..21844a4fa 100644 --- a/tests/convert_torch_to_of/test_patch_for_compiling.py +++ b/tests/convert_torch_to_of/test_patch_for_compiling.py @@ -6,7 +6,7 @@ """ import pytest import numpy as np -from onediff.infer_compiler.utils.patch_for_compiler import FakeCuda +from onediff.infer_compiler.backends.oneflow.import_tools.patch_for_compiler import FakeCuda @pytest.mark.parametrize("batch_size", [8]) From 0504ed7205c25e9cd5533e65d8530d18dc1be0d6 Mon Sep 17 00:00:00 2001 From: strint Date: Tue, 21 May 2024 19:29:57 +0800 Subject: [PATCH 12/13] fix impor --- onediff_comfy_nodes/modules/oneflow/booster_basic.py | 2 +- onediff_comfy_nodes/modules/oneflow/booster_patch.py | 2 +- onediff_comfy_nodes/modules/oneflow/booster_quantization.py | 2 +- onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py | 2 +- onediff_diffusers_extensions/onediffx/lora/utils.py | 2 +- src/onediff/torch_utils/model_inplace_assign.py | 2 +- tests/test_dual_module_list.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/onediff_comfy_nodes/modules/oneflow/booster_basic.py b/onediff_comfy_nodes/modules/oneflow/booster_basic.py index f1d722036..cf7773993 100644 --- a/onediff_comfy_nodes/modules/oneflow/booster_basic.py +++ b/onediff_comfy_nodes/modules/oneflow/booster_basic.py @@ -7,7 +7,7 @@ from comfy.model_patcher import ModelPatcher from comfy.sd import VAE from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule +from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule from ..booster_interface import BoosterExecutor from .onediff_controlnet import OneDiffControlLora diff --git a/onediff_comfy_nodes/modules/oneflow/booster_patch.py b/onediff_comfy_nodes/modules/oneflow/booster_patch.py index 6bff76ba9..b12e1a042 100644 --- a/onediff_comfy_nodes/modules/oneflow/booster_patch.py +++ b/onediff_comfy_nodes/modules/oneflow/booster_patch.py @@ -2,7 +2,7 @@ from functools import singledispatchmethod from comfy.model_patcher import ModelPatcher -from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule +from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule from ..booster_interface import BoosterExecutor diff --git a/onediff_comfy_nodes/modules/oneflow/booster_quantization.py b/onediff_comfy_nodes/modules/oneflow/booster_quantization.py index 7254ae0b3..f4b50d6e4 100644 --- a/onediff_comfy_nodes/modules/oneflow/booster_quantization.py +++ b/onediff_comfy_nodes/modules/oneflow/booster_quantization.py @@ -8,7 +8,7 @@ from comfy.controlnet import ControlNet from comfy.model_patcher import ModelPatcher from onediff.infer_compiler import oneflow_compile -from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule +from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule from onediff_quant.quantization import QuantizationConfig from onediff_quant.quantization.module_operations import get_sub_module from onediff_quant.quantization.quantize_calibrators import ( diff --git a/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py b/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py index 5e82ff7b5..a70246405 100644 --- a/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py +++ b/onediff_comfy_nodes/modules/oneflow/utils/booster_utils.py @@ -5,7 +5,7 @@ from comfy.model_base import BaseModel, SVD_img2vid from comfy.model_patcher import ModelPatcher -from onediff.infer_compiler.oneflow import OneflowDeployableModule as DeployableModule +from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule from onediff.utils import set_boolean_env_var from ..patch_management import PatchType, create_patch_executor diff --git a/onediff_diffusers_extensions/onediffx/lora/utils.py b/onediff_diffusers_extensions/onediffx/lora/utils.py index 1f62539eb..89b029d45 100644 --- a/onediff_diffusers_extensions/onediffx/lora/utils.py +++ b/onediff_diffusers_extensions/onediffx/lora/utils.py @@ -20,7 +20,7 @@ from diffusers.loaders import PatchedLoraProjection else: from diffusers.models.lora import PatchedLoraProjection -from onediff.infer_compiler.oneflow.dual_module import DualModule +from onediff.infer_compiler.backends.oneflow.dual_module import DualModule if version.parse(diffusers.__version__) <= version.parse("0.20.0"): from diffusers.loaders import PatchedLoraProjection diff --git a/src/onediff/torch_utils/model_inplace_assign.py b/src/onediff/torch_utils/model_inplace_assign.py index f61276f5b..c8edc6a6d 100644 --- a/src/onediff/torch_utils/model_inplace_assign.py +++ b/src/onediff/torch_utils/model_inplace_assign.py @@ -10,7 +10,7 @@ class TensorInplaceAssign: r""" This class is used as a context manager, instantiated with either a `torch.nn.Module` or - `onediff.infer_compiler.deployable_module.DeployableModule` during initialization. + `onediff.infer_compiler.backends.deployable_module.DeployableModule` during initialization. Within the context manager, all Tensors associated with the provided module will be transformed into AutoInplaceCopyTensor. After transformed, assignments to Tensor.data are modified to in-place copying. diff --git a/tests/test_dual_module_list.py b/tests/test_dual_module_list.py index 94f7b3da8..28a711404 100644 --- a/tests/test_dual_module_list.py +++ b/tests/test_dual_module_list.py @@ -39,7 +39,7 @@ def forward(self, x): assert np.allclose(y_torch.detach().cpu(), y_oneflow.detach().cpu(), 1e-03, 1e-03) -from onediff.infer_compiler.oneflow.dual_module import DualModule, DualModuleList +from onediff.infer_compiler.backends.oneflow.dual_module import DualModule, DualModuleList assert isinstance(m.linears, DualModuleList) From 35af0bf9727b422f6560a56a0233592561a6e681 Mon Sep 17 00:00:00 2001 From: strint Date: Tue, 21 May 2024 23:02:50 +0800 Subject: [PATCH 13/13] fix import --- src/onediff/optimization/quant_optimizer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/onediff/optimization/quant_optimizer.py b/src/onediff/optimization/quant_optimizer.py index 0e3813e05..37d45c9b6 100644 --- a/src/onediff/optimization/quant_optimizer.py +++ b/src/onediff/optimization/quant_optimizer.py @@ -3,10 +3,10 @@ import torch.nn as nn from copy import deepcopy from onediff.utils import logger -from ..infer_compiler.backends.oneflow.utils.version_util import is_quantization_enabled -from ..infer_compiler.backends.oneflow.utils.cost_util import cost_cnt -from ..infer_compiler.utils.module_operations import modify_sub_module -from ..infer_compiler.backends.oneflow.transform.manager import transform_mgr +from onediff.infer_compiler.backends.oneflow.utils.version_util import is_quantization_enabled +from onediff.infer_compiler.backends.oneflow.utils.cost_util import cost_cnt +from onediff.infer_compiler.backends.oneflow.transform.manager import transform_mgr +from onediff.torch_utils.module_operations import modify_sub_module __all__ = ["quantize_model", "varify_can_use_quantization"]