siliconflow · strint · May 15, 2024 · Apr 9, 2024 · Apr 9, 2024 · Apr 9, 2024
diff --git a/benchmarks/image_to_video.py b/benchmarks/image_to_video.py
@@ -41,7 +41,7 @@
 
 import oneflow as flow
 import torch
-from onediffx import compile_pipe, compiler_config
+from onediffx import compile_pipe, compile_options
 from diffusers.utils import load_image, export_to_video
 
 
@@ -189,10 +189,10 @@ def main():
         # especially for 40xx series cards.
         # So here by partially disabling the half accumulation in MHA partially,
         # we can get a good balance.
-        compiler_config.attention_allow_half_precision_score_accumulation_max_m = (
+        compile_options.oneflow.attention_allow_half_precision_score_accumulation_max_m = (
             args.attention_fp16_score_accum_max_m
         )
-        pipe = compile_pipe(pipe,)
+        pipe = compile_pipe(pipe, options=compile_options)
     elif args.compiler == "compile":
         pipe.unet = torch.compile(pipe.unet)
         if hasattr(pipe, "controlnet"):

diff --git a/benchmarks/text_to_image.py b/benchmarks/text_to_image.py
@@ -30,7 +30,6 @@
 from PIL import Image, ImageDraw
 from diffusers.utils import load_image
 
-import oneflow as flow
 from onediffx import compile_pipe
 
 
@@ -62,7 +61,7 @@ def parse_args():
         "--compiler",
         type=str,
         default="oneflow",
-        choices=["none", "oneflow", "compile", "compile-max-autotune"],
+        choices=["none", "oneflow", "nexfort", "compile", "compile-max-autotune"],
     )
     return parser.parse_args()
 
@@ -162,6 +161,8 @@ def main():
         pass
     elif args.compiler == "oneflow":
         pipe = compile_pipe(pipe)
+    elif args.compiler == "nexfort":
+        pipe = compile_pipe(pipe, backend="nexfort")
     elif args.compiler in ("compile", "compile-max-autotune"):
         mode = "max-autotune" if args.compiler == "compile-max-autotune" else None
         pipe.unet = torch.compile(pipe.unet, mode=mode)
@@ -248,10 +249,13 @@ def get_kwarg_inputs():
     iter_per_sec = iter_profiler.get_iter_per_sec()
     if iter_per_sec is not None:
         print(f"Iterations per second: {iter_per_sec:.3f}")
-    cuda_mem_after_used = flow._oneflow_internal.GetCUDAMemoryUsed()
-    host_mem_after_used = flow._oneflow_internal.GetCPUMemoryUsed()
-    print(f"CUDA Mem after: {cuda_mem_after_used / 1024:.3f}GiB")
-    print(f"Host Mem after: {host_mem_after_used / 1024:.3f}GiB")
+    if args.compiler == "oneflow":
+        import oneflow as flow
+
+        cuda_mem_after_used = flow._oneflow_internal.GetCUDAMemoryUsed() / 1024
+    else:
+        cuda_mem_after_used = torch.cuda.max_memory_allocated() / (1024 ** 3)
+    print(f"CUDA Mem after: {cuda_mem_after_used:.3f}GiB")
     print("=======================================")
 
     if args.output_image is not None:

diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py
@@ -1,12 +1,11 @@
 # /ComfyUI/custom_nodes/ComfyUI-AnimateDiff-Evolved/animatediff/sampling.py
 import oneflow as flow
 from einops import rearrange
-from onediff.infer_compiler.deployable_module import DeployableModule
+from onediff.infer_compiler import DeployableModule
 from onediff.infer_compiler.transform import register
 from oneflow.nn.functional import group_norm
 
-from ._config import (animatediff_hijacker, animatediff_of, animatediff_pt,
-                      comfy_of)
+from ._config import animatediff_hijacker, animatediff_of, animatediff_pt, comfy_of
 
 FunctionInjectionHolder = animatediff_pt.animatediff.sampling.FunctionInjectionHolder
 

diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py
@@ -3,7 +3,7 @@
 import comfy
 from comfy.ldm.modules.diffusionmodules.model import AttnBlock
 from nodes import *  # must imported before import comfy
-from onediff.infer_compiler import register
+from onediff.infer_compiler.transform import register
 from onediff.infer_compiler.utils import is_community_version
 
 from .attention import CrossAttention as CrossAttention1f

diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py
@@ -1,6 +1,6 @@
 import onediff_quant
 import oneflow as flow
-from onediff.infer_compiler import register
+from onediff.infer_compiler.transform import register
 
 torch2oflow_class_map = {
     onediff_quant.FakeQuantModule: onediff_quant.OneFlowFakeQuantModule,

diff --git a/onediff_comfy_nodes/modules/oneflow/utils/__init__.py b/onediff_comfy_nodes/modules/oneflow/utils/__init__.py
@@ -2,13 +2,17 @@
 import re
 import time
 
-from onediff.infer_compiler.deployable_module import DeployableModule
+from onediff.infer_compiler import DeployableModule
 
-from .model_patcher import (OneFlowDeepCacheSpeedUpModelPatcher,
-                            OneFlowSpeedUpModelPatcher)
+from .model_patcher import (
+    OneFlowDeepCacheSpeedUpModelPatcher,
+    OneFlowSpeedUpModelPatcher,
+)
 from .onediff_load_utils import onediff_load_quant_checkpoint_advanced
-from .onediff_quant_utils import (quantize_and_save_model,
-                                  replace_module_with_quantizable_module)
+from .onediff_quant_utils import (
+    quantize_and_save_model,
+    replace_module_with_quantizable_module,
+)
 
 OUTPUT_FOLDER = os.path.join(
     os.path.dirname(os.path.realpath(__file__)), "..", "graphs"

diff --git a/onediff_comfy_nodes/modules/oneflow/utils/deep_cache_speedup.py b/onediff_comfy_nodes/modules/oneflow/utils/deep_cache_speedup.py
@@ -2,7 +2,6 @@
 from comfy import model_management
 from comfy.model_base import SVD_img2vid
 from onediff.infer_compiler import oneflow_compile
-from onediff.infer_compiler.utils import set_boolean_env_var
 from register_comfy import DeepCacheUNet, FastDeepCacheUNet
 
 from .model_patcher import OneFlowDeepCacheSpeedUpModelPatcher

diff --git a/onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py b/onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py
@@ -32,8 +32,11 @@ def __init__(
         graph_path=None,
         graph_device=None,
     ):
-        from onediff.infer_compiler import CompileOptions, oneflow_compile
-        from onediff.infer_compiler.deployable_module import DeployableModule
+        from onediff.infer_compiler import (
+            CompileOptions,
+            oneflow_compile,
+            DeployableModule,
+        )
 
         self.weight_inplace_update = weight_inplace_update
         self.object_patches = {}
@@ -502,8 +505,11 @@ def __init__(
         use_graph=None,
         gen_compile_options=None,
     ):
-        from onediff.infer_compiler import CompileOptions, oneflow_compile
-        from onediff.infer_compiler.deployable_module import DeployableModule
+        from onediff.infer_compiler import (
+            CompileOptions,
+            oneflow_compile,
+            DeployableModule,
+        )
 
         self.weight_inplace_update = weight_inplace_update
         self.object_patches = {}

diff --git a/onediff_diffusers_extensions/README.md b/onediff_diffusers_extensions/README.md
@@ -197,7 +197,7 @@ deepcache_output = pipe(
 import torch
 
 from diffusers.utils import load_image, export_to_video
-from onediffx import compile_pipe, compiler_config
+from onediffx import compile_pipe, compile_options
 from onediffx.deep_cache import StableVideoDiffusionPipeline
 
 pipe = StableVideoDiffusionPipeline.from_pretrained(
@@ -208,8 +208,8 @@ pipe = StableVideoDiffusionPipeline.from_pretrained(
 )
 pipe.to("cuda")
 
-compiler_config.attention_allow_half_precision_score_accumulation_max_m = 0
-pipe = compile_pipe(pipe)
+compile_options.oneflow.attention_allow_half_precision_score_accumulation_max_m = 0
+pipe = compile_pipe(pipe, options=compile_options)
 
 input_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png?download=true")
 input_image = input_image.resize((1024, 576))

diff --git a/onediff_diffusers_extensions/examples/experimental/text_to_image_sdxl_torch_compile.py b/onediff_diffusers_extensions/examples/experimental/text_to_image_sdxl_torch_compile.py
@@ -10,7 +10,7 @@
 import oneflow as flow
 
 from diffusers import DiffusionPipeline
-from onediff.infer_compiler import oneflow_compile, CompileOptions
+from onediff.infer_compiler import oneflow_compile, compile_options
 
 parser = argparse.ArgumentParser()
 parser.add_argument(
@@ -53,7 +53,6 @@
 # Compile unet with oneflow
 if cmd_args.compile:
     print("unet is compiled to oneflow.")
-    compile_options = CompileOptions()
     compile_options.oneflow.max_cached_graph_size = cmd_args.num_dynamic_input_size
     base.unet = oneflow_compile(base.unet, options=compile_options)
 

diff --git a/onediff_diffusers_extensions/examples/text_to_image_deep_cache_sd_sdxl_enterprise.py b/onediff_diffusers_extensions/examples/text_to_image_deep_cache_sd_sdxl_enterprise.py
@@ -6,7 +6,7 @@
 import torch.nn as nn
 
 # oneflow_compile should be imported before importing any diffusers
-from onediff.infer_compiler import oneflow_compile, CompileOptions
+from onediff.infer_compiler import oneflow_compile, compile_options
 
 
 def parse_args():
@@ -110,26 +110,29 @@ def parse_args():
         pipe.unet, sub_module_name, sub_calibrate_info, False, False, args.bits,
     )
 
-options = CompileOptions()
-options.oneflow.use_graph = args.graph
+compile_options.oneflow.use_graph = args.graph
 
 if args.compile_text_encoder:
     if pipe.text_encoder is not None:
-        pipe.text_encoder = oneflow_compile(pipe.text_encoder, options=options)
+        pipe.text_encoder = oneflow_compile(pipe.text_encoder, options=compile_options)
     if hasattr(pipe, "text_encoder_2"):
-        pipe.text_encoder_2 = oneflow_compile(pipe.text_encoder_2, options=options)
+        pipe.text_encoder_2 = oneflow_compile(
+            pipe.text_encoder_2, options=compile_options
+        )
 
 if args.compile:
     if pipe.text_encoder is not None:
-        pipe.text_encoder = oneflow_compile(pipe.text_encoder, options=options)
+        pipe.text_encoder = oneflow_compile(pipe.text_encoder, options=compile_options)
     if hasattr(pipe, "text_encoder_2"):
-        pipe.text_encoder_2 = oneflow_compile(pipe.text_encoder_2, options=options)
-    pipe.unet = oneflow_compile(pipe.unet, options=options)
-    pipe.fast_unet = oneflow_compile(pipe.fast_unet, options=options)
+        pipe.text_encoder_2 = oneflow_compile(
+            pipe.text_encoder_2, options=compile_options
+        )
+    pipe.unet = oneflow_compile(pipe.unet, options=compile_options)
+    pipe.fast_unet = oneflow_compile(pipe.fast_unet, options=compile_options)
     if hasattr(pipe, "text_encoder_2") and pipe.needs_upcasting:
         # To avoid mis-match of loaded graph and loaded model
         pipe.upcast_vae()
-    pipe.vae.decoder = oneflow_compile(pipe.vae.decoder, options=options)
+    pipe.vae.decoder = oneflow_compile(pipe.vae.decoder, options=compile_options)
 
 torch.manual_seed(args.seed)
 

diff --git a/onediff_diffusers_extensions/examples/text_to_image_deep_cache_sdxl.py b/onediff_diffusers_extensions/examples/text_to_image_deep_cache_sdxl.py
@@ -7,9 +7,7 @@
 
 import torch
 
-from onediffx import compile_pipe, compiler_config
-from onediff.schedulers import EulerDiscreteScheduler
-
+from onediffx import compile_pipe
 from onediffx.deep_cache import StableDiffusionXLPipeline
 
 parser = argparse.ArgumentParser()
@@ -42,13 +40,8 @@
 OUTPUT_TYPE = "pil"
 
 # SDXL base: StableDiffusionXLPipeline
-scheduler = EulerDiscreteScheduler.from_pretrained(args.base, subfolder="scheduler")
 base = StableDiffusionXLPipeline.from_pretrained(
-    args.base,
-    scheduler=scheduler,
-    torch_dtype=torch.float16,
-    variant=args.variant,
-    use_safetensors=True,
+    args.base, torch_dtype=torch.float16, variant=args.variant, use_safetensors=True,
 )
 base.to("cuda")
 

diff --git a/onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py b/onediff_diffusers_extensions/examples/text_to_image_sd_enterprise.py
@@ -2,7 +2,7 @@
 import time
 import argparse
 
-from onediff.infer_compiler import oneflow_compile, CompileOptions
+from onediff.infer_compiler import oneflow_compile, compile_options
 
 import torch
 import torch.nn as nn
@@ -92,16 +92,15 @@ def parse_args():
         pipe.unet, sub_module_name, sub_calibrate_info, False, False, args.bits,
     )
 
-options = CompileOptions()
-options.oneflow.use_graph = args.graph
+compile_options.oneflow.use_graph = args.graph
 
 if args.compile_text_encoder:
     if pipe.text_encoder is not None:
-        pipe.text_encoder = oneflow_compile(pipe.text_encoder, options=options)
+        pipe.text_encoder = oneflow_compile(pipe.text_encoder, options=compile_options)
 
 if args.compile:
-    pipe.unet = oneflow_compile(pipe.unet, options=options)
-    pipe.vae.decoder = oneflow_compile(pipe.vae.decoder, options=options)
+    pipe.unet = oneflow_compile(pipe.unet, options=compile_options)
+    pipe.vae.decoder = oneflow_compile(pipe.vae.decoder, options=compile_options)
 
 torch.manual_seed(args.seed)
 

diff --git a/onediff_diffusers_extensions/examples/text_to_image_sdxl_enterprise.py b/onediff_diffusers_extensions/examples/text_to_image_sdxl_enterprise.py
@@ -6,7 +6,7 @@
 import torch.nn as nn
 
 # oneflow_compile should be imported before importing any diffusers
-from onediff.infer_compiler import oneflow_compile, CompileOptions
+from onediff.infer_compiler import oneflow_compile, compile_options
 
 
 def parse_args():
@@ -90,18 +90,19 @@ def parse_args():
         pipe.unet, sub_module_name, sub_calibrate_info, False, False, args.bits,
     )
 
-options = CompileOptions()
-options.oneflow.use_graph = args.graph
+compile_options.oneflow.use_graph = args.graph
 
 if args.compile_text_encoder:
     if pipe.text_encoder is not None:
-        pipe.text_encoder = oneflow_compile(pipe.text_encoder, options=options)
+        pipe.text_encoder = oneflow_compile(pipe.text_encoder, options=compile_options)
     if pipe.text_encoder_2 is not None:
-        pipe.text_encoder_2 = oneflow_compile(pipe.text_encoder_2, options=options)
+        pipe.text_encoder_2 = oneflow_compile(
+            pipe.text_encoder_2, options=compile_options
+        )
 
 if args.compile:
-    pipe.unet = oneflow_compile(pipe.unet, options=options)
-    pipe.vae.decoder = oneflow_compile(pipe.vae.decoder, options=options)
+    pipe.unet = oneflow_compile(pipe.unet, options=compile_options)
+    pipe.vae.decoder = oneflow_compile(pipe.vae.decoder, options=compile_options)
 
 torch.manual_seed(args.seed)
 

diff --git a/onediff_diffusers_extensions/examples/text_to_image_sdxl_light.py b/onediff_diffusers_extensions/examples/text_to_image_sdxl_light.py
@@ -5,7 +5,7 @@
 import torch
 from safetensors.torch import load_file
 from diffusers import StableDiffusionXLPipeline
-from onediffx import compile_pipe, compiler_config, save_pipe, load_pipe
+from onediffx import compile_pipe, save_pipe, load_pipe
 from huggingface_hub import hf_hub_download
 
 try:

diff --git a/onediff_diffusers_extensions/examples/text_to_image_sdxl_reuse_pipe.py b/onediff_diffusers_extensions/examples/text_to_image_sdxl_reuse_pipe.py
@@ -4,7 +4,6 @@
 import torch
 
 from onediff.infer_compiler import oneflow_compile
-from onediff.schedulers import EulerDiscreteScheduler
 from diffusers import StableDiffusionXLPipeline
 
 # import diffusers
@@ -50,17 +49,11 @@
 OUTPUT_TYPE = "pil"
 
 # SDXL base: StableDiffusionXLPipeline
-scheduler = EulerDiscreteScheduler.from_pretrained(args.base, subfolder="scheduler")
 base = StableDiffusionXLPipeline.from_pretrained(
-    args.base,
-    scheduler=scheduler,
-    torch_dtype=torch.float16,
-    variant=args.variant,
-    use_safetensors=True,
+    args.base, torch_dtype=torch.float16, variant=args.variant, use_safetensors=True,
 )
 base.to("cuda")
 
-
 # Compile unet with oneflow
 if args.compile_unet:
     print("Compiling unet with oneflow.")
@@ -94,15 +87,13 @@
 if str(args.new_base).endswith(".safetensors"):
     new_base = StableDiffusionXLPipeline.from_single_file(
         args.new_base,
-        scheduler=scheduler,
         torch_dtype=torch.float16,
         variant=args.variant,
         use_safetensors=True,
     )
 else:
     new_base = StableDiffusionXLPipeline.from_pretrained(
         args.new_base,
-        scheduler=scheduler,
         torch_dtype=torch.float16,
         variant=args.variant,
         use_safetensors=True,