siliconflow · strint · May 15, 2024 · Apr 9, 2024 · Apr 9, 2024 · Apr 9, 2024
diff --git a/benchmarks/image_to_video.py b/benchmarks/image_to_video.py
@@ -41,7 +41,7 @@
 
 import oneflow as flow
 import torch
-from onediffx import compile_pipe, compiler_config
+from onediffx import compile_pipe, compile_options
 from diffusers.utils import load_image, export_to_video
 
 
@@ -189,10 +189,10 @@ def main():
         # especially for 40xx series cards.
         # So here by partially disabling the half accumulation in MHA partially,
         # we can get a good balance.
-        compiler_config.attention_allow_half_precision_score_accumulation_max_m = (
+        compile_options.oneflow.attention_allow_half_precision_score_accumulation_max_m = (
             args.attention_fp16_score_accum_max_m
         )
-        pipe = compile_pipe(pipe,)
+        pipe = compile_pipe(pipe, options=compile_options)
     elif args.compiler == "compile":
         pipe.unet = torch.compile(pipe.unet)
         if hasattr(pipe, "controlnet"):

diff --git a/benchmarks/text_to_image.py b/benchmarks/text_to_image.py
@@ -30,7 +30,6 @@
 from PIL import Image, ImageDraw
 from diffusers.utils import load_image
 
-import oneflow as flow
 from onediffx import compile_pipe
 
 
@@ -62,7 +61,7 @@ def parse_args():
         "--compiler",
         type=str,
         default="oneflow",
-        choices=["none", "oneflow", "compile", "compile-max-autotune"],
+        choices=["none", "oneflow", "nexfort", "compile", "compile-max-autotune"],
     )
     return parser.parse_args()
 
@@ -162,6 +161,8 @@ def main():
         pass
     elif args.compiler == "oneflow":
         pipe = compile_pipe(pipe)
+    elif args.compiler == "nexfort":
+        pipe = compile_pipe(pipe, backend="nexfort")
     elif args.compiler in ("compile", "compile-max-autotune"):
         mode = "max-autotune" if args.compiler == "compile-max-autotune" else None
         pipe.unet = torch.compile(pipe.unet, mode=mode)
@@ -248,10 +249,13 @@ def get_kwarg_inputs():
     iter_per_sec = iter_profiler.get_iter_per_sec()
     if iter_per_sec is not None:
         print(f"Iterations per second: {iter_per_sec:.3f}")
-    cuda_mem_after_used = flow._oneflow_internal.GetCUDAMemoryUsed()
-    host_mem_after_used = flow._oneflow_internal.GetCPUMemoryUsed()
-    print(f"CUDA Mem after: {cuda_mem_after_used / 1024:.3f}GiB")
-    print(f"Host Mem after: {host_mem_after_used / 1024:.3f}GiB")
+    if args.compiler == "oneflow":
+        import oneflow as flow
+
+        cuda_mem_after_used = flow._oneflow_internal.GetCUDAMemoryUsed() / 1024
+    else:
+        cuda_mem_after_used = torch.cuda.max_memory_allocated() / (1024 ** 3)
+    print(f"CUDA Mem after: {cuda_mem_after_used:.3f}GiB")
     print("=======================================")
 
     if args.output_image is not None:

diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py b/onediff_comfy_nodes/modules/oneflow/hijack_animatediff/sampling.py
@@ -1,12 +1,11 @@
 # /ComfyUI/custom_nodes/ComfyUI-AnimateDiff-Evolved/animatediff/sampling.py
 import oneflow as flow
 from einops import rearrange
-from onediff.infer_compiler.deployable_module import DeployableModule
+from onediff.infer_compiler import DeployableModule
 from onediff.infer_compiler.transform import register
 from oneflow.nn.functional import group_norm
 
-from ._config import (animatediff_hijacker, animatediff_of, animatediff_pt,
-                      comfy_of)
+from ._config import animatediff_hijacker, animatediff_of, animatediff_pt, comfy_of
 
 FunctionInjectionHolder = animatediff_pt.animatediff.sampling.FunctionInjectionHolder
 

diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/__init__.py
@@ -3,7 +3,7 @@
 import comfy
 from comfy.ldm.modules.diffusionmodules.model import AttnBlock
 from nodes import *  # must imported before import comfy
-from onediff.infer_compiler import register
+from onediff.infer_compiler.transform import register
 from onediff.infer_compiler.utils import is_community_version
 
 from .attention import CrossAttention as CrossAttention1f

diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_onediff_quant.py
@@ -1,6 +1,6 @@
 import onediff_quant
 import oneflow as flow
-from onediff.infer_compiler import register
+from onediff.infer_compiler.transform import register
 
 torch2oflow_class_map = {
     onediff_quant.FakeQuantModule: onediff_quant.OneFlowFakeQuantModule,

diff --git a/onediff_comfy_nodes/modules/oneflow/utils/__init__.py b/onediff_comfy_nodes/modules/oneflow/utils/__init__.py
@@ -2,13 +2,17 @@
 import re
 import time
 
-from onediff.infer_compiler.deployable_module import DeployableModule
+from onediff.infer_compiler import DeployableModule
 
-from .model_patcher import (OneFlowDeepCacheSpeedUpModelPatcher,
-                            OneFlowSpeedUpModelPatcher)
+from .model_patcher import (
+    OneFlowDeepCacheSpeedUpModelPatcher,
+    OneFlowSpeedUpModelPatcher,
+)
 from .onediff_load_utils import onediff_load_quant_checkpoint_advanced
-from .onediff_quant_utils import (quantize_and_save_model,
-                                  replace_module_with_quantizable_module)
+from .onediff_quant_utils import (
+    quantize_and_save_model,
+    replace_module_with_quantizable_module,
+)
 
 OUTPUT_FOLDER = os.path.join(
     os.path.dirname(os.path.realpath(__file__)), "..", "graphs"

diff --git a/onediff_comfy_nodes/modules/oneflow/utils/deep_cache_speedup.py b/onediff_comfy_nodes/modules/oneflow/utils/deep_cache_speedup.py
@@ -2,7 +2,6 @@
 from comfy import model_management
 from comfy.model_base import SVD_img2vid
 from onediff.infer_compiler import oneflow_compile
-from onediff.infer_compiler.utils import set_boolean_env_var
 from register_comfy import DeepCacheUNet, FastDeepCacheUNet
 
 from .model_patcher import OneFlowDeepCacheSpeedUpModelPatcher

diff --git a/onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py b/onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py
@@ -32,8 +32,11 @@ def __init__(
         graph_path=None,
         graph_device=None,
     ):
-        from onediff.infer_compiler import CompileOptions, oneflow_compile
-        from onediff.infer_compiler.deployable_module import DeployableModule
+        from onediff.infer_compiler import (
+            CompileOptions,
+            oneflow_compile,
+            DeployableModule,
+        )
 
         self.weight_inplace_update = weight_inplace_update
         self.object_patches = {}
@@ -502,8 +505,11 @@ def __init__(
         use_graph=None,
         gen_compile_options=None,
     ):
-        from onediff.infer_compiler import CompileOptions, oneflow_compile
-        from onediff.infer_compiler.deployable_module import DeployableModule
+        from onediff.infer_compiler import (
+            CompileOptions,
+            oneflow_compile,
+            DeployableModule,
+        )
 
         self.weight_inplace_update = weight_inplace_update
         self.object_patches = {}

diff --git a/onediff_diffusers_extensions/README.md b/onediff_diffusers_extensions/README.md
@@ -197,7 +197,7 @@ deepcache_output = pipe(
 import torch
 
 from diffusers.utils import load_image, export_to_video
-from onediffx import compile_pipe, compiler_config
+from onediffx import compile_pipe, compile_options
 from onediffx.deep_cache import StableVideoDiffusionPipeline
 
 pipe = StableVideoDiffusionPipeline.from_pretrained(
@@ -208,8 +208,8 @@ pipe = StableVideoDiffusionPipeline.from_pretrained(
 )
 pipe.to("cuda")
 
-compiler_config.attention_allow_half_precision_score_accumulation_max_m = 0
-pipe = compile_pipe(pipe)
+compile_options.oneflow.attention_allow_half_precision_score_accumulation_max_m = 0
+pipe = compile_pipe(pipe, options=compile_options)
 
 input_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png?download=true")
 input_image = input_image.resize((1024, 576))

diff --git a/onediff_diffusers_extensions/examples/experimental/control_net_canny.py b/onediff_diffusers_extensions/examples/experimental/control_net_canny.py
diff --git a/onediff_diffusers_extensions/examples/experimental/text_to_image_sdxl_fp16.py b/onediff_diffusers_extensions/examples/experimental/text_to_image_sdxl_fp16.py
diff --git a/onediff_diffusers_extensions/examples/experimental/text_to_image_sdxl_torch_compile.py b/onediff_diffusers_extensions/examples/experimental/text_to_image_sdxl_torch_compile.py