Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

api for new backend #794

Merged
merged 18 commits into from
May 15, 2024
Merged
6 changes: 3 additions & 3 deletions benchmarks/image_to_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@

import oneflow as flow
import torch
from onediffx import compile_pipe, compiler_config
from onediffx import compile_pipe, compile_options
from diffusers.utils import load_image, export_to_video


Expand Down Expand Up @@ -189,10 +189,10 @@ def main():
# especially for 40xx series cards.
# So here by partially disabling the half accumulation in MHA partially,
# we can get a good balance.
compiler_config.attention_allow_half_precision_score_accumulation_max_m = (
compile_options.oneflow.attention_allow_half_precision_score_accumulation_max_m = (
args.attention_fp16_score_accum_max_m
)
pipe = compile_pipe(pipe,)
pipe = compile_pipe(pipe, options=compile_options)
elif args.compiler == "compile":
pipe.unet = torch.compile(pipe.unet)
if hasattr(pipe, "controlnet"):
Expand Down
16 changes: 10 additions & 6 deletions benchmarks/text_to_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from PIL import Image, ImageDraw
from diffusers.utils import load_image

import oneflow as flow
hjchen2 marked this conversation as resolved.
Show resolved Hide resolved
from onediffx import compile_pipe


Expand Down Expand Up @@ -62,7 +61,7 @@ def parse_args():
"--compiler",
type=str,
default="oneflow",
choices=["none", "oneflow", "compile", "compile-max-autotune"],
choices=["none", "oneflow", "nexfort", "compile", "compile-max-autotune"],
)
return parser.parse_args()

Expand Down Expand Up @@ -162,6 +161,8 @@ def main():
pass
elif args.compiler == "oneflow":
pipe = compile_pipe(pipe)
elif args.compiler == "nexfort":
pipe = compile_pipe(pipe, backend="nexfort")
elif args.compiler in ("compile", "compile-max-autotune"):
mode = "max-autotune" if args.compiler == "compile-max-autotune" else None
pipe.unet = torch.compile(pipe.unet, mode=mode)
Expand Down Expand Up @@ -248,10 +249,13 @@ def get_kwarg_inputs():
iter_per_sec = iter_profiler.get_iter_per_sec()
if iter_per_sec is not None:
print(f"Iterations per second: {iter_per_sec:.3f}")
cuda_mem_after_used = flow._oneflow_internal.GetCUDAMemoryUsed()
host_mem_after_used = flow._oneflow_internal.GetCPUMemoryUsed()
print(f"CUDA Mem after: {cuda_mem_after_used / 1024:.3f}GiB")
print(f"Host Mem after: {host_mem_after_used / 1024:.3f}GiB")
if args.compiler == "oneflow":
import oneflow as flow

cuda_mem_after_used = flow._oneflow_internal.GetCUDAMemoryUsed() / 1024
else:
cuda_mem_after_used = torch.cuda.max_memory_allocated() / (1024 ** 3)
print(f"CUDA Mem after: {cuda_mem_after_used:.3f}GiB")
print("=======================================")

if args.output_image is not None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
# /ComfyUI/custom_nodes/ComfyUI-AnimateDiff-Evolved/animatediff/sampling.py
import oneflow as flow
from einops import rearrange
from onediff.infer_compiler.deployable_module import DeployableModule
from onediff.infer_compiler import DeployableModule
from onediff.infer_compiler.transform import register
from oneflow.nn.functional import group_norm

from ._config import (animatediff_hijacker, animatediff_of, animatediff_pt,
comfy_of)
from ._config import animatediff_hijacker, animatediff_of, animatediff_pt, comfy_of

FunctionInjectionHolder = animatediff_pt.animatediff.sampling.FunctionInjectionHolder

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import comfy
from comfy.ldm.modules.diffusionmodules.model import AttnBlock
from nodes import * # must imported before import comfy
from onediff.infer_compiler import register
from onediff.infer_compiler.transform import register
from onediff.infer_compiler.utils import is_community_version

from .attention import CrossAttention as CrossAttention1f
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import onediff_quant
import oneflow as flow
from onediff.infer_compiler import register
from onediff.infer_compiler.transform import register

torch2oflow_class_map = {
onediff_quant.FakeQuantModule: onediff_quant.OneFlowFakeQuantModule,
Expand Down
14 changes: 9 additions & 5 deletions onediff_comfy_nodes/modules/oneflow/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@
import re
import time

from onediff.infer_compiler.deployable_module import DeployableModule
from onediff.infer_compiler import DeployableModule

from .model_patcher import (OneFlowDeepCacheSpeedUpModelPatcher,
OneFlowSpeedUpModelPatcher)
from .model_patcher import (
OneFlowDeepCacheSpeedUpModelPatcher,
OneFlowSpeedUpModelPatcher,
)
from .onediff_load_utils import onediff_load_quant_checkpoint_advanced
from .onediff_quant_utils import (quantize_and_save_model,
replace_module_with_quantizable_module)
from .onediff_quant_utils import (
quantize_and_save_model,
replace_module_with_quantizable_module,
)

OUTPUT_FOLDER = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..", "graphs"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from comfy import model_management
from comfy.model_base import SVD_img2vid
from onediff.infer_compiler import oneflow_compile
from onediff.infer_compiler.utils import set_boolean_env_var
from register_comfy import DeepCacheUNet, FastDeepCacheUNet

from .model_patcher import OneFlowDeepCacheSpeedUpModelPatcher
Expand Down
14 changes: 10 additions & 4 deletions onediff_comfy_nodes/modules/oneflow/utils/model_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,11 @@ def __init__(
graph_path=None,
graph_device=None,
):
from onediff.infer_compiler import CompileOptions, oneflow_compile
from onediff.infer_compiler.deployable_module import DeployableModule
from onediff.infer_compiler import (
CompileOptions,
oneflow_compile,
DeployableModule,
)

self.weight_inplace_update = weight_inplace_update
self.object_patches = {}
Expand Down Expand Up @@ -502,8 +505,11 @@ def __init__(
use_graph=None,
gen_compile_options=None,
):
from onediff.infer_compiler import CompileOptions, oneflow_compile
from onediff.infer_compiler.deployable_module import DeployableModule
from onediff.infer_compiler import (
CompileOptions,
oneflow_compile,
DeployableModule,
)

self.weight_inplace_update = weight_inplace_update
self.object_patches = {}
Expand Down
6 changes: 3 additions & 3 deletions onediff_diffusers_extensions/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ deepcache_output = pipe(
import torch

from diffusers.utils import load_image, export_to_video
from onediffx import compile_pipe, compiler_config
from onediffx import compile_pipe, compile_options
from onediffx.deep_cache import StableVideoDiffusionPipeline

pipe = StableVideoDiffusionPipeline.from_pretrained(
Expand All @@ -208,8 +208,8 @@ pipe = StableVideoDiffusionPipeline.from_pretrained(
)
pipe.to("cuda")

compiler_config.attention_allow_half_precision_score_accumulation_max_m = 0
pipe = compile_pipe(pipe)
compile_options.oneflow.attention_allow_half_precision_score_accumulation_max_m = 0
pipe = compile_pipe(pipe, options=compile_options)

input_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png?download=true")
input_image = input_image.resize((1024, 576))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import oneflow as flow

from diffusers import DiffusionPipeline
from onediff.infer_compiler import oneflow_compile, CompileOptions
from onediff.infer_compiler import oneflow_compile, compile_options

parser = argparse.ArgumentParser()
parser.add_argument(
Expand Down Expand Up @@ -53,7 +53,6 @@
# Compile unet with oneflow
if cmd_args.compile:
print("unet is compiled to oneflow.")
compile_options = CompileOptions()
compile_options.oneflow.max_cached_graph_size = cmd_args.num_dynamic_input_size
base.unet = oneflow_compile(base.unet, options=compile_options)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import torch.nn as nn

# oneflow_compile should be imported before importing any diffusers
from onediff.infer_compiler import oneflow_compile, CompileOptions
from onediff.infer_compiler import oneflow_compile, compile_options


def parse_args():
Expand Down Expand Up @@ -110,26 +110,29 @@ def parse_args():
pipe.unet, sub_module_name, sub_calibrate_info, False, False, args.bits,
)

options = CompileOptions()
options.oneflow.use_graph = args.graph
compile_options.oneflow.use_graph = args.graph

if args.compile_text_encoder:
if pipe.text_encoder is not None:
pipe.text_encoder = oneflow_compile(pipe.text_encoder, options=options)
pipe.text_encoder = oneflow_compile(pipe.text_encoder, options=compile_options)
if hasattr(pipe, "text_encoder_2"):
pipe.text_encoder_2 = oneflow_compile(pipe.text_encoder_2, options=options)
pipe.text_encoder_2 = oneflow_compile(
pipe.text_encoder_2, options=compile_options
)

if args.compile:
if pipe.text_encoder is not None:
pipe.text_encoder = oneflow_compile(pipe.text_encoder, options=options)
pipe.text_encoder = oneflow_compile(pipe.text_encoder, options=compile_options)
if hasattr(pipe, "text_encoder_2"):
pipe.text_encoder_2 = oneflow_compile(pipe.text_encoder_2, options=options)
pipe.unet = oneflow_compile(pipe.unet, options=options)
pipe.fast_unet = oneflow_compile(pipe.fast_unet, options=options)
pipe.text_encoder_2 = oneflow_compile(
pipe.text_encoder_2, options=compile_options
)
pipe.unet = oneflow_compile(pipe.unet, options=compile_options)
pipe.fast_unet = oneflow_compile(pipe.fast_unet, options=compile_options)
if hasattr(pipe, "text_encoder_2") and pipe.needs_upcasting:
# To avoid mis-match of loaded graph and loaded model
pipe.upcast_vae()
pipe.vae.decoder = oneflow_compile(pipe.vae.decoder, options=options)
pipe.vae.decoder = oneflow_compile(pipe.vae.decoder, options=compile_options)

torch.manual_seed(args.seed)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@

import torch

from onediffx import compile_pipe, compiler_config
from onediff.schedulers import EulerDiscreteScheduler

from onediffx import compile_pipe
from onediffx.deep_cache import StableDiffusionXLPipeline

parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -42,13 +40,8 @@
OUTPUT_TYPE = "pil"

# SDXL base: StableDiffusionXLPipeline
scheduler = EulerDiscreteScheduler.from_pretrained(args.base, subfolder="scheduler")
base = StableDiffusionXLPipeline.from_pretrained(
args.base,
scheduler=scheduler,
torch_dtype=torch.float16,
variant=args.variant,
use_safetensors=True,
args.base, torch_dtype=torch.float16, variant=args.variant, use_safetensors=True,
)
base.to("cuda")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import time
import argparse

from onediff.infer_compiler import oneflow_compile, CompileOptions
from onediff.infer_compiler import oneflow_compile, compile_options

import torch
import torch.nn as nn
Expand Down Expand Up @@ -92,16 +92,15 @@ def parse_args():
pipe.unet, sub_module_name, sub_calibrate_info, False, False, args.bits,
)

options = CompileOptions()
options.oneflow.use_graph = args.graph
compile_options.oneflow.use_graph = args.graph

if args.compile_text_encoder:
if pipe.text_encoder is not None:
pipe.text_encoder = oneflow_compile(pipe.text_encoder, options=options)
pipe.text_encoder = oneflow_compile(pipe.text_encoder, options=compile_options)

if args.compile:
pipe.unet = oneflow_compile(pipe.unet, options=options)
pipe.vae.decoder = oneflow_compile(pipe.vae.decoder, options=options)
pipe.unet = oneflow_compile(pipe.unet, options=compile_options)
pipe.vae.decoder = oneflow_compile(pipe.vae.decoder, options=compile_options)

torch.manual_seed(args.seed)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import torch.nn as nn

# oneflow_compile should be imported before importing any diffusers
from onediff.infer_compiler import oneflow_compile, CompileOptions
from onediff.infer_compiler import oneflow_compile, compile_options


def parse_args():
Expand Down Expand Up @@ -90,18 +90,19 @@ def parse_args():
pipe.unet, sub_module_name, sub_calibrate_info, False, False, args.bits,
)

options = CompileOptions()
options.oneflow.use_graph = args.graph
compile_options.oneflow.use_graph = args.graph

if args.compile_text_encoder:
if pipe.text_encoder is not None:
pipe.text_encoder = oneflow_compile(pipe.text_encoder, options=options)
pipe.text_encoder = oneflow_compile(pipe.text_encoder, options=compile_options)
if pipe.text_encoder_2 is not None:
pipe.text_encoder_2 = oneflow_compile(pipe.text_encoder_2, options=options)
pipe.text_encoder_2 = oneflow_compile(
pipe.text_encoder_2, options=compile_options
)

if args.compile:
pipe.unet = oneflow_compile(pipe.unet, options=options)
pipe.vae.decoder = oneflow_compile(pipe.vae.decoder, options=options)
pipe.unet = oneflow_compile(pipe.unet, options=compile_options)
pipe.vae.decoder = oneflow_compile(pipe.vae.decoder, options=compile_options)

torch.manual_seed(args.seed)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import torch
from safetensors.torch import load_file
from diffusers import StableDiffusionXLPipeline
from onediffx import compile_pipe, compiler_config, save_pipe, load_pipe
from onediffx import compile_pipe, save_pipe, load_pipe
from huggingface_hub import hf_hub_download

try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import torch

from onediff.infer_compiler import oneflow_compile
from onediff.schedulers import EulerDiscreteScheduler
from diffusers import StableDiffusionXLPipeline

# import diffusers
Expand Down Expand Up @@ -50,17 +49,11 @@
OUTPUT_TYPE = "pil"

# SDXL base: StableDiffusionXLPipeline
scheduler = EulerDiscreteScheduler.from_pretrained(args.base, subfolder="scheduler")
base = StableDiffusionXLPipeline.from_pretrained(
args.base,
scheduler=scheduler,
torch_dtype=torch.float16,
variant=args.variant,
use_safetensors=True,
args.base, torch_dtype=torch.float16, variant=args.variant, use_safetensors=True,
)
base.to("cuda")


# Compile unet with oneflow
if args.compile_unet:
print("Compiling unet with oneflow.")
Expand Down Expand Up @@ -94,15 +87,13 @@
if str(args.new_base).endswith(".safetensors"):
new_base = StableDiffusionXLPipeline.from_single_file(
args.new_base,
scheduler=scheduler,
torch_dtype=torch.float16,
variant=args.variant,
use_safetensors=True,
)
else:
new_base = StableDiffusionXLPipeline.from_pretrained(
args.new_base,
scheduler=scheduler,
torch_dtype=torch.float16,
variant=args.variant,
use_safetensors=True,
Expand Down
Loading
Loading