Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
hjchen2 committed Apr 9, 2024
1 parent 7f82a58 commit ede18e7
Show file tree
Hide file tree
Showing 10 changed files with 116 additions and 7 deletions.
2 changes: 1 addition & 1 deletion onediff_comfy_nodes/_nodes.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from functools import partial
from onediff.infer_compiler.transform import torch2oflow
from ._config import _USE_UNET_INT8, ONEDIFF_QUANTIZED_OPTIMIZED_MODELS
from onediff.infer_compiler.env import set_boolean_env_var
from onediff.infer_compiler.env_var import set_boolean_env_var
from onediff.optimization.quant_optimizer import quantize_model
from onediff.infer_compiler import oneflow_compile, CompileOptions
from onediff.infer_compiler.deployable_module import DeployableModule
Expand Down
2 changes: 1 addition & 1 deletion onediff_comfy_nodes/utils/deep_cache_speedup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from comfy import model_management
from comfy.model_base import SVD_img2vid

from onediff.infer_compiler.env import set_boolean_env_var
from onediff.infer_compiler.env_var import set_boolean_env_var
from .model_patcher import OneFlowDeepCacheSpeedUpModelPatcher


Expand Down
2 changes: 1 addition & 1 deletion onediff_sd_webui_extensions/scripts/onediff.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from onediff_hijack import do_hijack as onediff_do_hijack

from onediff.infer_compiler.utils.log_utils import logger
from onediff.infer_compiler.env import parse_boolean_from_env
from onediff.infer_compiler.env_var import parse_boolean_from_env
from onediff.optimization.quant_optimizer import (
quantize_model,
varify_can_use_quantization,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import diffusers
from diffusers.utils import deprecate, logging

from onediff.infer_compiler.env import parse_boolean_from_env, set_boolean_env_var
from onediff.infer_compiler.env_var import parse_boolean_from_env, set_boolean_env_var


def is_xformers_available():
Expand Down
2 changes: 1 addition & 1 deletion src/onediff/infer_compiler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import torch

from .deployable_module import DeployableModule
from .env import populate_default_env_var
from .env_var import populate_default_env_var
from .options import *
from .options import _GLOBAL_compile_options as compile_options
from .with_onediff_compile import compile, oneflow_compile
Expand Down
2 changes: 1 addition & 1 deletion src/onediff/infer_compiler/backends/oneflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def compile(torch_module: torch.nn.Module, *, options=None):
- 'graph_file' (None) generates a compilation cache file. If the file exists, loading occurs; if not, the compilation result is saved after the first run.
- 'graph_file_device' (None) sets the device for the graph file, default None. If set, the compilation result will be converted to the specified device.
"""
from ..env import populate_oneflow_env_var
from ..env_var import populate_oneflow_env_var
from ..transform.custom_transform import set_default_registry
from ..oneflow.deployable_module import OneflowDeployableModule
from ..oneflow.utils import get_mixed_deployable_module
Expand Down
2 changes: 2 additions & 0 deletions src/onediff/infer_compiler/env_var/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .utils import *
from .populate_env_var import *
107 changes: 107 additions & 0 deletions src/onediff/infer_compiler/env_var/populate_env_var.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import os


def _populate_env_var(field2env_var, options):
import dataclasses
from .utils import (
parse_boolean_from_env,
set_boolean_env_var,
parse_integer_from_env,
set_integer_env_var,
)

for field in dataclasses.fields(options):
field_name = field.name
if field_name not in field2env_var:
continue
env_var = field2env_var[field_name]
set_env_var = None
if field.type in (bool, Optional[bool]):
set_env_var = set_boolean_env_var
elif field.type in (int, Optional[int]):
set_env_var = set_integer_env_var
else:
raise ValueError(f"Unsupported type {field.type}")
set_env_var(env_var, getattr(options, field_name))


def populate_oneflow_env_var(options):
field2env_var = {
"run_graph_by_vm": "ONEFLOW_RUN_GRAPH_BY_VM",
"graph_delay_variable_op_execution": "ONEFLOW_GRAPH_DELAY_VARIABLE_OP_EXECUTION",
"mlir_cse": "ONEFLOW_MLIR_CSE",
"mlir_enable_inference_optimization": "ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION",
"mlir_enable_round_trip": "ONEFLOW_MLIR_ENABLE_ROUND_TRIP",
"mlir_fuse_forward_ops": "ONEFLOW_MLIR_FUSE_FORWARD_OPS",
"mlir_fuse_ops_with_backward_impl": "ONEFLOW_MLIR_FUSE_OPS_WITH_BACKWARD_IMPL",
"mlir_group_matmul": "ONEFLOW_MLIR_GROUP_MATMUL",
"mlir_prefer_nhwc": "ONEFLOW_MLIR_PREFER_NHWC",
"mlir_fuse_kernel_launch": "ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH",
"kernel_enable_cuda_graph": "ONEFLOW_KERNEL_ENABLE_CUDA_GRAPH",
"kernel_enable_fused_conv_bias": "ONEFLOW_KERNEL_ENABLE_FUSED_CONV_BIAS",
"kernel_enable_fused_linear": "ONEFLOW_KERNEL_ENABLE_FUSED_LINEAR",
"kernel_conv_cutlass_impl_enable_tuning_warmup": "ONEFLOW_KERNEL_CONV_CUTLASS_IMPL_ENABLE_TUNING_WARMUP",
"kernel_gemm_cutlass_impl_enable_tuning_warmup": "ONEFLOW_KERNEL_GEMM_CUTLASS_IMPL_ENABLE_TUNING_WARMUP",
"kernel_conv_enable_cutlass_impl": "ONEFLOW_KERNEL_CONV_ENABLE_CUTLASS_IMPL",
"kernel_gemm_enable_cutlass_impl": "ONEFLOW_KERNEL_GEMM_ENABLE_CUTLASS_IMPL",
"kernel_glu_enable_dual_gemm_impl": "ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL",
"kernel_glu_enable_y_gemm_impl": "ONEFLOW_KERNEL_GLU_ENABLE_Y_GEMM_IMPL",
"kernel_glu_quant_enable_dual_gemm_impl": "ONEFLOW_KERNEL_GLU_QUANT_ENABLE_DUAL_GEMM_IMPL",
"conv_allow_half_precision_accumulation": "ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION",
"matmul_allow_half_precision_accumulation": "ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION",
"attention_allow_half_precision_accumulation": "ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_ACCUMULATION",
"attention_allow_half_precision_score_accumulation_max_m": "ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_SCORE_ACCUMULATION_MAX_M",
}
_populate_env_var(field2env_var, options)


def populate_oneflow_default_env_var():
# ONEFLOW_RUN_GRAPH_BY_VM must set here to enable nn.Graph init with vm run
os.environ.setdefault("ONEFLOW_RUN_GRAPH_BY_VM", "1")
os.environ.setdefault("ONEFLOW_GRAPH_DELAY_VARIABLE_OP_EXECUTION", "1")

os.environ.setdefault("ONEFLOW_MLIR_CSE", "1")
os.environ.setdefault("ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION", "1")
os.environ.setdefault("ONEFLOW_MLIR_ENABLE_ROUND_TRIP", "1")
os.environ.setdefault("ONEFLOW_MLIR_FUSE_FORWARD_OPS", "1")
os.environ.setdefault("ONEFLOW_MLIR_FUSE_OPS_WITH_BACKWARD_IMPL", "1")
os.environ.setdefault("ONEFLOW_MLIR_GROUP_MATMUL", "1")
os.environ.setdefault("ONEFLOW_MLIR_PREFER_NHWC", "1")

os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_FUSED_CONV_BIAS", "1")
os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_FUSED_LINEAR", "1")
os.environ.setdefault("ONEFLOW_KERNEL_CONV_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", "1")
os.environ.setdefault("ONEFLOW_KERNEL_GEMM_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", "1")
os.environ.setdefault("ONEFLOW_KERNEL_CONV_ENABLE_CUTLASS_IMPL", "1")
os.environ.setdefault("ONEFLOW_KERNEL_GEMM_ENABLE_CUTLASS_IMPL", "1")
os.environ.setdefault("ONEFLOW_CONVOLUTION_BIAS_ADD_ACT_FUSION", "1")
# os.environ.setdefault("ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL", "0")
# os.environ.setdefault("ONEFLOW_KERNEL_GLU_ENABLE_Y_GEMM_IMPL", "0")
# os.environ.setdefault("ONEFLOW_KERNEL_GLU_QUANT_ENABLE_DUAL_GEMM_IMPL", "0")

os.environ.setdefault("ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION", "1")
os.environ.setdefault("ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION", "1")
os.environ.setdefault("ONEFLOW_LINEAR_EMBEDDING_SKIP_INIT", "1")
# os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_ACCUMULATION", "1")
# os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_SCORE_ACCUMULATION_MAX_M", "-1")
# os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_QUANTIZATION", "1")

os.environ.setdefault("ONEFLOW_MLIR_GROUP_MATMUL_QUANT", "1")

# TODO: enable this will cause the failure of multi resolution warmup
# os.environ.setdefault("ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH", "1")
# os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_CUDA_GRAPH", "1")


def populate_nexfort_env_var(options):
field2env_var = {}
_populate_env_var(field2env_var, options)


def populate_nexfort_default_env_var():
pass


def populate_default_env_var():
populate_oneflow_default_env_var()
populate_nexfort_default_env_var()
File renamed without changes.
2 changes: 1 addition & 1 deletion src/onediff/optimization/attention_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def __call__(
hidden_states = flow.bmm(attention_probs, value)
hidden_states = attn.batch_to_head_dim(hidden_states)
else:
from ..infer_compiler.env import (
from ..infer_compiler.env_var import (
parse_boolean_from_env,
set_boolean_env_var,
)
Expand Down

0 comments on commit ede18e7

Please sign in to comment.