From ede18e7c2188595878bbdfae57bfc3efeeee3f54 Mon Sep 17 00:00:00 2001 From: hjchen2 Date: Tue, 9 Apr 2024 17:03:01 +0800 Subject: [PATCH] fix --- onediff_comfy_nodes/_nodes.py | 2 +- .../utils/deep_cache_speedup.py | 2 +- .../scripts/onediff.py | 2 +- .../attention_processor_oflow.py | 2 +- src/onediff/infer_compiler/__init__.py | 2 +- .../infer_compiler/backends/oneflow.py | 2 +- .../infer_compiler/env_var/__init__.py | 2 + .../env_var/populate_env_var.py | 107 ++++++++++++++++++ .../infer_compiler/{env => env_var}/utils.py | 0 .../optimization/attention_processor.py | 2 +- 10 files changed, 116 insertions(+), 7 deletions(-) create mode 100644 src/onediff/infer_compiler/env_var/__init__.py create mode 100644 src/onediff/infer_compiler/env_var/populate_env_var.py rename src/onediff/infer_compiler/{env => env_var}/utils.py (100%) diff --git a/onediff_comfy_nodes/_nodes.py b/onediff_comfy_nodes/_nodes.py index 47aa3edda..5d97bda6c 100644 --- a/onediff_comfy_nodes/_nodes.py +++ b/onediff_comfy_nodes/_nodes.py @@ -1,7 +1,7 @@ from functools import partial from onediff.infer_compiler.transform import torch2oflow from ._config import _USE_UNET_INT8, ONEDIFF_QUANTIZED_OPTIMIZED_MODELS -from onediff.infer_compiler.env import set_boolean_env_var +from onediff.infer_compiler.env_var import set_boolean_env_var from onediff.optimization.quant_optimizer import quantize_model from onediff.infer_compiler import oneflow_compile, CompileOptions from onediff.infer_compiler.deployable_module import DeployableModule diff --git a/onediff_comfy_nodes/utils/deep_cache_speedup.py b/onediff_comfy_nodes/utils/deep_cache_speedup.py index fd0c53d25..98ad25c47 100644 --- a/onediff_comfy_nodes/utils/deep_cache_speedup.py +++ b/onediff_comfy_nodes/utils/deep_cache_speedup.py @@ -2,7 +2,7 @@ from comfy import model_management from comfy.model_base import SVD_img2vid -from onediff.infer_compiler.env import set_boolean_env_var +from onediff.infer_compiler.env_var import set_boolean_env_var from .model_patcher import OneFlowDeepCacheSpeedUpModelPatcher diff --git a/onediff_sd_webui_extensions/scripts/onediff.py b/onediff_sd_webui_extensions/scripts/onediff.py index a9c518a0c..9ba436a22 100644 --- a/onediff_sd_webui_extensions/scripts/onediff.py +++ b/onediff_sd_webui_extensions/scripts/onediff.py @@ -19,7 +19,7 @@ from onediff_hijack import do_hijack as onediff_do_hijack from onediff.infer_compiler.utils.log_utils import logger -from onediff.infer_compiler.env import parse_boolean_from_env +from onediff.infer_compiler.env_var import parse_boolean_from_env from onediff.optimization.quant_optimizer import ( quantize_model, varify_can_use_quantization, diff --git a/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py b/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py index 220349d5d..1322a9812 100644 --- a/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py +++ b/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py @@ -21,7 +21,7 @@ import diffusers from diffusers.utils import deprecate, logging -from onediff.infer_compiler.env import parse_boolean_from_env, set_boolean_env_var +from onediff.infer_compiler.env_var import parse_boolean_from_env, set_boolean_env_var def is_xformers_available(): diff --git a/src/onediff/infer_compiler/__init__.py b/src/onediff/infer_compiler/__init__.py index 50c431400..e65c6c7bc 100644 --- a/src/onediff/infer_compiler/__init__.py +++ b/src/onediff/infer_compiler/__init__.py @@ -2,7 +2,7 @@ import torch from .deployable_module import DeployableModule -from .env import populate_default_env_var +from .env_var import populate_default_env_var from .options import * from .options import _GLOBAL_compile_options as compile_options from .with_onediff_compile import compile, oneflow_compile diff --git a/src/onediff/infer_compiler/backends/oneflow.py b/src/onediff/infer_compiler/backends/oneflow.py index d30893439..ac4607dc0 100644 --- a/src/onediff/infer_compiler/backends/oneflow.py +++ b/src/onediff/infer_compiler/backends/oneflow.py @@ -20,7 +20,7 @@ def compile(torch_module: torch.nn.Module, *, options=None): - 'graph_file' (None) generates a compilation cache file. If the file exists, loading occurs; if not, the compilation result is saved after the first run. - 'graph_file_device' (None) sets the device for the graph file, default None. If set, the compilation result will be converted to the specified device. """ - from ..env import populate_oneflow_env_var + from ..env_var import populate_oneflow_env_var from ..transform.custom_transform import set_default_registry from ..oneflow.deployable_module import OneflowDeployableModule from ..oneflow.utils import get_mixed_deployable_module diff --git a/src/onediff/infer_compiler/env_var/__init__.py b/src/onediff/infer_compiler/env_var/__init__.py new file mode 100644 index 000000000..6207844f8 --- /dev/null +++ b/src/onediff/infer_compiler/env_var/__init__.py @@ -0,0 +1,2 @@ +from .utils import * +from .populate_env_var import * diff --git a/src/onediff/infer_compiler/env_var/populate_env_var.py b/src/onediff/infer_compiler/env_var/populate_env_var.py new file mode 100644 index 000000000..9f453374a --- /dev/null +++ b/src/onediff/infer_compiler/env_var/populate_env_var.py @@ -0,0 +1,107 @@ +import os + + +def _populate_env_var(field2env_var, options): + import dataclasses + from .utils import ( + parse_boolean_from_env, + set_boolean_env_var, + parse_integer_from_env, + set_integer_env_var, + ) + + for field in dataclasses.fields(options): + field_name = field.name + if field_name not in field2env_var: + continue + env_var = field2env_var[field_name] + set_env_var = None + if field.type in (bool, Optional[bool]): + set_env_var = set_boolean_env_var + elif field.type in (int, Optional[int]): + set_env_var = set_integer_env_var + else: + raise ValueError(f"Unsupported type {field.type}") + set_env_var(env_var, getattr(options, field_name)) + + +def populate_oneflow_env_var(options): + field2env_var = { + "run_graph_by_vm": "ONEFLOW_RUN_GRAPH_BY_VM", + "graph_delay_variable_op_execution": "ONEFLOW_GRAPH_DELAY_VARIABLE_OP_EXECUTION", + "mlir_cse": "ONEFLOW_MLIR_CSE", + "mlir_enable_inference_optimization": "ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION", + "mlir_enable_round_trip": "ONEFLOW_MLIR_ENABLE_ROUND_TRIP", + "mlir_fuse_forward_ops": "ONEFLOW_MLIR_FUSE_FORWARD_OPS", + "mlir_fuse_ops_with_backward_impl": "ONEFLOW_MLIR_FUSE_OPS_WITH_BACKWARD_IMPL", + "mlir_group_matmul": "ONEFLOW_MLIR_GROUP_MATMUL", + "mlir_prefer_nhwc": "ONEFLOW_MLIR_PREFER_NHWC", + "mlir_fuse_kernel_launch": "ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH", + "kernel_enable_cuda_graph": "ONEFLOW_KERNEL_ENABLE_CUDA_GRAPH", + "kernel_enable_fused_conv_bias": "ONEFLOW_KERNEL_ENABLE_FUSED_CONV_BIAS", + "kernel_enable_fused_linear": "ONEFLOW_KERNEL_ENABLE_FUSED_LINEAR", + "kernel_conv_cutlass_impl_enable_tuning_warmup": "ONEFLOW_KERNEL_CONV_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", + "kernel_gemm_cutlass_impl_enable_tuning_warmup": "ONEFLOW_KERNEL_GEMM_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", + "kernel_conv_enable_cutlass_impl": "ONEFLOW_KERNEL_CONV_ENABLE_CUTLASS_IMPL", + "kernel_gemm_enable_cutlass_impl": "ONEFLOW_KERNEL_GEMM_ENABLE_CUTLASS_IMPL", + "kernel_glu_enable_dual_gemm_impl": "ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL", + "kernel_glu_enable_y_gemm_impl": "ONEFLOW_KERNEL_GLU_ENABLE_Y_GEMM_IMPL", + "kernel_glu_quant_enable_dual_gemm_impl": "ONEFLOW_KERNEL_GLU_QUANT_ENABLE_DUAL_GEMM_IMPL", + "conv_allow_half_precision_accumulation": "ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION", + "matmul_allow_half_precision_accumulation": "ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION", + "attention_allow_half_precision_accumulation": "ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_ACCUMULATION", + "attention_allow_half_precision_score_accumulation_max_m": "ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_SCORE_ACCUMULATION_MAX_M", + } + _populate_env_var(field2env_var, options) + + +def populate_oneflow_default_env_var(): + # ONEFLOW_RUN_GRAPH_BY_VM must set here to enable nn.Graph init with vm run + os.environ.setdefault("ONEFLOW_RUN_GRAPH_BY_VM", "1") + os.environ.setdefault("ONEFLOW_GRAPH_DELAY_VARIABLE_OP_EXECUTION", "1") + + os.environ.setdefault("ONEFLOW_MLIR_CSE", "1") + os.environ.setdefault("ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION", "1") + os.environ.setdefault("ONEFLOW_MLIR_ENABLE_ROUND_TRIP", "1") + os.environ.setdefault("ONEFLOW_MLIR_FUSE_FORWARD_OPS", "1") + os.environ.setdefault("ONEFLOW_MLIR_FUSE_OPS_WITH_BACKWARD_IMPL", "1") + os.environ.setdefault("ONEFLOW_MLIR_GROUP_MATMUL", "1") + os.environ.setdefault("ONEFLOW_MLIR_PREFER_NHWC", "1") + + os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_FUSED_CONV_BIAS", "1") + os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_FUSED_LINEAR", "1") + os.environ.setdefault("ONEFLOW_KERNEL_CONV_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", "1") + os.environ.setdefault("ONEFLOW_KERNEL_GEMM_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", "1") + os.environ.setdefault("ONEFLOW_KERNEL_CONV_ENABLE_CUTLASS_IMPL", "1") + os.environ.setdefault("ONEFLOW_KERNEL_GEMM_ENABLE_CUTLASS_IMPL", "1") + os.environ.setdefault("ONEFLOW_CONVOLUTION_BIAS_ADD_ACT_FUSION", "1") + # os.environ.setdefault("ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL", "0") + # os.environ.setdefault("ONEFLOW_KERNEL_GLU_ENABLE_Y_GEMM_IMPL", "0") + # os.environ.setdefault("ONEFLOW_KERNEL_GLU_QUANT_ENABLE_DUAL_GEMM_IMPL", "0") + + os.environ.setdefault("ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION", "1") + os.environ.setdefault("ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION", "1") + os.environ.setdefault("ONEFLOW_LINEAR_EMBEDDING_SKIP_INIT", "1") + # os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_ACCUMULATION", "1") + # os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_SCORE_ACCUMULATION_MAX_M", "-1") + # os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_QUANTIZATION", "1") + + os.environ.setdefault("ONEFLOW_MLIR_GROUP_MATMUL_QUANT", "1") + + # TODO: enable this will cause the failure of multi resolution warmup + # os.environ.setdefault("ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH", "1") + # os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_CUDA_GRAPH", "1") + + +def populate_nexfort_env_var(options): + field2env_var = {} + _populate_env_var(field2env_var, options) + + +def populate_nexfort_default_env_var(): + pass + + +def populate_default_env_var(): + populate_oneflow_default_env_var() + populate_nexfort_default_env_var() diff --git a/src/onediff/infer_compiler/env/utils.py b/src/onediff/infer_compiler/env_var/utils.py similarity index 100% rename from src/onediff/infer_compiler/env/utils.py rename to src/onediff/infer_compiler/env_var/utils.py diff --git a/src/onediff/optimization/attention_processor.py b/src/onediff/optimization/attention_processor.py index 188e66435..0c3db7d9d 100644 --- a/src/onediff/optimization/attention_processor.py +++ b/src/onediff/optimization/attention_processor.py @@ -84,7 +84,7 @@ def __call__( hidden_states = flow.bmm(attention_probs, value) hidden_states = attn.batch_to_head_dim(hidden_states) else: - from ..infer_compiler.env import ( + from ..infer_compiler.env_var import ( parse_boolean_from_env, set_boolean_env_var, )