fix

siliconflow · Apr 9, 2024 · ede18e7 · ede18e7
1 parent 7f82a58
commit ede18e7
Show file tree

Hide file tree

Showing 10 changed files with 116 additions and 7 deletions.
diff --git a/onediff_comfy_nodes/_nodes.py b/onediff_comfy_nodes/_nodes.py
@@ -1,7 +1,7 @@
 from functools import partial
 from onediff.infer_compiler.transform import torch2oflow
 from ._config import _USE_UNET_INT8, ONEDIFF_QUANTIZED_OPTIMIZED_MODELS
-from onediff.infer_compiler.env import set_boolean_env_var
+from onediff.infer_compiler.env_var import set_boolean_env_var
 from onediff.optimization.quant_optimizer import quantize_model
 from onediff.infer_compiler import oneflow_compile, CompileOptions
 from onediff.infer_compiler.deployable_module import DeployableModule

diff --git a/onediff_comfy_nodes/utils/deep_cache_speedup.py b/onediff_comfy_nodes/utils/deep_cache_speedup.py
@@ -2,7 +2,7 @@
 from comfy import model_management
 from comfy.model_base import SVD_img2vid
 
-from onediff.infer_compiler.env import set_boolean_env_var
+from onediff.infer_compiler.env_var import set_boolean_env_var
 from .model_patcher import OneFlowDeepCacheSpeedUpModelPatcher
 
 

diff --git a/onediff_sd_webui_extensions/scripts/onediff.py b/onediff_sd_webui_extensions/scripts/onediff.py
@@ -19,7 +19,7 @@
 from onediff_hijack import do_hijack as onediff_do_hijack
 
 from onediff.infer_compiler.utils.log_utils import logger
-from onediff.infer_compiler.env import parse_boolean_from_env
+from onediff.infer_compiler.env_var import parse_boolean_from_env
 from onediff.optimization.quant_optimizer import (
     quantize_model,
     varify_can_use_quantization,

diff --git a/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py b/src/infer_compiler_registry/register_diffusers/attention_processor_oflow.py
@@ -21,7 +21,7 @@
 import diffusers
 from diffusers.utils import deprecate, logging
 
-from onediff.infer_compiler.env import parse_boolean_from_env, set_boolean_env_var
+from onediff.infer_compiler.env_var import parse_boolean_from_env, set_boolean_env_var
 
 
 def is_xformers_available():

diff --git a/src/onediff/infer_compiler/__init__.py b/src/onediff/infer_compiler/__init__.py
@@ -2,7 +2,7 @@
 import torch
 
 from .deployable_module import DeployableModule
-from .env import populate_default_env_var
+from .env_var import populate_default_env_var
 from .options import *
 from .options import _GLOBAL_compile_options as compile_options
 from .with_onediff_compile import compile, oneflow_compile

diff --git a/src/onediff/infer_compiler/backends/oneflow.py b/src/onediff/infer_compiler/backends/oneflow.py
@@ -20,7 +20,7 @@ def compile(torch_module: torch.nn.Module, *, options=None):
         - 'graph_file' (None) generates a compilation cache file. If the file exists, loading occurs; if not, the compilation result is saved after the first run.
         - 'graph_file_device' (None) sets the device for the graph file, default None.  If set, the compilation result will be converted to the specified device.
     """
-    from ..env import populate_oneflow_env_var
+    from ..env_var import populate_oneflow_env_var
     from ..transform.custom_transform import set_default_registry
     from ..oneflow.deployable_module import OneflowDeployableModule
     from ..oneflow.utils import get_mixed_deployable_module

diff --git a/src/onediff/infer_compiler/env_var/__init__.py b/src/onediff/infer_compiler/env_var/__init__.py
@@ -0,0 +1,2 @@
+from .utils import *
+from .populate_env_var import *
diff --git a/src/onediff/infer_compiler/env_var/populate_env_var.py b/src/onediff/infer_compiler/env_var/populate_env_var.py
@@ -0,0 +1,107 @@
+import os
+
+
+def _populate_env_var(field2env_var, options):
+    import dataclasses
+    from .utils import (
+        parse_boolean_from_env,
+        set_boolean_env_var,
+        parse_integer_from_env,
+        set_integer_env_var,
+    )
+
+    for field in dataclasses.fields(options):
+        field_name = field.name
+        if field_name not in field2env_var:
+            continue
+        env_var = field2env_var[field_name]
+        set_env_var = None
+        if field.type in (bool, Optional[bool]):
+            set_env_var = set_boolean_env_var
+        elif field.type in (int, Optional[int]):
+            set_env_var = set_integer_env_var
+        else:
+            raise ValueError(f"Unsupported type {field.type}")
+        set_env_var(env_var, getattr(options, field_name))
+
+
+def populate_oneflow_env_var(options):
+    field2env_var = {
+        "run_graph_by_vm": "ONEFLOW_RUN_GRAPH_BY_VM",
+        "graph_delay_variable_op_execution": "ONEFLOW_GRAPH_DELAY_VARIABLE_OP_EXECUTION",
+        "mlir_cse": "ONEFLOW_MLIR_CSE",
+        "mlir_enable_inference_optimization": "ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION",
+        "mlir_enable_round_trip": "ONEFLOW_MLIR_ENABLE_ROUND_TRIP",
+        "mlir_fuse_forward_ops": "ONEFLOW_MLIR_FUSE_FORWARD_OPS",
+        "mlir_fuse_ops_with_backward_impl": "ONEFLOW_MLIR_FUSE_OPS_WITH_BACKWARD_IMPL",
+        "mlir_group_matmul": "ONEFLOW_MLIR_GROUP_MATMUL",
+        "mlir_prefer_nhwc": "ONEFLOW_MLIR_PREFER_NHWC",
+        "mlir_fuse_kernel_launch": "ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH",
+        "kernel_enable_cuda_graph": "ONEFLOW_KERNEL_ENABLE_CUDA_GRAPH",
+        "kernel_enable_fused_conv_bias": "ONEFLOW_KERNEL_ENABLE_FUSED_CONV_BIAS",
+        "kernel_enable_fused_linear": "ONEFLOW_KERNEL_ENABLE_FUSED_LINEAR",
+        "kernel_conv_cutlass_impl_enable_tuning_warmup": "ONEFLOW_KERNEL_CONV_CUTLASS_IMPL_ENABLE_TUNING_WARMUP",
+        "kernel_gemm_cutlass_impl_enable_tuning_warmup": "ONEFLOW_KERNEL_GEMM_CUTLASS_IMPL_ENABLE_TUNING_WARMUP",
+        "kernel_conv_enable_cutlass_impl": "ONEFLOW_KERNEL_CONV_ENABLE_CUTLASS_IMPL",
+        "kernel_gemm_enable_cutlass_impl": "ONEFLOW_KERNEL_GEMM_ENABLE_CUTLASS_IMPL",
+        "kernel_glu_enable_dual_gemm_impl": "ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL",
+        "kernel_glu_enable_y_gemm_impl": "ONEFLOW_KERNEL_GLU_ENABLE_Y_GEMM_IMPL",
+        "kernel_glu_quant_enable_dual_gemm_impl": "ONEFLOW_KERNEL_GLU_QUANT_ENABLE_DUAL_GEMM_IMPL",
+        "conv_allow_half_precision_accumulation": "ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION",
+        "matmul_allow_half_precision_accumulation": "ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION",
+        "attention_allow_half_precision_accumulation": "ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_ACCUMULATION",
+        "attention_allow_half_precision_score_accumulation_max_m": "ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_SCORE_ACCUMULATION_MAX_M",
+    }
+    _populate_env_var(field2env_var, options)
+
+
+def populate_oneflow_default_env_var():
+    # ONEFLOW_RUN_GRAPH_BY_VM must set here to enable nn.Graph init with vm run
+    os.environ.setdefault("ONEFLOW_RUN_GRAPH_BY_VM", "1")
+    os.environ.setdefault("ONEFLOW_GRAPH_DELAY_VARIABLE_OP_EXECUTION", "1")
+
+    os.environ.setdefault("ONEFLOW_MLIR_CSE", "1")
+    os.environ.setdefault("ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION", "1")
+    os.environ.setdefault("ONEFLOW_MLIR_ENABLE_ROUND_TRIP", "1")
+    os.environ.setdefault("ONEFLOW_MLIR_FUSE_FORWARD_OPS", "1")
+    os.environ.setdefault("ONEFLOW_MLIR_FUSE_OPS_WITH_BACKWARD_IMPL", "1")
+    os.environ.setdefault("ONEFLOW_MLIR_GROUP_MATMUL", "1")
+    os.environ.setdefault("ONEFLOW_MLIR_PREFER_NHWC", "1")
+
+    os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_FUSED_CONV_BIAS", "1")
+    os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_FUSED_LINEAR", "1")
+    os.environ.setdefault("ONEFLOW_KERNEL_CONV_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", "1")
+    os.environ.setdefault("ONEFLOW_KERNEL_GEMM_CUTLASS_IMPL_ENABLE_TUNING_WARMUP", "1")
+    os.environ.setdefault("ONEFLOW_KERNEL_CONV_ENABLE_CUTLASS_IMPL", "1")
+    os.environ.setdefault("ONEFLOW_KERNEL_GEMM_ENABLE_CUTLASS_IMPL", "1")
+    os.environ.setdefault("ONEFLOW_CONVOLUTION_BIAS_ADD_ACT_FUSION", "1")
+    # os.environ.setdefault("ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL", "0")
+    # os.environ.setdefault("ONEFLOW_KERNEL_GLU_ENABLE_Y_GEMM_IMPL", "0")
+    # os.environ.setdefault("ONEFLOW_KERNEL_GLU_QUANT_ENABLE_DUAL_GEMM_IMPL", "0")
+
+    os.environ.setdefault("ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION", "1")
+    os.environ.setdefault("ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION", "1")
+    os.environ.setdefault("ONEFLOW_LINEAR_EMBEDDING_SKIP_INIT", "1")
+    # os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_ACCUMULATION", "1")
+    # os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_HALF_PRECISION_SCORE_ACCUMULATION_MAX_M", "-1")
+    # os.environ.setdefault("ONEFLOW_ATTENTION_ALLOW_QUANTIZATION", "1")
+
+    os.environ.setdefault("ONEFLOW_MLIR_GROUP_MATMUL_QUANT", "1")
+
+    # TODO: enable this will cause the failure of multi resolution warmup
+    # os.environ.setdefault("ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH", "1")
+    # os.environ.setdefault("ONEFLOW_KERNEL_ENABLE_CUDA_GRAPH", "1")
+
+
+def populate_nexfort_env_var(options):
+    field2env_var = {}
+    _populate_env_var(field2env_var, options)
+
+
+def populate_nexfort_default_env_var():
+    pass
+
+
+def populate_default_env_var():
+    populate_oneflow_default_env_var()
+    populate_nexfort_default_env_var()
diff --git a/src/onediff/infer_compiler/env/utils.py → src/onediff/infer_compiler/env_var/utils.py b/src/onediff/infer_compiler/env/utils.py → src/onediff/infer_compiler/env_var/utils.py
diff --git a/src/onediff/optimization/attention_processor.py b/src/onediff/optimization/attention_processor.py
@@ -84,7 +84,7 @@ def __call__(
             hidden_states = flow.bmm(attention_probs, value)
             hidden_states = attn.batch_to_head_dim(hidden_states)
         else:
-            from ..infer_compiler.env import (
+            from ..infer_compiler.env_var import (
                 parse_boolean_from_env,
                 set_boolean_env_var,
             )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .utils import *
		from .populate_env_var import *