diff --git a/onediff_comfy_nodes/utils/diffusers_quant_utils.py b/onediff_comfy_nodes/utils/diffusers_quant_utils.py index d94015105..ca29170b1 100644 --- a/onediff_comfy_nodes/utils/diffusers_quant_utils.py +++ b/onediff_comfy_nodes/utils/diffusers_quant_utils.py @@ -35,7 +35,7 @@ def _use_graph(): os.environ["ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION"] = "1" os.environ["ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION"] = "1" os.environ["ONEFLOW_LINEAR_EMBEDDING_SKIP_INIT"] = "1" - os.environ["ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL"] = "0" + # os.environ["ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL"] = "0" os.environ["ONEFLOW_MLIR_GROUP_MATMUL_QUANT"] = "1" os.environ["ONEFLOW_FUSE_QUANT_TO_MATMUL"] = "0" # os.environ["ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH"] = "1" diff --git a/src/infer_compiler_registry/register_diffusers_quant/quant_diffusion_pipeline.py b/src/infer_compiler_registry/register_diffusers_quant/quant_diffusion_pipeline.py index 56766813d..282c6eec7 100644 --- a/src/infer_compiler_registry/register_diffusers_quant/quant_diffusion_pipeline.py +++ b/src/infer_compiler_registry/register_diffusers_quant/quant_diffusion_pipeline.py @@ -29,7 +29,7 @@ def _use_graph(): os.environ["ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION"] = "1" os.environ["ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION"] = "1" os.environ["ONEFLOW_LINEAR_EMBEDDING_SKIP_INIT"] = "1" - os.environ["ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL"] = "0" + # os.environ["ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL"] = "0" os.environ["ONEFLOW_MLIR_GROUP_MATMUL_QUANT"] = "1" os.environ["ONEFLOW_FUSE_QUANT_TO_MATMUL"] = "0" # os.environ["ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH"] = "1" diff --git a/src/onediff/infer_compiler/with_oneflow_compile.py b/src/onediff/infer_compiler/with_oneflow_compile.py index 04b23b69c..21c9ba02a 100644 --- a/src/onediff/infer_compiler/with_oneflow_compile.py +++ b/src/onediff/infer_compiler/with_oneflow_compile.py @@ -379,7 +379,7 @@ def __init__(self, model): os.environ["ONEFLOW_CONV_ALLOW_HALF_PRECISION_ACCUMULATION"] = "1" os.environ["ONEFLOW_MATMUL_ALLOW_HALF_PRECISION_ACCUMULATION"] = "1" os.environ["ONEFLOW_LINEAR_EMBEDDING_SKIP_INIT"] = "1" - os.environ["ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL"] = "0" + # os.environ["ONEFLOW_KERNEL_GLU_ENABLE_DUAL_GEMM_IMPL"] = "0" os.environ["ONEFLOW_MLIR_GROUP_MATMUL_QUANT"] = "1" # TODO: enable this will cause the failure of multi resolution warmup # os.environ["ONEFLOW_MLIR_FUSE_KERNEL_LAUNCH"] = "1"