File tree Expand file tree Collapse file tree 2 files changed +8
-0
lines changed Expand file tree Collapse file tree 2 files changed +8
-0
lines changed Original file line number Diff line number Diff line change 113113 VLLM_ROCM_USE_AITER_FP8BMM : bool = True
114114 VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION : bool = False
115115 VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS : bool = True
116+ VLLM_ROCM_USE_AITER_TRITON_GEMM : bool = True
116117 VLLM_ROCM_USE_SKINNY_GEMM : bool = True
117118 VLLM_ROCM_FP8_PADDING : bool = True
118119 VLLM_ROCM_MOE_PADDING : bool = True
@@ -944,6 +945,12 @@ def get_vllm_port() -> int | None:
944945 os .getenv ("VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS" , "True" ).lower ()
945946 in ("true" , "1" )
946947 ),
948+ # Whether to use aiter triton kernels for gemm ops.
949+ # By default is enabled.
950+ "VLLM_ROCM_USE_AITER_TRITON_GEMM" : lambda : (
951+ os .getenv ("VLLM_ROCM_USE_AITER_TRITON_GEMM" , "True" ).lower ()
952+ in ("true" , "1" )
953+ ),
947954 # use rocm skinny gemms
948955 "VLLM_ROCM_USE_SKINNY_GEMM" : lambda : (
949956 os .getenv ("VLLM_ROCM_USE_SKINNY_GEMM" , "True" ).lower () in ("true" , "1" )
Original file line number Diff line number Diff line change @@ -106,6 +106,7 @@ def default_unquantized_gemm(
106106def use_aiter_triton_gemm (n , m , k , dtype ):
107107 if (
108108 envs .VLLM_ROCM_USE_AITER == 0
109+ or envs .VLLM_ROCM_USE_AITER_TRITON_GEMM == 0
109110 # MI300's - fp8nuz=True
110111 or current_platform .is_fp8_fnuz ()
111112 or dtype not in [torch .float16 , torch .bfloat16 ]
You can’t perform that action at this time.
0 commit comments