File tree Expand file tree Collapse file tree 4 files changed +6
-24
lines changed Expand file tree Collapse file tree 4 files changed +6
-24
lines changed Original file line number Diff line number Diff line change 2424
2525#include " attention_dtypes.h"
2626#include " attention_utils.cuh"
27+ #include " cuda_compat.h"
2728
2829#ifdef USE_ROCM
2930 #include < hip/hip_bf16.h>
@@ -33,12 +34,6 @@ typedef __hip_bfloat16 __nv_bfloat16;
3334 #include " ../quantization/fp8/nvidia/quant_utils.cuh"
3435#endif
3536
36- #ifndef USE_ROCM
37- #define WARP_SIZE 32
38- #else
39- #define WARP_SIZE warpSize
40- #endif
41-
4237#define MAX (a, b ) ((a) > (b) ? (a) : (b))
4338#define MIN (a, b ) ((a) < (b) ? (a) : (b))
4439#define DIVIDE_ROUND_UP (a, b ) (((a) + (b) - 1 ) / (b))
@@ -670,7 +665,6 @@ __global__ void paged_attention_v2_reduce_kernel(
670665
671666} // namespace vllm
672667
673- #undef WARP_SIZE
674668#undef MAX
675669#undef MIN
676670#undef DIVIDE_ROUND_UP
Original file line number Diff line number Diff line change 1818 */
1919
2020#include " attention_kernels.cuh"
21-
22- #ifndef USE_ROCM
23- #define WARP_SIZE 32
24- #else
25- #define WARP_SIZE warpSize
26- #endif
21+ #include " cuda_compat.h"
2722
2823#define MAX (a, b ) ((a) > (b) ? (a) : (b))
2924#define MIN (a, b ) ((a) < (b) ? (a) : (b))
@@ -187,7 +182,6 @@ void paged_attention_v1(
187182 CALL_V1_LAUNCHER_BLOCK_SIZE)
188183}
189184
190- #undef WARP_SIZE
191185#undef MAX
192186#undef MIN
193187#undef DIVIDE_ROUND_UP
Original file line number Diff line number Diff line change 1818 */
1919
2020#include " attention_kernels.cuh"
21-
22- #ifndef USE_ROCM
23- #define WARP_SIZE 32
24- #else
25- #define WARP_SIZE warpSize
26- #endif
21+ #include " cuda_compat.h"
2722
2823#define MAX (a, b ) ((a) > (b) ? (a) : (b))
2924#define MIN (a, b ) ((a) < (b) ? (a) : (b))
@@ -197,7 +192,6 @@ void paged_attention_v2(
197192 CALL_V2_LAUNCHER_BLOCK_SIZE)
198193}
199194
200- #undef WARP_SIZE
201195#undef MAX
202196#undef MIN
203197#undef DIVIDE_ROUND_UP
Original file line number Diff line number Diff line change 44 #include <hip/hip_runtime.h>
55#endif
66
7- #ifndef USE_ROCM
8- #define WARP_SIZE 32
7+ #if defined( USE_ROCM ) && defined( __GFX9__ )
8+ #define WARP_SIZE 64
99#else
10- #define WARP_SIZE warpSize
10+ #define WARP_SIZE 32
1111#endif
1212
1313#ifndef USE_ROCM
You can’t perform that action at this time.
0 commit comments