Skip to content

Commit 7e9802f

Browse files
[Headers][NFC] Steps to allow sharing code between gpu intrin.h headers (llvm#131134)
Adds macro guards to error if the implementation headers are included directly as part of dropping the need for them to be standalone. Lifts the bool macro into gpuintrin.h. Moves shuffle_idx_u64 into gpuintrin in passing, was the same implementation in each architecture file.
1 parent 94c8fa6 commit 7e9802f

File tree

3 files changed

+25
-50
lines changed

3 files changed

+25
-50
lines changed

clang/lib/Headers/amdgpuintrin.h

+2-20
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,8 @@
1313
#error "This file is intended for AMDGPU targets or offloading to AMDGPU"
1414
#endif
1515

16-
#include <stdint.h>
17-
18-
#if !defined(__cplusplus)
19-
_Pragma("push_macro(\"bool\")");
20-
#define bool _Bool
16+
#ifndef __GPUINTRIN_H
17+
#error "Never use <amdgpuintrin.h> directly; include <gpuintrin.h> instead"
2118
#endif
2219

2320
_Pragma("omp begin declare target device_type(nohost)");
@@ -146,17 +143,6 @@ __gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x,
146143
return __builtin_amdgcn_ds_bpermute(__lane << 2, __x);
147144
}
148145

149-
// Shuffles the the lanes inside the wavefront according to the given index.
150-
_DEFAULT_FN_ATTRS static __inline__ uint64_t
151-
__gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
152-
uint32_t __width) {
153-
uint32_t __hi = (uint32_t)(__x >> 32ull);
154-
uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF);
155-
return ((uint64_t)__gpu_shuffle_idx_u32(__lane_mask, __idx, __hi, __width)
156-
<< 32ull) |
157-
((uint64_t)__gpu_shuffle_idx_u32(__lane_mask, __idx, __lo, __width));
158-
}
159-
160146
// Returns a bitmask marking all lanes that have the same value of __x.
161147
_DEFAULT_FN_ATTRS static __inline__ uint64_t
162148
__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
@@ -238,8 +224,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) {
238224
_Pragma("omp end declare variant");
239225
_Pragma("omp end declare target");
240226

241-
#if !defined(__cplusplus)
242-
_Pragma("pop_macro(\"bool\")");
243-
#endif
244-
245227
#endif // __AMDGPUINTRIN_H

clang/lib/Headers/gpuintrin.h

+19-7
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,13 @@
2525
#endif
2626
#endif
2727

28+
#include <stdint.h>
29+
30+
#if !defined(__cplusplus)
31+
_Pragma("push_macro(\"bool\")");
32+
#define bool _Bool
33+
#endif
34+
2835
#if defined(__NVPTX__)
2936
#include <nvptxintrin.h>
3037
#elif defined(__AMDGPU__)
@@ -33,13 +40,6 @@
3340
#error "This header is only meant to be used on GPU architectures."
3441
#endif
3542

36-
#include <stdint.h>
37-
38-
#if !defined(__cplusplus)
39-
_Pragma("push_macro(\"bool\")");
40-
#define bool _Bool
41-
#endif
42-
4343
_Pragma("omp begin declare target device_type(nohost)");
4444
_Pragma("omp begin declare variant match(device = {kind(gpu)})");
4545

@@ -141,6 +141,18 @@ __gpu_read_first_lane_f64(uint64_t __lane_mask, double __x) {
141141
__builtin_bit_cast(uint64_t, __x)));
142142
}
143143

144+
// Shuffles the the lanes according to the given index.
145+
_DEFAULT_FN_ATTRS static __inline__ uint64_t
146+
__gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
147+
uint32_t __width) {
148+
uint32_t __hi = (uint32_t)(__x >> 32ull);
149+
uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF);
150+
uint32_t __mask = (uint32_t)__lane_mask;
151+
return ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __hi, __width)
152+
<< 32ull) |
153+
((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __lo, __width));
154+
}
155+
144156
// Shuffles the the lanes according to the given index.
145157
_DEFAULT_FN_ATTRS static __inline__ float
146158
__gpu_shuffle_idx_f32(uint64_t __lane_mask, uint32_t __idx, float __x,

clang/lib/Headers/nvptxintrin.h

+4-23
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,12 @@
1313
#error "This file is intended for NVPTX targets or offloading to NVPTX"
1414
#endif
1515

16-
#ifndef __CUDA_ARCH__
17-
#define __CUDA_ARCH__ 0
16+
#ifndef __GPUINTRIN_H
17+
#error "Never use <nvptxintrin.h> directly; include <gpuintrin.h> instead"
1818
#endif
1919

20-
#include <stdint.h>
21-
22-
#if !defined(__cplusplus)
23-
_Pragma("push_macro(\"bool\")");
24-
#define bool _Bool
20+
#ifndef __CUDA_ARCH__
21+
#define __CUDA_ARCH__ 0
2522
#endif
2623

2724
_Pragma("omp begin declare target device_type(nohost)");
@@ -153,18 +150,6 @@ __gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x,
153150
((__gpu_num_lanes() - __width) << 8u) | 0x1f);
154151
}
155152

156-
// Shuffles the the lanes inside the warp according to the given index.
157-
_DEFAULT_FN_ATTRS static __inline__ uint64_t
158-
__gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
159-
uint32_t __width) {
160-
uint32_t __hi = (uint32_t)(__x >> 32ull);
161-
uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF);
162-
uint32_t __mask = (uint32_t)__lane_mask;
163-
return ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __hi, __width)
164-
<< 32ull) |
165-
((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __lo, __width));
166-
}
167-
168153
// Returns a bitmask marking all lanes that have the same value of __x.
169154
_DEFAULT_FN_ATTRS static __inline__ uint64_t
170155
__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
@@ -263,8 +248,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) {
263248
_Pragma("omp end declare variant");
264249
_Pragma("omp end declare target");
265250

266-
#if !defined(__cplusplus)
267-
_Pragma("pop_macro(\"bool\")");
268-
#endif
269-
270251
#endif // __NVPTXINTRIN_H

0 commit comments

Comments
 (0)