-
Notifications
You must be signed in to change notification settings - Fork 13.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Headers][NFC] Steps to allow sharing code between gpu intrin.h headers #131134
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-clang Author: Jon Chesterfield (JonChesterfield) ChangesAdds macro guards to warn if the implementation headers are included directly as part of dropping the need for them to be standalone. I'd like to declare functions before the include but it might be be viable with the openmp pragma annotation to do so. Full diff: https://github.com/llvm/llvm-project/pull/131134.diff 3 Files Affected:
diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h
index 839a05175cf3e..7b1d16f8ca88d 100644
--- a/clang/lib/Headers/amdgpuintrin.h
+++ b/clang/lib/Headers/amdgpuintrin.h
@@ -13,11 +13,8 @@
#error "This file is intended for AMDGPU targets or offloading to AMDGPU"
#endif
-#include <stdint.h>
-
-#if !defined(__cplusplus)
-_Pragma("push_macro(\"bool\")");
-#define bool _Bool
+#ifndef __GPUINTRIN_H
+#warning "This file is intended as an implementation detail of gpuintrin.h"
#endif
_Pragma("omp begin declare target device_type(nohost)");
@@ -33,10 +30,6 @@ _Pragma("omp begin declare variant match(device = {arch(amdgcn)})");
// Attribute to declare a function as a kernel.
#define __gpu_kernel __attribute__((amdgpu_kernel, visibility("protected")))
-// Defined in gpuintrin.h, used later in this file.
-_DEFAULT_FN_ATTRS static __inline__ uint64_t
-__gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x);
-
// Returns the number of workgroups in the 'x' dimension of the grid.
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) {
return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x();
@@ -238,8 +231,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) {
_Pragma("omp end declare variant");
_Pragma("omp end declare target");
-#if !defined(__cplusplus)
-_Pragma("pop_macro(\"bool\")");
-#endif
-
#endif // __AMDGPUINTRIN_H
diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h
index 4181628d18048..8d300b5b9acb8 100644
--- a/clang/lib/Headers/gpuintrin.h
+++ b/clang/lib/Headers/gpuintrin.h
@@ -25,6 +25,20 @@
#endif
#endif
+#include <stdint.h>
+
+#if !defined(__cplusplus)
+_Pragma("push_macro(\"bool\")");
+#define bool _Bool
+#endif
+
+// Declare functions that can be called by the implementation headers
+
+// Returns the number of workgroups in the 'x' dimension of the grid.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x);
+
+
#if defined(__NVPTX__)
#include <nvptxintrin.h>
#elif defined(__AMDGPU__)
@@ -33,12 +47,6 @@
#error "This header is only meant to be used on GPU architectures."
#endif
-#include <stdint.h>
-
-#if !defined(__cplusplus)
-_Pragma("push_macro(\"bool\")");
-#define bool _Bool
-#endif
_Pragma("omp begin declare target device_type(nohost)");
_Pragma("omp begin declare variant match(device = {kind(gpu)})");
diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h
index d00a5f6de3950..170c943fe63a2 100644
--- a/clang/lib/Headers/nvptxintrin.h
+++ b/clang/lib/Headers/nvptxintrin.h
@@ -13,15 +13,12 @@
#error "This file is intended for NVPTX targets or offloading to NVPTX"
#endif
-#ifndef __CUDA_ARCH__
-#define __CUDA_ARCH__ 0
+#ifndef __GPUINTRIN_H
+#warning "This file is intended as an implementation detail of gpuintrin.h"
#endif
-#include <stdint.h>
-
-#if !defined(__cplusplus)
-_Pragma("push_macro(\"bool\")");
-#define bool _Bool
+#ifndef __CUDA_ARCH__
+#define __CUDA_ARCH__ 0
#endif
_Pragma("omp begin declare target device_type(nohost)");
@@ -37,10 +34,6 @@ _Pragma("omp begin declare variant match(device = {arch(nvptx64)})");
// Attribute to declare a function as a kernel.
#define __gpu_kernel __attribute__((nvptx_kernel, visibility("protected")))
-// Defined in gpuintrin.h, used later in this file.
-_DEFAULT_FN_ATTRS static __inline__ uint64_t
-__gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x);
-
// Returns the number of CUDA blocks in the 'x' dimension.
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) {
return __nvvm_read_ptx_sreg_nctaid_x();
@@ -263,8 +256,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) {
_Pragma("omp end declare variant");
_Pragma("omp end declare target");
-#if !defined(__cplusplus)
-_Pragma("pop_macro(\"bool\")");
-#endif
-
#endif // __NVPTXINTRIN_H
|
clang/lib/Headers/amdgpuintrin.h
Outdated
_Pragma("push_macro(\"bool\")"); | ||
#define bool _Bool | ||
#ifndef __GPUINTRIN_H | ||
#warning "This file is intended as an implementation detail of gpuintrin.h" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should do this like the SSE headers and do something like
#warning "This file is intended as an implementation detail of gpuintrin.h" | |
#error "Never use <amdgpuintrin.h> directly; include <gpuintrin.h> instead." |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yep, done
@@ -263,8 +256,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) { | |||
_Pragma("omp end declare variant"); | |||
_Pragma("omp end declare target"); | |||
|
|||
#if !defined(__cplusplus) | |||
_Pragma("pop_macro(\"bool\")"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where did this go?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Up into gpuintrin.h
✅ With the latest revision this PR passed the C/C++ code formatter. |
f0149fd
to
0466c31
Compare
0466c31
to
7347ebc
Compare
4c04f69
to
7347ebc
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
…rs (llvm#131134) Adds macro guards to error if the implementation headers are included directly as part of dropping the need for them to be standalone. Lifts the bool macro into gpuintrin.h. Moves shuffle_idx_u64 into gpuintrin in passing, was the same implementation in each architecture file.
Adds macro guards to error if the implementation headers are included directly as part of dropping the need for them to be standalone. Lifts the bool macro into gpuintrin.h.
Moves shuffle_idx_u64 into gpuintrin in passing, was the same implementation in each architecture file.