Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Headers][NFC] Steps to allow sharing code between gpu intrin.h headers #131134

Merged
merged 1 commit into from
Mar 13, 2025

Conversation

JonChesterfield
Copy link
Collaborator

@JonChesterfield JonChesterfield commented Mar 13, 2025

Adds macro guards to error if the implementation headers are included directly as part of dropping the need for them to be standalone. Lifts the bool macro into gpuintrin.h.

Moves shuffle_idx_u64 into gpuintrin in passing, was the same implementation in each architecture file.

@JonChesterfield JonChesterfield requested a review from jhuber6 March 13, 2025 12:52
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:AMDGPU backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Mar 13, 2025
@llvmbot
Copy link
Member

llvmbot commented Mar 13, 2025

@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-clang

Author: Jon Chesterfield (JonChesterfield)

Changes

Adds macro guards to warn if the implementation headers are included directly as part of dropping the need for them to be standalone.

I'd like to declare functions before the include but it might be be viable with the openmp pragma annotation to do so.


Full diff: https://github.com/llvm/llvm-project/pull/131134.diff

3 Files Affected:

  • (modified) clang/lib/Headers/amdgpuintrin.h (+2-13)
  • (modified) clang/lib/Headers/gpuintrin.h (+14-6)
  • (modified) clang/lib/Headers/nvptxintrin.h (+4-15)
diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h
index 839a05175cf3e..7b1d16f8ca88d 100644
--- a/clang/lib/Headers/amdgpuintrin.h
+++ b/clang/lib/Headers/amdgpuintrin.h
@@ -13,11 +13,8 @@
 #error "This file is intended for AMDGPU targets or offloading to AMDGPU"
 #endif
 
-#include <stdint.h>
-
-#if !defined(__cplusplus)
-_Pragma("push_macro(\"bool\")");
-#define bool _Bool
+#ifndef __GPUINTRIN_H
+#warning "This file is intended as an implementation detail of gpuintrin.h"
 #endif
 
 _Pragma("omp begin declare target device_type(nohost)");
@@ -33,10 +30,6 @@ _Pragma("omp begin declare variant match(device = {arch(amdgcn)})");
 // Attribute to declare a function as a kernel.
 #define __gpu_kernel __attribute__((amdgpu_kernel, visibility("protected")))
 
-// Defined in gpuintrin.h, used later in this file.
-_DEFAULT_FN_ATTRS static __inline__ uint64_t
-__gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x);
-
 // Returns the number of workgroups in the 'x' dimension of the grid.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) {
   return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x();
@@ -238,8 +231,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) {
 _Pragma("omp end declare variant");
 _Pragma("omp end declare target");
 
-#if !defined(__cplusplus)
-_Pragma("pop_macro(\"bool\")");
-#endif
-
 #endif // __AMDGPUINTRIN_H
diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h
index 4181628d18048..8d300b5b9acb8 100644
--- a/clang/lib/Headers/gpuintrin.h
+++ b/clang/lib/Headers/gpuintrin.h
@@ -25,6 +25,20 @@
 #endif
 #endif
 
+#include <stdint.h>
+
+#if !defined(__cplusplus)
+_Pragma("push_macro(\"bool\")");
+#define bool _Bool
+#endif
+
+// Declare functions that can be called by the implementation headers
+
+// Returns the number of workgroups in the 'x' dimension of the grid.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x);
+
+
 #if defined(__NVPTX__)
 #include <nvptxintrin.h>
 #elif defined(__AMDGPU__)
@@ -33,12 +47,6 @@
 #error "This header is only meant to be used on GPU architectures."
 #endif
 
-#include <stdint.h>
-
-#if !defined(__cplusplus)
-_Pragma("push_macro(\"bool\")");
-#define bool _Bool
-#endif
 
 _Pragma("omp begin declare target device_type(nohost)");
 _Pragma("omp begin declare variant match(device = {kind(gpu)})");
diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h
index d00a5f6de3950..170c943fe63a2 100644
--- a/clang/lib/Headers/nvptxintrin.h
+++ b/clang/lib/Headers/nvptxintrin.h
@@ -13,15 +13,12 @@
 #error "This file is intended for NVPTX targets or offloading to NVPTX"
 #endif
 
-#ifndef __CUDA_ARCH__
-#define __CUDA_ARCH__ 0
+#ifndef __GPUINTRIN_H
+#warning "This file is intended as an implementation detail of gpuintrin.h"
 #endif
 
-#include <stdint.h>
-
-#if !defined(__cplusplus)
-_Pragma("push_macro(\"bool\")");
-#define bool _Bool
+#ifndef __CUDA_ARCH__
+#define __CUDA_ARCH__ 0
 #endif
 
 _Pragma("omp begin declare target device_type(nohost)");
@@ -37,10 +34,6 @@ _Pragma("omp begin declare variant match(device = {arch(nvptx64)})");
 // Attribute to declare a function as a kernel.
 #define __gpu_kernel __attribute__((nvptx_kernel, visibility("protected")))
 
-// Defined in gpuintrin.h, used later in this file.
-_DEFAULT_FN_ATTRS static __inline__ uint64_t
-__gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x);
-
 // Returns the number of CUDA blocks in the 'x' dimension.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) {
   return __nvvm_read_ptx_sreg_nctaid_x();
@@ -263,8 +256,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) {
 _Pragma("omp end declare variant");
 _Pragma("omp end declare target");
 
-#if !defined(__cplusplus)
-_Pragma("pop_macro(\"bool\")");
-#endif
-
 #endif // __NVPTXINTRIN_H

_Pragma("push_macro(\"bool\")");
#define bool _Bool
#ifndef __GPUINTRIN_H
#warning "This file is intended as an implementation detail of gpuintrin.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should do this like the SSE headers and do something like

Suggested change
#warning "This file is intended as an implementation detail of gpuintrin.h"
#error "Never use <amdgpuintrin.h> directly; include <gpuintrin.h> instead."

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, done

@@ -263,8 +256,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) {
_Pragma("omp end declare variant");
_Pragma("omp end declare target");

#if !defined(__cplusplus)
_Pragma("pop_macro(\"bool\")");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where did this go?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Up into gpuintrin.h

Copy link

github-actions bot commented Mar 13, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@jhuber6 jhuber6 changed the title [libc][nfc] Steps to allow sharing code between gpu intrin.h headers [Headers][NFC] Steps to allow sharing code between gpu intrin.h headers Mar 13, 2025
@JonChesterfield JonChesterfield force-pushed the jc_header_refactor branch 2 times, most recently from 4c04f69 to 7347ebc Compare March 13, 2025 13:15
Copy link
Contributor

@jhuber6 jhuber6 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LG

@JonChesterfield JonChesterfield merged commit 7e9802f into llvm:main Mar 13, 2025
10 of 16 checks passed
@JonChesterfield JonChesterfield deleted the jc_header_refactor branch March 13, 2025 13:24
frederik-h pushed a commit to frederik-h/llvm-project that referenced this pull request Mar 18, 2025
…rs (llvm#131134)

Adds macro guards to error if the implementation headers are included
directly as part of dropping the need for them to be standalone. Lifts
the bool macro into gpuintrin.h.

Moves shuffle_idx_u64 into gpuintrin in passing, was the same
implementation in each architecture file.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants