-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[OpenMP] Introduce the initial support for OpenMP kernel language #66844
Conversation
@llvm/pr-subscribers-flang-semantics @llvm/pr-subscribers-clang ChangesThis patch starts the support for OpenMP kernel language, basically to write Patch is 232.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66844.diff 19 Files Affected:
diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index 31ae3d42e232fca..eeeca1998f9fa9c 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -9220,6 +9220,27 @@ class OMPXAttributeClause
}
};
+/// This represents 'ompx_bare' clause in the '#pragma omp target teams ...'
+/// directive.
+///
+/// \code
+/// #pragma omp target teams ompx_bare
+/// \endcode
+/// In this example directive '#pragma omp target teams' has a 'ompx_bare'
+/// clause.
+class OMPXBareClause : public OMPNoChildClause<llvm::omp::OMPC_ompx_bare> {
+public:
+ /// Build 'ompx_bare' clause.
+ ///
+ /// \param StartLoc Starting location of the clause.
+ /// \param EndLoc Ending location of the clause.
+ OMPXBareClause(SourceLocation StartLoc, SourceLocation EndLoc)
+ : OMPNoChildClause(StartLoc, EndLoc) {}
+
+ /// Build an empty clause.
+ OMPXBareClause() = default;
+};
+
} // namespace clang
#endif // LLVM_CLANG_AST_OPENMPCLAUSE_H
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index d4146d52893ffb1..298489e7d4fc413 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -3890,6 +3890,11 @@ bool RecursiveASTVisitor<Derived>::VisitOMPXAttributeClause(
return true;
}
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPXBareClause(OMPXBareClause *C) {
+ return true;
+}
+
// FIXME: look at the following tricky-seeming exprs to see if we
// need to recurse on anything. These are ones that have methods
// returning decls or qualtypes or nestednamespecifier -- though I'm
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 178761bdcf4d5e3..43a5f000eda6cb9 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1360,6 +1360,8 @@ def warn_clause_expected_string : Warning<
"expected string literal in 'clause %0' - ignoring">, InGroup<IgnoredPragmas>;
def err_omp_unexpected_clause : Error<
"unexpected OpenMP clause '%0' in directive '#pragma omp %1'">;
+def err_omp_unexpected_clause_extension_only : Error<
+ "OpenMP clause '%0' is only available as extension, use '-fopenmp-extensions'">;
def err_omp_immediate_directive : Error<
"'#pragma omp %0' %select{|with '%2' clause }1cannot be an immediate substatement">;
def err_omp_expected_identifier_for_critical : Error<
@@ -1452,6 +1454,8 @@ def warn_unknown_declare_variant_isa_trait
"spelling or consider restricting the context selector with the "
"'arch' selector further">,
InGroup<SourceUsesOpenMP>;
+def note_ompx_bare_clause : Note<
+ "OpenMP extension clause '%0' only allowed with '#pragma omp %1'">;
def note_omp_declare_variant_ctx_options
: Note<"context %select{set|selector|property}0 options are: %1">;
def warn_omp_declare_variant_expected
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 47379e00a7445e3..64939c8b0c4be0e 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -12445,6 +12445,10 @@ class Sema final {
SourceLocation LParenLoc,
SourceLocation EndLoc);
+ /// Called on a well-formed 'ompx_bare' clause.
+ OMPClause *ActOnOpenMPXBareClause(SourceLocation StartLoc,
+ SourceLocation EndLoc);
+
/// The kind of conversion being performed.
enum CheckedConversionKind {
/// An implicit conversion.
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index f5ad75028a641e5..b95b4fce180e736 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -170,6 +170,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) {
case OMPC_affinity:
case OMPC_when:
case OMPC_bind:
+ case OMPC_ompx_bare:
break;
default:
break;
@@ -2546,6 +2547,10 @@ void OMPClausePrinter::VisitOMPXAttributeClause(OMPXAttributeClause *Node) {
OS << ")";
}
+void OMPClausePrinter::VisitOMPXBareClause(OMPXBareClause *Node) {
+ OS << "ompx_bare";
+}
+
void OMPTraitInfo::getAsVariantMatchInfo(ASTContext &ASTCtx,
VariantMatchInfo &VMI) const {
for (const OMPTraitSet &Set : Sets) {
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 27f71edd6f99b32..24fa2bf06f72af8 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -930,6 +930,7 @@ void OMPClauseProfiler::VisitOMPDoacrossClause(const OMPDoacrossClause *C) {
}
void OMPClauseProfiler::VisitOMPXAttributeClause(const OMPXAttributeClause *C) {
}
+void OMPClauseProfiler::VisitOMPXBareClause(const OMPXBareClause *C) {}
} // namespace
void
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 93819ab815add08..aaf22bc8583f3ee 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -551,10 +551,9 @@ CGOpenMPRuntimeGPU::getExecutionMode() const {
return CurrentExecutionMode;
}
-static CGOpenMPRuntimeGPU::DataSharingMode
-getDataSharingMode(CodeGenModule &CGM) {
- return CGM.getLangOpts().OpenMPCUDAMode ? CGOpenMPRuntimeGPU::CUDA
- : CGOpenMPRuntimeGPU::Generic;
+CGOpenMPRuntimeGPU::DataSharingMode
+CGOpenMPRuntimeGPU::getDataSharingMode() const {
+ return CurrentDataSharingMode;
}
/// Check for inner (nested) SPMD construct, if any
@@ -752,27 +751,41 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
EntryFunctionState EST;
WrapperFunctionsMap.clear();
+ bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
+
// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
+ CGOpenMPRuntimeGPU &RT;
CGOpenMPRuntimeGPU::EntryFunctionState &EST;
+ bool IsBareKernel;
+ DataSharingMode Mode;
public:
- NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST)
- : EST(EST) {}
+ NVPTXPrePostActionTy(CGOpenMPRuntimeGPU &RT,
+ CGOpenMPRuntimeGPU::EntryFunctionState &EST,
+ bool IsBareKernel)
+ : RT(RT), EST(EST), IsBareKernel(IsBareKernel),
+ Mode(RT.CurrentDataSharingMode) {}
void Enter(CodeGenFunction &CGF) override {
- auto &RT =
- static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
+ if (IsBareKernel) {
+ RT.CurrentDataSharingMode = DataSharingMode::DS_CUDA;
+ return;
+ }
+
RT.emitKernelInit(CGF, EST, /* IsSPMD */ false);
// Skip target region initialization.
RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
}
void Exit(CodeGenFunction &CGF) override {
- auto &RT =
- static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
+ if (IsBareKernel) {
+ RT.CurrentDataSharingMode = Mode;
+ return;
+ }
+
RT.clearLocThreadIdInsertPt(CGF);
RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ false);
}
- } Action(EST);
+ } Action(*this, EST, IsBareKernel);
CodeGen.setAction(Action);
IsInTTDRegion = true;
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
@@ -807,25 +820,39 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_SPMD);
EntryFunctionState EST;
+ bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
+
// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
CGOpenMPRuntimeGPU &RT;
CGOpenMPRuntimeGPU::EntryFunctionState &EST;
+ bool IsBareKernel;
+ DataSharingMode Mode;
public:
NVPTXPrePostActionTy(CGOpenMPRuntimeGPU &RT,
- CGOpenMPRuntimeGPU::EntryFunctionState &EST)
- : RT(RT), EST(EST) {}
+ CGOpenMPRuntimeGPU::EntryFunctionState &EST,
+ bool IsBareKernel)
+ : RT(RT), EST(EST), IsBareKernel(IsBareKernel),
+ Mode(RT.CurrentDataSharingMode) {}
void Enter(CodeGenFunction &CGF) override {
+ if (IsBareKernel) {
+ RT.CurrentDataSharingMode = DataSharingMode::DS_CUDA;
+ return;
+ }
RT.emitKernelInit(CGF, EST, /* IsSPMD */ true);
// Skip target region initialization.
RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
}
void Exit(CodeGenFunction &CGF) override {
+ if (IsBareKernel) {
+ RT.CurrentDataSharingMode = Mode;
+ return;
+ }
RT.clearLocThreadIdInsertPt(CGF);
RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ true);
}
- } Action(*this, EST);
+ } Action(*this, EST, IsBareKernel);
CodeGen.setAction(Action);
IsInTTDRegion = true;
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
@@ -867,6 +894,9 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
if (CGM.getLangOpts().NoGPULib || CGM.getLangOpts().OMPHostIRFile.empty())
return;
+ if (CGM.getLangOpts().OpenMPCUDAMode)
+ CurrentDataSharingMode = CGOpenMPRuntimeGPU::DS_CUDA;
+
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTargetDebug,
"__omp_rtl_debug_kind");
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTeamSubscription,
@@ -1030,7 +1060,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
SourceLocation Loc,
bool WithSPMDCheck) {
- if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic &&
+ if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic &&
getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
return;
@@ -1142,7 +1172,7 @@ void CGOpenMPRuntimeGPU::getKmpcFreeShared(
void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF,
bool WithSPMDCheck) {
- if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic &&
+ if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic &&
getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
return;
@@ -1178,11 +1208,16 @@ void CGOpenMPRuntimeGPU::emitTeamsCall(CodeGenFunction &CGF,
if (!CGF.HaveInsertPoint())
return;
+ bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
+
Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
/*Name=*/".zero.addr");
CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr);
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
- OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
+ if (IsBareKernel)
+ OutlinedFnArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
+ else
+ OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
@@ -3273,7 +3308,7 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(
void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
const Decl *D) {
- if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
+ if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)
return;
assert(D && "Expected function or captured|block decl.");
@@ -3382,7 +3417,7 @@ Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF,
VarTy, Align);
}
- if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
+ if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)
return Address::invalid();
VD = VD->getCanonicalDecl();
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
index dddfe5a94dccb8c..86871dfce418fde 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -32,6 +32,18 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
/// Unknown execution mode (orphaned directive).
EM_Unknown,
};
+
+ /// Target codegen is specialized based on two data-sharing modes: CUDA, in
+ /// which the local variables are actually global threadlocal, and Generic, in
+ /// which the local variables are placed in global memory if they may escape
+ /// their declaration context.
+ enum DataSharingMode {
+ /// CUDA data sharing mode.
+ DS_CUDA,
+ /// Generic data-sharing mode.
+ DS_Generic,
+ };
+
private:
/// Parallel outlined function work for workers to execute.
llvm::SmallVector<llvm::Function *, 16> Work;
@@ -42,6 +54,8 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
ExecutionMode getExecutionMode() const;
+ DataSharingMode getDataSharingMode() const;
+
/// Get barrier to synchronize all threads in a block.
void syncCTAThreads(CodeGenFunction &CGF);
@@ -297,17 +311,6 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
Address getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD) override;
- /// Target codegen is specialized based on two data-sharing modes: CUDA, in
- /// which the local variables are actually global threadlocal, and Generic, in
- /// which the local variables are placed in global memory if they may escape
- /// their declaration context.
- enum DataSharingMode {
- /// CUDA data sharing mode.
- CUDA,
- /// Generic data-sharing mode.
- Generic,
- };
-
/// Cleans up references to the objects in finished function.
///
void functionFinished(CodeGenFunction &CGF) override;
@@ -343,6 +346,10 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
/// to emit optimized code.
ExecutionMode CurrentExecutionMode = EM_Unknown;
+ /// Track the data sharing mode when codegening directives within a target
+ /// region.
+ DataSharingMode CurrentDataSharingMode = DataSharingMode::DS_Generic;
+
/// true if currently emitting code for target/teams/distribute region, false
/// - otherwise.
bool IsInTTDRegion = false;
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 605b97617432ed3..dd77d7c79f07655 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -3416,6 +3416,17 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
case OMPC_ompx_attribute:
Clause = ParseOpenMPOMPXAttributesClause(WrongDirective);
break;
+ case OMPC_ompx_bare:
+ if (WrongDirective)
+ Diag(Tok, diag::note_ompx_bare_clause)
+ << getOpenMPClauseName(CKind) << "target";
+ if (!ErrorFound && !getLangOpts().OpenMPExtensions) {
+ Diag(Tok, diag::err_omp_unexpected_clause_extension_only)
+ << getOpenMPClauseName(CKind) << getOpenMPDirectiveName(DKind);
+ ErrorFound = true;
+ }
+ Clause = ParseOpenMPClause(CKind, WrongDirective);
+ break;
default:
break;
}
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 46eae3596d2a8fe..07c0823596acbec 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -17555,6 +17555,9 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind,
case OMPC_partial:
Res = ActOnOpenMPPartialClause(nullptr, StartLoc, /*LParenLoc=*/{}, EndLoc);
break;
+ case OMPC_ompx_bare:
+ Res = ActOnOpenMPXBareClause(StartLoc, EndLoc);
+ break;
case OMPC_if:
case OMPC_final:
case OMPC_num_threads:
@@ -24281,3 +24284,8 @@ OMPClause *Sema::ActOnOpenMPXAttributeClause(ArrayRef<const Attr *> Attrs,
SourceLocation EndLoc) {
return new (Context) OMPXAttributeClause(Attrs, StartLoc, LParenLoc, EndLoc);
}
+
+OMPClause *Sema::ActOnOpenMPXBareClause(SourceLocation StartLoc,
+ SourceLocation EndLoc) {
+ return new (Context) OMPXBareClause(StartLoc, EndLoc);
+}
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 603a23275889f21..095e2dad32e40d5 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -2390,6 +2390,15 @@ class TreeTransform {
EndLoc);
}
+ /// Build a new OpenMP 'ompx_bare' clause.
+ ///
+ /// By default, performs semantic analysis to build the new OpenMP clause.
+ /// Subclasses may override this routine to provide different behavior.
+ OMPClause *RebuildOMPXBareClause(SourceLocation StartLoc,
+ SourceLocation EndLoc) {
+ return getSema().ActOnOpenMPXBareClause(StartLoc, EndLoc);
+ }
+
/// Build a new OpenMP 'align' clause.
///
/// By default, performs semantic analysis to build the new OpenMP clause.
@@ -10800,6 +10809,11 @@ TreeTransform<Derived>::TransformOMPXAttributeClause(OMPXAttributeClause *C) {
NewAttrs, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
+template <typename Derived>
+OMPClause *TreeTransform<Derived>::TransformOMPXBareClause(OMPXBareClause *C) {
+ return getDerived().RebuildOMPXBareClause(C->getBeginLoc(), C->getEndLoc());
+}
+
//===----------------------------------------------------------------------===//
// Expression transformation
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..4d15e3cb534cde0 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -10444,6 +10444,9 @@ OMPClause *OMPClauseReader::readClause() {
case llvm::omp::OMPC_ompx_attribute:
C = new (Context) OMPXAttributeClause();
break;
+ case llvm::omp::OMPC_ompx_bare:
+ C = new (Context) OMPXBareClause();
+ break;
#define OMP_CLAUSE_NO_CLASS(Enum, Str) \
case llvm::omp::Enum: \
break;
@@ -11545,6 +11548,8 @@ void OMPClauseReader::VisitOMPXAttributeClause(OMPXAttributeClause *C) {
C->setLocEnd(Record.readSourceLocation());
}
+void OMPClauseReader::VisitOMPXBareClause(OMPXBareClause *C) {}
+
OMPTraitInfo *ASTRecordReader::readOMPTraitInfo() {
OMPTraitInfo &TI = getContext().getNewOMPTraitInfo();
TI.Sets.resize(readUInt32());
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 65bee806d2c5571..c085313025353ac 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -7255,6 +7255,8 @@ void OMPClauseWriter::VisitOMPXAttributeClause(OMPXAttributeClause *C) {
Record.AddSourceLocation(C->getEndLoc());
}
+void OMPClauseWriter::VisitOMPXBareClause(OMPXBareClause *C) {}
+
void ASTRecordWriter::writeOMPTraitInfo(const OMPTraitInfo *TI) {
writeUInt32(TI->Sets.size());
for (const auto &Set : TI->Sets) {
diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
index 01eab5ff971ec77..20c540943682ded 100644
--- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
@@ -30,6 +30,11 @@ tx ftemplate(int n) {
aa = 1;
}
+ #pragma omp target teams ompx_bare
+ {
+ aa = 2;
+ }
+
#pragma omp target teams
{
#pragma omp parallel
@@ -132,8 +137,39 @@ int bar(int n){
// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK1-NEXT: store i16 [[TMP1]], ptr [[AA_CASTED]], align 2
+// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[AA...
[truncated]
|
99b10f0
to
d66ac6e
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where is the code that prevents the init/deinit calls from being created?
It's in the early exit. |
d66ac6e
to
20b7120
Compare
This patch starts the support for OpenMP kernel language, basically to write OpenMP target region in SIMT style, similar to kernel languages such as CUDA. What included in this first patch is the `ompx_bare` clause for `target teams` directive. When `ompx_bare` exists, globalization is disabled such that local variables will not be globalized. The runtime init/deinit function calls will not be emitted. That being said, almost all OpenMP executable directives are not supported in the region, such as parallel, task. This patch doesn't include the Sema checks for that, so the use of them is UB. Simple directives, such as atomic, can be used. We provide a set of APIs (for C, they are prefix with `ompx_`; for C++, they are in `ompx` namespace) to get thread id, block id, etc. For more details, you can refer to https://tianshilei.me/wp-content/uploads/llvm-hpc-2022.pdf?swcfpc=1.
20b7120
to
c1bf88b
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG, we need runtime tests in a follow up.
It seems that this broke the AMDGPU OpenMP buildbot https://lab.llvm.org/buildbot/#/builders/193/builds/39393 |
This patch starts the support for OpenMP kernel language, basically to write OpenMP target region in SIMT style, similar to kernel languages such as CUDA. What included in this first patch is the `ompx_bare` clause for `target teams` directive. When `ompx_bare` exists, globalization is disabled such that local variables will not be globalized. The runtime init/deinit function calls will not be emitted. That being said, almost all OpenMP executable directives are not supported in the region, such as parallel, task. This patch doesn't include the Sema checks for that, so the use of them is UB. Simple directives, such as atomic, can be used. We provide a set of APIs (for C, they are prefix with `ompx_`; for C++, they are in `ompx` namespace) to get thread id, block id, etc. Please refer to https://tianshilei.me/wp-content/uploads/llvm-hpc-2023.pdf for more details.
Local branch amd-gfx a1fa683 Merged main:dc129d6f715c into amd-gfx:bedba19995b8 Remote branch main d6254e1 Introduce the initial support for OpenMP kernel language (llvm#66844)
This patch starts the support for OpenMP kernel language, basically to write
OpenMP target region in SIMT style, similar to kernel languages such as CUDA.
What included in this first patch is the
ompx_bare
clause fortarget teams
directive. When
ompx_bare
exists, globalization is disabled such that localvariables will not be globalized. The runtime init/deinit function calls will
not be emitted. That being said, almost all OpenMP executable directives are
not supported in the region, such as parallel, task. This patch doesn't include
the Sema checks for that, so the use of them is UB. Simple directives, such as
atomic, can be used. We provide a set of APIs (for C, they are prefix with
ompx_
; for C++, they are inompx
namespace) to get thread id, block id, etc.For more details, you can refer to
https://tianshilei.me/wp-content/uploads/llvm-hpc-2023.pdf.