Skip to content

Commit

Permalink
Merged main:d6254e1b2e6d into amd-gfx:a1fa6830c554
Browse files Browse the repository at this point in the history
Local branch amd-gfx a1fa683 Merged main:dc129d6f715c into amd-gfx:bedba19995b8
Remote branch main d6254e1 Introduce the initial support for OpenMP kernel language (llvm#66844)
  • Loading branch information
SC llvm team authored and SC llvm team committed Oct 5, 2023
2 parents a1fa683 + d6254e1 commit 61f415d
Show file tree
Hide file tree
Showing 32 changed files with 1,324 additions and 711 deletions.
21 changes: 21 additions & 0 deletions clang/include/clang/AST/OpenMPClause.h
Original file line number Diff line number Diff line change
Expand Up @@ -9220,6 +9220,27 @@ class OMPXAttributeClause
}
};

/// This represents 'ompx_bare' clause in the '#pragma omp target teams ...'
/// directive.
///
/// \code
/// #pragma omp target teams ompx_bare
/// \endcode
/// In this example directive '#pragma omp target teams' has a 'ompx_bare'
/// clause.
class OMPXBareClause : public OMPNoChildClause<llvm::omp::OMPC_ompx_bare> {
public:
/// Build 'ompx_bare' clause.
///
/// \param StartLoc Starting location of the clause.
/// \param EndLoc Ending location of the clause.
OMPXBareClause(SourceLocation StartLoc, SourceLocation EndLoc)
: OMPNoChildClause(StartLoc, EndLoc) {}

/// Build an empty clause.
OMPXBareClause() = default;
};

} // namespace clang

#endif // LLVM_CLANG_AST_OPENMPCLAUSE_H
5 changes: 5 additions & 0 deletions clang/include/clang/AST/RecursiveASTVisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -3890,6 +3890,11 @@ bool RecursiveASTVisitor<Derived>::VisitOMPXAttributeClause(
return true;
}

template <typename Derived>
bool RecursiveASTVisitor<Derived>::VisitOMPXBareClause(OMPXBareClause *C) {
return true;
}

// FIXME: look at the following tricky-seeming exprs to see if we
// need to recurse on anything. These are ones that have methods
// returning decls or qualtypes or nestednamespecifier -- though I'm
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Basic/DiagnosticParseKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -1360,6 +1360,8 @@ def warn_clause_expected_string : Warning<
"expected string literal in 'clause %0' - ignoring">, InGroup<IgnoredPragmas>;
def err_omp_unexpected_clause : Error<
"unexpected OpenMP clause '%0' in directive '#pragma omp %1'">;
def err_omp_unexpected_clause_extension_only : Error<
"OpenMP clause '%0' is only available as extension, use '-fopenmp-extensions'">;
def err_omp_immediate_directive : Error<
"'#pragma omp %0' %select{|with '%2' clause }1cannot be an immediate substatement">;
def err_omp_expected_identifier_for_critical : Error<
Expand Down Expand Up @@ -1452,6 +1454,8 @@ def warn_unknown_declare_variant_isa_trait
"spelling or consider restricting the context selector with the "
"'arch' selector further">,
InGroup<SourceUsesOpenMP>;
def note_ompx_bare_clause : Note<
"OpenMP extension clause '%0' only allowed with '#pragma omp %1'">;
def note_omp_declare_variant_ctx_options
: Note<"context %select{set|selector|property}0 options are: %1">;
def warn_omp_declare_variant_expected
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -12486,6 +12486,10 @@ class Sema final {
SourceLocation LParenLoc,
SourceLocation EndLoc);

/// Called on a well-formed 'ompx_bare' clause.
OMPClause *ActOnOpenMPXBareClause(SourceLocation StartLoc,
SourceLocation EndLoc);

/// The kind of conversion being performed.
enum CheckedConversionKind {
/// An implicit conversion.
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Serialization/ASTWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,7 @@ class ASTWriter : public ASTDeserializationListener,
/// the module but currently is merely a random 32-bit number.
ASTFileSignature WriteAST(Sema &SemaRef, StringRef OutputFile,
Module *WritingModule, StringRef isysroot,
bool hasErrors = false,
bool ShouldCacheASTInMemory = false);

/// Emit a token.
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/AST/OpenMPClause.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) {
case OMPC_affinity:
case OMPC_when:
case OMPC_bind:
case OMPC_ompx_bare:
break;
default:
break;
Expand Down Expand Up @@ -2546,6 +2547,10 @@ void OMPClausePrinter::VisitOMPXAttributeClause(OMPXAttributeClause *Node) {
OS << ")";
}

void OMPClausePrinter::VisitOMPXBareClause(OMPXBareClause *Node) {
OS << "ompx_bare";
}

void OMPTraitInfo::getAsVariantMatchInfo(ASTContext &ASTCtx,
VariantMatchInfo &VMI) const {
for (const OMPTraitSet &Set : Sets) {
Expand Down
1 change: 1 addition & 0 deletions clang/lib/AST/StmtProfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,7 @@ void OMPClauseProfiler::VisitOMPDoacrossClause(const OMPDoacrossClause *C) {
}
void OMPClauseProfiler::VisitOMPXAttributeClause(const OMPXAttributeClause *C) {
}
void OMPClauseProfiler::VisitOMPXBareClause(const OMPXBareClause *C) {}
} // namespace

void
Expand Down
59 changes: 42 additions & 17 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -551,10 +551,9 @@ CGOpenMPRuntimeGPU::getExecutionMode() const {
return CurrentExecutionMode;
}

static CGOpenMPRuntimeGPU::DataSharingMode
getDataSharingMode(CodeGenModule &CGM) {
return CGM.getLangOpts().OpenMPCUDAMode ? CGOpenMPRuntimeGPU::CUDA
: CGOpenMPRuntimeGPU::Generic;
CGOpenMPRuntimeGPU::DataSharingMode
CGOpenMPRuntimeGPU::getDataSharingMode() const {
return CurrentDataSharingMode;
}

/// Check for inner (nested) SPMD construct, if any
Expand Down Expand Up @@ -752,6 +751,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
EntryFunctionState EST;
WrapperFunctionsMap.clear();

[[maybe_unused]] bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
assert(!IsBareKernel && "bare kernel should not be at generic mode");

// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
CGOpenMPRuntimeGPU::EntryFunctionState &EST;
Expand All @@ -760,15 +762,13 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST)
: EST(EST) {}
void Enter(CodeGenFunction &CGF) override {
auto &RT =
static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
RT.emitKernelInit(CGF, EST, /* IsSPMD */ false);
// Skip target region initialization.
RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
}
void Exit(CodeGenFunction &CGF) override {
auto &RT =
static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
RT.clearLocThreadIdInsertPt(CGF);
RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ false);
}
Expand Down Expand Up @@ -807,25 +807,39 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_SPMD);
EntryFunctionState EST;

bool IsBareKernel = D.getSingleClause<OMPXBareClause>();

// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
CGOpenMPRuntimeGPU &RT;
CGOpenMPRuntimeGPU::EntryFunctionState &EST;
bool IsBareKernel;
DataSharingMode Mode;

public:
NVPTXPrePostActionTy(CGOpenMPRuntimeGPU &RT,
CGOpenMPRuntimeGPU::EntryFunctionState &EST)
: RT(RT), EST(EST) {}
CGOpenMPRuntimeGPU::EntryFunctionState &EST,
bool IsBareKernel)
: RT(RT), EST(EST), IsBareKernel(IsBareKernel),
Mode(RT.CurrentDataSharingMode) {}
void Enter(CodeGenFunction &CGF) override {
if (IsBareKernel) {
RT.CurrentDataSharingMode = DataSharingMode::DS_CUDA;
return;
}
RT.emitKernelInit(CGF, EST, /* IsSPMD */ true);
// Skip target region initialization.
RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
}
void Exit(CodeGenFunction &CGF) override {
if (IsBareKernel) {
RT.CurrentDataSharingMode = Mode;
return;
}
RT.clearLocThreadIdInsertPt(CGF);
RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ true);
}
} Action(*this, EST);
} Action(*this, EST, IsBareKernel);
CodeGen.setAction(Action);
IsInTTDRegion = true;
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
Expand All @@ -843,7 +857,8 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
assert(!ParentName.empty() && "Invalid target region parent name!");

bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
if (Mode)
bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
if (Mode || IsBareKernel)
emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
CodeGen);
else
Expand All @@ -863,6 +878,9 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
if (!CGM.getLangOpts().OpenMPIsTargetDevice)
llvm_unreachable("OpenMP can only handle device code.");

if (CGM.getLangOpts().OpenMPCUDAMode)
CurrentDataSharingMode = CGOpenMPRuntimeGPU::DS_CUDA;

llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder();
if (CGM.getLangOpts().NoGPULib || CGM.getLangOpts().OMPHostIRFile.empty())
return;
Expand Down Expand Up @@ -1030,7 +1048,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
SourceLocation Loc,
bool WithSPMDCheck) {
if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic &&
if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic &&
getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
return;

Expand Down Expand Up @@ -1142,7 +1160,7 @@ void CGOpenMPRuntimeGPU::getKmpcFreeShared(

void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF,
bool WithSPMDCheck) {
if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic &&
if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic &&
getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
return;

Expand Down Expand Up @@ -1178,11 +1196,18 @@ void CGOpenMPRuntimeGPU::emitTeamsCall(CodeGenFunction &CGF,
if (!CGF.HaveInsertPoint())
return;

bool IsBareKernel = D.getSingleClause<OMPXBareClause>();

Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
/*Name=*/".zero.addr");
CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr);
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
// We don't emit any thread id function call in bare kernel, but because the
// outlined function has a pointer argument, we emit a nullptr here.
if (IsBareKernel)
OutlinedFnArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
else
OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
Expand Down Expand Up @@ -3273,7 +3298,7 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(

void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
const Decl *D) {
if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)
return;

assert(D && "Expected function or captured|block decl.");
Expand Down Expand Up @@ -3382,7 +3407,7 @@ Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF,
VarTy, Align);
}

if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)
return Address::invalid();

VD = VD->getCanonicalDecl();
Expand Down
29 changes: 18 additions & 11 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
/// Unknown execution mode (orphaned directive).
EM_Unknown,
};

/// Target codegen is specialized based on two data-sharing modes: CUDA, in
/// which the local variables are actually global threadlocal, and Generic, in
/// which the local variables are placed in global memory if they may escape
/// their declaration context.
enum DataSharingMode {
/// CUDA data sharing mode.
DS_CUDA,
/// Generic data-sharing mode.
DS_Generic,
};

private:
/// Parallel outlined function work for workers to execute.
llvm::SmallVector<llvm::Function *, 16> Work;
Expand All @@ -42,6 +54,8 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {

ExecutionMode getExecutionMode() const;

DataSharingMode getDataSharingMode() const;

/// Get barrier to synchronize all threads in a block.
void syncCTAThreads(CodeGenFunction &CGF);

Expand Down Expand Up @@ -297,17 +311,6 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
Address getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD) override;

/// Target codegen is specialized based on two data-sharing modes: CUDA, in
/// which the local variables are actually global threadlocal, and Generic, in
/// which the local variables are placed in global memory if they may escape
/// their declaration context.
enum DataSharingMode {
/// CUDA data sharing mode.
CUDA,
/// Generic data-sharing mode.
Generic,
};

/// Cleans up references to the objects in finished function.
///
void functionFinished(CodeGenFunction &CGF) override;
Expand Down Expand Up @@ -343,6 +346,10 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
/// to emit optimized code.
ExecutionMode CurrentExecutionMode = EM_Unknown;

/// Track the data sharing mode when codegening directives within a target
/// region.
DataSharingMode CurrentDataSharingMode = DataSharingMode::DS_Generic;

/// true if currently emitting code for target/teams/distribute region, false
/// - otherwise.
bool IsInTTDRegion = false;
Expand Down
17 changes: 12 additions & 5 deletions clang/lib/Frontend/ASTUnit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2341,9 +2341,12 @@ bool ASTUnit::Save(StringRef File) {
return false;
}

static bool serializeUnit(ASTWriter &Writer, SmallVectorImpl<char> &Buffer,
Sema &S, raw_ostream &OS) {
Writer.WriteAST(S, std::string(), nullptr, "");
static bool serializeUnit(ASTWriter &Writer,
SmallVectorImpl<char> &Buffer,
Sema &S,
bool hasErrors,
raw_ostream &OS) {
Writer.WriteAST(S, std::string(), nullptr, "", hasErrors);

// Write the generated bitstream to "Out".
if (!Buffer.empty())
Expand All @@ -2353,14 +2356,18 @@ static bool serializeUnit(ASTWriter &Writer, SmallVectorImpl<char> &Buffer,
}

bool ASTUnit::serialize(raw_ostream &OS) {
// For serialization we are lenient if the errors were only warn-as-error kind.
bool hasErrors = getDiagnostics().hasUncompilableErrorOccurred();

if (WriterData)
return serializeUnit(WriterData->Writer, WriterData->Buffer, getSema(), OS);
return serializeUnit(WriterData->Writer, WriterData->Buffer,
getSema(), hasErrors, OS);

SmallString<128> Buffer;
llvm::BitstreamWriter Stream(Buffer);
InMemoryModuleCache ModuleCache;
ASTWriter Writer(Stream, Buffer, ModuleCache, {});
return serializeUnit(Writer, Buffer, getSema(), OS);
return serializeUnit(Writer, Buffer, getSema(), hasErrors, OS);
}

using SLocRemap = ContinuousRangeMap<unsigned, int, 2>;
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/Parse/ParseOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3416,6 +3416,17 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
case OMPC_ompx_attribute:
Clause = ParseOpenMPOMPXAttributesClause(WrongDirective);
break;
case OMPC_ompx_bare:
if (WrongDirective)
Diag(Tok, diag::note_ompx_bare_clause)
<< getOpenMPClauseName(CKind) << "target teams";
if (!ErrorFound && !getLangOpts().OpenMPExtensions) {
Diag(Tok, diag::err_omp_unexpected_clause_extension_only)
<< getOpenMPClauseName(CKind) << getOpenMPDirectiveName(DKind);
ErrorFound = true;
}
Clause = ParseOpenMPClause(CKind, WrongDirective);
break;
default:
break;
}
Expand Down
8 changes: 8 additions & 0 deletions clang/lib/Sema/SemaOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17553,6 +17553,9 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind,
case OMPC_partial:
Res = ActOnOpenMPPartialClause(nullptr, StartLoc, /*LParenLoc=*/{}, EndLoc);
break;
case OMPC_ompx_bare:
Res = ActOnOpenMPXBareClause(StartLoc, EndLoc);
break;
case OMPC_if:
case OMPC_final:
case OMPC_num_threads:
Expand Down Expand Up @@ -24279,3 +24282,8 @@ OMPClause *Sema::ActOnOpenMPXAttributeClause(ArrayRef<const Attr *> Attrs,
SourceLocation EndLoc) {
return new (Context) OMPXAttributeClause(Attrs, StartLoc, LParenLoc, EndLoc);
}

OMPClause *Sema::ActOnOpenMPXBareClause(SourceLocation StartLoc,
SourceLocation EndLoc) {
return new (Context) OMPXBareClause(StartLoc, EndLoc);
}
Loading

0 comments on commit 61f415d

Please sign in to comment.