From 41f2b97bdd1c6334eb22b6898d95cf13726a387e Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Tue, 26 Oct 2021 14:36:46 +0100 Subject: [PATCH] [WIP] Introduce disjoint agents fn attribute/intrinsic property --- llvm/include/llvm/AsmParser/LLToken.h | 1 + llvm/include/llvm/Bitcode/LLVMBitCodes.h | 1 + llvm/include/llvm/IR/Attributes.td | 3 +++ llvm/include/llvm/IR/Function.h | 11 +++++++++++ llvm/include/llvm/IR/Intrinsics.td | 9 +++++++++ llvm/include/llvm/IR/IntrinsicsNVVM.td | 2 +- llvm/include/llvm/Target/Target.td | 2 ++ llvm/lib/Analysis/GlobalsModRef.cpp | 18 +++++++----------- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 3 +++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 2 ++ llvm/lib/Transforms/Utils/CodeExtractor.cpp | 1 + llvm/utils/TableGen/CodeGenInstruction.cpp | 1 + llvm/utils/TableGen/CodeGenInstruction.h | 1 + llvm/utils/TableGen/CodeGenIntrinsics.h | 3 +++ llvm/utils/TableGen/CodeGenTarget.cpp | 4 +++- llvm/utils/TableGen/InstrInfoEmitter.cpp | 1 + llvm/utils/TableGen/IntrinsicEmitter.cpp | 7 ++++++- 17 files changed, 56 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index f8ca054863ac5..97d24341eb6c8 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -188,6 +188,7 @@ enum Kind { kw_inalloca, kw_cold, kw_convergent, + kw_disjoint_agents, kw_dereferenceable, kw_dereferenceable_or_null, kw_disable_sanitizer_instrumentation, diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 04eb2739cbd5a..eb1359380b1c6 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -672,6 +672,7 @@ enum AttributeKindCodes { ATTR_KIND_NO_SANITIZE_COVERAGE = 76, ATTR_KIND_ELEMENTTYPE = 77, ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION = 78, + ATTR_KIND_DISJOINT_AGENTS = 79, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index de25b51a62927..62720505a432b 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -76,6 +76,9 @@ def Cold : EnumAttr<"cold", [FnAttr]>; /// Can only be moved to control-equivalent blocks. def Convergent : EnumAttr<"convergent", [FnAttr]>; +/// Marks function as interfering with disjoint agents. +def DisjointAgents : EnumAttr<"disjoint_agents", [FnAttr]>; + /// Marks function as being in a hot path and frequently called. def Hot: EnumAttr<"hot", [FnAttr]>; diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index e5c675a64af00..82f3b19d12e11 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -590,6 +590,17 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject, removeFnAttr(Attribute::Convergent); } + /// Determine if the call has disjoint agents. + bool isDisjointAgents() const { + return hasFnAttribute(Attribute::DisjointAgents); + } + void setDisjointAgents() { + addFnAttr(Attribute::DisjointAgents); + } + void setNotDisjointAgents() { + removeFnAttr(Attribute::DisjointAgents); + } + /// Determine if the call has sideeffects. bool isSpeculatable() const { return hasFnAttribute(Attribute::Speculatable); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 9c51a2f2b7ea3..c37029d5117a2 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -159,6 +159,15 @@ def IntrSpeculatable : IntrinsicProperty; // defined by the hasSideEffects property of the TableGen Instruction class. def IntrHasSideEffects : IntrinsicProperty; +// IntrDisjointAgents - Calls to this intrinsics will interfere with disjoint +// agents. +// Depending on the target's execution model it might be the case that multiple +// agents will be executing instructions generated by the compiler, a call to +// IntrDisjointAgents intrinsic will alter the sequence of execution of those +// agents. An example of such intrinsic is NVIDIA's barrier_0 that will cause +// all the threads to stop their execution at barrier's point. +def IntrDisjointAgents : IntrinsicProperty; + //===----------------------------------------------------------------------===// // Types used by intrinsics. //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index b4b40de603f37..4d4698cc45f2e 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -1251,7 +1251,7 @@ let TargetPrefix = "nvvm" in { // The builtin for "bar.sync 0" is called __syncthreads. Unlike most of the // intrinsics in this file, this one is a user-facing API. def int_nvvm_barrier0 : GCCBuiltin<"__syncthreads">, - Intrinsic<[], [], [IntrConvergent]>; + Intrinsic<[], [], [IntrConvergent, IntrDisjointAgents]>; // Synchronize all threads in the CTA at barrier 'n'. def int_nvvm_barrier_n : GCCBuiltin<"__nvvm_bar_n">, Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>; diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index 36b9531a17929..6dfec2da32feb 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -550,6 +550,8 @@ class Instruction : InstructionEncoding { bit hasCtrlDep = false; // Does this instruction r/w ctrl-flow chains? bit isNotDuplicable = false; // Is it unsafe to duplicate this instruction? bit isConvergent = false; // Is this instruction convergent? + bit isDisjointAgents = false; // Does this instruction interact with + // disjoint agents? bit isAuthenticated = false; // Does this instruction authenticate a pointer? bit isAsCheapAsAMove = false; // As cheap (or cheaper) than a move instruction. bit hasExtraSrcRegAllocReq = false; // Sources have special regalloc requirement? diff --git a/llvm/lib/Analysis/GlobalsModRef.cpp b/llvm/lib/Analysis/GlobalsModRef.cpp index f4d66f05e66f8..b202b8362a7b0 100644 --- a/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/llvm/lib/Analysis/GlobalsModRef.cpp @@ -533,18 +533,14 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { FI.addModRefInfo(ModRefInfo::ModRef); if (!F->onlyAccessesArgMemory()) FI.setMayReadAnyGlobal(); - if (!F->isIntrinsic()) { - KnowNothing = true; - break; - } else if (F->getName().contains("nvvm.barrier") or - F->getName().contains("nvvm.membar")) { + if (!F->isIntrinsic() || F->isDisjointAgents()) { // Even if it is an intrinsic, consider that nothing is known for - // NVVM barrier itrinsics to prevent illegal optimizations. - // This is a workaround for the bug on PTX target: barrier - // intrinsics are implemented as llvm intrinsics, as result there - // are cases when globals alias analysis can produce a result that - // barrier doesn't modify internal global which causes illegal - // reordering of memory accesses. + // calls that interact with disjoint agents, such as NVVM barrier + // itrinsics to prevent illegal optimizations. + // For context in PTX barriers are implemented as llvm intrinsics, + // as result there are cases when globals alias analysis can + // produce a result that barrier doesn't modify internal global + // which causes illegal reordering of memory accesses. KnowNothing = true; break; } diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index b3df3a759d971..4e35b2f6da0cc 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1231,6 +1231,7 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) { case Attribute::Naked: return 1 << 24; case Attribute::InlineHint: return 1 << 25; case Attribute::StackAlignment: return 7 << 26; + case Attribute::DisjointAgents: return 1ULL << 28; case Attribute::ReturnsTwice: return 1 << 29; case Attribute::UWTable: return 1 << 30; case Attribute::NonLazyBind: return 1U << 31; @@ -1396,6 +1397,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::Cold; case bitc::ATTR_KIND_CONVERGENT: return Attribute::Convergent; + case bitc::ATTR_KIND_DISJOINT_AGENTS: + return Attribute::DisjointAgents; case bitc::ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION: return Attribute::DisableSanitizerInstrumentation; case bitc::ATTR_KIND_ELEMENTTYPE: diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index fd77a8c80bb41..0425c98dcf040 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -622,6 +622,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_BY_VAL; case Attribute::Convergent: return bitc::ATTR_KIND_CONVERGENT; + case Attribute::DisjointAgents: + return bitc::ATTR_KIND_DISJOINT_AGENTS; case Attribute::InAlloca: return bitc::ATTR_KIND_IN_ALLOCA; case Attribute::Cold: diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 8bd09198ee745..1b61867a7b842 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -901,6 +901,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::Builtin: case Attribute::ByVal: case Attribute::Convergent: + case Attribute::DisjointAgents: case Attribute::Dereferenceable: case Attribute::DereferenceableOrNull: case Attribute::ElementType: diff --git a/llvm/utils/TableGen/CodeGenInstruction.cpp b/llvm/utils/TableGen/CodeGenInstruction.cpp index 3933ce6e1106c..23c4db2624cda 100644 --- a/llvm/utils/TableGen/CodeGenInstruction.cpp +++ b/llvm/utils/TableGen/CodeGenInstruction.cpp @@ -397,6 +397,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R) isExtractSubreg = R->getValueAsBit("isExtractSubreg"); isInsertSubreg = R->getValueAsBit("isInsertSubreg"); isConvergent = R->getValueAsBit("isConvergent"); + isDisjointAgents = R->getValueAsBit("isDisjointAgents"); hasNoSchedulingInfo = R->getValueAsBit("hasNoSchedulingInfo"); FastISelShouldIgnore = R->getValueAsBit("FastISelShouldIgnore"); variadicOpsAreDefs = R->getValueAsBit("variadicOpsAreDefs"); diff --git a/llvm/utils/TableGen/CodeGenInstruction.h b/llvm/utils/TableGen/CodeGenInstruction.h index e35d2191ea458..a51599788fb4c 100644 --- a/llvm/utils/TableGen/CodeGenInstruction.h +++ b/llvm/utils/TableGen/CodeGenInstruction.h @@ -274,6 +274,7 @@ template class ArrayRef; bool isExtractSubreg : 1; bool isInsertSubreg : 1; bool isConvergent : 1; + bool isDisjointAgents : 1; bool hasNoSchedulingInfo : 1; bool FastISelShouldIgnore : 1; bool hasChain : 1; diff --git a/llvm/utils/TableGen/CodeGenIntrinsics.h b/llvm/utils/TableGen/CodeGenIntrinsics.h index dbfad3bf6b178..86c1e708aa1be 100644 --- a/llvm/utils/TableGen/CodeGenIntrinsics.h +++ b/llvm/utils/TableGen/CodeGenIntrinsics.h @@ -148,6 +148,9 @@ struct CodeGenIntrinsic { // True if the intrinsic is marked as speculatable. bool isSpeculatable; + // True if the intrinsic is marked as interacting with disjoint agents. + bool isDisjointAgents; + enum ArgAttrKind { NoCapture, NoAlias, diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp index 137f99078faf6..8100e682822f2 100644 --- a/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/CodeGenTarget.cpp @@ -907,7 +907,9 @@ void CodeGenIntrinsic::setProperty(Record *R) { unsigned ArgNo = R->getValueAsInt("ArgNo"); uint64_t Align = R->getValueAsInt("Align"); ArgumentAttributes.emplace_back(ArgNo, Alignment, Align); - } else + } else if (R->getName() == "IntrDisjointAgents") + isDisjointAgents = true; + else llvm_unreachable("Unknown property!"); } diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index aee887a906e54..fd7a0490d08a6 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -981,6 +981,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num, if (Inst.isExtractSubreg) OS << "|(1ULL<hasSideEffects != R->hasSideEffects) return R->hasSideEffects; + if (L->isDisjointAgents != R->isDisjointAgents) + return R->isDisjointAgents; + // Try to order by readonly/readnone attribute. CodeGenIntrinsic::ModRefBehavior LK = L->ModRef; CodeGenIntrinsic::ModRefBehavior RK = R->ModRef; @@ -742,7 +745,7 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints, Intrinsic.isNoReturn || Intrinsic.isNoSync || Intrinsic.isNoFree || Intrinsic.isWillReturn || Intrinsic.isCold || Intrinsic.isNoDuplicate || Intrinsic.isNoMerge || Intrinsic.isConvergent || - Intrinsic.isSpeculatable) { + Intrinsic.isSpeculatable || Intrinsic.isDisjointAgents) { OS << " const Attribute::AttrKind Atts[] = {"; ListSeparator LS(","); if (!Intrinsic.canThrow) @@ -765,6 +768,8 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints, OS << LS << "Attribute::Convergent"; if (Intrinsic.isSpeculatable) OS << LS << "Attribute::Speculatable"; + if (Intrinsic.isDisjointAgents) + OS << LS << "Attribute::DisjointAgents"; switch (Intrinsic.ModRef) { case CodeGenIntrinsic::NoMem: