diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h index 10bc6ebd6fe506..ece51ebecdd9c1 100644 --- a/llvm/include/llvm/MC/MCExpr.h +++ b/llvm/include/llvm/MC/MCExpr.h @@ -86,6 +86,10 @@ class MCExpr { bool InParens = false) const; void dump() const; + /// Returns whether the given symbol is used anywhere in the expression or + /// subexpressions. + bool isSymbolUsedInExpression(const MCSymbol *Sym) const; + /// @} /// \name Expression Evaluation /// @{ @@ -663,6 +667,9 @@ class MCTargetExpr : public MCExpr { const MCFixup *Fixup) const = 0; // allow Target Expressions to be checked for equality virtual bool isEqualTo(const MCExpr *x) const { return false; } + virtual bool isSymbolUsedInExpression(const MCSymbol *Sym) const { + return false; + } // This should be set when assigned expressions are not valid ".set" // expressions, e.g. registers, and must be inlined. virtual bool inlineAssignedExpr() const { return false; } diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index c9d5f6580fda4c..ede7655733f253 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -177,6 +177,35 @@ LLVM_DUMP_METHOD void MCExpr::dump() const { } #endif +bool MCExpr::isSymbolUsedInExpression(const MCSymbol *Sym) const { + switch (getKind()) { + case MCExpr::Binary: { + const MCBinaryExpr *BE = static_cast(this); + return BE->getLHS()->isSymbolUsedInExpression(Sym) || + BE->getRHS()->isSymbolUsedInExpression(Sym); + } + case MCExpr::Target: { + const MCTargetExpr *TE = static_cast(this); + return TE->isSymbolUsedInExpression(Sym); + } + case MCExpr::Constant: + return false; + case MCExpr::SymbolRef: { + const MCSymbol &S = static_cast(this)->getSymbol(); + if (S.isVariable() && !S.isWeakExternal()) + return S.getVariableValue()->isSymbolUsedInExpression(Sym); + return &S == Sym; + } + case MCExpr::Unary: { + const MCExpr *SubExpr = + static_cast(this)->getSubExpr(); + return SubExpr->isSymbolUsedInExpression(Sym); + } + } + + llvm_unreachable("Unknown expr kind!"); +} + /* *** */ const MCBinaryExpr *MCBinaryExpr::create(Opcode Opc, const MCExpr *LHS, diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index de11b46c069bc9..3ce45f7d5d67e1 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -6417,33 +6417,6 @@ bool HLASMAsmParser::parseStatement(ParseStatementInfo &Info, namespace llvm { namespace MCParserUtils { -/// Returns whether the given symbol is used anywhere in the given expression, -/// or subexpressions. -static bool isSymbolUsedInExpression(const MCSymbol *Sym, const MCExpr *Value) { - switch (Value->getKind()) { - case MCExpr::Binary: { - const MCBinaryExpr *BE = static_cast(Value); - return isSymbolUsedInExpression(Sym, BE->getLHS()) || - isSymbolUsedInExpression(Sym, BE->getRHS()); - } - case MCExpr::Target: - case MCExpr::Constant: - return false; - case MCExpr::SymbolRef: { - const MCSymbol &S = - static_cast(Value)->getSymbol(); - if (S.isVariable() && !S.isWeakExternal()) - return isSymbolUsedInExpression(Sym, S.getVariableValue()); - return &S == Sym; - } - case MCExpr::Unary: - return isSymbolUsedInExpression( - Sym, static_cast(Value)->getSubExpr()); - } - - llvm_unreachable("Unknown expr kind!"); -} - bool parseAssignmentExpression(StringRef Name, bool allow_redef, MCAsmParser &Parser, MCSymbol *&Sym, const MCExpr *&Value) { @@ -6468,7 +6441,7 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef, // // FIXME: Diagnostics. Note the location of the definition as a label. // FIXME: Diagnose assignment to protected identifier (e.g., register name). - if (isSymbolUsedInExpression(Sym, Value)) + if (Value->isSymbolUsedInExpression(Sym)) return Parser.Error(EqualLoc, "Recursive use of '" + Name + "'"); else if (Sym->isUndefined(/*SetUsed*/ false) && !Sym->isUsed() && !Sym->isVariable()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp index 413f2b1cdb7530..9511b6bb7de062 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp @@ -100,25 +100,50 @@ void MCResourceInfo::assignResourceInfoExpr( const MCConstantExpr *LocalConstExpr = MCConstantExpr::create(LocalValue, OutContext); const MCExpr *SymVal = LocalConstExpr; + MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext); if (!Callees.empty()) { SmallVector ArgExprs; - // Avoid recursive symbol assignment. SmallPtrSet Seen; ArgExprs.push_back(LocalConstExpr); - const Function &F = MF.getFunction(); - Seen.insert(&F); for (const Function *Callee : Callees) { if (!Seen.insert(Callee).second) continue; + MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction()); MCSymbol *CalleeValSym = getSymbol(CalleeFnSym->getName(), RIK, OutContext); - ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext)); + + // Avoid constructing recursive definitions by detecting whether `Sym` is + // found transitively within any of its `CalleeValSym`. + if (!CalleeValSym->isVariable() || + !CalleeValSym->getVariableValue(/*isUsed=*/false) + ->isSymbolUsedInExpression(Sym)) { + ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext)); + } else { + // In case of recursion: make sure to use conservative register counts + // (i.e., specifically for VGPR/SGPR/AGPR). + switch (RIK) { + default: + break; + case RIK_NumVGPR: + ArgExprs.push_back(MCSymbolRefExpr::create( + getMaxVGPRSymbol(OutContext), OutContext)); + break; + case RIK_NumSGPR: + ArgExprs.push_back(MCSymbolRefExpr::create( + getMaxSGPRSymbol(OutContext), OutContext)); + break; + case RIK_NumAGPR: + ArgExprs.push_back(MCSymbolRefExpr::create( + getMaxAGPRSymbol(OutContext), OutContext)); + break; + } + } } - SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext); + if (ArgExprs.size() > 1) + SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext); } - MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext); Sym->setVariableValue(SymVal); } @@ -162,6 +187,7 @@ void MCResourceInfo::gatherResourceInfo( // The expression for private segment size should be: FRI.PrivateSegmentSize // + max(FRI.Callees, FRI.CalleeSegmentSize) SmallVector ArgExprs; + MCSymbol *Sym = getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext); if (FRI.CalleeSegmentSize) ArgExprs.push_back( MCConstantExpr::create(FRI.CalleeSegmentSize, OutContext)); @@ -173,9 +199,16 @@ void MCResourceInfo::gatherResourceInfo( continue; if (!Callee->isDeclaration()) { MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction()); - MCSymbol *calleeValSym = + MCSymbol *CalleeValSym = getSymbol(CalleeFnSym->getName(), RIK_PrivateSegSize, OutContext); - ArgExprs.push_back(MCSymbolRefExpr::create(calleeValSym, OutContext)); + + // Avoid constructing recursive definitions by detecting whether `Sym` + // is found transitively within any of its `CalleeValSym`. + if (!CalleeValSym->isVariable() || + !CalleeValSym->getVariableValue(/*isUsed=*/false) + ->isSymbolUsedInExpression(Sym)) { + ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext)); + } } } const MCExpr *localConstExpr = @@ -186,8 +219,7 @@ void MCResourceInfo::gatherResourceInfo( localConstExpr = MCBinaryExpr::createAdd(localConstExpr, transitiveExpr, OutContext); } - getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext) - ->setVariableValue(localConstExpr); + Sym->setVariableValue(localConstExpr); } auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp index 5fd1295177e14a..fd2d6109ac23bc 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp @@ -305,6 +305,14 @@ const AMDGPUMCExpr *AMDGPUMCExpr::createOccupancy(unsigned InitOcc, Ctx); } +bool AMDGPUMCExpr::isSymbolUsedInExpression(const MCSymbol *Sym) const { + for (const MCExpr *E : getArgs()) { + if (E->isSymbolUsedInExpression(Sym)) + return true; + } + return false; +} + static KnownBits fromOptionalToKnownBits(std::optional CompareResult) { static constexpr unsigned BitWidth = 64; const APInt True(BitWidth, 1); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h index a16843f404b8f6..75e676bb7d5081 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h @@ -97,6 +97,7 @@ class AMDGPUMCExpr : public MCTargetExpr { void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; bool evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, const MCFixup *Fixup) const override; + bool isSymbolUsedInExpression(const MCSymbol *Sym) const override; void visitUsedExpr(MCStreamer &Streamer) const override; MCFragment *findAssociatedFragment() const override; void fixELFSymbolsInTLSFixups(MCAssembler &) const override{}; diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll index eda1e33cc4b9eb..278a2a0170ff33 100644 --- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll @@ -481,6 +481,136 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 { ret void } +; GCN-LABEL: {{^}}multi_stage_recurse2: +; GCN: .set multi_stage_recurse2.num_vgpr, max(43, multi_stage_recurse1.num_vgpr) +; GCN: .set multi_stage_recurse2.num_agpr, max(0, multi_stage_recurse1.num_agpr) +; GCN: .set multi_stage_recurse2.numbered_sgpr, max(34, multi_stage_recurse1.numbered_sgpr) +; GCN: .set multi_stage_recurse2.private_seg_size, 16+(max(multi_stage_recurse1.private_seg_size)) +; GCN: .set multi_stage_recurse2.uses_vcc, or(1, multi_stage_recurse1.uses_vcc) +; GCN: .set multi_stage_recurse2.uses_flat_scratch, or(0, multi_stage_recurse1.uses_flat_scratch) +; GCN: .set multi_stage_recurse2.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack) +; GCN: .set multi_stage_recurse2.has_recursion, or(1, multi_stage_recurse1.has_recursion) +; GCN: .set multi_stage_recurse2.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call) +; GCN: TotalNumSgprs: multi_stage_recurse2.numbered_sgpr+(extrasgprs(multi_stage_recurse2.uses_vcc, multi_stage_recurse2.uses_flat_scratch, 1)) +; GCN: NumVgprs: max(43, multi_stage_recurse1.num_vgpr) +; GCN: ScratchSize: 16+(max(multi_stage_recurse1.private_seg_size)) +; GCN-LABEL: {{^}}multi_stage_recurse1: +; GCN: .set multi_stage_recurse1.num_vgpr, max(48, amdgpu.max_num_vgpr) +; GCN: .set multi_stage_recurse1.num_agpr, max(0, amdgpu.max_num_agpr) +; GCN: .set multi_stage_recurse1.numbered_sgpr, max(34, amdgpu.max_num_sgpr) +; GCN: .set multi_stage_recurse1.private_seg_size, 16 +; GCN: .set multi_stage_recurse1.uses_vcc, 1 +; GCN: .set multi_stage_recurse1.uses_flat_scratch, 0 +; GCN: .set multi_stage_recurse1.has_dyn_sized_stack, 0 +; GCN: .set multi_stage_recurse1.has_recursion, 1 +; GCN: .set multi_stage_recurse1.has_indirect_call, 0 +; GCN: TotalNumSgprs: multi_stage_recurse1.numbered_sgpr+4 +; GCN: NumVgprs: max(48, amdgpu.max_num_vgpr) +; GCN: ScratchSize: 16 +define void @multi_stage_recurse1(i32 %val) #2 { + call void @multi_stage_recurse2(i32 %val) + call void asm sideeffect "", "~{v47}"() #0 + ret void +} +define void @multi_stage_recurse2(i32 %val) #2 { + call void @multi_stage_recurse1(i32 %val) + call void asm sideeffect "", "~{v42}"() #0 + ret void +} + +; GCN-LABEL: {{^}}usage_multi_stage_recurse: +; GCN: .set usage_multi_stage_recurse.num_vgpr, max(32, multi_stage_recurse1.num_vgpr) +; GCN: .set usage_multi_stage_recurse.num_agpr, max(0, multi_stage_recurse1.num_agpr) +; GCN: .set usage_multi_stage_recurse.numbered_sgpr, max(33, multi_stage_recurse1.numbered_sgpr) +; GCN: .set usage_multi_stage_recurse.private_seg_size, 0+(max(multi_stage_recurse1.private_seg_size)) +; GCN: .set usage_multi_stage_recurse.uses_vcc, or(1, multi_stage_recurse1.uses_vcc) +; GCN: .set usage_multi_stage_recurse.uses_flat_scratch, or(1, multi_stage_recurse1.uses_flat_scratch) +; GCN: .set usage_multi_stage_recurse.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack) +; GCN: .set usage_multi_stage_recurse.has_recursion, or(1, multi_stage_recurse1.has_recursion) +; GCN: .set usage_multi_stage_recurse.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call) +; GCN: TotalNumSgprs: usage_multi_stage_recurse.numbered_sgpr+6 +; GCN: NumVgprs: usage_multi_stage_recurse.num_vgpr +; GCN: ScratchSize: 16 +define amdgpu_kernel void @usage_multi_stage_recurse(i32 %n) #0 { + call void @multi_stage_recurse1(i32 %n) + ret void +} + +; GCN-LABEL: {{^}}multi_stage_recurse_noattr2: +; GCN: .set multi_stage_recurse_noattr2.num_vgpr, max(41, multi_stage_recurse_noattr1.num_vgpr) +; GCN: .set multi_stage_recurse_noattr2.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr) +; GCN: .set multi_stage_recurse_noattr2.numbered_sgpr, max(54, multi_stage_recurse_noattr1.numbered_sgpr) +; GCN: .set multi_stage_recurse_noattr2.private_seg_size, 16+(max(multi_stage_recurse_noattr1.private_seg_size)) +; GCN: .set multi_stage_recurse_noattr2.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc) +; GCN: .set multi_stage_recurse_noattr2.uses_flat_scratch, or(0, multi_stage_recurse_noattr1.uses_flat_scratch) +; GCN: .set multi_stage_recurse_noattr2.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack) +; GCN: .set multi_stage_recurse_noattr2.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion) +; GCN: .set multi_stage_recurse_noattr2.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call) +; GCN: TotalNumSgprs: multi_stage_recurse_noattr2.numbered_sgpr+(extrasgprs(multi_stage_recurse_noattr2.uses_vcc, multi_stage_recurse_noattr2.uses_flat_scratch, 1)) +; GCN: NumVgprs: max(41, multi_stage_recurse_noattr1.num_vgpr) +; GCN: ScratchSize: 16+(max(multi_stage_recurse_noattr1.private_seg_size)) +; GCN-LABEL: {{^}}multi_stage_recurse_noattr1: +; GCN: .set multi_stage_recurse_noattr1.num_vgpr, max(41, amdgpu.max_num_vgpr) +; GCN: .set multi_stage_recurse_noattr1.num_agpr, max(0, amdgpu.max_num_agpr) +; GCN: .set multi_stage_recurse_noattr1.numbered_sgpr, max(57, amdgpu.max_num_sgpr) +; GCN: .set multi_stage_recurse_noattr1.private_seg_size, 16 +; GCN: .set multi_stage_recurse_noattr1.uses_vcc, 1 +; GCN: .set multi_stage_recurse_noattr1.uses_flat_scratch, 0 +; GCN: .set multi_stage_recurse_noattr1.has_dyn_sized_stack, 0 +; GCN: .set multi_stage_recurse_noattr1.has_recursion, 0 +; GCN: .set multi_stage_recurse_noattr1.has_indirect_call, 0 +; GCN: TotalNumSgprs: multi_stage_recurse_noattr1.numbered_sgpr+4 +; GCN: NumVgprs: max(41, amdgpu.max_num_vgpr) +; GCN: ScratchSize: 16 +define void @multi_stage_recurse_noattr1(i32 %val) #0 { + call void @multi_stage_recurse_noattr2(i32 %val) + call void asm sideeffect "", "~{s56}"() #0 + ret void +} +define void @multi_stage_recurse_noattr2(i32 %val) #0 { + call void @multi_stage_recurse_noattr1(i32 %val) + call void asm sideeffect "", "~{s53}"() #0 + ret void +} + +; GCN-LABEL: {{^}}usage_multi_stage_recurse_noattrs: +; GCN: .set usage_multi_stage_recurse_noattrs.num_vgpr, max(32, multi_stage_recurse_noattr1.num_vgpr) +; GCN: .set usage_multi_stage_recurse_noattrs.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr) +; GCN: .set usage_multi_stage_recurse_noattrs.numbered_sgpr, max(33, multi_stage_recurse_noattr1.numbered_sgpr) +; GCN: .set usage_multi_stage_recurse_noattrs.private_seg_size, 0+(max(multi_stage_recurse_noattr1.private_seg_size)) +; GCN: .set usage_multi_stage_recurse_noattrs.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc) +; GCN: .set usage_multi_stage_recurse_noattrs.uses_flat_scratch, or(1, multi_stage_recurse_noattr1.uses_flat_scratch) +; GCN: .set usage_multi_stage_recurse_noattrs.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack) +; GCN: .set usage_multi_stage_recurse_noattrs.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion) +; GCN: .set usage_multi_stage_recurse_noattrs.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call) +; GCN: TotalNumSgprs: usage_multi_stage_recurse_noattrs.numbered_sgpr+6 +; GCN: NumVgprs: usage_multi_stage_recurse_noattrs.num_vgpr +; GCN: ScratchSize: 16 +define amdgpu_kernel void @usage_multi_stage_recurse_noattrs(i32 %n) #0 { + call void @multi_stage_recurse_noattr1(i32 %n) + ret void +} + +; GCN-LABEL: {{^}}multi_call_with_multi_stage_recurse: +; GCN: .set multi_call_with_multi_stage_recurse.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr, multi_stage_recurse1.num_vgpr) +; GCN: .set multi_call_with_multi_stage_recurse.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr, multi_stage_recurse1.num_agpr) +; GCN: .set multi_call_with_multi_stage_recurse.numbered_sgpr, max(43, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr, multi_stage_recurse1.numbered_sgpr) +; GCN: .set multi_call_with_multi_stage_recurse.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size, multi_stage_recurse1.private_seg_size)) +; GCN: .set multi_call_with_multi_stage_recurse.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc, multi_stage_recurse1.uses_vcc) +; GCN: .set multi_call_with_multi_stage_recurse.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch, multi_stage_recurse1.uses_flat_scratch) +; GCN: .set multi_call_with_multi_stage_recurse.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack, multi_stage_recurse1.has_dyn_sized_stack) +; GCN: .set multi_call_with_multi_stage_recurse.has_recursion, or(1, use_stack0.has_recursion, use_stack1.has_recursion, multi_stage_recurse1.has_recursion) +; GCN: .set multi_call_with_multi_stage_recurse.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call, multi_stage_recurse1.has_indirect_call) +; GCN: TotalNumSgprs: multi_call_with_multi_stage_recurse.numbered_sgpr+6 +; GCN: NumVgprs: multi_call_with_multi_stage_recurse.num_vgpr +; GCN: ScratchSize: 2052 +define amdgpu_kernel void @multi_call_with_multi_stage_recurse(i32 %n) #0 { + call void @use_stack0() + call void @use_stack1() + call void @multi_stage_recurse1(i32 %n) + ret void +} + ; Make sure there's no assert when a sgpr96 is used. ; GCN-LABEL: {{^}}count_use_sgpr96_external_call ; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr) diff --git a/llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll b/llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll new file mode 100644 index 00000000000000..e150231e3d9e1a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll @@ -0,0 +1,86 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s + +; CHECK-LABEL: {{^}}qux +; CHECK: .set qux.num_vgpr, 13 +; CHECK: .set qux.num_agpr, 0 +; CHECK: .set qux.numbered_sgpr, 32 +; CHECK: .set qux.private_seg_size, 0 +; CHECK: .set qux.uses_vcc, 0 +; CHECK: .set qux.uses_flat_scratch, 0 +; CHECK: .set qux.has_dyn_sized_stack, 0 +; CHECK: .set qux.has_recursion, 0 +; CHECK: .set qux.has_indirect_call, 0 +define void @qux() { +entry: + call void asm sideeffect "", "~{v12}"() + ret void +} + +; CHECK-LABEL: {{^}}baz +; CHECK: .set baz.num_vgpr, max(49, qux.num_vgpr) +; CHECK: .set baz.num_agpr, max(0, qux.num_agpr) +; CHECK: .set baz.numbered_sgpr, max(34, qux.numbered_sgpr) +; CHECK: .set baz.private_seg_size, 16+(max(qux.private_seg_size)) +; CHECK: .set baz.uses_vcc, or(0, qux.uses_vcc) +; CHECK: .set baz.uses_flat_scratch, or(0, qux.uses_flat_scratch) +; CHECK: .set baz.has_dyn_sized_stack, or(0, qux.has_dyn_sized_stack) +; CHECK: .set baz.has_recursion, or(1, qux.has_recursion) +; CHECK: .set baz.has_indirect_call, or(0, qux.has_indirect_call) +define void @baz() { +entry: + call void @qux() + call void asm sideeffect "", "~{v48}"() + ret void +} + +; CHECK-LABEL: {{^}}bar +; CHECK: .set bar.num_vgpr, max(65, baz.num_vgpr, qux.num_vgpr) +; CHECK: .set bar.num_agpr, max(0, baz.num_agpr, qux.num_agpr) +; CHECK: .set bar.numbered_sgpr, max(34, baz.numbered_sgpr, qux.numbered_sgpr) +; CHECK: .set bar.private_seg_size, 16+(max(baz.private_seg_size, qux.private_seg_size)) +; CHECK: .set bar.uses_vcc, or(0, baz.uses_vcc, qux.uses_vcc) +; CHECK: .set bar.uses_flat_scratch, or(0, baz.uses_flat_scratch, qux.uses_flat_scratch) +; CHECK: .set bar.has_dyn_sized_stack, or(0, baz.has_dyn_sized_stack, qux.has_dyn_sized_stack) +; CHECK: .set bar.has_recursion, or(1, baz.has_recursion, qux.has_recursion) +; CHECK: .set bar.has_indirect_call, or(0, baz.has_indirect_call, qux.has_indirect_call) +define void @bar() { +entry: + call void @baz() + call void @qux() + call void @baz() + call void asm sideeffect "", "~{v64}"() + ret void +} + +; CHECK-LABEL: {{^}}foo +; CHECK: .set foo.num_vgpr, max(38, bar.num_vgpr) +; CHECK: .set foo.num_agpr, max(0, bar.num_agpr) +; CHECK: .set foo.numbered_sgpr, max(34, bar.numbered_sgpr) +; CHECK: .set foo.private_seg_size, 16+(max(bar.private_seg_size)) +; CHECK: .set foo.uses_vcc, or(0, bar.uses_vcc) +; CHECK: .set foo.uses_flat_scratch, or(0, bar.uses_flat_scratch) +; CHECK: .set foo.has_dyn_sized_stack, or(0, bar.has_dyn_sized_stack) +; CHECK: .set foo.has_recursion, or(1, bar.has_recursion) +; CHECK: .set foo.has_indirect_call, or(0, bar.has_indirect_call) +define void @foo() { +entry: + call void @bar() + call void asm sideeffect "", "~{v37}"() + ret void +} + +; CHECK-LABEL: {{^}}usefoo +; CHECK: .set usefoo.num_vgpr, max(32, foo.num_vgpr) +; CHECK: .set usefoo.num_agpr, max(0, foo.num_agpr) +; CHECK: .set usefoo.numbered_sgpr, max(33, foo.numbered_sgpr) +; CHECK: .set usefoo.private_seg_size, 0+(max(foo.private_seg_size)) +; CHECK: .set usefoo.uses_vcc, or(0, foo.uses_vcc) +; CHECK: .set usefoo.uses_flat_scratch, or(1, foo.uses_flat_scratch) +; CHECK: .set usefoo.has_dyn_sized_stack, or(0, foo.has_dyn_sized_stack) +; CHECK: .set usefoo.has_recursion, or(1, foo.has_recursion) +; CHECK: .set usefoo.has_indirect_call, or(0, foo.has_indirect_call) +define amdgpu_kernel void @usefoo() { + call void @foo() + ret void +} + diff --git a/llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll b/llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll new file mode 100644 index 00000000000000..ac6bd9a4ae8a6e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll @@ -0,0 +1,93 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s + +; CHECK-LABEL: {{^}}qux +; CHECK: .set qux.num_vgpr, max(71, foo.num_vgpr) +; CHECK: .set qux.num_agpr, max(0, foo.num_agpr) +; CHECK: .set qux.numbered_sgpr, max(46, foo.numbered_sgpr) +; CHECK: .set qux.private_seg_size, 16 +; CHECK: .set qux.uses_vcc, or(1, foo.uses_vcc) +; CHECK: .set qux.uses_flat_scratch, or(0, foo.uses_flat_scratch) +; CHECK: .set qux.has_dyn_sized_stack, or(0, foo.has_dyn_sized_stack) +; CHECK: .set qux.has_recursion, or(1, foo.has_recursion) +; CHECK: .set qux.has_indirect_call, or(0, foo.has_indirect_call) + +; CHECK-LABEL: {{^}}baz +; CHECK: .set baz.num_vgpr, max(61, qux.num_vgpr) +; CHECK: .set baz.num_agpr, max(0, qux.num_agpr) +; CHECK: .set baz.numbered_sgpr, max(51, qux.numbered_sgpr) +; CHECK: .set baz.private_seg_size, 16+(max(qux.private_seg_size)) +; CHECK: .set baz.uses_vcc, or(1, qux.uses_vcc) +; CHECK: .set baz.uses_flat_scratch, or(0, qux.uses_flat_scratch) +; CHECK: .set baz.has_dyn_sized_stack, or(0, qux.has_dyn_sized_stack) +; CHECK: .set baz.has_recursion, or(1, qux.has_recursion) +; CHECK: .set baz.has_indirect_call, or(0, qux.has_indirect_call) + +; CHECK-LABEL: {{^}}bar +; CHECK: .set bar.num_vgpr, max(51, baz.num_vgpr) +; CHECK: .set bar.num_agpr, max(0, baz.num_agpr) +; CHECK: .set bar.numbered_sgpr, max(61, baz.numbered_sgpr) +; CHECK: .set bar.private_seg_size, 16+(max(baz.private_seg_size)) +; CHECK: .set bar.uses_vcc, or(1, baz.uses_vcc) +; CHECK: .set bar.uses_flat_scratch, or(0, baz.uses_flat_scratch) +; CHECK: .set bar.has_dyn_sized_stack, or(0, baz.has_dyn_sized_stack) +; CHECK: .set bar.has_recursion, or(1, baz.has_recursion) +; CHECK: .set bar.has_indirect_call, or(0, baz.has_indirect_call) + +; CHECK-LABEL: {{^}}foo +; CHECK: .set foo.num_vgpr, max(46, amdgpu.max_num_vgpr) +; CHECK: .set foo.num_agpr, max(0, amdgpu.max_num_agpr) +; CHECK: .set foo.numbered_sgpr, max(71, amdgpu.max_num_sgpr) +; CHECK: .set foo.private_seg_size, 16 +; CHECK: .set foo.uses_vcc, 1 +; CHECK: .set foo.uses_flat_scratch, 0 +; CHECK: .set foo.has_dyn_sized_stack, 0 +; CHECK: .set foo.has_recursion, 1 +; CHECK: .set foo.has_indirect_call, 0 + +define void @foo() { +entry: + call void @bar() + call void asm sideeffect "", "~{v45}"() + call void asm sideeffect "", "~{s70}"() + ret void +} + +define void @bar() { +entry: + call void @baz() + call void asm sideeffect "", "~{v50}"() + call void asm sideeffect "", "~{s60}"() + ret void +} + +define void @baz() { +entry: + call void @qux() + call void asm sideeffect "", "~{v60}"() + call void asm sideeffect "", "~{s50}"() + ret void +} + +define void @qux() { +entry: + call void @foo() + call void asm sideeffect "", "~{v70}"() + call void asm sideeffect "", "~{s45}"() + ret void +} + +; CHECK-LABEL: {{^}}usefoo +; CHECK: .set usefoo.num_vgpr, max(32, foo.num_vgpr) +; CHECK: .set usefoo.num_agpr, max(0, foo.num_agpr) +; CHECK: .set usefoo.numbered_sgpr, max(33, foo.numbered_sgpr) +; CHECK: .set usefoo.private_seg_size, 0+(max(foo.private_seg_size)) +; CHECK: .set usefoo.uses_vcc, or(1, foo.uses_vcc) +; CHECK: .set usefoo.uses_flat_scratch, or(1, foo.uses_flat_scratch) +; CHECK: .set usefoo.has_dyn_sized_stack, or(0, foo.has_dyn_sized_stack) +; CHECK: .set usefoo.has_recursion, or(1, foo.has_recursion) +; CHECK: .set usefoo.has_indirect_call, or(0, foo.has_indirect_call) +define amdgpu_kernel void @usefoo() { + call void @foo() + ret void +} +