@@ -670,27 +670,42 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
670670 // Build the argument list.
671671 bool NeedWrapperFunction =
672672 getDebugInfo () && CGM.getCodeGenOpts ().hasReducedDebugInfo ();
673- FunctionArgList Args;
674- llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
675- llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
673+ FunctionArgList Args, WrapperArgs;
674+ llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs,
675+ WrapperLocalAddrs;
676+ llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes,
677+ WrapperVLASizes;
676678 SmallString<256 > Buffer;
677679 llvm::raw_svector_ostream Out (Buffer);
678680 Out << CapturedStmtInfo->getHelperName ();
679- if (NeedWrapperFunction)
681+
682+ CodeGenFunction WrapperCGF (CGM, /* suppressNewContext=*/ true );
683+ llvm::Function *WrapperF = nullptr ;
684+ if (NeedWrapperFunction) {
685+ // Emit the final kernel early to allow attributes to be added by the
686+ // OpenMPI-IR-Builder.
687+ FunctionOptions WrapperFO (&S, /* UIntPtrCastRequired=*/ true ,
688+ /* RegisterCastedArgsOnly=*/ true ,
689+ CapturedStmtInfo->getHelperName (), Loc);
690+ WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
691+ WrapperF =
692+ emitOutlinedFunctionPrologue (WrapperCGF, Args, LocalAddrs, VLASizes,
693+ WrapperCGF.CXXThisValue , WrapperFO);
680694 Out << " _debug__" ;
695+ }
681696 FunctionOptions FO (&S, !NeedWrapperFunction, /* RegisterCastedArgsOnly=*/ false ,
682697 Out.str (), Loc);
683- llvm::Function *F = emitOutlinedFunctionPrologue (* this , Args, LocalAddrs,
684- VLASizes , CXXThisValue, FO);
698+ llvm::Function *F = emitOutlinedFunctionPrologue (
699+ * this , WrapperArgs, WrapperLocalAddrs, WrapperVLASizes , CXXThisValue, FO);
685700 CodeGenFunction::OMPPrivateScope LocalScope (*this );
686- for (const auto &LocalAddrPair : LocalAddrs ) {
701+ for (const auto &LocalAddrPair : WrapperLocalAddrs ) {
687702 if (LocalAddrPair.second .first ) {
688703 LocalScope.addPrivate (LocalAddrPair.second .first ,
689704 LocalAddrPair.second .second );
690705 }
691706 }
692707 (void )LocalScope.Privatize ();
693- for (const auto &VLASizePair : VLASizes )
708+ for (const auto &VLASizePair : WrapperVLASizes )
694709 VLASizeMap[VLASizePair.second .first ] = VLASizePair.second .second ;
695710 PGO.assignRegionCounters (GlobalDecl (CD), F);
696711 CapturedStmtInfo->EmitBody (*this , CD->getBody ());
@@ -699,17 +714,10 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
699714 if (!NeedWrapperFunction)
700715 return F;
701716
702- FunctionOptions WrapperFO (&S, /* UIntPtrCastRequired=*/ true ,
703- /* RegisterCastedArgsOnly=*/ true ,
704- CapturedStmtInfo->getHelperName (), Loc);
705- CodeGenFunction WrapperCGF (CGM, /* suppressNewContext=*/ true );
706- WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
707- Args.clear ();
708- LocalAddrs.clear ();
709- VLASizes.clear ();
710- llvm::Function *WrapperF =
711- emitOutlinedFunctionPrologue (WrapperCGF, Args, LocalAddrs, VLASizes,
712- WrapperCGF.CXXThisValue , WrapperFO);
717+ // Reverse the order.
718+ WrapperF->removeFromParent ();
719+ F->getParent ()->getFunctionList ().insertAfter (F->getIterator (), WrapperF);
720+
713721 llvm::SmallVector<llvm::Value *, 4 > CallArgs;
714722 auto *PI = F->arg_begin ();
715723 for (const auto *Arg : Args) {
0 commit comments