@@ -141,6 +141,12 @@ static cl::opt<bool> GreedyReverseLocalAssignment(
141141 " shorter local live ranges will tend to be allocated first" ),
142142 cl::Hidden);
143143
144+ static cl::opt<unsigned > SplitThresholdForRegWithHint (
145+ " split-threshold-for-reg-with-hint" ,
146+ cl::desc (" The threshold for splitting a virtual register with a hint, in "
147+ " percentate" ),
148+ cl::init(75 ), cl::Hidden);
149+
144150static RegisterRegAlloc greedyRegAlloc (" greedy" , " greedy register allocator" ,
145151 createGreedyRegisterAllocator);
146152
@@ -422,6 +428,11 @@ MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
422428 evictInterference (VirtReg, PhysHint, NewVRegs);
423429 return PhysHint;
424430 }
431+
432+ // We can also split the virtual register in cold blocks.
433+ if (trySplitAroundHintReg (PhysHint, VirtReg, NewVRegs, Order))
434+ return 0 ;
435+
425436 // Record the missed hint, we may be able to recover
426437 // at the end if the surrounding allocation changed.
427438 SetOfBrokenHints.insert (&VirtReg);
@@ -1064,86 +1075,98 @@ MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
10641075 return doRegionSplit (VirtReg, BestCand, HasCompact, NewVRegs);
10651076}
10661077
1067- unsigned RAGreedy::calculateRegionSplitCost (const LiveInterval &VirtReg,
1078+ unsigned
1079+ RAGreedy::calculateRegionSplitCostAroundReg (MCPhysReg PhysReg,
10681080 AllocationOrder &Order,
10691081 BlockFrequency &BestCost,
10701082 unsigned &NumCands,
1071- bool IgnoreCSR) {
1072- unsigned BestCand = NoCand;
1073- for (MCPhysReg PhysReg : Order) {
1074- assert (PhysReg);
1075- if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg (PhysReg))
1076- continue ;
1077-
1078- // Discard bad candidates before we run out of interference cache cursors.
1079- // This will only affect register classes with a lot of registers (>32).
1080- if (NumCands == IntfCache.getMaxCursors ()) {
1081- unsigned WorstCount = ~0u ;
1082- unsigned Worst = 0 ;
1083- for (unsigned CandIndex = 0 ; CandIndex != NumCands; ++CandIndex) {
1084- if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg )
1085- continue ;
1086- unsigned Count = GlobalCand[CandIndex].LiveBundles .count ();
1087- if (Count < WorstCount) {
1088- Worst = CandIndex;
1089- WorstCount = Count;
1090- }
1083+ unsigned &BestCand) {
1084+ // Discard bad candidates before we run out of interference cache cursors.
1085+ // This will only affect register classes with a lot of registers (>32).
1086+ if (NumCands == IntfCache.getMaxCursors ()) {
1087+ unsigned WorstCount = ~0u ;
1088+ unsigned Worst = 0 ;
1089+ for (unsigned CandIndex = 0 ; CandIndex != NumCands; ++CandIndex) {
1090+ if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg )
1091+ continue ;
1092+ unsigned Count = GlobalCand[CandIndex].LiveBundles .count ();
1093+ if (Count < WorstCount) {
1094+ Worst = CandIndex;
1095+ WorstCount = Count;
10911096 }
1092- --NumCands;
1093- GlobalCand[Worst] = GlobalCand[NumCands];
1094- if (BestCand == NumCands)
1095- BestCand = Worst;
10961097 }
1098+ --NumCands;
1099+ GlobalCand[Worst] = GlobalCand[NumCands];
1100+ if (BestCand == NumCands)
1101+ BestCand = Worst;
1102+ }
10971103
1098- if (GlobalCand.size () <= NumCands)
1099- GlobalCand.resize (NumCands+1 );
1100- GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1101- Cand.reset (IntfCache, PhysReg);
1104+ if (GlobalCand.size () <= NumCands)
1105+ GlobalCand.resize (NumCands+1 );
1106+ GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1107+ Cand.reset (IntfCache, PhysReg);
11021108
1103- SpillPlacer->prepare (Cand.LiveBundles );
1104- BlockFrequency Cost;
1105- if (!addSplitConstraints (Cand.Intf , Cost)) {
1106- LLVM_DEBUG (dbgs () << printReg (PhysReg, TRI) << " \t no positive bundles\n " );
1107- continue ;
1108- }
1109- LLVM_DEBUG (dbgs () << printReg (PhysReg, TRI) << " \t static = " ;
1110- MBFI->printBlockFreq (dbgs (), Cost));
1111- if (Cost >= BestCost) {
1112- LLVM_DEBUG ({
1113- if (BestCand == NoCand)
1114- dbgs () << " worse than no bundles\n " ;
1115- else
1116- dbgs () << " worse than "
1117- << printReg (GlobalCand[BestCand].PhysReg , TRI) << ' \n ' ;
1118- });
1119- continue ;
1120- }
1121- if (!growRegion (Cand)) {
1122- LLVM_DEBUG (dbgs () << " , cannot spill all interferences.\n " );
1123- continue ;
1124- }
1109+ SpillPlacer->prepare (Cand.LiveBundles );
1110+ BlockFrequency Cost;
1111+ if (!addSplitConstraints (Cand.Intf , Cost)) {
1112+ LLVM_DEBUG (dbgs () << printReg (PhysReg, TRI) << " \t no positive bundles\n " );
1113+ return BestCand;
1114+ }
1115+ LLVM_DEBUG (dbgs () << printReg (PhysReg, TRI) << " \t static = " ;
1116+ MBFI->printBlockFreq (dbgs (), Cost));
1117+ if (Cost >= BestCost) {
1118+ LLVM_DEBUG ({
1119+ if (BestCand == NoCand)
1120+ dbgs () << " worse than no bundles\n " ;
1121+ else
1122+ dbgs () << " worse than "
1123+ << printReg (GlobalCand[BestCand].PhysReg , TRI) << ' \n ' ;
1124+ });
1125+ return BestCand;
1126+ }
1127+ if (!growRegion (Cand)) {
1128+ LLVM_DEBUG (dbgs () << " , cannot spill all interferences.\n " );
1129+ return BestCand;
1130+ }
1131+
1132+ SpillPlacer->finish ();
1133+
1134+ // No live bundles, defer to splitSingleBlocks().
1135+ if (!Cand.LiveBundles .any ()) {
1136+ LLVM_DEBUG (dbgs () << " no bundles.\n " );
1137+ return BestCand;
1138+ }
1139+
1140+ Cost += calcGlobalSplitCost (Cand, Order);
1141+ LLVM_DEBUG ({
1142+ dbgs () << " , total = " ;
1143+ MBFI->printBlockFreq (dbgs (), Cost) << " with bundles" ;
1144+ for (int I : Cand.LiveBundles .set_bits ())
1145+ dbgs () << " EB#" << I;
1146+ dbgs () << " .\n " ;
1147+ });
1148+ if (Cost < BestCost) {
1149+ BestCand = NumCands;
1150+ BestCost = Cost;
1151+ }
1152+ ++NumCands;
11251153
1126- SpillPlacer->finish ();
1154+ return BestCand;
1155+ }
11271156
1128- // No live bundles, defer to splitSingleBlocks().
1129- if (!Cand.LiveBundles .any ()) {
1130- LLVM_DEBUG (dbgs () << " no bundles.\n " );
1157+ unsigned RAGreedy::calculateRegionSplitCost (const LiveInterval &VirtReg,
1158+ AllocationOrder &Order,
1159+ BlockFrequency &BestCost,
1160+ unsigned &NumCands,
1161+ bool IgnoreCSR) {
1162+ unsigned BestCand = NoCand;
1163+ for (MCPhysReg PhysReg : Order) {
1164+ assert (PhysReg);
1165+ if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg (PhysReg))
11311166 continue ;
1132- }
11331167
1134- Cost += calcGlobalSplitCost (Cand, Order);
1135- LLVM_DEBUG ({
1136- dbgs () << " , total = " ;
1137- MBFI->printBlockFreq (dbgs (), Cost) << " with bundles" ;
1138- for (int I : Cand.LiveBundles .set_bits ())
1139- dbgs () << " EB#" << I;
1140- dbgs () << " .\n " ;
1141- });
1142- if (Cost < BestCost) {
1143- BestCand = NumCands;
1144- BestCost = Cost;
1145- }
1146- ++NumCands;
1168+ calculateRegionSplitCostAroundReg (PhysReg, Order, BestCost, NumCands,
1169+ BestCand);
11471170 }
11481171
11491172 return BestCand;
@@ -1189,6 +1212,53 @@ unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
11891212 return 0 ;
11901213}
11911214
1215+ // VirtReg has a physical Hint, this function tries to split VirtReg around
1216+ // Hint if we can place new COPY instructions in cold blocks.
1217+ bool RAGreedy::trySplitAroundHintReg (MCPhysReg Hint,
1218+ const LiveInterval &VirtReg,
1219+ SmallVectorImpl<Register> &NewVRegs,
1220+ AllocationOrder &Order) {
1221+ BlockFrequency Cost = 0 ;
1222+ Register Reg = VirtReg.reg ();
1223+
1224+ // Compute the cost of assigning a non Hint physical register to VirtReg.
1225+ // We define it as the total frequency of broken COPY instructions to/from
1226+ // Hint register, and after split, they can be deleted.
1227+ for (const MachineInstr &Instr : MRI->reg_nodbg_instructions (Reg)) {
1228+ if (!TII->isFullCopyInstr (Instr))
1229+ continue ;
1230+ Register OtherReg = Instr.getOperand (1 ).getReg ();
1231+ if (OtherReg == Reg) {
1232+ OtherReg = Instr.getOperand (0 ).getReg ();
1233+ if (OtherReg == Reg)
1234+ continue ;
1235+ // Check if VirtReg interferes with OtherReg after this COPY instruction.
1236+ if (VirtReg.liveAt (LIS->getInstructionIndex (Instr).getRegSlot ()))
1237+ continue ;
1238+ }
1239+ MCRegister OtherPhysReg =
1240+ OtherReg.isPhysical () ? OtherReg.asMCReg () : VRM->getPhys (OtherReg);
1241+ if (OtherPhysReg == Hint)
1242+ Cost += MBFI->getBlockFreq (Instr.getParent ());
1243+ }
1244+
1245+ // Decrease the cost so it will be split in colder blocks.
1246+ BranchProbability Threshold (SplitThresholdForRegWithHint, 100 );
1247+ Cost *= Threshold;
1248+ if (Cost == 0 )
1249+ return false ;
1250+
1251+ unsigned NumCands = 0 ;
1252+ unsigned BestCand = NoCand;
1253+ SA->analyze (&VirtReg);
1254+ calculateRegionSplitCostAroundReg (Hint, Order, Cost, NumCands, BestCand);
1255+ if (BestCand == NoCand)
1256+ return false ;
1257+
1258+ doRegionSplit (VirtReg, BestCand, false /* HasCompact*/ , NewVRegs);
1259+ return true ;
1260+ }
1261+
11921262// ===----------------------------------------------------------------------===//
11931263// Per-Block Splitting
11941264// ===----------------------------------------------------------------------===//
@@ -2329,6 +2399,9 @@ MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
23292399 } else
23302400 return PhysReg;
23312401 }
2402+ // Non emtpy NewVRegs means VirtReg has been split.
2403+ if (!NewVRegs.empty ())
2404+ return 0 ;
23322405
23332406 LiveRangeStage Stage = ExtraInfo->getStage (VirtReg);
23342407 LLVM_DEBUG (dbgs () << StageName[Stage] << " Cascade "
0 commit comments