-
Notifications
You must be signed in to change notification settings - Fork 4.8k
/
Copy pathlsra.h
2732 lines (2370 loc) · 99.3 KB
/
lsra.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
/*****************************************************************************/
#ifndef _LSRA_H_
#define _LSRA_H_
#include "arraylist.h"
#include "smallhash.h"
// Minor and forward-reference types
class Interval;
class RefPosition;
class LinearScan;
class RegRecord;
template <class T>
class ArrayStack;
// LsraLocation tracks the linearized order of the nodes.
// Each node is assigned two LsraLocations - one for all the uses and all but the last
// def, and a second location for the last def (if any)
typedef unsigned int LsraLocation;
const unsigned int MinLocation = 0;
const unsigned int MaxLocation = UINT_MAX;
// max number of registers an operation could require internally (in addition to uses and defs)
const unsigned int MaxInternalRegisters = 8;
const unsigned int RegisterTypeCount = 2;
/*****************************************************************************
* Register types
*****************************************************************************/
typedef var_types RegisterType;
#define IntRegisterType TYP_INT
#define FloatRegisterType TYP_FLOAT
#define MaskRegisterType TYP_MASK
//------------------------------------------------------------------------
// regType: Return the RegisterType to use for a given type
//
// Arguments:
// type - the type of interest
//
template <class T>
RegisterType regType(T type)
{
if (varTypeUsesIntReg(type))
{
return IntRegisterType;
}
#if defined(TARGET_XARCH) && defined(FEATURE_SIMD)
else if (varTypeUsesMaskReg(type))
{
return MaskRegisterType;
}
#endif // TARGET_XARCH && FEATURE_SIMD
else
{
assert(varTypeUsesFloatReg(type));
return FloatRegisterType;
}
}
//------------------------------------------------------------------------
// useFloatReg: Check if the given var_type should be allocated to a FloatRegisterType
//
inline bool useFloatReg(var_types type)
{
return (regType(type) == FloatRegisterType);
}
//------------------------------------------------------------------------
// RefInfo: Captures the necessary information for a definition that is "in-flight"
// during `buildIntervals` (i.e. a tree-node definition has been encountered,
// but not its use). This includes the RefPosition and its associated
// GenTree node.
//
struct RefInfo
{
RefPosition* ref;
GenTree* treeNode;
RefInfo(RefPosition* r, GenTree* t) : ref(r), treeNode(t)
{
}
// default constructor for data structures
RefInfo()
{
}
};
//------------------------------------------------------------------------
// RefInfoListNode: used to store a single `RefInfo` value for a
// node during `buildIntervals`.
//
// This is the node type for `RefInfoList` below.
//
class RefInfoListNode final : public RefInfo
{
friend class RefInfoList;
friend class RefInfoListNodePool;
RefInfoListNode* m_next; // The next node in the list
public:
RefInfoListNode(RefPosition* r, GenTree* t) : RefInfo(r, t)
{
}
//------------------------------------------------------------------------
// RefInfoListNode::Next: Returns the next node in the list.
RefInfoListNode* Next() const
{
return m_next;
}
};
//------------------------------------------------------------------------
// RefInfoList: used to store a list of `RefInfo` values for a
// node during `buildIntervals`.
//
// This list of 'RefInfoListNode's contains the source nodes consumed by
// a node, and is created by 'BuildNode'.
//
class RefInfoList final
{
friend class RefInfoListNodePool;
RefInfoListNode* m_head; // The head of the list
RefInfoListNode* m_tail; // The tail of the list
public:
RefInfoList() : m_head(nullptr), m_tail(nullptr)
{
}
RefInfoList(RefInfoListNode* node) : m_head(node), m_tail(node)
{
assert(m_head->m_next == nullptr);
}
//------------------------------------------------------------------------
// RefInfoList::IsEmpty: Returns true if the list is empty.
//
bool IsEmpty() const
{
return m_head == nullptr;
}
//------------------------------------------------------------------------
// RefInfoList::Begin: Returns the first node in the list.
//
RefInfoListNode* Begin() const
{
return m_head;
}
//------------------------------------------------------------------------
// RefInfoList::End: Returns the position after the last node in the
// list. The returned value is suitable for use as
// a sentinel for iteration.
//
RefInfoListNode* End() const
{
return nullptr;
}
//------------------------------------------------------------------------
// RefInfoList::End: Returns the position after the last node in the
// list. The returned value is suitable for use as
// a sentinel for iteration.
//
RefInfoListNode* Last() const
{
return m_tail;
}
//------------------------------------------------------------------------
// RefInfoList::Append: Appends a node to the list.
//
// Arguments:
// node - The node to append. Must not be part of an existing list.
//
void Append(RefInfoListNode* node)
{
assert(node->m_next == nullptr);
if (m_tail == nullptr)
{
assert(m_head == nullptr);
m_head = node;
}
else
{
m_tail->m_next = node;
}
m_tail = node;
}
//------------------------------------------------------------------------
// RefInfoList::Append: Appends another list to this list.
//
// Arguments:
// other - The list to append.
//
void Append(RefInfoList other)
{
if (m_tail == nullptr)
{
assert(m_head == nullptr);
m_head = other.m_head;
}
else
{
m_tail->m_next = other.m_head;
}
m_tail = other.m_tail;
}
//------------------------------------------------------------------------
// RefInfoList::Prepend: Prepends a node to the list.
//
// Arguments:
// node - The node to prepend. Must not be part of an existing list.
//
void Prepend(RefInfoListNode* node)
{
assert(node->m_next == nullptr);
if (m_head == nullptr)
{
assert(m_tail == nullptr);
m_tail = node;
}
else
{
node->m_next = m_head;
}
m_head = node;
}
//------------------------------------------------------------------------
// RefInfoList::Add: Adds a node to the list.
//
// Arguments:
// node - The node to add. Must not be part of an existing list.
// prepend - True if it should be prepended (otherwise is appended)
//
void Add(RefInfoListNode* node, bool prepend)
{
if (prepend)
{
Prepend(node);
}
else
{
Append(node);
}
}
//------------------------------------------------------------------------
// removeListNode - retrieve the RefInfo for the given node
//
// Notes:
// The BuildNode methods use this helper to retrieve the RefInfo for child nodes
// from the useList being constructed.
//
RefInfoListNode* removeListNode(RefInfoListNode* listNode, RefInfoListNode* prevListNode)
{
RefInfoListNode* nextNode = listNode->Next();
if (prevListNode == nullptr)
{
m_head = nextNode;
}
else
{
prevListNode->m_next = nextNode;
}
if (nextNode == nullptr)
{
m_tail = prevListNode;
}
listNode->m_next = nullptr;
return listNode;
}
// removeListNode - remove the RefInfoListNode for the given GenTree node from the defList
RefInfoListNode* removeListNode(GenTree* node);
// Same as above but takes a multiRegIdx to support multi-reg nodes.
RefInfoListNode* removeListNode(GenTree* node, unsigned multiRegIdx);
//------------------------------------------------------------------------
// GetRefPosition - retrieve the RefPosition for the given node
//
// Notes:
// The Build methods use this helper to retrieve the RefPosition for child nodes
// from the useList being constructed. Note that, if the user knows the order of the operands,
// it is expected that they should just retrieve them directly.
RefPosition* GetRefPosition(GenTree* node)
{
for (RefInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())
{
if (listNode->treeNode == node)
{
return listNode->ref;
}
}
assert(!"GetRefPosition didn't find the node");
unreached();
}
//------------------------------------------------------------------------
// RefInfoList::GetSecond: Gets the second node in the list.
//
// Arguments:
// (DEBUG ONLY) treeNode - The GenTree* we expect to be in the second node.
//
RefInfoListNode* GetSecond(INDEBUG(GenTree* treeNode))
{
noway_assert((Begin() != nullptr) && (Begin()->Next() != nullptr));
RefInfoListNode* second = Begin()->Next();
assert(second->treeNode == treeNode);
return second;
}
#ifdef DEBUG
// Count - return the number of nodes in the list (DEBUG only)
int Count()
{
int count = 0;
for (RefInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())
{
count++;
}
return count;
}
#endif // DEBUG
};
//------------------------------------------------------------------------
// RefInfoListNodePool: manages a pool of `RefInfoListNode`
// values to decrease overall memory usage
// during `buildIntervals`.
//
// `buildIntervals` involves creating a list of RefInfo items per
// node that either directly produces a set of registers or that is a
// contained node with register-producing sources. However, these lists
// are short-lived: they are destroyed once the use of the corresponding
// node is processed. As such, there is typically only a small number of
// `RefInfoListNode` values in use at any given time. Pooling these
// values avoids otherwise frequent allocations.
class RefInfoListNodePool final
{
RefInfoListNode* m_freeList;
Compiler* m_compiler;
static const unsigned defaultPreallocation = 8;
public:
RefInfoListNodePool(Compiler* compiler, unsigned preallocate = defaultPreallocation);
RefInfoListNode* GetNode(RefPosition* r, GenTree* t);
void ReturnNode(RefInfoListNode* listNode);
};
#if TRACK_LSRA_STATS
enum LsraStat
{
#define LSRA_STAT_DEF(enum_name, enum_str) enum_name,
#include "lsra_stats.h"
#undef LSRA_STAT_DEF
#define REG_SEL_DEF(enum_name, value, short_str, orderSeqId) STAT_##enum_name,
#define BUSY_REG_SEL_DEF(enum_name, value, short_str, orderSeqId) REG_SEL_DEF(enum_name, value, short_str, orderSeqId)
#include "lsra_score.h"
COUNT
};
#endif // TRACK_LSRA_STATS
struct LsraBlockInfo
{
// bbNum of the predecessor to use for the register location of live-in variables.
// 0 for fgFirstBB.
unsigned int predBBNum;
weight_t weight;
bool hasCriticalInEdge : 1;
bool hasCriticalOutEdge : 1;
bool hasEHBoundaryIn : 1;
bool hasEHBoundaryOut : 1;
bool hasEHPred : 1;
#if TRACK_LSRA_STATS
// Per block maintained LSRA statistics.
unsigned stats[LsraStat::COUNT];
#endif // TRACK_LSRA_STATS
};
enum RegisterScore
{
#define REG_SEL_DEF(enum_name, value, short_str, orderSeqId) enum_name = value,
#define BUSY_REG_SEL_DEF(enum_name, value, short_str, orderSeqId) REG_SEL_DEF(enum_name, value, short_str, orderSeqId)
#include "lsra_score.h"
NONE = 0
};
// This is sort of a bit mask
// The low order 2 bits will be 1 for defs, and 2 for uses
enum RefType : unsigned char
{
#define DEF_REFTYPE(memberName, memberValue, shortName) memberName = memberValue,
#include "lsra_reftypes.h"
#undef DEF_REFTYPE
};
// position in a block (for resolution)
enum BlockStartOrEnd
{
BlockPositionStart = 0,
BlockPositionEnd = 1,
PositionCount = 2
};
inline bool RefTypeIsUse(RefType refType)
{
return ((refType & RefTypeUse) == RefTypeUse);
}
inline bool RefTypeIsDef(RefType refType)
{
return ((refType & RefTypeDef) == RefTypeDef);
}
typedef regNumberSmall* VarToRegMap;
typedef jitstd::list<Interval> IntervalList;
typedef jitstd::list<RefPosition> RefPositionList;
typedef jitstd::list<RefPosition>::iterator RefPositionIterator;
typedef jitstd::list<RefPosition>::reverse_iterator RefPositionReverseIterator;
class Referenceable
{
public:
Referenceable()
{
firstRefPosition = nullptr;
recentRefPosition = nullptr;
lastRefPosition = nullptr;
}
// A linked list of RefPositions. These are only traversed in the forward
// direction, and are not moved, so they don't need to be doubly linked
// (see RefPosition).
RefPosition* firstRefPosition;
RefPosition* recentRefPosition;
RefPosition* lastRefPosition;
// Get the position of the next reference which is at or greater than
// the current location (relies upon recentRefPosition being updated
// during traversal).
RefPosition* getNextRefPosition();
LsraLocation getNextRefLocation();
};
class RegRecord : public Referenceable
{
public:
RegRecord()
{
assignedInterval = nullptr;
previousInterval = nullptr;
regNum = REG_NA;
isCalleeSave = false;
registerType = IntRegisterType;
}
void init(regNumber reg)
{
#ifdef TARGET_ARM64
// The Zero register, or the SP
if ((reg == REG_ZR) || (reg == REG_SP))
{
// IsGeneralRegister returns false for REG_ZR and REG_SP
regNum = reg;
registerType = IntRegisterType;
}
else
#endif
if (emitter::isGeneralRegister(reg))
{
assert(registerType == IntRegisterType);
}
else if (emitter::isFloatReg(reg))
{
registerType = FloatRegisterType;
}
#if defined(TARGET_XARCH) && defined(FEATURE_SIMD)
else
{
assert(emitter::isMaskReg(reg));
registerType = MaskRegisterType;
}
#endif
regNum = reg;
isCalleeSave = ((RBM_CALLEE_SAVED & genRegMask(reg)) != 0);
}
#ifdef DEBUG
// print out representation
void dump();
// concise representation for embedding
void tinyDump();
#endif // DEBUG
// DATA
// interval to which this register is currently allocated.
// If the interval is inactive (isActive == false) then it is not currently live,
// and the register can be unassigned (i.e. setting assignedInterval to nullptr)
// without spilling the register.
Interval* assignedInterval;
// Interval to which this register was previously allocated, and which was unassigned
// because it was inactive. This register will be reassigned to this Interval when
// assignedInterval becomes inactive.
Interval* previousInterval;
regNumber regNum;
bool isCalleeSave;
RegisterType registerType;
unsigned char regOrder;
};
inline bool leafInRange(GenTree* leaf, int lower, int upper)
{
if (!leaf->IsIntCnsFitsInI32())
{
return false;
}
if (leaf->AsIntCon()->gtIconVal < lower)
{
return false;
}
if (leaf->AsIntCon()->gtIconVal > upper)
{
return false;
}
return true;
}
inline bool leafInRange(GenTree* leaf, int lower, int upper, int multiple)
{
if (!leafInRange(leaf, lower, upper))
{
return false;
}
if (leaf->AsIntCon()->gtIconVal % multiple)
{
return false;
}
return true;
}
inline bool leafAddInRange(GenTree* leaf, int lower, int upper, int multiple = 1)
{
if (leaf->OperGet() != GT_ADD)
{
return false;
}
return leafInRange(leaf->gtGetOp2(), lower, upper, multiple);
}
inline bool isCandidateVar(const LclVarDsc* varDsc)
{
return varDsc->lvLRACandidate;
}
/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XX XX
XX LinearScan XX
XX XX
XX This is the container for the Linear Scan data structures and methods. XX
XX XX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/
// OPTION 1: The algorithm as described in "Optimized Interval Splitting in a
// Linear Scan Register Allocator". It is driven by iterating over the Interval
// lists. In this case, we need multiple IntervalLists, and Intervals will be
// moved between them so they must be easily updated.
// OPTION 2: The algorithm is driven by iterating over the RefPositions. In this
// case, we only need a single IntervalList, and it won't be updated.
// The RefPosition must refer to its Interval, and we need to be able to traverse
// to the next RefPosition in code order
// THIS IS THE OPTION CURRENTLY BEING PURSUED
class LinearScan : public LinearScanInterface
{
friend class RefPosition;
friend class Interval;
friend class Lowering;
public:
// This could use further abstraction. From Compiler we need the tree,
// the flowgraph and the allocator.
LinearScan(Compiler* theCompiler);
// This is the main driver
virtual PhaseStatus doLinearScan();
static bool isSingleRegister(regMaskTP regMask)
{
return (genExactlyOneBit(regMask));
}
// Initialize the block traversal for LSRA.
// This resets the bbVisitedSet, and on the first invocation sets the blockSequence array,
// which determines the order in which blocks will be allocated (currently called during Lowering).
BasicBlock* startBlockSequence();
// Move to the next block in sequence, updating the current block information.
BasicBlock* moveToNextBlock();
// Get the next block to be scheduled without changing the current block,
// but updating the blockSequence during the first iteration if it is not fully computed.
BasicBlock* getNextBlock();
// This is called during code generation to update the location of variables
virtual void recordVarLocationsAtStartOfBB(BasicBlock* bb);
// This does the dataflow analysis and builds the intervals
template <bool localVarsEnregistered>
void buildIntervals();
// This is where the actual assignment is done
#ifdef TARGET_ARM64
template <bool hasConsecutiveRegister = false>
#endif
void allocateRegisters();
// This is the resolution phase, where cross-block mismatches are fixed up
template <bool localVarsEnregistered>
void resolveRegisters();
void writeRegisters(RefPosition* currentRefPosition, GenTree* tree);
// Insert a copy in the case where a tree node value must be moved to a different
// register at the point of use, or it is reloaded to a different register
// than the one it was spilled from
void insertCopyOrReload(BasicBlock* block, GenTree* tree, unsigned multiRegIdx, RefPosition* refPosition);
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
void makeUpperVectorInterval(unsigned varIndex);
Interval* getUpperVectorInterval(unsigned varIndex);
// Save the upper half of a vector that lives in a callee-save register at the point of a call.
void insertUpperVectorSave(GenTree* tree,
RefPosition* refPosition,
Interval* upperVectorInterval,
BasicBlock* block);
// Restore the upper half of a vector that's been partially spilled prior to a use in 'tree'.
void insertUpperVectorRestore(GenTree* tree,
RefPosition* refPosition,
Interval* upperVectorInterval,
BasicBlock* block);
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
// resolve along one block-block edge
enum ResolveType
{
ResolveSplit,
ResolveJoin,
ResolveCritical,
ResolveSharedCritical,
ResolveTypeCount
};
#ifdef DEBUG
static const char* resolveTypeName[ResolveTypeCount];
#endif
enum WhereToInsert
{
InsertAtTop,
InsertAtBottom
};
#ifdef TARGET_ARM
void addResolutionForDouble(BasicBlock* block,
GenTree* insertionPoint,
Interval** sourceIntervals,
regNumberSmall* location,
regNumber toReg,
regNumber fromReg,
ResolveType resolveType DEBUG_ARG(BasicBlock* fromBlock)
DEBUG_ARG(BasicBlock* toBlock));
#endif
void addResolution(BasicBlock* block,
GenTree* insertionPoint,
Interval* interval,
regNumber outReg,
regNumber inReg DEBUG_ARG(BasicBlock* fromBlock) DEBUG_ARG(BasicBlock* toBlock)
DEBUG_ARG(const char* reason));
void handleOutgoingCriticalEdges(BasicBlock* block);
void resolveEdge(BasicBlock* fromBlock,
BasicBlock* toBlock,
ResolveType resolveType,
VARSET_VALARG_TP liveSet,
regMaskTP terminatorConsumedRegs);
void resolveEdges();
// Keep track of how many temp locations we'll need for spill
void initMaxSpill();
void updateMaxSpill(RefPosition* refPosition);
void recordMaxSpill();
// max simultaneous spill locations used of every type
unsigned int maxSpill[TYP_COUNT];
unsigned int currentSpill[TYP_COUNT];
bool needFloatTmpForFPCall;
bool needDoubleTmpForFPCall;
bool needNonIntegerRegisters;
#ifdef DEBUG
private:
//------------------------------------------------------------------------
// Should we stress lsra? This uses the DOTNET_JitStressRegs variable.
//
// The mask bits are currently divided into fields in which each non-zero value
// is a distinct stress option (e.g. 0x3 is not a combination of 0x1 and 0x2).
// However, subject to possible constraints (to be determined), the different
// fields can be combined (e.g. 0x7 is a combination of 0x3 and 0x4).
// Note that the field values are declared in a public enum, but the actual bits are
// only accessed via accessors.
unsigned lsraStressMask;
// This controls the registers available for allocation
enum LsraStressLimitRegs
{
LSRA_LIMIT_NONE = 0,
LSRA_LIMIT_CALLEE = 0x1,
LSRA_LIMIT_CALLER = 0x2,
LSRA_LIMIT_SMALL_SET = 0x3,
#if defined(TARGET_AMD64)
LSRA_LIMIT_UPPER_SIMD_SET = 0x2000,
LSRA_LIMIT_MASK = 0x2003
#else
LSRA_LIMIT_MASK = 0x3
#endif
};
// When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save
// registers, so as to get different coverage than limiting to callee or caller.
// At least for x86 and AMD64, and potentially other architecture that will support SIMD,
// we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4.
// Hence the "SmallFPSet" has 5 elements.
CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(TARGET_AMD64)
#ifdef UNIX_AMD64_ABI
// On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers.
static const regMaskTP LsraLimitSmallIntSet =
(RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13);
#else // !UNIX_AMD64_ABI
// On Windows Amd64 use the RDI and RSI as callee saved registers.
static const regMaskTP LsraLimitSmallIntSet =
(RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI);
#endif // !UNIX_AMD64_ABI
static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
static const regMaskTP LsraLimitUpperSimdSet =
(RBM_XMM16 | RBM_XMM17 | RBM_XMM18 | RBM_XMM19 | RBM_XMM20 | RBM_XMM21 | RBM_XMM22 | RBM_XMM23 | RBM_XMM24 |
RBM_XMM25 | RBM_XMM26 | RBM_XMM27 | RBM_XMM28 | RBM_XMM29 | RBM_XMM30 | RBM_XMM31);
#elif defined(TARGET_ARM)
// On ARM, we may need two registers to set up the target register for a virtual call, so we need
// to have at least the maximum number of arg registers, plus 2.
static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5);
static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17);
#elif defined(TARGET_ARM64)
static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
#elif defined(TARGET_X86)
static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
#elif defined(TARGET_LOONGARCH64)
static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0);
static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9);
#elif defined(TARGET_RISCV64)
static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0);
static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9);
#else
#error Unsupported or unset target architecture
#endif // target
LsraStressLimitRegs getStressLimitRegs()
{
return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK);
}
regMaskTP getConstrainedRegMask(RefPosition* refPosition,
regMaskTP regMaskActual,
regMaskTP regMaskConstrain,
unsigned minRegCount);
regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask);
// This controls the heuristics used to select registers
// These can be combined.
enum LsraSelect{LSRA_SELECT_DEFAULT = 0, LSRA_SELECT_REVERSE_HEURISTICS = 0x04,
LSRA_SELECT_REVERSE_CALLER_CALLEE = 0x08, LSRA_SELECT_NEAREST = 0x10, LSRA_SELECT_MASK = 0x1c};
LsraSelect getSelectionHeuristics()
{
return (LsraSelect)(lsraStressMask & LSRA_SELECT_MASK);
}
bool doReverseSelect()
{
return ((lsraStressMask & LSRA_SELECT_REVERSE_HEURISTICS) != 0);
}
bool doReverseCallerCallee()
{
return ((lsraStressMask & LSRA_SELECT_REVERSE_CALLER_CALLEE) != 0);
}
bool doSelectNearest()
{
return ((lsraStressMask & LSRA_SELECT_NEAREST) != 0);
}
// This controls the order in which basic blocks are visited during allocation
enum LsraTraversalOrder{LSRA_TRAVERSE_LAYOUT = 0x20, LSRA_TRAVERSE_PRED_FIRST = 0x40,
LSRA_TRAVERSE_RANDOM = 0x60, // NYI
LSRA_TRAVERSE_DEFAULT = LSRA_TRAVERSE_PRED_FIRST, LSRA_TRAVERSE_MASK = 0x60};
LsraTraversalOrder getLsraTraversalOrder()
{
if ((lsraStressMask & LSRA_TRAVERSE_MASK) == 0)
{
return LSRA_TRAVERSE_DEFAULT;
}
return (LsraTraversalOrder)(lsraStressMask & LSRA_TRAVERSE_MASK);
}
bool isTraversalLayoutOrder()
{
return getLsraTraversalOrder() == LSRA_TRAVERSE_LAYOUT;
}
bool isTraversalPredFirstOrder()
{
return getLsraTraversalOrder() == LSRA_TRAVERSE_PRED_FIRST;
}
// This controls whether lifetimes should be extended to the entire method.
// Note that this has no effect under MinOpts
enum LsraExtendLifetimes{LSRA_DONT_EXTEND = 0, LSRA_EXTEND_LIFETIMES = 0x80, LSRA_EXTEND_LIFETIMES_MASK = 0x80};
LsraExtendLifetimes getLsraExtendLifeTimes()
{
return (LsraExtendLifetimes)(lsraStressMask & LSRA_EXTEND_LIFETIMES_MASK);
}
bool extendLifetimes()
{
return getLsraExtendLifeTimes() == LSRA_EXTEND_LIFETIMES;
}
// This controls whether variables locations should be set to the previous block in layout order
// (LSRA_BLOCK_BOUNDARY_LAYOUT), or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED -
// the default), or rotated (LSRA_BLOCK_BOUNDARY_ROTATE).
enum LsraBlockBoundaryLocations{LSRA_BLOCK_BOUNDARY_PRED = 0, LSRA_BLOCK_BOUNDARY_LAYOUT = 0x100,
LSRA_BLOCK_BOUNDARY_ROTATE = 0x200, LSRA_BLOCK_BOUNDARY_MASK = 0x300};
LsraBlockBoundaryLocations getLsraBlockBoundaryLocations()
{
return (LsraBlockBoundaryLocations)(lsraStressMask & LSRA_BLOCK_BOUNDARY_MASK);
}
regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs);
// This controls whether we always insert a GT_RELOAD instruction after a spill
// Note that this can be combined with LSRA_SPILL_ALWAYS (or not)
enum LsraReload{LSRA_NO_RELOAD_IF_SAME = 0, LSRA_ALWAYS_INSERT_RELOAD = 0x400, LSRA_RELOAD_MASK = 0x400};
LsraReload getLsraReload()
{
return (LsraReload)(lsraStressMask & LSRA_RELOAD_MASK);
}
bool alwaysInsertReload()
{
return getLsraReload() == LSRA_ALWAYS_INSERT_RELOAD;
}
// This controls whether we spill everywhere
enum LsraSpill{LSRA_DONT_SPILL_ALWAYS = 0, LSRA_SPILL_ALWAYS = 0x800, LSRA_SPILL_MASK = 0x800};
LsraSpill getLsraSpill()
{
return (LsraSpill)(lsraStressMask & LSRA_SPILL_MASK);
}
bool spillAlways()
{
return getLsraSpill() == LSRA_SPILL_ALWAYS;
}
// This controls whether RefPositions that lower/codegen indicated as reg optional be
// allocated a reg at all.
enum LsraRegOptionalControl{LSRA_REG_OPTIONAL_DEFAULT = 0, LSRA_REG_OPTIONAL_NO_ALLOC = 0x1000,
LSRA_REG_OPTIONAL_MASK = 0x1000};
LsraRegOptionalControl getLsraRegOptionalControl()
{
return (LsraRegOptionalControl)(lsraStressMask & LSRA_REG_OPTIONAL_MASK);
}
bool regOptionalNoAlloc()
{
return getLsraRegOptionalControl() == LSRA_REG_OPTIONAL_NO_ALLOC;
}
bool candidatesAreStressLimited()
{
return ((lsraStressMask & (LSRA_LIMIT_MASK | LSRA_SELECT_MASK)) != 0);
}
// Dump support
void dumpDefList();
void lsraDumpIntervals(const char* msg);
void dumpRefPositions(const char* msg);
void dumpVarRefPositions(const char* msg);
// Checking code
static bool IsLsraAdded(GenTree* node)
{
return ((node->gtDebugFlags & GTF_DEBUG_NODE_LSRA_ADDED) != 0);
}
static void SetLsraAdded(GenTree* node)
{
node->gtDebugFlags |= GTF_DEBUG_NODE_LSRA_ADDED;
}
static bool IsResolutionMove(GenTree* node);
static bool IsResolutionNode(LIR::Range& containingRange, GenTree* node);
void verifyFinalAllocation();
void verifyResolutionMove(GenTree* resolutionNode, LsraLocation currentLocation);
#else // !DEBUG
bool doSelectNearest()
{
return false;
}
bool extendLifetimes()
{
return false;
}
bool spillAlways()
{
return false;
}
// In a retail build we support only the default traversal order
bool isTraversalLayoutOrder()
{
return false;
}
bool isTraversalPredFirstOrder()
{
return true;
}
bool getLsraExtendLifeTimes()
{
return false;
}
static void SetLsraAdded(GenTree* node)
{
// do nothing; checked only under #DEBUG
}
bool candidatesAreStressLimited()
{
return false;
}
#endif // !DEBUG
public:
// Used by Lowering when considering whether to split Longs, as well as by identifyCandidates().
bool isRegCandidate(LclVarDsc* varDsc);
bool isContainableMemoryOp(GenTree* node);
private:
// Determine which locals are candidates for allocation
template <bool localVarsEnregistered>
void identifyCandidates();
// determine which locals are used in EH constructs we don't want to deal with
void identifyCandidatesExceptionDataflow();
void buildPhysRegRecords();
#ifdef DEBUG
void checkLastUses(BasicBlock* block);
int ComputeOperandDstCount(GenTree* operand);
int ComputeAvailableSrcCount(GenTree* node);
#endif // DEBUG
void setFrameType();