diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 68b1894fe0537e..84e393d4ef25e0 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -167,6 +167,13 @@ inline unsigned genCountBits(uint64_t bits) return BitOperations::PopCount(bits); } +#ifdef TARGET_ARM64 +inline unsigned genCountBits(regMaskTP mask) +{ + return regMaskTP::PopCountRegMask(mask); +} +#endif + /***************************************************************************** * * A rather simple routine that counts the number of bits in a given number. @@ -874,9 +881,13 @@ inline regNumber genRegNumFromMask(regMaskTP mask) { assert(mask != 0); // Must have one bit set, so can't have a mask of zero - /* Convert the mask to a register number */ +/* Convert the mask to a register number */ - regNumber regNum = (regNumber)genLog2(mask); +#ifdef TARGET_ARM64 + regNumber regNum = (regNumber)regMaskTP::BitScanForwardRegMask(mask); +#else + regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); +#endif /* Make sure we got it right */ @@ -900,9 +911,13 @@ inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask) { assert(mask != 0); // Must have one bit set, so can't have a mask of zero - /* Convert the mask to a register number */ +/* Convert the mask to a register number */ +#ifdef TARGET_ARM64 + regNumber regNum = (regNumber)regMaskTP::BitScanForwardRegMask(mask); +#else regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); +#endif mask ^= genRegMask(regNum); return regNum; @@ -922,9 +937,13 @@ inline regNumber genFirstRegNumFromMask(regMaskTP mask) { assert(mask != 0); // Must have one bit set, so can't have a mask of zero - /* Convert the mask to a register number */ +/* Convert the mask to a register number */ +#ifdef TARGET_ARM64 + regNumber regNum = (regNumber)regMaskTP::BitScanForwardRegMask(mask); +#else regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); +#endif return regNum; } @@ -4619,7 +4638,11 @@ inline void* operator new[](size_t sz, Compiler* compiler, CompMemKind cmk) inline void printRegMask(regMaskTP mask) { +#ifdef TARGET_ARM64 + printf(REG_MASK_ALL_FMT, mask.low); +#else printf(REG_MASK_ALL_FMT, mask); +#endif } inline char* regMaskToString(regMaskTP mask, Compiler* context) @@ -4627,14 +4650,22 @@ inline char* regMaskToString(regMaskTP mask, Compiler* context) const size_t cchRegMask = 24; char* regmask = new (context, CMK_Unknown) char[cchRegMask]; +#ifdef TARGET_ARM64 + sprintf_s(regmask, cchRegMask, REG_MASK_ALL_FMT, mask.low); +#else sprintf_s(regmask, cchRegMask, REG_MASK_ALL_FMT, mask); +#endif return regmask; } inline void printRegMaskInt(regMaskTP mask) { +#ifdef TARGET_ARM64 + printf(REG_MASK_INT_FMT, (mask & RBM_ALLINT).low); +#else printf(REG_MASK_INT_FMT, (mask & RBM_ALLINT)); +#endif } inline char* regMaskIntToString(regMaskTP mask, Compiler* context) @@ -4642,7 +4673,11 @@ inline char* regMaskIntToString(regMaskTP mask, Compiler* context) const size_t cchRegMask = 24; char* regmask = new (context, CMK_Unknown) char[cchRegMask]; +#ifdef TARGET_ARM64 + sprintf_s(regmask, cchRegMask, REG_MASK_INT_FMT, (mask & RBM_ALLINT).low); +#else sprintf_s(regmask, cchRegMask, REG_MASK_INT_FMT, (mask & RBM_ALLINT)); +#endif return regmask; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 178a6a4d45cf38..0f47fd47198e23 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -966,7 +966,7 @@ struct GenTree regMaskSmall gtRsvdRegs; // set of fixed trashed registers - unsigned AvailableTempRegCount(regMaskTP mask = (regMaskTP)-1) const; + unsigned AvailableTempRegCount(regMaskTP mask = static_cast(-1)) const; regNumber GetSingleTempReg(regMaskTP mask = (regMaskTP)-1); regNumber ExtractTempReg(regMaskTP mask = (regMaskTP)-1); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 37d85e23b91504..720643e4766dba 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -12595,7 +12595,11 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, &overallLimitCandidates); assert(limitConsecutiveResult != RBM_NONE); +#ifdef TARGET_ARM64 + unsigned startRegister = regMaskTP::BitScanForwardRegMask(limitConsecutiveResult); +#else unsigned startRegister = BitOperations::BitScanForward(limitConsecutiveResult); +#endif regMaskTP registersNeededMask = (1ULL << refPosition->regCount) - 1; candidates |= (registersNeededMask << startRegister); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index a708662882a06c..1de0dc5959e941 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -785,8 +785,8 @@ class LinearScan : public LinearScanInterface static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); #elif defined(TARGET_ARM64) - static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); - static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); + const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); + const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); #elif defined(TARGET_X86) static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 991513cfa33cbd..262125202d8a25 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -180,7 +180,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, unsigned int registersNeeded, regMaskTP* allConsecutiveCandidates) { - if (BitOperations::PopCount(candidates) < registersNeeded) + if (regMaskTP::PopCountRegMask(candidates) < registersNeeded) { // There is no way the register demanded can be satisfied for this RefPosition // based on the candidates from which it can allocate a register. @@ -205,14 +205,14 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, do { // From LSB, find the first available register (bit `1`) - regAvailableStartIndex = BitOperations::BitScanForward(static_cast(currAvailableRegs)); + regAvailableStartIndex = regMaskTP::BitScanForwardRegMask(currAvailableRegs); regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. regMaskTP maskProcessed = ~(currAvailableRegs | startMask); // From regAvailableStart, find the first unavailable register (bit `0`). - if (maskProcessed == RBM_NONE) + if (maskProcessed.low == RBM_NONE) { regAvailableEndIndex = 64; if ((regAvailableEndIndex - regAvailableStartIndex) >= registersNeeded) @@ -223,7 +223,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, } else { - regAvailableEndIndex = BitOperations::BitScanForward(static_cast(maskProcessed)); + regAvailableEndIndex = regMaskTP::BitScanForwardRegMask(maskProcessed); } regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1; @@ -234,7 +234,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, AppendConsecutiveMask(regAvailableStartIndex, regAvailableEndIndex, (endMask & ~startMask)); } currAvailableRegs &= ~endMask; - } while (currAvailableRegs != RBM_NONE); + } while (currAvailableRegs.low != RBM_NONE); regMaskTP v0_v31_mask = RBM_V0 | RBM_V31; if ((candidates & v0_v31_mask) == v0_v31_mask) @@ -335,7 +335,7 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC do { // From LSB, find the first available register (bit `1`) - regAvailableStartIndex = BitOperations::BitScanForward(static_cast(unprocessedRegs)); + regAvailableStartIndex = regMaskTP::BitScanForwardRegMask(static_cast(unprocessedRegs)); // For the current range, find how many registers are free vs. busy regMaskTP maskForCurRange = RBM_NONE; @@ -365,12 +365,12 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC maskForCurRange |= (registersNeededMask << regAvailableStartIndex); maskForCurRange &= m_AvailableRegs; - if (maskForCurRange != RBM_NONE) + if (maskForCurRange.low != RBM_NONE) { // In the given range, there are some free registers available. Calculate how many registers // will need spilling if this range is picked. - int curSpillRegs = registersNeeded - BitOperations::PopCount(maskForCurRange); + int curSpillRegs = registersNeeded - regMaskTP::PopCountRegMask(maskForCurRange); if (curSpillRegs < maxSpillRegs) { consecutiveResultForBusy = 1ULL << regAvailableStartIndex; @@ -382,7 +382,7 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC } } unprocessedRegs &= ~(1ULL << regAvailableStartIndex); - } while (unprocessedRegs != RBM_NONE); + } while (unprocessedRegs.low != RBM_NONE); // consecutiveResultForBusy should always be a subset of consecutiveCandidates. assert((consecutiveCandidates & consecutiveResultForBusy) == consecutiveResultForBusy); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index f3aaba0847f689..033258abafbe48 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2805,6 +2805,13 @@ void LinearScan::buildIntervals() availableRegCount = REG_INT_COUNT; } +#if TARGET_ARM64 + if ((sizeof(regMaskTP) * 8) > 64) + { + actualRegistersMask = ~RBM_NONE; + } + else +#endif if (availableRegCount < (sizeof(regMaskTP) * 8)) { // Mask out the bits that are between 64 ~ availableRegCount diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 1e32a1d88946cc..7993395116cb76 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -209,10 +209,180 @@ enum _regMask_enum : unsigned // In any case, we believe that is OK to freely cast between these types; no information will // be lost. -#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) typedef unsigned __int64 regMaskTP; +#elif defined(TARGET_ARM64) +typedef struct __regMaskTP regMaskTP; +typedef unsigned __int64 regMaskSmall; + +struct __regMaskTP +{ + unsigned __int64 low; + unsigned __int64 high; + + __regMaskTP() : low(0), high(0) + { + } + __regMaskTP(unsigned __int64 _low, unsigned __int64 _high = 0) : low(_low), high(_high) + { + } + //// Move constructor + //__regMaskTP(__regMaskTP&& other) noexcept + //{ + // low = other.low; + //} + + FORCEINLINE static uint32_t BitScanForwardRegMask(regMaskTP mask) + { + if (mask.low == 0) + { + return 32 + BitOperations::BitScanForward(mask.high); + } + return BitOperations::BitScanForward(mask.low); + } + + FORCEINLINE static unsigned PopCountRegMask(regMaskTP mask) + { + return BitOperations::PopCount(mask.low) + BitOperations::PopCount(mask.high); + } + + FORCEINLINE operator regMaskTP() const + { + return regMaskTP{static_cast(low)}; + } + + FORCEINLINE explicit operator bool() const + { + return low != 0 || high != 0; + } + + FORCEINLINE explicit operator regMaskSmall() const + { + //assert(high == 0); + return (regMaskSmall)low; + } + + FORCEINLINE explicit operator unsigned int() const + { + //assert(high == 0); + return (unsigned int)low; + } +}; +// typedef unsigned __int64 regMaskTP; +FORCEINLINE regMaskTP operator-(const regMaskTP& a, const regMaskTP& b) +{ + regMaskTP result(a.low - b.low, a.high - b.high); + return result; +} + +FORCEINLINE regMaskTP operator&(const regMaskTP& a, const regMaskTP& b) +{ + regMaskTP result(a.low & b.low, a.high & b.high); + return result; +} + +FORCEINLINE regMaskTP operator|(const regMaskTP& a, const regMaskTP& b) +{ + regMaskTP result(a.low | b.low, a.high | b.high); + return result; +} + +FORCEINLINE regMaskTP operator<<(const regMaskTP& a, const regMaskTP& b) +{ + regMaskTP result(a.low << b.low, a.high << b.high); + return result; +} + +FORCEINLINE regMaskTP operator|=(regMaskTP& a, const regMaskTP& b) +{ + a.low |= b.low; + a.high |= b.high; + return a; +} + +FORCEINLINE regMaskTP operator-=(regMaskTP& a, const regMaskTP& b) +{ + a.low -= b.low; + a.high -= b.high; + return a; +} + +FORCEINLINE regMaskTP operator^=(regMaskTP& a, const regMaskTP& b) +{ + a.low ^= b.low; + return a; +} + +FORCEINLINE regMaskSmall operator^=(regMaskSmall& a, const regMaskTP& b) +{ + //assert(b.high == 0); + a ^= b.low; + return a; +} + +FORCEINLINE regMaskSmall operator&=(regMaskSmall& a, const regMaskTP& b) +{ + //assert(b.high == 0); + a &= b.low; + return a; +} + +FORCEINLINE regMaskSmall operator|=(regMaskSmall& a, const regMaskTP& b) +{ + //assert(b.high == 0); + a |= b.low; + return a; +} + +FORCEINLINE regMaskSmall operator-=(regMaskSmall& a, const regMaskTP& b) +{ + //assert(b.high == 0); + a -= b.low; + return a; +} + +FORCEINLINE regMaskTP operator<<=(regMaskTP& a, const regMaskTP& b) +{ + a.low <<= b.low; + a.high <<= b.high; + return a; +} + +FORCEINLINE regMaskTP operator&=(regMaskTP& a, const regMaskTP& b) +{ + a.low &= b.low; + a.high &= b.high; + return a; +} + +// FORCEINLINE regMaskTP operator|=(regMaskTP& a, const regMaskTP& b) +//{ +// low <<= value; +// return *this; +//} + +FORCEINLINE constexpr bool operator==(const regMaskTP& a, const regMaskTP& b) +{ + return (a.low == b.low) && (a.high == b.high); +} + +FORCEINLINE constexpr bool operator!=(const regMaskTP& a, const regMaskTP& b) +{ + return (a.low != b.low) || (a.high != b.high); +} + +FORCEINLINE constexpr bool operator>(const regMaskTP& a, const regMaskTP& b) +{ + return (a.low > b.low) && (a.high > b.high); +} + +FORCEINLINE regMaskTP operator~(const regMaskTP& a) +{ + regMaskTP result(~a.low, ~a.high); + return result; +} #else -typedef unsigned regMaskTP; +typedef unsigned regMaskTP; #endif #if REGMASK_BITS == 8 @@ -479,7 +649,7 @@ inline regMaskTP fullIntArgRegMask() { if (hasFixedRetBuffReg()) { - return RBM_ARG_REGS | theFixedRetBuffMask(); + return theFixedRetBuffMask() | RBM_ARG_REGS; } else {