From 78e6a5bd695cef2a3eb93d1e7f703562fbf92861 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 Nov 2023 14:42:50 -0800 Subject: [PATCH 1/6] wip --- src/coreclr/jit/compiler.hpp | 19 +++-- src/coreclr/jit/gentree.h | 2 +- src/coreclr/jit/lsra.h | 2 +- src/coreclr/jit/lsraarm64.cpp | 4 +- src/coreclr/jit/target.h | 142 +++++++++++++++++++++++++++++++++- 5 files changed, 156 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 47c9007ddee4fc..6b93daed983054 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -167,6 +167,11 @@ inline unsigned genCountBits(uint64_t bits) return BitOperations::PopCount(bits); } +inline unsigned genCountBits(regMaskTP mask) +{ + return genCountBits(mask.low); +} + /***************************************************************************** * * A rather simple routine that counts the number of bits in a given number. @@ -935,7 +940,7 @@ inline regNumber genRegNumFromMask(regMaskTP mask) /* Convert the mask to a register number */ - regNumber regNum = (regNumber)genLog2(mask); + regNumber regNum = (regNumber)regMaskTP::BitScanForwardRegMask(mask); /* Make sure we got it right */ @@ -961,7 +966,7 @@ inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask) /* Convert the mask to a register number */ - regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); + regNumber regNum = (regNumber)regMaskTP::BitScanForwardRegMask(mask); mask ^= genRegMask(regNum); return regNum; @@ -983,7 +988,7 @@ inline regNumber genFirstRegNumFromMask(regMaskTP mask) /* Convert the mask to a register number */ - regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); + regNumber regNum = (regNumber)regMaskTP::BitScanForwardRegMask(mask); return regNum; } @@ -4702,7 +4707,7 @@ inline void* operator new[](size_t sz, Compiler* compiler, CompMemKind cmk) inline void printRegMask(regMaskTP mask) { - printf(REG_MASK_ALL_FMT, mask); + printf(REG_MASK_ALL_FMT, mask.low); } inline char* regMaskToString(regMaskTP mask, Compiler* context) @@ -4710,14 +4715,14 @@ inline char* regMaskToString(regMaskTP mask, Compiler* context) const size_t cchRegMask = 24; char* regmask = new (context, CMK_Unknown) char[cchRegMask]; - sprintf_s(regmask, cchRegMask, REG_MASK_ALL_FMT, mask); + sprintf_s(regmask, cchRegMask, REG_MASK_ALL_FMT, mask.low); return regmask; } inline void printRegMaskInt(regMaskTP mask) { - printf(REG_MASK_INT_FMT, (mask & RBM_ALLINT)); + printf(REG_MASK_INT_FMT, (mask & RBM_ALLINT).low); } inline char* regMaskIntToString(regMaskTP mask, Compiler* context) @@ -4725,7 +4730,7 @@ inline char* regMaskIntToString(regMaskTP mask, Compiler* context) const size_t cchRegMask = 24; char* regmask = new (context, CMK_Unknown) char[cchRegMask]; - sprintf_s(regmask, cchRegMask, REG_MASK_INT_FMT, (mask & RBM_ALLINT)); + sprintf_s(regmask, cchRegMask, REG_MASK_INT_FMT, (mask & RBM_ALLINT).low); return regmask; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index ff45d2def41686..f99101a874b9e5 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -965,7 +965,7 @@ struct GenTree regMaskSmall gtRsvdRegs; // set of fixed trashed registers - unsigned AvailableTempRegCount(regMaskTP mask = (regMaskTP)-1) const; + unsigned AvailableTempRegCount(regMaskTP mask = static_cast(-1)) const; regNumber GetSingleTempReg(regMaskTP mask = (regMaskTP)-1); regNumber ExtractTempReg(regMaskTP mask = (regMaskTP)-1); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index d0f7aeb8369695..c20a660fde1d75 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -785,7 +785,7 @@ class LinearScan : public LinearScanInterface static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); #elif defined(TARGET_ARM64) - static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); + static const regMaskTP LsraLimitSmallIntSet = regMaskTP{RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20}; static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); #elif defined(TARGET_X86) static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index aed2de96d5e306..073b00909df211 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -177,7 +177,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, unsigned int registersNeeded, regMaskTP* allConsecutiveCandidates) { - if (BitOperations::PopCount(candidates) < registersNeeded) + if (regMaskTP::PopCountRegMask(candidates) < registersNeeded) { // There is no way the register demanded can be satisfied for this RefPosition // based on the candidates from which it can allocate a register. @@ -367,7 +367,7 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC // In the given range, there are some free registers available. Calculate how many registers // will need spilling if this range is picked. - int curSpillRegs = registersNeeded - BitOperations::PopCount(maskForCurRange); + int curSpillRegs = registersNeeded - regMaskTP::PopCountRegMask(maskForCurRange); if (curSpillRegs < maxSpillRegs) { consecutiveResultForBusy = 1ULL << regAvailableStartIndex; diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 638e55d56c7431..5f45fc917ac94c 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -209,8 +209,146 @@ enum _regMask_enum : unsigned // In any case, we believe that is OK to freely cast between these types; no information will // be lost. -#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) typedef unsigned __int64 regMaskTP; +#elif defined(TARGET_ARM64) +typedef struct __regMaskTP regMaskTP; +struct __regMaskTP +{ + unsigned __int64 low; + + __regMaskTP() : low(0) {} + __regMaskTP(unsigned __int64 _low) : low(_low) {} + //// Move constructor + //__regMaskTP(__regMaskTP&& other) noexcept + //{ + // low = other.low; + //} + + //unsigned __int64 operator&(const unsigned __int64& b) + //{ + // return low & b; + //} + + //unsigned __int64 operator&(const regMaskTP& b) + //{ + // return low & b.low; + //} + + + FORCEINLINE static uint32_t BitScanForwardRegMask(regMaskTP mask) + { + return BitOperations::BitScanForward(mask.low); + } + + FORCEINLINE static unsigned PopCountRegMask(regMaskTP mask) + { + return BitOperations::PopCount(mask.low); + } + + FORCEINLINE regMaskTP operator&(const unsigned __int64 b) const + { + regMaskTP result(low & b); + return result; + } + + FORCEINLINE regMaskTP operator&(const regMaskTP& b) const + { + regMaskTP result(low & b.low); + return result; + } + + FORCEINLINE regMaskTP operator|(const unsigned __int64 b) const + { + regMaskTP result(low | b); + return result; + } + + //unsigned __int64 operator|(const regMaskTP& b) + //{ + // return low | b.low; + //} + + FORCEINLINE regMaskTP operator|(const regMaskTP& b) const + { + regMaskTP result(low | b.low); + return result; + } + + FORCEINLINE regMaskTP operator<<(const unsigned value) const + { + regMaskTP result(low << value); + return result; + } + + FORCEINLINE regMaskTP& operator=(const unsigned __int64& value) + { + low = value; + return *this; + } + + FORCEINLINE regMaskTP& operator=(const unsigned& value) + { + low = value; + return *this; + } + + FORCEINLINE regMaskTP& operator^=(const regMaskTP& value) + { + low ^= value.low; + return *this; + } + + FORCEINLINE regMaskTP& operator&=(const regMaskTP& value) + { + low &= value.low; + return *this; + } + + FORCEINLINE regMaskTP& operator|=(const regMaskTP& value) + { + low |= value.low; + return *this; + } + + FORCEINLINE regMaskTP& operator<<=(const unsigned value) + { + low <<= value; + return *this; + } + + FORCEINLINE bool operator==(const regMaskTP& value) const + { + return low != value.low; + } + + FORCEINLINE bool operator!=(const regMaskTP& value) const + { + return low != value.low; + } + + //FORCEINLINE bool operator||(const regMaskTP& value) const + //{ + // return low != 0 || value.low != 0; + //} + + FORCEINLINE regMaskTP operator~() + { + regMaskTP result(~low); + return result; + } + + FORCEINLINE operator regMaskTP() const + { + return regMaskTP{static_cast(low)}; + } + + FORCEINLINE explicit operator bool() const + { + return low != 0; + } +}; + //typedef unsigned __int64 regMaskTP; #else typedef unsigned regMaskTP; #endif @@ -479,7 +617,7 @@ inline regMaskTP fullIntArgRegMask() { if (hasFixedRetBuffReg()) { - return RBM_ARG_REGS | theFixedRetBuffMask(); + return theFixedRetBuffMask() | RBM_ARG_REGS; } else { From 302b4a5827bc11e9c0e2e5521da18580fc2a6f0d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 19 Dec 2023 09:55:00 -0800 Subject: [PATCH 2/6] fix all the compilation errors --- src/coreclr/jit/compiler.hpp | 36 ++++++- src/coreclr/jit/gentree.h | 2 +- src/coreclr/jit/lsra.cpp | 4 + src/coreclr/jit/lsra.h | 4 +- src/coreclr/jit/target.h | 203 +++++++++++++++++++---------------- 5 files changed, 150 insertions(+), 99 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 7011cc0b0ef374..dcd01a421fff11 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -167,10 +167,12 @@ inline unsigned genCountBits(uint64_t bits) return BitOperations::PopCount(bits); } +#ifdef TARGET_ARM64 inline unsigned genCountBits(regMaskTP mask) { return genCountBits(mask.low); } +#endif /***************************************************************************** * @@ -879,9 +881,13 @@ inline regNumber genRegNumFromMask(regMaskTP mask) { assert(mask != 0); // Must have one bit set, so can't have a mask of zero - /* Convert the mask to a register number */ +/* Convert the mask to a register number */ +#ifdef TARGET_ARM64 regNumber regNum = (regNumber)regMaskTP::BitScanForwardRegMask(mask); +#else + regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); +#endif /* Make sure we got it right */ @@ -905,9 +911,13 @@ inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask) { assert(mask != 0); // Must have one bit set, so can't have a mask of zero - /* Convert the mask to a register number */ +/* Convert the mask to a register number */ +#ifdef TARGET_ARM64 regNumber regNum = (regNumber)regMaskTP::BitScanForwardRegMask(mask); +#else + regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); +#endif mask ^= genRegMask(regNum); return regNum; @@ -927,9 +937,13 @@ inline regNumber genFirstRegNumFromMask(regMaskTP mask) { assert(mask != 0); // Must have one bit set, so can't have a mask of zero - /* Convert the mask to a register number */ +/* Convert the mask to a register number */ +#ifdef TARGET_ARM64 regNumber regNum = (regNumber)regMaskTP::BitScanForwardRegMask(mask); +#else + regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); +#endif return regNum; } @@ -4624,7 +4638,11 @@ inline void* operator new[](size_t sz, Compiler* compiler, CompMemKind cmk) inline void printRegMask(regMaskTP mask) { +#ifdef TARGET_ARM64 printf(REG_MASK_ALL_FMT, mask.low); +#else + printf(REG_MASK_ALL_FMT, mask); +#endif } inline char* regMaskToString(regMaskTP mask, Compiler* context) @@ -4632,14 +4650,22 @@ inline char* regMaskToString(regMaskTP mask, Compiler* context) const size_t cchRegMask = 24; char* regmask = new (context, CMK_Unknown) char[cchRegMask]; +#ifdef TARGET_ARM64 sprintf_s(regmask, cchRegMask, REG_MASK_ALL_FMT, mask.low); +#else + sprintf_s(regmask, cchRegMask, REG_MASK_ALL_FMT, mask); +#endif return regmask; } inline void printRegMaskInt(regMaskTP mask) { +#ifdef TARGET_ARM64 printf(REG_MASK_INT_FMT, (mask & RBM_ALLINT).low); +#else + printf(REG_MASK_INT_FMT, (mask & RBM_ALLINT)); +#endif } inline char* regMaskIntToString(regMaskTP mask, Compiler* context) @@ -4647,7 +4673,11 @@ inline char* regMaskIntToString(regMaskTP mask, Compiler* context) const size_t cchRegMask = 24; char* regmask = new (context, CMK_Unknown) char[cchRegMask]; +#ifdef TARGET_ARM64 sprintf_s(regmask, cchRegMask, REG_MASK_INT_FMT, (mask & RBM_ALLINT).low); +#else + sprintf_s(regmask, cchRegMask, REG_MASK_INT_FMT, (mask & RBM_ALLINT)); +#endif return regmask; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index db056916eba99f..0f47fd47198e23 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -966,7 +966,7 @@ struct GenTree regMaskSmall gtRsvdRegs; // set of fixed trashed registers - unsigned AvailableTempRegCount(regMaskTP mask = static_cast(-1)) const; + unsigned AvailableTempRegCount(regMaskTP mask = static_cast(-1)) const; regNumber GetSingleTempReg(regMaskTP mask = (regMaskTP)-1); regNumber ExtractTempReg(regMaskTP mask = (regMaskTP)-1); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 37d85e23b91504..720643e4766dba 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -12595,7 +12595,11 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, &overallLimitCandidates); assert(limitConsecutiveResult != RBM_NONE); +#ifdef TARGET_ARM64 + unsigned startRegister = regMaskTP::BitScanForwardRegMask(limitConsecutiveResult); +#else unsigned startRegister = BitOperations::BitScanForward(limitConsecutiveResult); +#endif regMaskTP registersNeededMask = (1ULL << refPosition->regCount) - 1; candidates |= (registersNeededMask << startRegister); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 11e5e9e1481817..1de0dc5959e941 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -785,8 +785,8 @@ class LinearScan : public LinearScanInterface static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); #elif defined(TARGET_ARM64) - static const regMaskTP LsraLimitSmallIntSet = regMaskTP{RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20}; - static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); + const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); + const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); #elif defined(TARGET_X86) static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 34c19ea77ba89b..56c2f4089af8d2 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -213,29 +213,24 @@ enum _regMask_enum : unsigned typedef unsigned __int64 regMaskTP; #elif defined(TARGET_ARM64) typedef struct __regMaskTP regMaskTP; +typedef unsigned __int64 regMaskSmall; + struct __regMaskTP { unsigned __int64 low; - __regMaskTP() : low(0) {} - __regMaskTP(unsigned __int64 _low) : low(_low) {} + __regMaskTP() : low(0) + { + } + __regMaskTP(unsigned __int64 _low) : low(_low) + { + } //// Move constructor //__regMaskTP(__regMaskTP&& other) noexcept //{ // low = other.low; //} - //unsigned __int64 operator&(const unsigned __int64& b) - //{ - // return low & b; - //} - - //unsigned __int64 operator&(const regMaskTP& b) - //{ - // return low & b.low; - //} - - FORCEINLINE static uint32_t BitScanForwardRegMask(regMaskTP mask) { return BitOperations::BitScanForward(mask.low); @@ -246,111 +241,133 @@ struct __regMaskTP return BitOperations::PopCount(mask.low); } - FORCEINLINE regMaskTP operator&(const unsigned __int64 b) const + FORCEINLINE operator regMaskTP() const { - regMaskTP result(low & b); - return result; + return regMaskTP{static_cast(low)}; } - FORCEINLINE regMaskTP operator&(const regMaskTP& b) const + FORCEINLINE explicit operator bool() const { - regMaskTP result(low & b.low); - return result; + return low != 0; } - FORCEINLINE regMaskTP operator|(const unsigned __int64 b) const + FORCEINLINE explicit operator regMaskSmall() const { - regMaskTP result(low | b); - return result; + return (regMaskSmall)low; } - - //unsigned __int64 operator|(const regMaskTP& b) - //{ - // return low | b.low; - //} - FORCEINLINE regMaskTP operator|(const regMaskTP& b) const + FORCEINLINE explicit operator unsigned int() const { - regMaskTP result(low | b.low); - return result; + return (unsigned int)low; } +}; +// typedef unsigned __int64 regMaskTP; +FORCEINLINE regMaskTP operator-(const regMaskTP& a, const regMaskTP& b) +{ + regMaskTP result(a.low - b.low); + return result; +} - FORCEINLINE regMaskTP operator<<(const unsigned value) const - { - regMaskTP result(low << value); - return result; - } +FORCEINLINE regMaskTP operator&(const regMaskTP& a, const regMaskTP& b) +{ + regMaskTP result(a.low & b.low); + return result; +} - FORCEINLINE regMaskTP& operator=(const unsigned __int64& value) - { - low = value; - return *this; - } +FORCEINLINE regMaskTP operator|(const regMaskTP& a, const regMaskTP& b) +{ + regMaskTP result(a.low | b.low); + return result; +} - FORCEINLINE regMaskTP& operator=(const unsigned& value) - { - low = value; - return *this; - } +FORCEINLINE regMaskTP operator<<(const regMaskTP& a, const regMaskTP& b) +{ + regMaskTP result(a.low << b.low); + return result; +} - FORCEINLINE regMaskTP& operator^=(const regMaskTP& value) - { - low ^= value.low; - return *this; - } +FORCEINLINE regMaskTP operator|=(regMaskTP& a, const regMaskTP& b) +{ + a.low |= b.low; + return a; +} - FORCEINLINE regMaskTP& operator&=(const regMaskTP& value) - { - low &= value.low; - return *this; - } +FORCEINLINE regMaskTP operator-=(regMaskTP& a, const regMaskTP& b) +{ + a.low -= b.low; + return a; +} - FORCEINLINE regMaskTP& operator|=(const regMaskTP& value) - { - low |= value.low; - return *this; - } +FORCEINLINE regMaskTP operator^=(regMaskTP& a, const regMaskTP& b) +{ + a.low ^= b.low; + return a; +} - FORCEINLINE regMaskTP& operator<<=(const unsigned value) - { - low <<= value; - return *this; - } +FORCEINLINE regMaskSmall operator^=(regMaskSmall& a, const regMaskTP& b) +{ + a ^= b.low; + return a; +} - FORCEINLINE bool operator==(const regMaskTP& value) const - { - return low != value.low; - } +FORCEINLINE regMaskSmall operator&=(regMaskSmall& a, const regMaskTP& b) +{ + a &= b.low; + return a; +} - FORCEINLINE bool operator!=(const regMaskTP& value) const - { - return low != value.low; - } +FORCEINLINE regMaskSmall operator|=(regMaskSmall& a, const regMaskTP& b) +{ + a |= b.low; + return a; +} - //FORCEINLINE bool operator||(const regMaskTP& value) const - //{ - // return low != 0 || value.low != 0; - //} +FORCEINLINE regMaskSmall operator-=(regMaskSmall& a, const regMaskTP& b) +{ + a -= b.low; + return a; +} - FORCEINLINE regMaskTP operator~() - { - regMaskTP result(~low); - return result; - } +FORCEINLINE regMaskTP operator<<=(regMaskTP& a, const regMaskTP& b) +{ + a.low <<= b.low; + return a; +} - FORCEINLINE operator regMaskTP() const - { - return regMaskTP{static_cast(low)}; - } +FORCEINLINE regMaskTP operator&=(regMaskTP& a, const regMaskTP& b) +{ + a.low &= b.low; + return a; +} - FORCEINLINE explicit operator bool() const - { - return low != 0; - } -}; - //typedef unsigned __int64 regMaskTP; +// FORCEINLINE regMaskTP operator|=(regMaskTP& a, const regMaskTP& b) +//{ +// low <<= value; +// return *this; +//} + +FORCEINLINE constexpr bool operator==(const regMaskTP& a, const regMaskTP& b) +{ + return a.low == b.low; +} + +FORCEINLINE constexpr bool operator!=(const regMaskTP& a, const regMaskTP& b) +{ + return a.low != b.low; +} + +FORCEINLINE constexpr bool operator>(const regMaskTP& a, const regMaskTP& b) +{ + return a.low > b.low; +} + +FORCEINLINE regMaskTP operator~(const regMaskTP& a) +{ + regMaskTP result(~a.low); + return result; +} #else -typedef unsigned regMaskTP; +typedef unsigned regMaskTP; #endif #if REGMASK_BITS == 8 From 3a851ca5ac2a201f77cfd22d7aad4ed81853f8b0 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 19 Dec 2023 11:39:58 -0800 Subject: [PATCH 3/6] Add high field in regMaskTP and use it in popmask --- src/coreclr/jit/compiler.hpp | 2 +- src/coreclr/jit/target.h | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index dcd01a421fff11..84e393d4ef25e0 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -170,7 +170,7 @@ inline unsigned genCountBits(uint64_t bits) #ifdef TARGET_ARM64 inline unsigned genCountBits(regMaskTP mask) { - return genCountBits(mask.low); + return regMaskTP::PopCountRegMask(mask); } #endif diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 56c2f4089af8d2..990ae3476c0932 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -218,6 +218,7 @@ typedef unsigned __int64 regMaskSmall; struct __regMaskTP { unsigned __int64 low; + unsigned __int64 high; __regMaskTP() : low(0) { @@ -233,12 +234,16 @@ struct __regMaskTP FORCEINLINE static uint32_t BitScanForwardRegMask(regMaskTP mask) { + if (mask.low == 0) + { + return 32 + BitOperations::BitScanForward(mask.high); + } return BitOperations::BitScanForward(mask.low); } FORCEINLINE static unsigned PopCountRegMask(regMaskTP mask) { - return BitOperations::PopCount(mask.low); + return BitOperations::PopCount(mask.low) + BitOperations::PopCount(mask.high); } FORCEINLINE operator regMaskTP() const From 074199b156c6b7ed929290d7c11077c2fb09eb46 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 19 Dec 2023 12:48:35 -0800 Subject: [PATCH 4/6] Use `high` in all the operators --- src/coreclr/jit/target.h | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 990ae3476c0932..8d4cff608e6d86 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -220,10 +220,10 @@ struct __regMaskTP unsigned __int64 low; unsigned __int64 high; - __regMaskTP() : low(0) + __regMaskTP() : low(0), high(0) { } - __regMaskTP(unsigned __int64 _low) : low(_low) + __regMaskTP(unsigned __int64 _low, unsigned __int64 _high = 0) : low(_low), high(_high) { } //// Move constructor @@ -253,53 +253,57 @@ struct __regMaskTP FORCEINLINE explicit operator bool() const { - return low != 0; + return low != 0 && high != 0; } FORCEINLINE explicit operator regMaskSmall() const { + assert(high == 0); return (regMaskSmall)low; } FORCEINLINE explicit operator unsigned int() const { + assert(high == 0); return (unsigned int)low; } }; // typedef unsigned __int64 regMaskTP; FORCEINLINE regMaskTP operator-(const regMaskTP& a, const regMaskTP& b) { - regMaskTP result(a.low - b.low); + regMaskTP result(a.low - b.low, a.high - b.high); return result; } FORCEINLINE regMaskTP operator&(const regMaskTP& a, const regMaskTP& b) { - regMaskTP result(a.low & b.low); + regMaskTP result(a.low & b.low, a.high & b.high); return result; } FORCEINLINE regMaskTP operator|(const regMaskTP& a, const regMaskTP& b) { - regMaskTP result(a.low | b.low); + regMaskTP result(a.low | b.low, a.high | b.high); return result; } FORCEINLINE regMaskTP operator<<(const regMaskTP& a, const regMaskTP& b) { - regMaskTP result(a.low << b.low); + regMaskTP result(a.low << b.low, a.high << b.high); return result; } FORCEINLINE regMaskTP operator|=(regMaskTP& a, const regMaskTP& b) { a.low |= b.low; + a.high |= b.high; return a; } FORCEINLINE regMaskTP operator-=(regMaskTP& a, const regMaskTP& b) { a.low -= b.low; + a.high -= b.high; return a; } @@ -311,24 +315,28 @@ FORCEINLINE regMaskTP operator^=(regMaskTP& a, const regMaskTP& b) FORCEINLINE regMaskSmall operator^=(regMaskSmall& a, const regMaskTP& b) { + assert(b.high == 0); a ^= b.low; return a; } FORCEINLINE regMaskSmall operator&=(regMaskSmall& a, const regMaskTP& b) { + assert(b.high == 0); a &= b.low; return a; } FORCEINLINE regMaskSmall operator|=(regMaskSmall& a, const regMaskTP& b) { + assert(b.high == 0); a |= b.low; return a; } FORCEINLINE regMaskSmall operator-=(regMaskSmall& a, const regMaskTP& b) { + assert(b.high == 0); a -= b.low; return a; } @@ -336,12 +344,14 @@ FORCEINLINE regMaskSmall operator-=(regMaskSmall& a, const regMaskTP& b) FORCEINLINE regMaskTP operator<<=(regMaskTP& a, const regMaskTP& b) { a.low <<= b.low; + a.high <<= b.high; return a; } FORCEINLINE regMaskTP operator&=(regMaskTP& a, const regMaskTP& b) { a.low &= b.low; + a.high &= b.high; return a; } @@ -353,22 +363,22 @@ FORCEINLINE regMaskTP operator&=(regMaskTP& a, const regMaskTP& b) FORCEINLINE constexpr bool operator==(const regMaskTP& a, const regMaskTP& b) { - return a.low == b.low; + return (a.low == b.low) && (a.high == b.high); } FORCEINLINE constexpr bool operator!=(const regMaskTP& a, const regMaskTP& b) { - return a.low != b.low; + return (a.low != b.low) || (a.high != b.high); } FORCEINLINE constexpr bool operator>(const regMaskTP& a, const regMaskTP& b) { - return a.low > b.low; + return (a.low > b.low) && (a.high > b.high); } FORCEINLINE regMaskTP operator~(const regMaskTP& a) { - regMaskTP result(~a.low); + regMaskTP result(~a.low, ~a.high); return result; } #else From 69e6d905942b7a3cea51a1bd2c7e739572214aa1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 19 Dec 2023 14:02:25 -0800 Subject: [PATCH 5/6] fix some crossgen2 errors --- src/coreclr/jit/target.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 8d4cff608e6d86..f6024a6b7dd9b1 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -253,7 +253,7 @@ struct __regMaskTP FORCEINLINE explicit operator bool() const { - return low != 0 && high != 0; + return low != 0 || high != 0; } FORCEINLINE explicit operator regMaskSmall() const @@ -315,28 +315,28 @@ FORCEINLINE regMaskTP operator^=(regMaskTP& a, const regMaskTP& b) FORCEINLINE regMaskSmall operator^=(regMaskSmall& a, const regMaskTP& b) { - assert(b.high == 0); + //assert(b.high == 0); a ^= b.low; return a; } FORCEINLINE regMaskSmall operator&=(regMaskSmall& a, const regMaskTP& b) { - assert(b.high == 0); + //assert(b.high == 0); a &= b.low; return a; } FORCEINLINE regMaskSmall operator|=(regMaskSmall& a, const regMaskTP& b) { - assert(b.high == 0); + //assert(b.high == 0); a |= b.low; return a; } FORCEINLINE regMaskSmall operator-=(regMaskSmall& a, const regMaskTP& b) { - assert(b.high == 0); + //assert(b.high == 0); a -= b.low; return a; } From 1b27ea21c7688e46200c458c5f2b2622838e43bd Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 19 Dec 2023 15:31:50 -0800 Subject: [PATCH 6/6] fix some consecutive register failures --- src/coreclr/jit/lsraarm64.cpp | 14 +++++++------- src/coreclr/jit/lsrabuild.cpp | 7 +++++++ src/coreclr/jit/target.h | 4 ++-- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 876d7ca81b1c10..262125202d8a25 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -205,14 +205,14 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, do { // From LSB, find the first available register (bit `1`) - regAvailableStartIndex = BitOperations::BitScanForward(static_cast(currAvailableRegs)); + regAvailableStartIndex = regMaskTP::BitScanForwardRegMask(currAvailableRegs); regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. regMaskTP maskProcessed = ~(currAvailableRegs | startMask); // From regAvailableStart, find the first unavailable register (bit `0`). - if (maskProcessed == RBM_NONE) + if (maskProcessed.low == RBM_NONE) { regAvailableEndIndex = 64; if ((regAvailableEndIndex - regAvailableStartIndex) >= registersNeeded) @@ -223,7 +223,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, } else { - regAvailableEndIndex = BitOperations::BitScanForward(static_cast(maskProcessed)); + regAvailableEndIndex = regMaskTP::BitScanForwardRegMask(maskProcessed); } regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1; @@ -234,7 +234,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, AppendConsecutiveMask(regAvailableStartIndex, regAvailableEndIndex, (endMask & ~startMask)); } currAvailableRegs &= ~endMask; - } while (currAvailableRegs != RBM_NONE); + } while (currAvailableRegs.low != RBM_NONE); regMaskTP v0_v31_mask = RBM_V0 | RBM_V31; if ((candidates & v0_v31_mask) == v0_v31_mask) @@ -335,7 +335,7 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC do { // From LSB, find the first available register (bit `1`) - regAvailableStartIndex = BitOperations::BitScanForward(static_cast(unprocessedRegs)); + regAvailableStartIndex = regMaskTP::BitScanForwardRegMask(static_cast(unprocessedRegs)); // For the current range, find how many registers are free vs. busy regMaskTP maskForCurRange = RBM_NONE; @@ -365,7 +365,7 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC maskForCurRange |= (registersNeededMask << regAvailableStartIndex); maskForCurRange &= m_AvailableRegs; - if (maskForCurRange != RBM_NONE) + if (maskForCurRange.low != RBM_NONE) { // In the given range, there are some free registers available. Calculate how many registers // will need spilling if this range is picked. @@ -382,7 +382,7 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC } } unprocessedRegs &= ~(1ULL << regAvailableStartIndex); - } while (unprocessedRegs != RBM_NONE); + } while (unprocessedRegs.low != RBM_NONE); // consecutiveResultForBusy should always be a subset of consecutiveCandidates. assert((consecutiveCandidates & consecutiveResultForBusy) == consecutiveResultForBusy); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index f3aaba0847f689..033258abafbe48 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2805,6 +2805,13 @@ void LinearScan::buildIntervals() availableRegCount = REG_INT_COUNT; } +#if TARGET_ARM64 + if ((sizeof(regMaskTP) * 8) > 64) + { + actualRegistersMask = ~RBM_NONE; + } + else +#endif if (availableRegCount < (sizeof(regMaskTP) * 8)) { // Mask out the bits that are between 64 ~ availableRegCount diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index f6024a6b7dd9b1..7993395116cb76 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -258,13 +258,13 @@ struct __regMaskTP FORCEINLINE explicit operator regMaskSmall() const { - assert(high == 0); + //assert(high == 0); return (regMaskSmall)low; } FORCEINLINE explicit operator unsigned int() const { - assert(high == 0); + //assert(high == 0); return (unsigned int)low; } };