@@ -5151,9 +5151,228 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
51515151
51525152 case GT_CNS_LNG:
51535153 case GT_CNS_INT:
5154- costEx = 1;
5155- costSz = 4;
5154+ {
5155+ GenTreeIntConCommon* con = tree->AsIntConCommon();
5156+ bool iconNeedsReloc = con->ImmedValNeedsReloc(this);
5157+ INT64 imm = con->LngValue();
5158+ emitAttr size = EA_SIZE(emitActualTypeSize(tree));
5159+
5160+ if (iconNeedsReloc)
5161+ {
5162+ // TODO-RISCV64-CQ: tune the costs.
5163+ // The codegen(emitIns_R_AI) is not implemented yet.
5164+ // Assuming that it will require two instructions auipc + addi for relocations
5165+ costSz = 8;
5166+ costEx = 2;
5167+ }
5168+ else if (emitter::isValidSimm12((ssize_t)imm))
5169+ {
5170+ costSz = 4;
5171+ costEx = 1;
5172+ }
5173+ else
5174+ {
5175+ // The below logic mimics emitter::emitLoadImmediate
5176+ #define WordMask(x) (static_cast<unsigned>((1ull << (uint8_t)(x)) - 1))
5177+
5178+ // STEP 1: Determine x & y
5179+
5180+ int x;
5181+ int y;
5182+ if (((uint64_t)imm >> 63) & 0b1)
5183+ {
5184+ // last one position from MSB
5185+ y = 63 - BitOperations::LeadingZeroCount((uint64_t)~imm) + 1;
5186+ }
5187+ else
5188+ {
5189+ // last zero position from MSB
5190+ y = 63 - BitOperations::LeadingZeroCount((uint64_t)imm) + 1;
5191+ }
5192+ if (imm & 0b1)
5193+ {
5194+ // first zero position from LSB
5195+ x = BitOperations::TrailingZeroCount((uint64_t)~imm);
5196+ }
5197+ else
5198+ {
5199+ // first one position from LSB
5200+ x = BitOperations::TrailingZeroCount((uint64_t)imm);
5201+ }
5202+
5203+ // STEP 2: Determine whether to utilize SRLI or not.
5204+
5205+ constexpr int absMaxInsCount = emitter::instrDescLoadImm::absMaxInsCount;
5206+ constexpr int prefMaxInsCount = 5;
5207+ assert(prefMaxInsCount <= absMaxInsCount);
5208+
5209+ int insCountLimit = prefMaxInsCount;
5210+ if (this->compGeneratingProlog || this->compGeneratingEpilog)
5211+ {
5212+ insCountLimit = absMaxInsCount;
5213+ }
5214+
5215+ bool utilizeSRLI = false;
5216+ int srliShiftAmount = 0;
5217+ uint64_t originalImm = imm;
5218+ bool cond1 = (y - x) > 31;
5219+ if ((((uint64_t)imm >> 63) & 0b1) == 0 && cond1)
5220+ {
5221+ srliShiftAmount = BitOperations::LeadingZeroCount((uint64_t)imm);
5222+ uint64_t tempImm = (uint64_t)imm << srliShiftAmount;
5223+ int m = BitOperations::LeadingZeroCount(~tempImm);
5224+ int b = 64 - m;
5225+ int a = BitOperations::TrailingZeroCount(tempImm);
5226+ bool cond2 = (b - a) < 32;
5227+ bool cond3 = ((y - x) - (b - a)) >= 11;
5228+ if (cond2 || cond3)
5229+ {
5230+ imm = tempImm;
5231+ y = b;
5232+ x = a;
5233+ utilizeSRLI = true;
5234+ insCountLimit -= 1;
5235+ }
5236+ }
5237+
5238+ if (y < 32)
5239+ {
5240+ y = 31;
5241+ x = 0;
5242+ }
5243+ else if ((y - x) < 31)
5244+ {
5245+ y = x + 31;
5246+ }
5247+ else
5248+ {
5249+ x = y - 31;
5250+ }
5251+
5252+ uint32_t high32 = ((int64_t)imm >> x) & WordMask(32);
5253+
5254+ // STEP 3: Determine whether to use high32 + offset1 or high32 - offset2
5255+
5256+ // TODO-RISCV: Instead of using subtract / add mode, assume that we're always adding
5257+ // 12-bit chunks. However, if we encounter such 12-bit chunk with MSB == 1,
5258+ // add 1 to the previous chunk, and add the 12-bit chunk as is, which
5259+ // essentially does a subtraction. It will generate the least instruction to
5260+ // load offset.
5261+ // See the following discussion:
5262+ // https://github.com/dotnet/runtime/pull/113250#discussion_r1987576070 */
5263+
5264+ uint32_t offset1 = imm & WordMask((uint8_t)x);
5265+ uint32_t offset2 = (~(offset1 - 1)) & WordMask((uint8_t)x);
5266+ uint32_t offset = offset1;
5267+ bool isSubtractMode = false;
5268+
5269+ if ((high32 == 0x7FFFFFFF) && (y != 63))
5270+ {
5271+ int newX = x + 1;
5272+ uint32_t newOffset1 = imm & WordMask((uint8_t)newX);
5273+ uint32_t newOffset2 = (~(newOffset1 - 1)) & WordMask((uint8_t)newX);
5274+ if (newOffset2 < offset1)
5275+ {
5276+ x = newX;
5277+ high32 = ((int64_t)imm >> x) & WordMask(32);
5278+ offset2 = newOffset2;
5279+ isSubtractMode = true;
5280+ }
5281+ }
5282+ else if (offset2 < offset1)
5283+ {
5284+ isSubtractMode = true;
5285+ }
5286+
5287+ if (isSubtractMode)
5288+ {
5289+ offset = offset2;
5290+ high32 = (high32 + 1) & WordMask(32);
5291+ }
5292+
5293+ assert(absMaxInsCount >= 2);
5294+ int numberOfInstructions = 0;
5295+ instruction ins[absMaxInsCount];
5296+ int32_t values[absMaxInsCount];
5297+
5298+ // STEP 4: Generate instructions to load high32
5299+
5300+ uint32_t upper = (high32 >> 12) & WordMask(20);
5301+ uint32_t lower = high32 & WordMask(12);
5302+ int lowerMsb = (lower >> 11) & 0b1;
5303+ if (lowerMsb == 1)
5304+ {
5305+ upper += 1;
5306+ upper &= WordMask(20);
5307+ }
5308+ if (upper != 0)
5309+ {
5310+ numberOfInstructions += 1;
5311+ }
5312+ if (lower != 0)
5313+ {
5314+ numberOfInstructions += 1;
5315+ }
5316+
5317+ // STEP 5: Generate instructions to load offset in 11-bits chunks
5318+
5319+ int chunkLsbPos = (x < 11) ? 0 : (x - 11);
5320+ int shift = (x < 11) ? x : 11;
5321+ int chunkMask = (x < 11) ? WordMask((uint8_t)x) : WordMask(11);
5322+ while (true)
5323+ {
5324+ uint32_t chunk = (offset >> chunkLsbPos) & chunkMask;
5325+
5326+ if (chunk != 0)
5327+ {
5328+ /* We could move our 11 bit chunk window to the right for as many as the
5329+ * leading zeros.*/
5330+ int leadingZerosOn11BitsChunk = 11 - (32 - BitOperations::LeadingZeroCount(chunk));
5331+ if (leadingZerosOn11BitsChunk > 0)
5332+ {
5333+ int maxAdditionalShift =
5334+ (chunkLsbPos < leadingZerosOn11BitsChunk) ? chunkLsbPos : leadingZerosOn11BitsChunk;
5335+ chunkLsbPos -= maxAdditionalShift;
5336+ shift += maxAdditionalShift;
5337+ chunk = (offset >> chunkLsbPos) & chunkMask;
5338+ }
5339+
5340+ numberOfInstructions += 2;
5341+ if (numberOfInstructions > insCountLimit)
5342+ {
5343+ break;
5344+ }
5345+ shift = 0;
5346+ }
5347+ if (chunkLsbPos == 0)
5348+ {
5349+ break;
5350+ }
5351+ shift += (chunkLsbPos < 11) ? chunkLsbPos : 11;
5352+ chunkMask = (chunkLsbPos < 11) ? (chunkMask >> (11 - chunkLsbPos)) : WordMask(11);
5353+ chunkLsbPos -= (chunkLsbPos < 11) ? chunkLsbPos : 11;
5354+ }
5355+ if (shift > 0)
5356+ {
5357+ numberOfInstructions += 1;
5358+ }
5359+
5360+ // STEP 6: Determine whether to use emitDataConst or emit generated instructions
5361+
5362+ if (numberOfInstructions <= insCountLimit)
5363+ {
5364+ if (utilizeSRLI)
5365+ {
5366+ numberOfInstructions += 1;
5367+ assert(numberOfInstructions < absMaxInsCount);
5368+ }
5369+ }
5370+ costSz = 4 * numberOfInstructions;
5371+ costEx = numberOfInstructions;
5372+ #undef WordMask
5373+ }
51565374 goto COMMON_CNS;
5375+ }
51575376#else
51585377 case GT_CNS_STR:
51595378 case GT_CNS_LNG:
0 commit comments