@@ -5151,9 +5151,267 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
51515151
51525152 case GT_CNS_LNG:
51535153 case GT_CNS_INT:
5154- costEx = 1;
5155- costSz = 4;
5154+ {
5155+ GenTreeIntConCommon* con = tree->AsIntConCommon();
5156+ bool iconNeedsReloc = con->ImmedValNeedsReloc(this);
5157+ INT64 imm = con->LngValue();
5158+ emitAttr size = EA_SIZE(emitActualTypeSize(tree));
5159+
5160+ if (iconNeedsReloc)
5161+ {
5162+ // TODO-RISCV64-CQ: tune the costs.
5163+ // The codegen(emitIns_R_AI) is not implemented yet.
5164+ // Assuming that it will require two instructions auipc + addi for relocations
5165+ costSz = 8;
5166+ costEx = 2;
5167+ }
5168+ else if (emitter::isValidSimm12((ssize_t)imm))
5169+ {
5170+ costSz = 4;
5171+ costEx = 1;
5172+ }
5173+ else
5174+ {
5175+ // The below logic mimics emitter::emitLoadImmediate
5176+ #define WordMask(x) (static_cast<unsigned>((1ull << (uint8_t)(x)) - 1))
5177+
5178+ // STEP 1: Determine x & y
5179+ int x;
5180+ int y;
5181+ if (((uint64_t)imm >> 63) & 0b1)
5182+ {
5183+ // last one position from MSB
5184+ y = 63 - BitOperations::LeadingZeroCount((uint64_t)~imm) + 1;
5185+ }
5186+ else
5187+ {
5188+ // last zero position from MSB
5189+ y = 63 - BitOperations::LeadingZeroCount((uint64_t)imm) + 1;
5190+ }
5191+ if (imm & 0b1)
5192+ {
5193+ // first zero position from LSB
5194+ x = BitOperations::TrailingZeroCount((uint64_t)~imm);
5195+ }
5196+ else
5197+ {
5198+ // first one position from LSB
5199+ x = BitOperations::TrailingZeroCount((uint64_t)imm);
5200+ }
5201+
5202+ // STEP 2: Determine whether to utilize SRLI or not.
5203+
5204+ constexpr int absMaxInsCount = emitter::instrDescLoadImm::absMaxInsCount;
5205+ constexpr int prefMaxInsCount = 5;
5206+ assert(prefMaxInsCount <= absMaxInsCount);
5207+
5208+ // If we generate more instructions than the prefered maximum instruction count, we'll instead use emitDataConst +
5209+ // emitIns_R_C combination.
5210+ int insCountLimit = prefMaxInsCount;
5211+ // If we are currently generating prolog / epilog, we are currently not inside a method block, therefore, we should
5212+ // not use the emitDataConst + emitIns_R_C combination.
5213+ if (this->compGeneratingProlog || this->compGeneratingEpilog)
5214+ {
5215+ insCountLimit = absMaxInsCount;
5216+ }
5217+
5218+ bool utilizeSRLI = false;
5219+ int srliShiftAmount = 0;
5220+ uint64_t originalImm = imm;
5221+ bool cond1 = (y - x) > 31;
5222+ if ((((uint64_t)imm >> 63) & 0b1) == 0 && cond1)
5223+ {
5224+ srliShiftAmount = BitOperations::LeadingZeroCount((uint64_t)imm);
5225+ uint64_t tempImm = (uint64_t)imm << srliShiftAmount;
5226+ int m = BitOperations::LeadingZeroCount(~tempImm);
5227+ int b = 64 - m;
5228+ int a = BitOperations::TrailingZeroCount(tempImm);
5229+ bool cond2 = (b - a) < 32;
5230+ bool cond3 = ((y - x) - (b - a)) >= 11;
5231+ if (cond2 || cond3)
5232+ {
5233+ imm = tempImm;
5234+ y = b;
5235+ x = a;
5236+ utilizeSRLI = true;
5237+ insCountLimit -= 1;
5238+ }
5239+ }
5240+
5241+ if (y < 32)
5242+ {
5243+ y = 31;
5244+ x = 0;
5245+ }
5246+ else if ((y - x) < 31)
5247+ {
5248+ y = x + 31;
5249+ }
5250+ else
5251+ {
5252+ x = y - 31;
5253+ }
5254+
5255+ uint32_t high32 = ((int64_t)imm >> x) & WordMask(32);
5256+
5257+ // STEP 3: Determine whether to use high32 + offset1 or high32 - offset2
5258+
5259+ uint32_t offset1 = imm & WordMask((uint8_t)x);
5260+ uint32_t offset2 = (~(offset1 - 1)) & WordMask((uint8_t)x);
5261+ uint32_t offset = offset1;
5262+ bool isSubtractMode = false;
5263+
5264+ if ((high32 == 0x7FFFFFFF) && (y != 63))
5265+ {
5266+ // Handle corner case: we cannot do subtract mode if high32 == 0x7FFFFFFF
5267+ // Since adding 1 to it will change the sign bit. Instead, shift x and y
5268+ // to the left by one.
5269+ int newX = x + 1;
5270+ uint32_t newOffset1 = imm & WordMask((uint8_t)newX);
5271+ uint32_t newOffset2 = (~(newOffset1 - 1)) & WordMask((uint8_t)newX);
5272+ if (newOffset2 < offset1)
5273+ {
5274+ x = newX;
5275+ high32 = ((int64_t)imm >> x) & WordMask(32);
5276+ offset2 = newOffset2;
5277+ isSubtractMode = true;
5278+ }
5279+ }
5280+ else if (offset2 < offset1)
5281+ {
5282+ isSubtractMode = true;
5283+ }
5284+
5285+ if (isSubtractMode)
5286+ {
5287+ offset = offset2;
5288+ high32 = (high32 + 1) & WordMask(32);
5289+ }
5290+
5291+ assert(absMaxInsCount >= 2);
5292+ int numberOfInstructions = 0;
5293+ instruction ins[absMaxInsCount];
5294+ int32_t values[absMaxInsCount];
5295+
5296+ // STEP 4: Generate instructions to load high32
5297+
5298+ uint32_t upper = (high32 >> 12) & WordMask(20);
5299+ uint32_t lower = high32 & WordMask(12);
5300+ int lowerMsb = (lower >> 11) & 0b1;
5301+ if (lowerMsb == 1)
5302+ {
5303+ upper += 1;
5304+ upper &= WordMask(20);
5305+ }
5306+ if (upper != 0)
5307+ {
5308+ // ins[numberOfInstructions] = INS_lui;
5309+ // values[numberOfInstructions] = ((upper >> 19) & 0b1) ? (upper + 0xFFF00000) : upper;
5310+ numberOfInstructions += 1;
5311+ }
5312+ if (lower != 0)
5313+ {
5314+ // ins[numberOfInstructions] = INS_addiw;
5315+ // values[numberOfInstructions] = lower;
5316+ numberOfInstructions += 1;
5317+ }
5318+
5319+ // STEP 5: Generate instructions to load offset in 11-bits chunks
5320+
5321+ int chunkLsbPos = (x < 11) ? 0 : (x - 11);
5322+ int shift = (x < 11) ? x : 11;
5323+ int chunkMask = (x < 11) ? WordMask((uint8_t)x) : WordMask(11);
5324+ while (true)
5325+ {
5326+ uint32_t chunk = (offset >> chunkLsbPos) & chunkMask;
5327+
5328+ if (chunk != 0)
5329+ {
5330+ /* We could move our 11 bit chunk window to the right for as many as the
5331+ * leading zeros.*/
5332+ int leadingZerosOn11BitsChunk = 11 - (32 - BitOperations::LeadingZeroCount(chunk));
5333+ if (leadingZerosOn11BitsChunk > 0)
5334+ {
5335+ int maxAdditionalShift =
5336+ (chunkLsbPos < leadingZerosOn11BitsChunk) ? chunkLsbPos : leadingZerosOn11BitsChunk;
5337+ chunkLsbPos -= maxAdditionalShift;
5338+ shift += maxAdditionalShift;
5339+ chunk = (offset >> chunkLsbPos) & chunkMask;
5340+ }
5341+
5342+ numberOfInstructions += 2;
5343+ if (numberOfInstructions > insCountLimit)
5344+ {
5345+ break;
5346+ }
5347+ // ins[numberOfInstructions - 2] = INS_slli;
5348+ // values[numberOfInstructions - 2] = shift;
5349+ // if (isSubtractMode)
5350+ // {
5351+ // ins[numberOfInstructions - 1] = INS_addi;
5352+ // values[numberOfInstructions - 1] = -(int32_t)chunk;
5353+ // }
5354+ // else
5355+ // {
5356+ // ins[numberOfInstructions - 1] = INS_addi;
5357+ // values[numberOfInstructions - 1] = chunk;
5358+ // }
5359+ shift = 0;
5360+ }
5361+ if (chunkLsbPos == 0)
5362+ {
5363+ break;
5364+ }
5365+ shift += (chunkLsbPos < 11) ? chunkLsbPos : 11;
5366+ chunkMask = (chunkLsbPos < 11) ? (chunkMask >> (11 - chunkLsbPos)) : WordMask(11);
5367+ chunkLsbPos -= (chunkLsbPos < 11) ? chunkLsbPos : 11;
5368+ }
5369+ if (shift > 0)
5370+ {
5371+ numberOfInstructions += 1;
5372+ // if (numberOfInstructions <= insCountLimit)
5373+ // {
5374+ // ins[numberOfInstructions - 1] = INS_slli;
5375+ // values[numberOfInstructions - 1] = shift;
5376+ // }
5377+ }
5378+
5379+ // STEP 6: Determine whether to use emitDataConst or emit generated instructions
5380+
5381+ if (numberOfInstructions <= insCountLimit)
5382+ {
5383+ // instrDescLoadImm* id = static_cast<instrDescLoadImm*>(emitNewInstrLoadImm(size, originalImm));
5384+ // id->idReg1(reg);
5385+ // memcpy(id->ins, ins, sizeof(instruction) * numberOfInstructions);
5386+ // memcpy(id->values, values, sizeof(int32_t) * numberOfInstructions);
5387+ if (utilizeSRLI)
5388+ {
5389+ numberOfInstructions += 1;
5390+ assert(numberOfInstructions < absMaxInsCount);
5391+ // id->ins[numberOfInstructions - 1] = INS_srli;
5392+ // id->values[numberOfInstructions - 1] = srliShiftAmount;
5393+ }
5394+ // id->idCodeSize(numberOfInstructions * 4);
5395+ // id->idIns(id->ins[numberOfInstructions - 1]);
5396+
5397+ // appendToCurIG(id);
5398+ }
5399+ // else if (size == EA_PTRSIZE)
5400+ // {
5401+ // assert(!emitComp->compGeneratingProlog && !emitComp->compGeneratingEpilog);
5402+ // auto constAddr = emitDataConst(&originalImm, sizeof(long), sizeof(long), TYP_LONG);
5403+ // emitIns_R_C(INS_ld, EA_PTRSIZE, reg, REG_NA, emitComp->eeFindJitDataOffs(constAddr));
5404+ // }
5405+ // else
5406+ // {
5407+ // assert(false && "If number of instruction exceeds MAX_NUM_OF_LOAD_IMM_INS, imm must be 8 bytes");
5408+ // }
5409+ costSz = 4 * numberOfInstructions;
5410+ costEx = numberOfInstructions;
5411+ #undef WordMask
5412+ }
51565413 goto COMMON_CNS;
5414+ }
51575415#else
51585416 case GT_CNS_STR:
51595417 case GT_CNS_LNG:
0 commit comments