diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index 0bd4d67b38c43b..0fd672768682c3 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -1461,22 +1461,8 @@ void emitter::emitIns_Jump(instruction ins, BasicBlock* dst, regNumber reg1, reg static inline constexpr unsigned WordMask(uint8_t bits); -/***************************************************************************** - * - * Emits load of 64-bit constant to register. - * - */ -void emitter::emitLoadImmediate(emitAttr size, regNumber reg, ssize_t imm) +int emitter::emitLoadImmediateLarge(emitAttr size, regNumber reg, ssize_t imm, bool doEmit = true) { - assert(!EA_IS_RELOC(size)); - assert(isGeneralRegister(reg)); - - if (isValidSimm12(imm)) - { - emitIns_R_R_I(INS_addi, size, reg, REG_R0, imm & 0xFFF); - return; - } - /* The following algorithm works based on the following equation: * `imm = high32 + offset1` OR `imm = high32 - offset2` * @@ -1485,7 +1471,7 @@ void emitter::emitLoadImmediate(emitAttr size, regNumber reg, ssize_t imm) * * First, determine at which position to partition imm into high32 and offset, * so that it yields the least instruction. - * Where high32 = imm[y:x] and imm[63:y] are all zeroes or all ones. + * Where high32 = imm[y:x] and imm[63:y] are all zeros or all ones. * * From the above equation, the value of offset1 & offset2 are: * -> offset1 = imm[x-1:0] @@ -1658,14 +1644,20 @@ void emitter::emitLoadImmediate(emitAttr size, regNumber reg, ssize_t imm) } if (upper != 0) { - ins[numberOfInstructions] = INS_lui; - values[numberOfInstructions] = ((upper >> 19) & 0b1) ? (upper + 0xFFF00000) : upper; + if (doEmit) + { + ins[numberOfInstructions] = INS_lui; + values[numberOfInstructions] = ((upper >> 19) & 0b1) ? (upper + 0xFFF00000) : upper; + } numberOfInstructions += 1; } if (lower != 0) { - ins[numberOfInstructions] = INS_addiw; - values[numberOfInstructions] = lower; + if (doEmit) + { + ins[numberOfInstructions] = INS_addiw; + values[numberOfInstructions] = lower; + } numberOfInstructions += 1; } @@ -1697,17 +1689,20 @@ void emitter::emitLoadImmediate(emitAttr size, regNumber reg, ssize_t imm) { break; } - ins[numberOfInstructions - 2] = INS_slli; - values[numberOfInstructions - 2] = shift; - if (isSubtractMode) - { - ins[numberOfInstructions - 1] = INS_addi; - values[numberOfInstructions - 1] = -(int32_t)chunk; - } - else + if (doEmit) { - ins[numberOfInstructions - 1] = INS_addi; - values[numberOfInstructions - 1] = chunk; + ins[numberOfInstructions - 2] = INS_slli; + values[numberOfInstructions - 2] = shift; + if (isSubtractMode) + { + ins[numberOfInstructions - 1] = INS_addi; + values[numberOfInstructions - 1] = -(int32_t)chunk; + } + else + { + ins[numberOfInstructions - 1] = INS_addi; + values[numberOfInstructions - 1] = chunk; + } } shift = 0; } @@ -1722,7 +1717,7 @@ void emitter::emitLoadImmediate(emitAttr size, regNumber reg, ssize_t imm) if (shift > 0) { numberOfInstructions += 1; - if (numberOfInstructions <= insCountLimit) + if (doEmit && (numberOfInstructions <= insCountLimit)) { ins[numberOfInstructions - 1] = INS_slli; values[numberOfInstructions - 1] = shift; @@ -1733,32 +1728,69 @@ void emitter::emitLoadImmediate(emitAttr size, regNumber reg, ssize_t imm) if (numberOfInstructions <= insCountLimit) { - instrDescLoadImm* id = static_cast(emitNewInstrLoadImm(size, originalImm)); - id->idReg1(reg); - memcpy(id->ins, ins, sizeof(instruction) * numberOfInstructions); - memcpy(id->values, values, sizeof(int32_t) * numberOfInstructions); + instrDescLoadImm* id; + if (doEmit) + { + id = static_cast(emitNewInstrLoadImm(size, originalImm)); + id->idReg1(reg); + memcpy(id->ins, ins, sizeof(instruction) * numberOfInstructions); + memcpy(id->values, values, sizeof(int32_t) * numberOfInstructions); + } + if (utilizeSRLI) { numberOfInstructions += 1; assert(numberOfInstructions < absMaxInsCount); - id->ins[numberOfInstructions - 1] = INS_srli; - id->values[numberOfInstructions - 1] = srliShiftAmount; + if (doEmit) + { + id->ins[numberOfInstructions - 1] = INS_srli; + id->values[numberOfInstructions - 1] = srliShiftAmount; + } + } + + if (doEmit) + { + id->idCodeSize(numberOfInstructions * 4); + id->idIns(id->ins[numberOfInstructions - 1]); + appendToCurIG(id); } - id->idCodeSize(numberOfInstructions * 4); - id->idIns(id->ins[numberOfInstructions - 1]); - appendToCurIG(id); + return numberOfInstructions; } else if (size == EA_PTRSIZE) { - assert(!emitComp->compGeneratingProlog && !emitComp->compGeneratingEpilog); - auto constAddr = emitDataConst(&originalImm, sizeof(long), sizeof(long), TYP_LONG); - emitIns_R_C(INS_ld, EA_PTRSIZE, reg, REG_NA, emitComp->eeFindJitDataOffs(constAddr)); + if (doEmit) + { + assert(!emitComp->compGeneratingProlog && !emitComp->compGeneratingEpilog); + auto constAddr = emitDataConst(&originalImm, sizeof(long), sizeof(long), TYP_LONG); + emitIns_R_C(INS_ld, EA_PTRSIZE, reg, REG_NA, emitComp->eeFindJitDataOffs(constAddr)); + } + return 2; } else { assert(false && "If number of instruction exceeds MAX_NUM_OF_LOAD_IMM_INS, imm must be 8 bytes"); } + return 0; +} + +/***************************************************************************** + * + * Emits load of 64-bit constant to register. + * + */ +void emitter::emitLoadImmediate(emitAttr size, regNumber reg, ssize_t imm) +{ + assert(!EA_IS_RELOC(size)); + assert(isGeneralRegister(reg)); + + if (isValidSimm12(imm)) + { + emitIns_R_R_I(INS_addi, size, reg, REG_R0, imm & 0xFFF); + return; + } + + emitLoadImmediateLarge(size, reg, imm, /* doEmit */ true); } /***************************************************************************** diff --git a/src/coreclr/jit/emitriscv64.h b/src/coreclr/jit/emitriscv64.h index a66de99b999940..2c698b9dff5fbf 100644 --- a/src/coreclr/jit/emitriscv64.h +++ b/src/coreclr/jit/emitriscv64.h @@ -32,6 +32,7 @@ const char* emitVectorRegName(regNumber reg); void emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1 = REG_R0, regNumber reg2 = REG_R0); void emitIns_J(instruction ins, BasicBlock* dst); +int emitLoadImmediateLarge(emitAttr size, regNumber reg, ssize_t imm, bool doEmit); void emitLoadImmediate(emitAttr attr, regNumber reg, ssize_t imm); /************************************************************************/ diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 8c00356c330415..f0b4444c3e15d2 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -5164,9 +5164,34 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) case GT_CNS_LNG: case GT_CNS_INT: - costEx = 1; - costSz = 4; + { + GenTreeIntConCommon* con = tree->AsIntConCommon(); + bool iconNeedsReloc = con->ImmedValNeedsReloc(this); + ssize_t imm = static_cast(con->LngValue()); + emitAttr size = EA_SIZE(emitActualTypeSize(tree)); + + if (iconNeedsReloc) + { + // TODO-RISCV64-CQ: tune the costs. + // The codegen(emitIns_R_AI) is not implemented yet. + // Assuming that it will require two instructions auipc + addi for relocations + costSz = 8; + costEx = 2; + } + else if (emitter::isValidSimm12(imm)) + { + costSz = 4; + costEx = 1; + } + else + { + int instructionCount = GetEmitter()->emitLoadImmediateLarge(size, REG_NA, imm, /* doEmit */ false); + assert(instructionCount > 0); + costSz = 4 * instructionCount; + costEx = instructionCount; + } goto COMMON_CNS; + } #elif defined(TARGET_WASM) case GT_CNS_STR: costEx = IND_COST_EX + 2; diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 31b513cb7a93dd..c7de189eed27e0 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -447,18 +447,18 @@ RELEASE_CONFIG_INTEGER(EnableApxZU, "EnableApxZU", RELEASE_CONFIG_INTEGER(JitDisableSimdVN, "JitDisableSimdVN", 0) #endif -// Default 0, enable the CSE of Constants, including nearby offsets. (only for ARM/ARM64) +// Default 0, enable the CSE of Constants, including nearby offsets. (only for ARM/ARM64/RISCV64) // If 1, disable all the CSE of Constants -// If 2, enable the CSE of Constants but don't combine with nearby offsets. (only for ARM/ARM64) +// If 2, enable the CSE of Constants but don't combine with nearby offsets. (only for ARM/ARM64/RISCV64) // If 3, enable the CSE of Constants including nearby offsets. (all platforms) // If 4, enable the CSE of Constants but don't combine with nearby offsets. (all platforms) // -#define CONST_CSE_ENABLE_ARM 0 -#define CONST_CSE_DISABLE_ALL 1 -#define CONST_CSE_ENABLE_ARM_NO_SHARING 2 -#define CONST_CSE_ENABLE_ALL 3 -#define CONST_CSE_ENABLE_ALL_NO_SHARING 4 -RELEASE_CONFIG_INTEGER(JitConstCSE, "JitConstCSE", CONST_CSE_ENABLE_ARM) +#define CONST_CSE_ENABLE_ARM_RISCV64 0 +#define CONST_CSE_DISABLE_ALL 1 +#define CONST_CSE_ENABLE_ARM_RISCV64_NO_SHARING 2 +#define CONST_CSE_ENABLE_ALL 3 +#define CONST_CSE_ENABLE_ALL_NO_SHARING 4 +RELEASE_CONFIG_INTEGER(JitConstCSE, "JitConstCSE", CONST_CSE_ENABLE_ARM_RISCV64) // If nonzero, use the greedy RL policy. // diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index 5714afe9bb0192..8d91640d377c99 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -5873,12 +5873,12 @@ bool Compiler::optSharedConstantCSEEnabled() { enableSharedConstCSE = true; } -#if defined(TARGET_ARMARCH) - else if (configValue == CONST_CSE_ENABLE_ARM) +#if defined(TARGET_ARMARCH) || defined(TARGET_RISCV64) + else if (configValue == CONST_CSE_ENABLE_ARM_RISCV64) { enableSharedConstCSE = true; } -#endif // TARGET_ARMARCH +#endif // TARGET_ARMARCH || TARGET_RISCV64 return enableSharedConstCSE; } @@ -5898,8 +5898,8 @@ bool Compiler::optConstantCSEEnabled() { enableConstCSE = true; } -#if defined(TARGET_ARMARCH) - else if ((configValue == CONST_CSE_ENABLE_ARM) || (configValue == CONST_CSE_ENABLE_ARM_NO_SHARING)) +#if defined(TARGET_ARMARCH) || defined(TARGET_RISCV64) + else if ((configValue == CONST_CSE_ENABLE_ARM_RISCV64) || (configValue == CONST_CSE_ENABLE_ARM_RISCV64_NO_SHARING)) { enableConstCSE = true; }