From d2ced94133f4e10972ae1845734aa0a204e6d6ae Mon Sep 17 00:00:00 2001 From: Fuad Ismail Date: Wed, 9 Apr 2025 18:39:18 +0700 Subject: [PATCH 1/3] [RISC-V] Group load immediate instructions into a single instrDesc and print imm value in disasm --- src/coreclr/jit/emit.h | 28 ++++++++ src/coreclr/jit/emitriscv64.cpp | 121 ++++++++++++++++++++++++-------- src/coreclr/jit/emitriscv64.h | 1 + src/coreclr/jit/instr.h | 25 +++---- 4 files changed, 135 insertions(+), 40 deletions(-) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index ef1fd2f701fc15..185c62a68bbc64 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -2214,6 +2214,18 @@ class emitter }; #endif +#ifdef TARGET_RISCV64 + struct instrDescLoadImm : instrDescCns + { + instrDescLoadImm() = delete; + + static const int absMaxInsCount = 8; + + instruction ins[absMaxInsCount]; + int32_t values[absMaxInsCount]; + }; +#endif // TARGET_RISCV64 + struct instrDescCGCA : instrDesc // call with ... { instrDescCGCA() = delete; @@ -3149,6 +3161,10 @@ class emitter instrDesc* emitNewInstrLclVarPair(emitAttr attr, cnsval_ssize_t cns); #endif // !TARGET_ARM64 +#ifdef TARGET_RISCV64 + instrDesc* emitNewInstrLoadImm(emitAttr attr, cnsval_ssize_t cns); +#endif // TARGET_RISCV64 + static const BYTE emitFmtToOps[]; #ifdef DEBUG @@ -3981,6 +3997,18 @@ inline emitter::instrDesc* emitter::emitNewInstrReloc(emitAttr attr, BYTE* addr) #endif // TARGET_ARM +#ifdef TARGET_RISCV64 + +inline emitter::instrDesc* emitter::emitNewInstrLoadImm(emitAttr attr, cnsval_ssize_t cns) +{ + instrDescLoadImm* id = static_cast(emitAllocAnyInstr(sizeof(instrDescLoadImm), attr)); + id->idInsOpt(INS_OPTS_I); + id->idcCnsVal = cns; + return id; +} + +#endif // TARGET_RISCV64 + #ifdef TARGET_XARCH /***************************************************************************** diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index d11b8e52f6c4aa..9ca16d3891ede4 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -97,6 +97,8 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id) const case INS_OPTS_RELOC: case INS_OPTS_NONE: return sizeof(instrDesc); + case INS_OPTS_I: + return sizeof(instrDescLoadImm); default: NO_WAY("unexpected instruction descriptor format"); break; @@ -1362,7 +1364,7 @@ void emitter::emitLoadImmediate(emitAttr size, regNumber reg, ssize_t imm) * b <-a * */ - constexpr int absMaxInsCount = 8; + constexpr int absMaxInsCount = instrDescLoadImm::absMaxInsCount; constexpr int prefMaxInsCount = 5; assert(prefMaxInsCount <= absMaxInsCount); @@ -1553,33 +1555,21 @@ void emitter::emitLoadImmediate(emitAttr size, regNumber reg, ssize_t imm) if (numberOfInstructions <= insCountLimit) { - for (int i = 0; i < numberOfInstructions; i++) - { - if ((i == 0) && (ins[0] == INS_lui)) - { - emitIns_R_I(ins[i], size, reg, values[i]); - } - else if ((i == 0) && ((ins[0] == INS_addiw) || (ins[0] == INS_addi))) - { - emitIns_R_R_I(ins[i], size, reg, REG_R0, values[i]); - } - else if (i == 0) - { - assert(false && "First instruction must be lui / addiw / addi"); - } - else if ((ins[i] == INS_addi) || (ins[i] == INS_addiw) || (ins[i] == INS_slli)) - { - emitIns_R_R_I(ins[i], size, reg, reg, values[i]); - } - else - { - assert(false && "Remainding instructions must be addi / addiw / slli"); - } - } + instrDescLoadImm* id = static_cast(emitNewInstrLoadImm(size, originalImm)); + id->idReg1(reg); + memcpy(id->ins, ins, sizeof(instruction) * numberOfInstructions); + memcpy(id->values, values, sizeof(int32_t) * numberOfInstructions); if (utilizeSRLI) { - emitIns_R_R_I(INS_srli, size, reg, reg, srliShiftAmount); + numberOfInstructions += 1; + assert(numberOfInstructions < absMaxInsCount); + id->ins[numberOfInstructions - 1] = INS_srli; + id->values[numberOfInstructions - 1] = srliShiftAmount; } + id->idCodeSize(numberOfInstructions * 4); + id->idIns(id->ins[numberOfInstructions - 1]); + + appendToCurIG(id); } else if (size == EA_PTRSIZE) { @@ -3444,6 +3434,50 @@ BYTE* emitter::emitOutputInstr_OptsC(BYTE* dst, instrDesc* id, const insGroup* i return dst; } +BYTE* emitter::emitOutputInstr_OptsI(BYTE* dst, instrDesc* id, instruction* lastIns) +{ + assert(id->idInsOpt() == INS_OPTS_I); + + instrDescLoadImm* idli = static_cast(id); + instruction* ins = idli->ins; + int32_t* values = idli->values; + regNumber reg = idli->idReg1(); + + assert((reg != REG_NA) && (reg != REG_R0)); + + int numberOfInstructions = idli->idCodeSize() / sizeof(code_t); + for (int i = 0; i < numberOfInstructions; i++) + { + if ((i == 0) && (ins[0] == INS_lui)) + { + assert(isValidSimm20(values[i])); + dst += emitOutput_UTypeInstr(dst, ins[i], reg, values[i] & 0xfffff); + } + else if ((i == 0) && ((ins[0] == INS_addiw) || (ins[0] == INS_addi))) + { + assert(isValidSimm12(values[i]) || ((ins[i] == INS_addiw) && isValidUimm12(values[i]))); + dst += emitOutput_ITypeInstr(dst, ins[i], reg, REG_R0, values[i] & 0xfff); + } + else if (i == 0) + { + assert(false && "First instruction must be lui / addiw / addi"); + } + else if ((ins[i] == INS_addi) || (ins[i] == INS_addiw) || (ins[i] == INS_slli) || (ins[i] == INS_srli)) + { + assert(isValidSimm12(values[i]) || ((ins[i] == INS_addiw) && isValidUimm12(values[i]))); + dst += emitOutput_ITypeInstr(dst, ins[i], reg, reg, values[i] & 0xfff); + } + else + { + assert(false && "Remainding instructions must be addi / addiw / slli / srli"); + } + } + + *lastIns = ins[numberOfInstructions - 1]; + + return dst; +} + /***************************************************************************** * * Append the machine code corresponding to the given instruction descriptor @@ -3497,6 +3531,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst2 = dst; ins = INS_nop; break; + case INS_OPTS_I: + dst = emitOutputInstr_OptsI(dst, id, &ins); + sz = sizeof(instrDescLoadImm); + break; default: // case INS_OPTS_NONE: dst += emitOutput_Instr(dst, id->idAddr()->iiaGetInstrEncode()); ins = id->idIns(); @@ -3752,6 +3790,8 @@ void emitter::emitDispInsName( printf(" "); + bool willPrintLoadImmValue = (id->idInsOpt() == INS_OPTS_I) && !emitComp->opts.disDiffable; + switch (GetMajorOpcode(code)) { case MajorOpcode::Lui: @@ -3763,7 +3803,7 @@ void emitter::emitDispInsName( imm20 |= 0xfff00000; } printf("lui %s, ", rd); - emitDispImmediate(imm20); + emitDispImmediate(imm20, !willPrintLoadImmValue); return; } case MajorOpcode::Auipc: @@ -3881,7 +3921,10 @@ void emitter::emitDispInsName( printf("%d", imm12); } } - printf("\n"); + if (!willPrintLoadImmValue) + { + printf("\n"); + } return; } @@ -3901,7 +3944,7 @@ void emitter::emitDispInsName( else { printf("addiw %s, %s, ", rd, rs1); - emitDispImmediate(imm12); + emitDispImmediate(imm12, !willPrintLoadImmValue); } return; case 0x1: @@ -4749,6 +4792,8 @@ void emitter::emitDispIns( emitDispInsInstrNum(id); + bool willPrintLoadImmValue = (id->idInsOpt() == INS_OPTS_I) && !emitComp->opts.disDiffable; + const BYTE* instr = pCode + writeableOffset; unsigned instrSize; for (size_t i = 0; i < sz; instr += instrSize, i += instrSize, offset += instrSize) @@ -4764,6 +4809,17 @@ void emitter::emitDispIns( } #endif emitDispInsName(instruction, instr, doffs, offset, id, ig); + + if (willPrintLoadImmValue && ((i + instrSize) < sz)) + { + printf("\n"); + } + } + + if (willPrintLoadImmValue) + { + instrDescLoadImm* liid = static_cast(id); + printf("\t\t;; load imm: hex=0x%016lX dec=%ld\n", liid->idcCnsVal, liid->idcCnsVal); } } @@ -5446,6 +5502,15 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins unsigned codeSize = id->idCodeSize(); assert((codeSize >= 4) && (codeSize % sizeof(code_t) == 0)); + + // instrDescLoadImm consits of OpImm, OpImm32, and Lui instructions. + if (id->idInsOpt() == INS_OPTS_I) + { + result.insLatency = PERFSCORE_LATENCY_1C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X * (codeSize / sizeof(code_t)); + return result; + } + // Some instructions like jumps or loads may have not-yet-known simple auxilliary instructions (lui, addi, slli, // etc) for building immediates, assume cost of one each. float immediateBuildingCost = ((codeSize / sizeof(code_t)) - 1) * PERFSCORE_LATENCY_1C; diff --git a/src/coreclr/jit/emitriscv64.h b/src/coreclr/jit/emitriscv64.h index 8fb130d42f239f..b846eb5ee65be1 100644 --- a/src/coreclr/jit/emitriscv64.h +++ b/src/coreclr/jit/emitriscv64.h @@ -133,6 +133,7 @@ BYTE* emitOutputInstr_OptsJalr28(BYTE* dst, const instrDescJmp* jmp, ssize_t imm BYTE* emitOutputInstr_OptsJCond(BYTE* dst, instrDesc* id, const insGroup* ig, instruction* ins); BYTE* emitOutputInstr_OptsJ(BYTE* dst, instrDesc* id, const insGroup* ig, instruction* ins); BYTE* emitOutputInstr_OptsC(BYTE* dst, instrDesc* id, const insGroup* ig, size_t* size); +BYTE* emitOutputInstr_OptsI(BYTE* dst, instrDesc* id, instruction* ins); static unsigned TrimSignedToImm12(ssize_t imm12); static unsigned TrimSignedToImm13(ssize_t imm13); diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 305a8f7124d7df..b8f1adfe762d33 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -222,8 +222,8 @@ enum insFlags : uint64_t Encoding_REX2 = 1ULL << 44, // APX: EVEX.ND: - INS_Flags_Has_NDD = 1ULL << 45, - + INS_Flags_Has_NDD = 1ULL << 45, + // APX: EVEX.NF: INS_Flags_Has_NF = 1ULL << 46, @@ -438,16 +438,16 @@ enum insSvePattern : unsigned enum insSvePrfop : unsigned { SVE_PRFOP_PLDL1KEEP = 0b0000, - SVE_PRFOP_PLDL1STRM = 0b0001, - SVE_PRFOP_PLDL2KEEP = 0b0010, - SVE_PRFOP_PLDL2STRM = 0b0011, - SVE_PRFOP_PLDL3KEEP = 0b0100, - SVE_PRFOP_PLDL3STRM = 0b0101, - SVE_PRFOP_PSTL1KEEP = 0b1000, - SVE_PRFOP_PSTL1STRM = 0b1001, - SVE_PRFOP_PSTL2KEEP = 0b1010, - SVE_PRFOP_PSTL2STRM = 0b1011, - SVE_PRFOP_PSTL3KEEP = 0b1100, + SVE_PRFOP_PLDL1STRM = 0b0001, + SVE_PRFOP_PLDL2KEEP = 0b0010, + SVE_PRFOP_PLDL2STRM = 0b0011, + SVE_PRFOP_PLDL3KEEP = 0b0100, + SVE_PRFOP_PLDL3STRM = 0b0101, + SVE_PRFOP_PSTL1KEEP = 0b1000, + SVE_PRFOP_PSTL1STRM = 0b1001, + SVE_PRFOP_PSTL2KEEP = 0b1010, + SVE_PRFOP_PSTL2STRM = 0b1011, + SVE_PRFOP_PSTL3KEEP = 0b1100, SVE_PRFOP_PSTL3STRM = 0b1101, SVE_PRFOP_CONST6 = 0b0110, @@ -532,6 +532,7 @@ enum insOpts : unsigned INS_OPTS_JALR, // see ::emitIns_J_R(). INS_OPTS_J, // see ::emitIns_J(). INS_OPTS_J_cond, // see ::emitIns_J_cond_la(). + INS_OPTS_I, // see ::emitLoadImmediate(). INS_OPTS_C, // see ::emitIns_Call(). INS_OPTS_RELOC, // see ::emitIns_R_AI(). }; From 533156096ae6d63f280324ee8a81a0299beb2ca5 Mon Sep 17 00:00:00 2001 From: Fuad Ismail Date: Thu, 10 Apr 2025 15:55:27 +0700 Subject: [PATCH 2/3] [RISC-V] Correct performance score for instrDescLoadImm --- src/coreclr/jit/emitriscv64.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index 9ca16d3891ede4..22ad0d0dfded23 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -5503,21 +5503,14 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins unsigned codeSize = id->idCodeSize(); assert((codeSize >= 4) && (codeSize % sizeof(code_t) == 0)); - // instrDescLoadImm consits of OpImm, OpImm32, and Lui instructions. - if (id->idInsOpt() == INS_OPTS_I) - { - result.insLatency = PERFSCORE_LATENCY_1C; - result.insThroughput = PERFSCORE_THROUGHPUT_2X * (codeSize / sizeof(code_t)); - return result; - } - // Some instructions like jumps or loads may have not-yet-known simple auxilliary instructions (lui, addi, slli, // etc) for building immediates, assume cost of one each. + // instrDescLoadImm consits of OpImm, OpImm32, and Lui instructions. float immediateBuildingCost = ((codeSize / sizeof(code_t)) - 1) * PERFSCORE_LATENCY_1C; instruction ins = id->idIns(); assert(ins != INS_invalid); - if (ins == INS_lea) + if ((ins == INS_lea) || (id->idInsOpt() == INS_OPTS_I)) { result.insLatency += immediateBuildingCost; result.insThroughput += immediateBuildingCost; From 96b8a19dd6411ef3eb6643537218cd6164280f06 Mon Sep 17 00:00:00 2001 From: Fuad Ismail Date: Fri, 11 Apr 2025 16:28:37 +0700 Subject: [PATCH 3/3] [RISC-V] Fix typos --- src/coreclr/jit/emitriscv64.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index 22ad0d0dfded23..1b38e9688f5922 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -3469,7 +3469,7 @@ BYTE* emitter::emitOutputInstr_OptsI(BYTE* dst, instrDesc* id, instruction* last } else { - assert(false && "Remainding instructions must be addi / addiw / slli / srli"); + assert(false && "Remaining instructions must be addi / addiw / slli / srli"); } } @@ -5505,7 +5505,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins // Some instructions like jumps or loads may have not-yet-known simple auxilliary instructions (lui, addi, slli, // etc) for building immediates, assume cost of one each. - // instrDescLoadImm consits of OpImm, OpImm32, and Lui instructions. + // instrDescLoadImm consists of OpImm, OpImm32, and Lui instructions. float immediateBuildingCost = ((codeSize / sizeof(code_t)) - 1) * PERFSCORE_LATENCY_1C; instruction ins = id->idIns();