-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISC-V] Add quirks for riscv to R2RDump #101683
Changes from 3 commits
33afb41
68cfb45
cbee233
9227818
7c96325
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -359,6 +359,10 @@ public int GetInstruction(RuntimeFunction rtf, int imageOffset, int rtfOffset, o | |
case Machine.ArmThumb2: | ||
break; | ||
|
||
case Machine.RiscV64: | ||
ProbeRiscV64Quirks(rtf, imageOffset, rtfOffset, ref fixedTranslatedLine); | ||
break; | ||
|
||
default: | ||
break; | ||
} | ||
|
@@ -1209,6 +1213,235 @@ private static bool IsAnotherRuntimeFunctionWithinMethod(int rva, RuntimeFunctio | |
return false; | ||
} | ||
|
||
/// <summary> | ||
/// Improves disassembler output for RiscV64 by adding comments at the end of instructions. | ||
/// </summary> | ||
/// <param name="rtf">Runtime function</param> | ||
/// <param name="imageOffset">Offset within the image byte array</param> | ||
/// <param name="rtfOffset">Offset within the runtime function</param> | ||
/// <param name="instruction">Textual representation of the instruction</param> | ||
private void ProbeRiscV64Quirks(RuntimeFunction rtf, int imageOffset, int rtfOffset, ref string instruction) | ||
{ | ||
const int InstructionSize = 4; | ||
uint instr = BitConverter.ToUInt32(_reader.Image, imageOffset + rtfOffset); | ||
|
||
/* | ||
Pattern: | ||
auipc | ||
addi | ||
ld | ||
jalr | ||
, where addi and ld may be repeated many times. | ||
Instructions that have no effect on the jump | ||
address calculation are skipped. | ||
*/ | ||
if (IsRiscV64JalrInstruction(instr)) | ||
{ | ||
AnalyzeRiscV64Itype(instr, out uint rd, out uint rs1, out int imm); | ||
uint register = rs1; | ||
int immValue = imm; | ||
|
||
int target = 0; | ||
bool isFound = false; | ||
int currentInstrOffset = rtfOffset - InstructionSize; | ||
// skip all subsequent ld,lw instructions until first non-ld,lw instruction | ||
do | ||
{ | ||
instr = BitConverter.ToUInt32(_reader.Image, imageOffset + currentInstrOffset); | ||
|
||
if (IsRiscV64LdInstruction(instr) || IsRiscV64LwInstruction(instr)) | ||
{ | ||
AnalyzeRiscV64Itype(instr, out rd, out rs1, out imm); | ||
if (rd == register) | ||
{ | ||
immValue = imm; | ||
register = rs1; | ||
} | ||
} | ||
else | ||
{ | ||
// frist non-ld instruction | ||
jkotas marked this conversation as resolved.
Show resolved
Hide resolved
|
||
isFound = StaticAnalyzeRiscV64Assembly( | ||
rtf.StartAddress + currentInstrOffset, | ||
currentInstrOffset, | ||
imageOffset, | ||
register, | ||
ref target); | ||
break; | ||
} | ||
|
||
currentInstrOffset -= InstructionSize; | ||
} while (currentInstrOffset > 0); | ||
|
||
if (isFound) | ||
{ | ||
if (!TryGetImportCellName(target + immValue, out string targetName) || string.IsNullOrWhiteSpace(targetName)) | ||
{ | ||
return; | ||
} | ||
|
||
StringBuilder updatedInstruction = new(); | ||
updatedInstruction.Append(instruction); | ||
updatedInstruction.Append(" // "); | ||
updatedInstruction.Append(targetName); | ||
instruction = updatedInstruction.ToString(); | ||
} | ||
} | ||
|
||
} | ||
|
||
/// <summary> | ||
/// Counts the value stored in a given register. | ||
/// </summary> | ||
/// <param name="currentPC">Program Counter of analyzed instruction</param> | ||
/// <param name="currentInstrOffset">Offset within the runtime function of analyzed instruction</param> | ||
/// <param name="imageOffset">Offset within the image byte array</param> | ||
/// <param name="register">Register whose value is being searched for</param> | ||
/// <param name="registerValue">Found value of the register</param> | ||
/// <returns>If the value stored in a given register is countable, the function returns true. Otherwise false</returns> | ||
private bool StaticAnalyzeRiscV64Assembly(int currentPC, int currentInstrOffset, int imageOffset, uint register, ref int registerValue) | ||
{ | ||
const int InstructionSize = 4; | ||
|
||
// if currentInstrOffset is less than 0 then it is the end of this analyzed method | ||
if (currentInstrOffset < 0) | ||
{ | ||
return false; | ||
} | ||
|
||
do | ||
{ | ||
uint instr = BitConverter.ToUInt32(_reader.Image, imageOffset + currentInstrOffset); | ||
if (IsRiscV64AddiInstruction(instr)) | ||
{ | ||
AnalyzeRiscV64Itype(instr, out uint rd, out uint rs1, out int imm); | ||
if (rd == register) | ||
SzpejnaDawid marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
int rs1Value = 0; | ||
bool returnValue = StaticAnalyzeRiscV64Assembly(currentPC - InstructionSize, currentInstrOffset - InstructionSize, imageOffset, rs1, ref rs1Value); | ||
registerValue = rs1Value + imm; | ||
return returnValue; | ||
} | ||
} | ||
else if (IsRiscV64AuipcInstruction(instr)) | ||
{ | ||
AnalyzeRiscV64Utype(instr, out uint rd, out int imm); | ||
if (rd == register) | ||
{ | ||
registerValue = currentPC + imm; | ||
return true; | ||
} | ||
} | ||
else | ||
{ | ||
// check if "register" is calculated using an unsupported instruction | ||
uint rd = (instr >> 7) & 0b_11111U; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If instr is BType, JType or SType like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In my honest opinion, i have never seen a situation when an instruction, like |
||
if (rd == register) | ||
{ | ||
return false; | ||
} | ||
} | ||
|
||
currentInstrOffset -= InstructionSize; | ||
currentPC -= InstructionSize; | ||
} while (currentInstrOffset > 0); | ||
|
||
return false; | ||
} | ||
|
||
/// <summary> | ||
/// Checks if instruction is auipc. | ||
/// </summary> | ||
/// <param name="instruction">Assembly code of instruction</param> | ||
/// <returns>It returns true if instruction is auipc. Otherwise false</returns> | ||
private bool IsRiscV64AuipcInstruction(uint instruction) | ||
{ | ||
const uint OpcodeAuipc = 0b_0010111; | ||
return (instruction & 0x7f) == OpcodeAuipc; | ||
} | ||
|
||
/// <summary> | ||
/// Checks if instruction is jalr. | ||
/// </summary> | ||
/// <param name="instruction">Assembly code of instruction</param> | ||
/// <returns>It returns true if instruction is jalr. Otherwise false</returns> | ||
private bool IsRiscV64JalrInstruction(uint instruction) | ||
{ | ||
const uint OpcodeJalr = 0b_1100111; | ||
const uint Funct3Jalr = 0b_000; | ||
return (instruction & 0x7f) == OpcodeJalr && | ||
((instruction >> 12) & 0b_111) == Funct3Jalr; | ||
} | ||
|
||
/// <summary> | ||
/// Checks if instruction is addi. | ||
/// </summary> | ||
/// <param name="instruction">Assembly code of instruction</param> | ||
/// <returns>It returns true if instruction is addi. Otherwise false</returns> | ||
private bool IsRiscV64AddiInstruction(uint instruction) | ||
{ | ||
const uint OpcodeAddi = 0b_0010011; | ||
const uint Funct3Addi = 0b_000; | ||
return (instruction & 0x7f) == OpcodeAddi && | ||
((instruction >> 12) & 0b_111) == Funct3Addi; | ||
} | ||
|
||
/// <summary> | ||
/// Checks if instruction is ld. | ||
/// </summary> | ||
/// <param name="instruction">Assembly code of instruction</param> | ||
/// <returns>It returns true if instruction is ld. Otherwise false</returns> | ||
private bool IsRiscV64LdInstruction(uint instruction) | ||
{ | ||
const uint OpcodeLd = 0b_0000011; | ||
const uint Funct3Ld = 0b_011; | ||
return (instruction & 0x7f) == OpcodeLd && | ||
((instruction >> 12) & 0b_111) == Funct3Ld; | ||
} | ||
|
||
/// <summary> | ||
/// Checks if instruction is lw. | ||
/// </summary> | ||
/// <param name="instruction">Assembly code of instruction</param> | ||
/// <returns>It returns true if instruction is lw. Otherwise false</returns> | ||
private bool IsRiscV64LwInstruction(uint instruction) | ||
{ | ||
const uint OpcodeLw = 0b_0000011; | ||
const uint Funct3Lw = 0b_010; | ||
return (instruction & 0x7f) == OpcodeLw && | ||
((instruction >> 12) & 0b_111) == Funct3Lw; | ||
} | ||
|
||
/// <summary> | ||
/// Retrieves output register and immediate value from U-type instruction. | ||
/// </summary> | ||
/// <param name="instruction">Assembly code of instruction</param> | ||
/// <param name="rd">Output register</param> | ||
/// <param name="imm">Immediate value</param> | ||
private void AnalyzeRiscV64Utype(uint instruction, out uint rd, out int imm) | ||
{ | ||
// U-type 31 12 11 7 6 0 | ||
// [ imm ] [ rd ] [ opcode ] | ||
rd = (instruction >> 7) & 0b_11111U; | ||
imm = unchecked((int)(instruction & (0xfffff << 12))); | ||
} | ||
|
||
/// <summary> | ||
/// Retrieves output register, resource register and immediate value from U-type instruction. | ||
/// </summary> | ||
/// <param name="instruction">Assembly code of instruction</param> | ||
/// <param name="rd">Output register</param> | ||
/// <param name="rs1">Resource register</param> | ||
/// <param name="imm">Immediate value</param> | ||
private void AnalyzeRiscV64Itype(uint instruction, out uint rd, out uint rs1, out int imm) | ||
{ | ||
// I-type 31 20 19 15 14 12 11 7 6 0 | ||
// [ imm ] [ rs1 ] [ funct3 ] [ rd ] [ opcode ] | ||
rd = (instruction >> 7) & 0b_11111U; | ||
rs1 = (instruction >> 15) & 0b_11111U; | ||
imm = unchecked((int)instruction) >> 20; | ||
} | ||
|
||
/// <summary> | ||
/// Determine whether a given character is an ASCII digit. | ||
/// </summary> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am just curious. In your examples, check for
lw
instruction is not necessary. Could you give an example withlw
instruction? Thank you.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As i know there is no function call which uses
lw
to calculate jump address. However, this check forlw
is necessary because this firstif
is trying to skip allld
andlw
instructions which are used to push arguments (likely). For exampleAs you can see, the sequence of
lw
andld
instructions is not fixed.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well. IMO,
lw
instructions can be skipped in line 1334 inStaticAnalyzeRiscV64Assembly
. Am I wrong?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if we want to skip
lw
inStaticAnalyzeRiscV64Assembly
, we will have to skipld
too. In other case, we would have a problem, for example, with this:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see. You are right. However, actually what I want are removing 'lw' and implementing simpler like #102146. Could you check loongarch PR and share your idea? Thank you.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I simplified code which calculates callee address
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no problem :D