-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[lldb][riscv] Fix setting breakpoint for undecoded instruction #90075
Conversation
@llvm/pr-subscribers-lldb Author: None (ita-sc) ChangesHi This patch adds an interface GetLastInstrSize to get information about the size of last tried to be decoded instruction and uses it to set software breakpoint if the memory can be decoded as instruction. RISC-V architecture instruction format specifies the length of instruction in first bits, so we can set a breakpoint for these cases. This is needed as RISCV have a lot of extensions, that are not suppored by Full diff: https://github.com/llvm/llvm-project/pull/90075.diff 6 Files Affected:
diff --git a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp
index 6c46618b337c23..3f61e011d620a2 100644
--- a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp
+++ b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp
@@ -624,9 +624,26 @@ std::optional<DecodeResult> EmulateInstructionRISCV::Decode(uint32_t inst) {
uint16_t try_rvc = uint16_t(inst & 0x0000ffff);
// check whether the compressed encode could be valid
uint16_t mask = try_rvc & 0b11;
- bool is_rvc = try_rvc != 0 && mask != 3;
uint8_t inst_type = RV64;
+ // Try to get size of RISCV instruction.
+ // 1.2 Instruction Length Encoding
+ bool is_16b = (inst & 0b11) != 0b11;
+ bool is_32b = (inst & 0x1f) != 0x1f;
+ bool is_48b = (inst & 0x3f) != 0x1f;
+ bool is_64b = (inst & 0x7f) != 0x3f;
+ if (is_16b)
+ m_last_size = 2;
+ else if (is_32b)
+ m_last_size = 4;
+ else if (is_48b)
+ m_last_size = 6;
+ else if (is_64b)
+ m_last_size = 8;
+ else
+ // Not Valid
+ m_last_size = std::nullopt;
+
// if we have ArchSpec::eCore_riscv128 in the future,
// we also need to check it here
if (m_arch.GetCore() == ArchSpec::eCore_riscv32)
@@ -638,8 +655,8 @@ std::optional<DecodeResult> EmulateInstructionRISCV::Decode(uint32_t inst) {
LLDB_LOGF(
log, "EmulateInstructionRISCV::%s: inst(%x at %" PRIx64 ") was decoded to %s",
__FUNCTION__, inst, m_addr, pat.name);
- auto decoded = is_rvc ? pat.decode(try_rvc) : pat.decode(inst);
- return DecodeResult{decoded, inst, is_rvc, pat};
+ auto decoded = is_16b ? pat.decode(try_rvc) : pat.decode(inst);
+ return DecodeResult{decoded, inst, is_16b, pat};
}
}
LLDB_LOGF(log, "EmulateInstructionRISCV::%s: inst(0x%x) was unsupported",
diff --git a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.h b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.h
index 8bca73a7f589df..7eac59d9a127b5 100644
--- a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.h
+++ b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.h
@@ -60,6 +60,7 @@ class EmulateInstructionRISCV : public EmulateInstruction {
bool SetTargetTriple(const ArchSpec &arch) override;
bool ReadInstruction() override;
+ virtual std::optional<uint32_t> GetLastInstrSize() { return std::nullopt; }
bool EvaluateInstruction(uint32_t options) override;
bool TestEmulation(Stream &out_stream, ArchSpec &arch,
OptionValueDictionary *test_data) override;
@@ -99,6 +100,8 @@ class EmulateInstructionRISCV : public EmulateInstruction {
private:
/// Last decoded instruction from m_opcode
DecodeResult m_decoded;
+ /// Last tried to be decoded instruction expected size.
+ std::optional<uint32_t> m_last_size;
};
} // namespace lldb_private
diff --git a/lldb/source/Plugins/Process/Utility/NativeProcessSoftwareSingleStep.cpp b/lldb/source/Plugins/Process/Utility/NativeProcessSoftwareSingleStep.cpp
index 6bf8a0dc28b22e..a6019d09737472 100644
--- a/lldb/source/Plugins/Process/Utility/NativeProcessSoftwareSingleStep.cpp
+++ b/lldb/source/Plugins/Process/Utility/NativeProcessSoftwareSingleStep.cpp
@@ -94,6 +94,39 @@ static lldb::addr_t ReadFlags(NativeRegisterContext ®siter_context) {
LLDB_INVALID_ADDRESS);
}
+static int GetSoftwareWatchpointSize(const ArchSpec &arch,
+ lldb::addr_t next_flags) {
+ if (arch.GetMachine() == llvm::Triple::arm) {
+ if (next_flags & 0x20)
+ // Thumb mode
+ return 2;
+ else
+ // Arm mode
+ return 4;
+ }
+ if (arch.IsMIPS() || arch.GetTriple().isPPC64() ||
+ arch.GetTriple().isRISCV() || arch.GetTriple().isLoongArch())
+ return 4;
+ return 0;
+}
+
+static Status SetSoftwareBreakPointOnPC(const ArchSpec &arch, lldb::addr_t pc,
+ lldb::addr_t next_flags,
+ NativeProcessProtocol &process) {
+ int size_hint = GetSoftwareWatchpointSize(arch, next_flags);
+ Status error;
+ error = process.SetBreakpoint(pc, size_hint, /*hardware=*/false);
+
+ // If setting the breakpoint fails because pc is out of the address
+ // space, ignore it and let the debugee segfault.
+ if (error.GetError() == EIO || error.GetError() == EFAULT) {
+ return Status();
+ } else if (error.Fail())
+ return error;
+
+ return Status();
+}
+
Status NativeProcessSoftwareSingleStep::SetupSoftwareSingleStepping(
NativeThreadProtocol &thread) {
Status error;
@@ -115,8 +148,23 @@ Status NativeProcessSoftwareSingleStep::SetupSoftwareSingleStepping(
emulator_up->SetWriteMemCallback(&WriteMemoryCallback);
emulator_up->SetWriteRegCallback(&WriteRegisterCallback);
- if (!emulator_up->ReadInstruction())
- return Status("Read instruction failed!");
+ if (!emulator_up->ReadInstruction()) {
+ // try to get at least the size of next instruction to set breakpoint.
+ auto instrSizeOpt = emulator_up->GetLastInstrSize();
+ if (!instrSizeOpt)
+ return Status("Read instruction failed!");
+ bool success = false;
+ auto pc = emulator_up->ReadRegisterUnsigned(eRegisterKindGeneric,
+ LLDB_REGNUM_GENERIC_PC,
+ LLDB_INVALID_ADDRESS, &success);
+ if (!success)
+ return Status("Reading pc failed!");
+ lldb::addr_t next_pc = pc + *instrSizeOpt;
+ auto Result =
+ SetSoftwareBreakPointOnPC(arch, next_pc, /* next_flags */ 0x0, process);
+ m_threads_stepping_with_breakpoint.insert({thread.GetID(), next_pc});
+ return Result;
+ }
bool emulation_result =
emulator_up->EvaluateInstruction(eEmulateInstructionOptionAutoAdvancePC);
@@ -157,29 +205,7 @@ Status NativeProcessSoftwareSingleStep::SetupSoftwareSingleStepping(
// modifying the PC but we don't know how.
return Status("Instruction emulation failed unexpectedly.");
}
-
- int size_hint = 0;
- if (arch.GetMachine() == llvm::Triple::arm) {
- if (next_flags & 0x20) {
- // Thumb mode
- size_hint = 2;
- } else {
- // Arm mode
- size_hint = 4;
- }
- } else if (arch.IsMIPS() || arch.GetTriple().isPPC64() ||
- arch.GetTriple().isRISCV() || arch.GetTriple().isLoongArch())
- size_hint = 4;
- error = process.SetBreakpoint(next_pc, size_hint, /*hardware=*/false);
-
- // If setting the breakpoint fails because next_pc is out of the address
- // space, ignore it and let the debugee segfault.
- if (error.GetError() == EIO || error.GetError() == EFAULT) {
- return Status();
- } else if (error.Fail())
- return error;
-
+ auto Result = SetSoftwareBreakPointOnPC(arch, next_pc, next_flags, process);
m_threads_stepping_with_breakpoint.insert({thread.GetID(), next_pc});
-
- return Status();
+ return Result;
}
diff --git a/lldb/test/API/riscv/break-undecoded/Makefile b/lldb/test/API/riscv/break-undecoded/Makefile
new file mode 100644
index 00000000000000..10495940055b63
--- /dev/null
+++ b/lldb/test/API/riscv/break-undecoded/Makefile
@@ -0,0 +1,3 @@
+C_SOURCES := main.c
+
+include Makefile.rules
diff --git a/lldb/test/API/riscv/break-undecoded/TestBreakpointIlligal.py b/lldb/test/API/riscv/break-undecoded/TestBreakpointIlligal.py
new file mode 100644
index 00000000000000..a934b5024eacc8
--- /dev/null
+++ b/lldb/test/API/riscv/break-undecoded/TestBreakpointIlligal.py
@@ -0,0 +1,27 @@
+"""
+Test that we can set up software breakpoint even if we failed to decode and execute instruction
+"""
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class TestBreakpointIlligal(TestBase):
+ @skipIf(archs=no_match(["rv64gc"]))
+ def test(self):
+ self.build()
+ (target, process, cur_thread, bkpt) = lldbutil.run_to_source_breakpoint(
+ self, "main", lldb.SBFileSpec("main.c")
+ )
+ self.runCmd("thread step-inst")
+ # we need to step more, as some compilers do not set appropriate debug info.
+ while cur_thread.GetStopDescription(256) == "instruction step into":
+ self.runCmd("thread step-inst")
+ # The stop reason of the thread should be illegal opcode.
+ self.expect(
+ "thread list",
+ STOPPED_DUE_TO_SIGNAL,
+ substrs=["stopped", "stop reason = signal SIGILL: illegal opcode"],
+ )
diff --git a/lldb/test/API/riscv/break-undecoded/main.c b/lldb/test/API/riscv/break-undecoded/main.c
new file mode 100644
index 00000000000000..ba85f4b9fa86d6
--- /dev/null
+++ b/lldb/test/API/riscv/break-undecoded/main.c
@@ -0,0 +1,7 @@
+int main() {
+ // This instruction is not valid, but we have an ability to set
+ // software breakpoint.
+ // This results illegal instruction during execution, not fail to set
+ // breakpoint
+ asm volatile(".insn r 0x73, 0, 0, a0, a1, a2" : :);
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I read this patch while trying to keep the RVC instruction set in mind because it's an interesting case to consider. Maybe the test file could have a compressed instruction? The one instruction there now ends in 0b11 so I don't think it will decode that way.
@@ -99,6 +100,8 @@ class EmulateInstructionRISCV : public EmulateInstruction { | |||
private: | |||
/// Last decoded instruction from m_opcode | |||
DecodeResult m_decoded; | |||
/// Last tried to be decoded instruction expected size. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I might call this Last decoded instruction size
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well, I tried to emphasize that even if we failed to fully decode instruction, we are still able to fill this information. I'll change to
Last decoded instruction size estimate.
@@ -115,8 +148,23 @@ Status NativeProcessSoftwareSingleStep::SetupSoftwareSingleStepping( | |||
emulator_up->SetWriteMemCallback(&WriteMemoryCallback); | |||
emulator_up->SetWriteRegCallback(&WriteRegisterCallback); | |||
|
|||
if (!emulator_up->ReadInstruction()) | |||
return Status("Read instruction failed!"); | |||
if (!emulator_up->ReadInstruction()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't this block now be if (emulator_up->ReadInstruction())
now? We're going to get the size of the last decoded instruction here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah wait, I see. This method is trying to decode where the next instruction will go, with branches and jumps decoded, so we can put a breakpoint there. And you're handling the case where we can't decode the current instruction (I now understand why you used that in your test case). It seems harmless to call GetLastInstrSize() if the instruction that couldn't be decoded, and add the length of the instruction to pc. We can assume the emulation engine will emulate all branching instructions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, you are right: I'm adding an interface to get instruction size, as if it is not a jump or branch, actually, we do not need to emulate it on engine. As RISCV have a lot of extensions, even vendor-specific, we can not simply emulate all of them. I've added a common interface, as it may be helpful for other architectures, as for new implementations later user will need to implement only branches and jumps.
// try to get at least the size of next instruction to set breakpoint. | ||
auto instrSizeOpt = emulator_up->GetLastInstrSize(); | ||
if (!instrSizeOpt) | ||
return Status("Read instruction failed!"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We've defined the new GetLastInstrSize() method for the RISCV EmulateInstruction plugin, but others like AArch64 won't have that, so this will error out on them, won't it? What we really want to express is "if arch.GetTriple().isRISCV()
and we couldn't decode the length of the last instruction, then error out" isn't it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well, in this MR this is a common code, and every other architecture now not implemented GetLastInstrSize
, so it will fail with the same error, as it was before. For RISC-V we will generate the same error if we can not get the size of instruction (if we can not read memory or if an instruction is bigger than expected)
return Status("Reading pc failed!"); | ||
lldb::addr_t next_pc = pc + *instrSizeOpt; | ||
auto Result = | ||
SetSoftwareBreakPointOnPC(arch, next_pc, /* next_flags */ 0x0, process); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We've decoded the length of the instruction at pc
at this point, and them to get next_pc
. Then we pass next_pc
to this method which has a hardcoded size of 4 for RISCV. It's only a hint that is sent to lldb-server as it tries to step over the instruction. With armv7/aarch32 we had to get arm/thumb breakpoint instructions correct because an arm breakpoint wasn't valid when the processor was in thumb mode (iirc) but RISC-V doesn't have a processor mode like that iiuc. So maybe it's fine to have SetSoftwareBreakPointOnPC
hardcoding 4 for the next RISCV breakpoint.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, I do not understand fully what you mean. For RISCV software breakpoint size is 4 or 2 (currently LLDB hardcodes only 4). We save memory and restore it later, so it seems we will have no problems here for different instruction sizes on memory regions where we write software breakpoint.
cb5511a
to
e45b7b7
Compare
The test intentionally has an invalid instruction. I'll add one more test with compressed illegal instruction.
|
from lldbsuite.test import lldbutil | ||
|
||
|
||
class TestBreakpointIlligal(TestBase): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
typo (Illegal). Also in the file name.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed
@@ -94,6 +94,39 @@ static lldb::addr_t ReadFlags(NativeRegisterContext ®siter_context) { | |||
LLDB_INVALID_ADDRESS); | |||
} | |||
|
|||
static int GetSoftwareWatchpointSize(const ArchSpec &arch, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doesn't this return the size of the breakpoint (i.e. it should be called GetSoftwareBreakpointSize
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed, changed to GetSoftwareBreakpointSize
return 0; | ||
} | ||
|
||
static Status SetSoftwareBreakPointOnPC(const ArchSpec &arch, lldb::addr_t pc, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We usually spell "break point" as a single word (i.e., Breakpoint
).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed
// space, ignore it and let the debugee segfault. | ||
if (error.GetError() == EIO || error.GetError() == EFAULT) { | ||
return Status(); | ||
} else if (error.Fail()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed
return Status("Read instruction failed!"); | ||
if (!emulator_up->ReadInstruction()) { | ||
// try to get at least the size of next instruction to set breakpoint. | ||
auto instrSizeOpt = emulator_up->GetLastInstrSize(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://llvm.org/docs/CodingStandards.html#don-t-use-else-after-a-return
The rule is pretty subjective, but I think the fact you felt the need to add "opt" to the name shows the code is not completely understandable without it, and I'd say that spelling out the name is the more conventional way to express that. Also, (for better or worse) most of the lldb code (the surrounding code included) uses snake_case for variable name, so it's better to stick to that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed to instr_size
e45b7b7
to
a8ee2a2
Compare
✅ With the latest revision this PR passed the Python code formatter. |
936846a
to
b30ddc6
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A few small nits. I defer to Pavel and Jason for the functional changes.
if (next_flags & 0x20) | ||
// Thumb mode | ||
return 2; | ||
else |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
else
after return
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed
return 0; | ||
} | ||
|
||
static Status SetSoftwareBreakpointOnPC(const ArchSpec &arch, lldb::addr_t pc, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Small nit, but in recent years we've been preferring using llvm::Error
over lldb::Status
where possible. The benefit of Error
is that it must be checked and it's trivial to convert between the two. It would be nice if this function would return an llvm::Error
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would like to keep Status
in this MR, as from my point of view, this MR will do more that it should -- it will add a new functionality and change the way we work with errors here.
Moreover, changing this will result in unnecessary casts from Status
to Error
(process.SetBreakpoint
) and from Error
to Status
(SetupSoftwareSingleStepping
).
Note: I've checked that we have Status::ToError
and Status(llvm::Error error)
ctor, but I'm not sure these functions will suffice (AFAIU m_code
will be saved only if m_type == ErrorType::eErrorTypePOSIX
).
Copy gdb behaviour: For RISCV we can set breakpoint even for unknown instruction, as RISCV encoding have information about size of instruction.
b30ddc6
to
a6e9602
Compare
Gentle ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for pinging this. Sorry it dropped off my radar.
Thanks. Could you please merge this? (I do not have write access yet) |
…90075) Summary: This patch adds an interface GetLastInstrSize to get information about the size of last tried to be decoded instruction and uses it to set software breakpoint if the memory can be decoded as instruction. RISC-V architecture instruction format specifies the length of instruction in first bits, so we can set a breakpoint for these cases. This is needed as RISCV have a lot of extensions, that are not supported by `EmulateInstructionRISCV`. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D59822425
Summary: This patch adds an interface GetLastInstrSize to get information about the size of last tried to be decoded instruction and uses it to set software breakpoint if the memory can be decoded as instruction. RISC-V architecture instruction format specifies the length of instruction in first bits, so we can set a breakpoint for these cases. This is needed as RISCV have a lot of extensions, that are not supported by `EmulateInstructionRISCV`. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251663
Hi
This patch adds an interface GetLastInstrSize to get information about the size of last tried to be decoded instruction and uses it to set software breakpoint if the memory can be decoded as instruction.
RISC-V architecture instruction format specifies the length of instruction in first bits, so we can set a breakpoint for these cases. This is needed as RISCV have a lot of extensions, that are not suppored by
EmulateInstructionRISCV
.