diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index ae64f6f261bad..6c5c5162e2468 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -114,33 +114,6 @@ class ProcessOptionValueProperties } }; -class ProcessMemoryIterator { -public: - ProcessMemoryIterator(Process &process, lldb::addr_t base) - : m_process(process), m_base_addr(base) {} - - bool IsValid() { return m_is_valid; } - - uint8_t operator[](lldb::addr_t offset) { - if (!IsValid()) - return 0; - - uint8_t retval = 0; - Status error; - if (0 == m_process.ReadMemory(m_base_addr + offset, &retval, 1, error)) { - m_is_valid = false; - return 0; - } - - return retval; - } - -private: - Process &m_process; - const lldb::addr_t m_base_addr; - bool m_is_valid = true; -}; - static constexpr OptionEnumValueElement g_follow_fork_mode_values[] = { { eFollowParent, @@ -3379,21 +3352,49 @@ lldb::addr_t Process::FindInMemory(lldb::addr_t low, lldb::addr_t high, if (region_size < size) return LLDB_INVALID_ADDRESS; + // See "Boyer-Moore string search algorithm". std::vector bad_char_heuristic(256, size); - ProcessMemoryIterator iterator(*this, low); - for (size_t idx = 0; idx < size - 1; idx++) { decltype(bad_char_heuristic)::size_type bcu_idx = buf[idx]; bad_char_heuristic[bcu_idx] = size - idx - 1; } - for (size_t s = 0; s <= (region_size - size);) { + + // Memory we're currently searching through. + llvm::SmallVector mem; + // Position of the memory buffer. + addr_t mem_pos = low; + // Maximum number of bytes read (and buffered). We need to read at least + // `size` bytes for a successful match. + const size_t max_read_size = std::max(size, 0x10000); + + for (addr_t cur_addr = low; cur_addr <= (high - size);) { + if (cur_addr + size > mem_pos + mem.size()) { + // We need to read more data. We don't attempt to reuse the data we've + // already read (up to `size-1` bytes from `cur_addr` to + // `mem_pos+mem.size()`). This is fine for patterns much smaller than + // max_read_size. For very + // long patterns we may need to do something more elaborate. + mem.resize_for_overwrite(max_read_size); + Status error; + mem.resize(ReadMemory(cur_addr, mem.data(), + std::min(mem.size(), high - cur_addr), error)); + mem_pos = cur_addr; + if (size > mem.size()) { + // We didn't read enough data. Skip to the next memory region. + MemoryRegionInfo info; + error = GetMemoryRegionInfo(mem_pos + mem.size(), info); + if (error.Fail()) + break; + cur_addr = info.GetRange().GetRangeEnd(); + continue; + } + } int64_t j = size - 1; - while (j >= 0 && buf[j] == iterator[s + j]) + while (j >= 0 && buf[j] == mem[cur_addr + j - mem_pos]) j--; if (j < 0) - return low + s; - else - s += bad_char_heuristic[iterator[s + size - 1]]; + return cur_addr; // We have a match. + cur_addr += bad_char_heuristic[mem[cur_addr + size - 1 - mem_pos]]; } return LLDB_INVALID_ADDRESS; diff --git a/lldb/test/API/functionalities/memory/holes/TestMemoryHoles.py b/lldb/test/API/functionalities/memory/holes/TestMemoryHoles.py index 1c2c90d483ea3..c61ae15b9dda7 100644 --- a/lldb/test/API/functionalities/memory/holes/TestMemoryHoles.py +++ b/lldb/test/API/functionalities/memory/holes/TestMemoryHoles.py @@ -43,6 +43,9 @@ def _prepare_inferior(self): # inside the holes we've deliberately left empty. self.memory = self.frame().FindVariable("mem_with_holes").GetValueAsUnsigned() self.pagesize = self.frame().FindVariable("pagesize").GetValueAsUnsigned() + self.num_pages = ( + self.target().FindFirstGlobalVariable("num_pages").GetValueAsUnsigned() + ) positions = self.frame().FindVariable("positions") self.positions = [ positions.GetChildAtIndex(i).GetValueAsUnsigned() @@ -58,3 +61,13 @@ def test_memory_read(self): self.assertEqual(len(content), self.pagesize) self.assertEqual(content[0:7], b"needle\0") self.assertTrue(error.Fail()) + + def test_memory_find(self): + self._prepare_inferior() + + matches = [f"data found at location: {p:#x}" for p in self.positions] + self.expect( + f'memory find --count {len(self.positions)+1} --string "needle" ' + f"{self.memory:#x} {self.memory+self.pagesize*self.num_pages:#x}", + substrs=matches + ["no more matches within the range"], + )