Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add procmem slicing and refactor to make proper MemoryBuffer interface #122

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
5 changes: 3 additions & 2 deletions malduck/extractor/extract_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def carve_procmem(self, p: ProcessMemory) -> Iterator[ProcessMemoryBinary]:
for carved_bin in carved_bins:
log.debug(
f"carve: Found {carved_bin.__class__.__name__} "
f"at offset {carved_bin.regions[0].offset}"
f"at {hex(carved_bin.imgbase)}"
)
yield carved_bin

Expand Down Expand Up @@ -211,7 +211,8 @@ def push_procmem(

family = self._extract_procmem(p, matches)
for binary in binaries:
family = self._extract_procmem(binary, matches) or family
with binary:
family = self._extract_procmem(binary, matches) or family
return family

@property
Expand Down
3 changes: 2 additions & 1 deletion malduck/procmem/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .cuckoomem import CuckooProcessMemory, cuckoomem
from .idamem import IDAProcessMemory, idamem
from .procmem import MemoryBuffer, ProcessMemory, procmem
from .membuf import MemoryBuffer
from .procmem import ProcessMemory, procmem
from .procmemdnpe import ProcessMemoryDnPE, procmemdnpe
from .procmemelf import ProcessMemoryELF, procmemelf
from .procmempe import ProcessMemoryPE, procmempe
Expand Down
6 changes: 4 additions & 2 deletions malduck/procmem/binmem.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,12 @@ def load_binaries_from_memory(cls: Type[T], procmem: ProcessMemory) -> Iterator[
if cls.__magic__ is None:
raise NotImplementedError()
for binary_va in procmem.findv(cls.__magic__):
binary_procmem_dmp = cls.from_memory(procmem, base=binary_va)
binary_procmem_dmp = cls.from_memory_slice(procmem, binary_va)
binary_procmem_img = binary_procmem_dmp.image
# Binaries must be yielded at the end as they may be
# released by caller after that
if binary_procmem_dmp.is_valid():
yield binary_procmem_dmp
binary_procmem_img = binary_procmem_dmp.image
if binary_procmem_img and binary_procmem_img.is_valid():
yield binary_procmem_img

Expand Down
41 changes: 27 additions & 14 deletions malduck/procmem/idamem.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from .procmem import MemoryBuffer, ProcessMemory
from typing import Iterator, Optional, Tuple

from .membuf import MemoryBuffer
from .procmem import ProcessMemory
from .region import Region

try:
Expand All @@ -13,24 +16,21 @@
__all__ = ["IDAProcessMemory", "idamem"]


class IDAVM(MemoryBuffer):
def __init__(self, idamem):
class IDAMemoryBuffer(MemoryBuffer):
def __init__(self, idamem: "IDAProcessMemory") -> None:
# Depends on region information from IDAProcessMemory
self.idamem = idamem

def _get_ea_range(self, item):
if isinstance(item, slice):
offset = item.start or 0
length = (item.stop or len(self)) - offset
else:
offset = item
length = 1
def _get_ea_range(self, item: slice) -> Iterator[Tuple[int, int]]:
offset = item.start or 0
length = (item.stop or len(self)) - offset
for region in self.idamem.regions:
if region.offset < offset + length and offset < region.end_offset:
ea_start = min(max(region.p2v(offset), region.addr), region.end)
ea_end = min(max(region.p2v(offset + length), region.addr), region.end)
yield (ea_start, ea_end)

def __setitem__(self, item, value):
def __setitem__(self, item: slice, value: bytes) -> None:
value_bytes = iter(value)
for ea_start, ea_end in self._get_ea_range(item):
for ea in range(ea_start, ea_end):
Expand All @@ -39,15 +39,27 @@ def __setitem__(self, item, value):
except StopIteration:
return

def __getitem__(self, item):
def __getitem__(self, item: slice) -> bytes:
data = []
for ea_start, ea_end in self._get_ea_range(item):
data.append(idc.get_bytes(ea_start, ea_end - ea_start))
return b"".join(data)

def __len__(self):
def __len__(self) -> int:
return self.idamem.regions[-1].end_offset

def slice(
self, from_offset: Optional[int] = None, to_offset: Optional[int] = None
) -> "MemoryBuffer":
# HACK: IDAMemoryBuffer depends on region information from IDAProcessMemory
# Let's assume that MemoryBuffer is never directly sliced and regions
# are properly managed by slicev
return self

def release(self) -> None:
# Nothing to release
return


class IDAProcessMemory(ProcessMemory):
"""
Expand Down Expand Up @@ -77,7 +89,8 @@ def __init__(self):
off = 0 if not regions else regions[-1].end_offset
region = Region(seg, idc.get_segm_end(seg) - seg, 0, 0, 0, off)
regions.append(region)
super().__init__(IDAVM(self), regions=regions)
super().__init__(IDAMemoryBuffer(self), regions=regions)


idamem = IDAProcessMemory
IDAVM = IDAMemoryBuffer
138 changes: 138 additions & 0 deletions malduck/procmem/membuf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import mmap
import weakref
from abc import ABC, abstractmethod
from typing import Optional, Union


class MemoryBuffer(ABC):
@abstractmethod
def __getitem__(self, item: slice) -> bytes:
raise NotImplementedError

@abstractmethod
def __setitem__(self, item: slice, value: bytes) -> None:
raise NotImplementedError

@abstractmethod
def __len__(self) -> int:
raise NotImplementedError

@abstractmethod
def slice(
self, from_offset: Optional[int] = None, to_offset: Optional[int] = None
) -> "MemoryBuffer":
raise NotImplementedError

@abstractmethod
def release(self) -> None:
raise NotImplementedError


class PlainMemoryBuffer(MemoryBuffer):
def __init__(
self,
buf: Union[bytes, bytearray, memoryview],
) -> None:
if type(buf) is memoryview:
self.buf = buf
elif type(buf) in (bytearray, bytes):
self.buf = memoryview(buf)
else:
raise TypeError(
"Buffer in PlainMemoryBuffer must be memoryview, bytes or bytearray"
)

def __getitem__(self, item: slice) -> bytes:
return bytes(self.buf[item])

def __setitem__(self, item: slice, value: bytes) -> None:
if self.buf.readonly:
# If buffer is read-only, make a copy (on write)
patchable_buf = memoryview(bytearray(self.buf))
self.release()
self.buf = patchable_buf
self.buf[item] = value

def __len__(self) -> int:
return len(self.buf)

def _slice(
self, from_offset: Optional[int], to_offset: Optional[int]
) -> memoryview:
return self.buf[from_offset:to_offset].toreadonly()

def slice(
self, from_offset: Optional[int] = None, to_offset: Optional[int] = None
) -> "MemoryBuffer":
"""
Creates a derived MemoryBuffer object representing slice of an underlying memory.

Derived buffer is readonly, so __setitem__ will first make a copy before applying
changes. It means that changes on parent buffer may be seen in derived buffers,
but not the other way.
"""
return PlainMemoryBuffer(self._slice(from_offset, to_offset))

def release(self) -> None:
self.buf.release()


class MmapMemoryBuffer(PlainMemoryBuffer):
def __init__(
self,
file_name: Optional[str] = None,
mapped_buf: Optional[mmap.mmap] = None,
):
self.opened_file = None
self.mapped_buf = mapped_buf
self._slices: weakref.WeakSet = weakref.WeakSet()
if mapped_buf is None and file_name is None:
raise ValueError("Either file_name or map is required.")
if file_name is not None:
self.opened_file = open(file_name, "rb")
try:
# Allow copy-on-write
if hasattr(mmap, "ACCESS_COPY"):
self.mapped_buf = mmap.mmap(
self.opened_file.fileno(), 0, access=mmap.ACCESS_COPY
)
else:
raise RuntimeError("mmap with CoW is not supported on your OS")
super().__init__(memoryview(self.mapped_buf))
except RuntimeError:
# Fallback to file.read()
super().__init__(memoryview(self.opened_file.read()))
self.opened_file.close()
self.opened_file = None

def release(self) -> None:
super().release()
for memory_slice in self._slices:
memory_slice.release()
if self.mapped_buf is not None:
self.mapped_buf.close()
self.mapped_buf = None
if self.opened_file is not None:
self.opened_file.close()
self.opened_file = None

def slice(
self, from_offset: Optional[int] = None, to_offset: Optional[int] = None
) -> "MemoryBuffer":
return self.acquire_slice(self._slice(from_offset, to_offset))

def acquire_slice(self, buf: memoryview) -> "MemoryBuffer":
memory_slice = MmapSliceMemoryBuffer(buf, self)
self._slices.add(memory_slice)
return memory_slice


class MmapSliceMemoryBuffer(PlainMemoryBuffer):
def __init__(self, buf: memoryview, parent: MmapMemoryBuffer):
super().__init__(buf)
self.parent = parent

def slice(
self, from_offset: Optional[int] = None, to_offset: Optional[int] = None
) -> "MemoryBuffer":
return self.parent.acquire_slice(self._slice(from_offset, to_offset))
Loading
Loading