Skip to content

Commit

Permalink
feature: implement debuinfod support
Browse files Browse the repository at this point in the history
  • Loading branch information
io-no committed Dec 5, 2023
1 parent fb03e1a commit d337ee2
Showing 1 changed file with 139 additions and 165 deletions.
304 changes: 139 additions & 165 deletions libdebug/utils/elf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,32 +15,24 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#

from elftools.elf.elffile import ELFFile, SymbolTableSection, NoteSection
from libdebug.cffi import _debug_sym_cffi
import functools
import requests
from pathlib import Path
import os
from typing import IO
import os
from elftools.elf.elffile import ELFFile
import time
import gc
import yappi

DEBUGINFOD_PATH: Path = Path.home() / ".cache" / "debuginfod_client"
LOCAL_DEBUG_PATH: bytes = b'/usr/lib/debug/.build-id/'

def _stream_loader(relative_file_path: str) -> IO[bytes]:
"""
This function takes a relative file path to load a supplementary object file,
and returns a stream suitable for creating a new ELFFile.
Args:
relative_file_path (str): The relative path to the supplementary object file.
Returns:
stream (IO[bytes]): A stream suitable for creating a new ELFFile.
"""
global absolute_debug_path
stream = open(absolute_debug_path + relative_file_path, 'rb')
LOCAL_DEBUG_PATH: str = '/usr/lib/debug/.build-id/'

return stream
lib_sym = _debug_sym_cffi.lib
ffi = _debug_sym_cffi.ffi

debug_info_level = 4

def _download_debuginfod(buildid: str, debuginfod_path: Path):
"""Downloads the debuginfo file corresponding to the specified buildid.
Expand All @@ -50,16 +42,22 @@ def _download_debuginfod(buildid: str, debuginfod_path: Path):
debuginfod_path (Path): The output directory.
"""

debuginfod_path.parent.mkdir(parents=True, exist_ok=True)
try:
url = f"https://debuginfod.elfutils.org/buildid/{buildid}/debuginfo"
r = requests.get(url, allow_redirects=True)

url = f"https://debuginfod.elfutils.org/buildid/{buildid}/debuginfo"
r = requests.get(url, allow_redirects=True)
if r.status_code == 200:
debuginfod_path.parent.mkdir(parents=True, exist_ok=True)

with open(debuginfod_path, "wb") as f:
f.write(r.content)
with open(debuginfod_path, "wb") as f:
f.write(r.content)
except Exception as e:
print('Exception in _download_debuginfod', e)
pass


def _debuginfod(buildid: str) -> str:
@functools.cache
def _debuginfod(buildid: str) -> Path:
"""Returns the path to the debuginfo file corresponding to the specified buildid.
Args:
Expand All @@ -74,113 +72,44 @@ def _debuginfod(buildid: str) -> str:
if not debuginfod_path.exists():
_download_debuginfod(buildid, debuginfod_path)

return str(debuginfod_path)



return debuginfod_path


def _symbols_from_debuglink(elf: ELFFile, buildid: str) -> dict[str, (int, int)]:
"""Returns a dictionary containing the symbols of the specified ELF file from the linked
debug file.
Args:
elf (ELFFile): The ELF file.
buildid (str): The buildid of the debuginfo file.
Returns:
dict: A dictionary containing the symbols of the specified ELF file.
"""
global absolute_debug_path
symbols = {}

if not elf.has_dwarf_info():
return symbols

# Determine the path of the debuginfo file
absolute_debug_path = LOCAL_DEBUG_PATH + buildid[:2].encode() + b'/'

# Retrieve the symbols from the DWARF info
dwarf_info = elf.get_dwarf_info(follow_links=True, relocate_dwarf_sections=True)

if dwarf_info.supplementary_dwarfinfo:
for CU in dwarf_info.supplementary_dwarfinfo.iter_CUs():
print(CU.header)
for DIE in CU.iter_DIEs():
pass
#if DIE.tag == 'DW_TAG_subprogram':
# Retrieve all attributes at once
#pass
#attributes = DIE.attributes
#lowpc_attr = attributes.get('DW_AT_low_pc')
#highpc_attr = attributes.get('DW_AT_high_pc')
#name_attr = attributes.get('DW_AT_name')

# Check if all necessary attributes are present
#if not lowpc_attr or not highpc_attr or not name_attr:
# continue

#lowpc = lowpc_attr.value
#if highpc_attr.form == 'DW_FORM_addr':
# # highpc is an absolute address
# size = highpc_attr.value - lowpc
#elif highpc_attr.form in {'DW_FORM_data2','DW_FORM_data4',
# 'DW_FORM_data8', 'DW_FORM_data1',
# 'DW_FORM_udata'}:
# # highpc is an offset from lowpc
# size = highpc_attr.value

#name = name_attr.value
#symbols[name] = (lowpc, size)


return symbols


def _iterate_sym_table(elf: ELFFile) -> dict[str, (int, int)]:
"""Returns a dictionary containing the symbols of the specified ELF file from
SymbolTableSection.
@functools.cache
def _collect_external_info(path: str) -> dict[str, int]:
"""Returns a dictionary containing the symbols taken from the external debuginfo file
Args:
elf (ELFFile): The ELF file.
path (str): The path to the ELF file.
Returns:
dict: A dictionary containing the symbols of the specified ELF file.
symbols (dict): A dictionary containing the symbols of the specified external debuginfo file.
"""

symbols = {}

# Retrieve the symbols from the SymbolTableSection
for section in elf.iter_sections():
if isinstance(section, SymbolTableSection):
for symbol in section.iter_symbols():
start_value = symbol.entry.st_value
size_value = symbol.entry.st_size
if start_value and size_value:
symbols[symbol.name] = (symbol.entry.st_value, symbol.entry.st_size)
c_file_path = ffi.new("char[]", path.encode('utf-8'))
head = lib_sym.collect_external_symbols(c_file_path)

return symbols
if head != ffi.NULL:
count = lib_sym.get_symbol_count(head)

for i in range(count):
name = ffi.new("char **")
low_pc = ffi.new("unsigned long long *")
high_pc = ffi.new("unsigned long long *")

if lib_sym.get_symbol_data(head, i, name, low_pc, high_pc) == 0:
symbol_name = ffi.string(name[0]).decode("utf-8")
symbols[symbol_name] = (low_pc[0], high_pc[0])

def _retrieve_buildid(elf: ELFFile) -> str:
"""Returns the buildid of the specified ELF file.
lib_sym.free_symbol_info(head)

Args:
elf (ELFFile): The ELF file.
Returns:
str: The buildid of the specified ELF file.
"""

for section in elf.iter_sections():
if section.name == '.note.gnu.build-id':
for note in section.iter_notes():
if note["n_type"] == "NT_GNU_BUILD_ID":
buildid = note["n_desc"]
return buildid
return symbols


@functools.cache
def _parse_elf_file(path: str, debug_info_level: int=2) -> dict[str, int]:
def _parse_elf_file(path: str, debug_info_level: int) -> (dict[str, int], str, str):
"""Returns a dictionary containing the symbols of the specified ELF file and
the buildid.
Expand All @@ -191,31 +120,57 @@ def _parse_elf_file(path: str, debug_info_level: int=2) -> dict[str, int]:
Returns:
symbols (dict): A dictionary containing the symbols of the specified ELF file.
buildid (str): The buildid of the specified ELF file.
debug_file_path (str): The path to the external debuginfo file corresponding.
"""

symbols_table = {}
symbols_debug = {}


with open(path, "rb") as elf_file:
elf = ELFFile(elf_file, stream_loader=_stream_loader)

# Retrieve the symbols from the SymbolTableSection
symbols_table = _iterate_sym_table(elf)

if debug_info_level >= 2:
# Retrieve the buildid
buildid = _retrieve_buildid(elf)


symbols = {}
buildid = None
debug_file_path = None


c_file_path = ffi.new("char[]", path.encode('utf-8'))
head = lib_sym.read_elf_info(c_file_path, debug_info_level)

if head != ffi.NULL:
count = lib_sym.get_symbol_count(head)
#yappi.start()
#yappi.set_clock_type("cpu")
for i in range(count):
#start = time.perf_counter()

name = ffi.new("char **")
low_pc = ffi.new("unsigned long long *")
high_pc = ffi.new("unsigned long long *")

if lib_sym.get_symbol_data(head, i, name, low_pc, high_pc) == 0:
symbol_name = ffi.string(name[0]).decode("utf-8")
symbols[symbol_name] = (low_pc[0], high_pc[0])

#end = time.perf_counter()
#print(f'get_symbol_data took {end-start} seconds')
#yappi.stop()
#print(yappi.get_thread_stats().print_all())
#yappi.get_func_stats().print_all()
#yappi.clear_stats()
lib_sym.free_symbol_info(head)

if debug_info_level > 2:
buildid = lib_sym.get_build_id()
if buildid != ffi.NULL:
buildid = ffi.string(buildid).decode("utf-8")
else:
buildid = None

# Retrieve the symbols from the DWARF info
symbols_debug = _symbols_from_debuglink(elf, buildid)

debug_file_path = lib_sym.get_debug_file()
if debug_file_path != ffi.NULL:
debug_file_path = ffi.string(debug_file_path).decode("utf-8")
else:
debug_file_path = None

return symbols_table, buildid
return symbols, buildid, debug_file_path


@functools.cache
def resolve_symbol(path: str, symbol: str) -> int:
"""Returns the address of the specified symbol in the specified ELF file.
Expand All @@ -227,36 +182,34 @@ def resolve_symbol(path: str, symbol: str) -> int:
int: The address of the specified symbol in the specified ELF file.
"""

global absolute_debug_path

# Retrieve the symbols from the SymbolTableSection
symbols, buildid = _parse_elf_file(path)

symbols, buildid, debug_file = _parse_elf_file(path, debug_info_level)
if symbol in symbols:
match = symbols[symbol][0]
else:
# Retrieve the symbols from the local debuginfo file

if buildid:
# Determine the path of the debuginfo file
folder = buildid[:2].encode()
absolute_debug_path = LOCAL_DEBUG_PATH + folder + b'/'
else:
# TODO log
pass






if symbol not in symbols:
raise ValueError(
f"Symbol {symbol} not found in {path}. Please specify a valid symbol."
)
return symbols[symbol][0]

return match
# Retrieve the symbols from the external debuginfo file
if buildid and debug_file and debug_info_level > 2:
folder = buildid[:2]
absolute_debug_path = os.path.join(LOCAL_DEBUG_PATH, folder, debug_file)
symbols = _collect_external_info(absolute_debug_path)
if symbol in symbols:
return symbols[symbol][0]

# Retrieve the symbols from debuginfod
if buildid and debug_info_level > 3:
absolute_debug_path = _debuginfod(buildid)
if absolute_debug_path.exists():
symbols = _collect_external_info(str(absolute_debug_path))
if symbol in symbols:
return symbols[symbol][0]

# Symbol not found
raise ValueError(
f"Symbol {symbol} not found in {path}. Please specify a valid symbol."
)


@functools.cache
def resolve_address(path: str, address: int) -> str:
"""Returns the symbol corresponding to the specified address in the specified ELF file.
Expand All @@ -267,11 +220,32 @@ def resolve_address(path: str, address: int) -> str:
Returns:
str: The symbol corresponding to the specified address in the specified ELF file.
"""
print(path)
symbols, buildid = _parse_elf_file(path)
for symbol, (symbol_address, symbol_size) in symbols.items():
if symbol_address <= address < symbol_address + symbol_size:
return f'{symbol}+{str(address-symbol_address)}'

# Retrieve the symbols from the SymbolTableSection
symbols, buildid, debug_file = _parse_elf_file(path, debug_info_level)
for symbol, (symbol_start, symbol_end) in symbols.items():
if symbol_start <= address < symbol_end:
return f'{symbol}+{str(address-symbol_start)}'

# Retrieve the symbols from the external debuginfo file
if buildid and debug_file and debug_info_level > 2:
folder = buildid[:2]
absolute_debug_path = os.path.join(LOCAL_DEBUG_PATH, folder, debug_file)
symbols = _collect_external_info(absolute_debug_path)
for symbol, (symbol_start, symbol_end) in symbols.items():
if symbol_start <= address < symbol_end:
return f'{symbol}+{str(address-symbol_start)}'

# Retrieve the symbols from debuginfod
if buildid and debug_info_level > 3:
absolute_debug_path = _debuginfod(buildid)
if absolute_debug_path.exists():
symbols = _collect_external_info(str(absolute_debug_path))
for symbol, (symbol_start, symbol_end) in symbols.items():
if symbol_start <= address < symbol_end:
return f'{symbol}+{str(address-symbol_start)}'

# Address not found
raise ValueError(
f"Address {hex(address)} not found in {path}. Please specify a valid address."
)
Expand Down

0 comments on commit d337ee2

Please sign in to comment.