-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Introduce framework for various link strategies #3
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,50 +1,60 @@ | ||
import os | ||
import re | ||
from shutil import copystat | ||
from typing import Optional | ||
|
||
import click | ||
import lief # type: ignore | ||
from sh import Command, ErrorReturnCode # type: ignore | ||
|
||
from shrinkwrap.elf import LinkStrategy | ||
|
||
|
||
@click.command() | ||
@click.argument("file", type=click.Path(exists=True)) | ||
@click.option("-o", "--output", type=click.Path(), required=False) | ||
def shrinkwrap(file: str, output: Optional[str]): | ||
@click.option( | ||
"-l", | ||
"--link-strategy", | ||
default="native", | ||
show_default=True, | ||
type=click.Choice(["native", "virtual"], case_sensitive=True), | ||
) | ||
def shrinkwrap(file: str, output: Optional[str], link_strategy: str): | ||
"""Freeze the dependencies into the top level shared object file.""" | ||
if output is None: | ||
output = os.path.basename(file) + "_stamped" | ||
|
||
try: | ||
binary: lief.Binary = lief.parse(file) | ||
if not binary.has_interpreter: | ||
click.echo("no interpreter set on the binary") | ||
exit(1) | ||
interpreter = Command(binary.interpreter) | ||
resolution = interpreter("--list", file) | ||
|
||
needed = binary.libraries | ||
|
||
for line in resolution: | ||
m = re.match(r"\s*([^ ]+) => ([^ ]+)", line) | ||
if not m: | ||
continue | ||
soname, lib = m.group(1), m.group(2) | ||
if soname in needed: | ||
binary.remove_library(soname) | ||
|
||
binary.add_library(lib) | ||
|
||
# dump the new binary file | ||
binary.write(output) | ||
|
||
# copy the file metadata | ||
copystat(file, output) | ||
except ErrorReturnCode as e: | ||
print(f"shrinkwrap failed: {e.stderr}") | ||
if not lief.is_elf(file): | ||
click.echo(f"{file} is not elf format") | ||
exit(1) | ||
|
||
binary: lief.Binary = lief.parse(file) | ||
if not binary.has_interpreter: | ||
click.echo("no interpreter set on the binary") | ||
exit(1) | ||
|
||
strategy = LinkStrategy.select_by_name(link_strategy) | ||
resolution = strategy.explore(binary, file) | ||
needed = binary.libraries | ||
|
||
for soname, lib in resolution.items(): | ||
if soname in needed: | ||
binary.remove_library(soname) | ||
binary.add_library(lib) | ||
|
||
# we need to update the VERNEED entries now to match | ||
verneeded = binary.symbols_version_requirement | ||
for verneed in verneeded: | ||
if verneed.name in resolution: | ||
# we want to map the possible shortname soname | ||
# to the absolute one we generate | ||
verneed.name = resolution.get(verneed.name) | ||
|
||
# dump the new binary file | ||
binary.write(output) | ||
|
||
# copy the file metadata | ||
copystat(file, output) | ||
|
||
|
||
if __name__ == "__main__": | ||
shrinkwrap() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
from __future__ import annotations | ||
|
||
import os | ||
import re | ||
from abc import ABC, abstractmethod | ||
from typing import Dict, Iterable, Optional | ||
|
||
import lief # type: ignore | ||
from sh import Command # type: ignore | ||
|
||
from shrinkwrap import ldsoconf | ||
|
||
|
||
class LinkStrategy(ABC): | ||
@staticmethod | ||
def select_by_name(name: str) -> LinkStrategy: | ||
if name == "native": | ||
return NativeLinkStrategy() | ||
elif name == "virtual": | ||
return VirtualLinkStrategy() | ||
else: | ||
raise Exception(f"Unknown strategy: {name}") | ||
|
||
@abstractmethod | ||
def explore(self, binary: lief.Binary, filename: str) -> Dict[str, str]: | ||
""" | ||
Determine the linking for all needed objects | ||
""" | ||
pass | ||
|
||
|
||
class NativeLinkStrategy(LinkStrategy): | ||
"""Uses the native interpreter in the binary to determine the linking""" | ||
|
||
def explore(self, binary: lief.Binary, filename: str) -> Dict[str, str]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. kind of annoying that the binary doesn't give the original filename and just the name portion :/ |
||
interpreter = Command(binary.interpreter) | ||
resolution = interpreter("--list", filename) | ||
result = {} | ||
# TODO: Figure out why `--list` and `ldd` produce different outcomes | ||
# specifically for the interpreter. | ||
# https://gist.github.com/fzakaria/3dc42a039401598d8e0fdbc57f5e7eae | ||
for line in resolution: | ||
Comment on lines
+39
to
+42
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @trws let me know if you have some knowledge here of this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's a new one on me. Maybe the loader just prints itself instead of looking it up? |
||
m = re.match(r"\s*([^ ]+) => ([^ ]+)", line) | ||
if not m: | ||
continue | ||
soname, lib = m.group(1), m.group(2) | ||
result[soname] = lib | ||
return result | ||
|
||
|
||
class VirtualLinkStrategy(LinkStrategy): | ||
|
||
# TODO: Need to figure out a good way to determine the NEEDED of glibc | ||
# I think it's resolving based on a shared object cache from the .INTERP | ||
# section but that remains to be validated. | ||
SKIP = ["ld-linux.so.2", "ld-linux-x86-64.so.2"] | ||
|
||
@staticmethod | ||
def find( | ||
paths: Iterable[str], | ||
soname: str, | ||
identity_class: lief.ELF.ELF_CLASS, | ||
machine_type: lief.ELF.ARCH, | ||
) -> Optional[str]: | ||
"""Given a list of paths, try and find it. It does not search recursively""" | ||
for path in paths: | ||
full_path = os.path.join(path, soname) | ||
if os.path.exists(full_path): | ||
if not lief.is_elf(full_path): | ||
continue | ||
binary = lief.parse(full_path) | ||
if ( | ||
binary.header.identity_class != identity_class | ||
or binary.header.machine_type != machine_type | ||
): | ||
continue | ||
return full_path | ||
return None | ||
|
||
@staticmethod | ||
def has_nodeflib(binary: lief.Binary) -> bool: | ||
if not binary.has(lief.ELF.DYNAMIC_TAGS.FLAGS_1): | ||
return False | ||
for flag in binary[lief.ELF.DYNAMIC_TAGS.FLAGS_1].flags: | ||
if flag == lief.ELF.DYNAMIC_FLAGS_1.NODEFLIB: | ||
return True | ||
return False | ||
|
||
def explore(self, binary: lief.Binary, filename: str) -> Dict[str, str]: | ||
""" | ||
Determine the linking for all needed objects | ||
""" | ||
|
||
result = {} | ||
queue = [binary] | ||
rpaths = [] | ||
ld_library_path = os.environ.get("LD_LIBRARY_PATH", "").split(":") | ||
default_paths = ldsoconf.parse() | ||
seen = set() | ||
|
||
# The following is a rough translation of the search as described in | ||
# https://man7.org/linux/man-pages/man8/ld.so.8.html | ||
# 1. IF RUNPATH is not present, and RPATH is present use RPATH. | ||
# Note: RPATH is cumaltive as it traverses the children | ||
# 2. Use the environment variable LD_LIBRARY_PATH | ||
# 3. Use RUNPATH to locate only the current shared objects dependencies | ||
# 4. Default libraries, unless ELF file has 'nodeflibs' set | ||
while len(queue) > 0: | ||
current = queue.pop() | ||
|
||
if current.has(lief.ELF.DYNAMIC_TAGS.RPATH): | ||
rpaths += current.get(lief.ELF.DYNAMIC_TAGS.RPATH).paths | ||
|
||
runpaths = [] | ||
if current.has(lief.ELF.DYNAMIC_TAGS.RUNPATH): | ||
runpaths += current.get(lief.ELF.DYNAMIC_TAGS.RUNPATH).paths | ||
|
||
needed = current.libraries | ||
|
||
# any binaries found need to make sure we match | ||
# the identity_class and machine_type | ||
identity_class = current.header.identity_class | ||
machine_type = current.header.machine_type | ||
|
||
for soname in needed: | ||
|
||
if soname in VirtualLinkStrategy.SKIP: | ||
continue | ||
|
||
if os.path.basename(soname) in seen: | ||
continue | ||
|
||
path = None | ||
# IF RUNPATH is not present, and RPATH is present use RPATH. | ||
if not path and len(runpaths) == 0 and len(rpaths) > 0: | ||
path = VirtualLinkStrategy.find( | ||
rpaths, soname, identity_class, machine_type | ||
) | ||
# Use the environment variable LD_LIBRARY_PATH | ||
if not path and len(ld_library_path) > 0: | ||
path = VirtualLinkStrategy.find( | ||
ld_library_path, soname, identity_class, machine_type | ||
) | ||
if path: | ||
result[soname] = path | ||
|
||
# Use RUNPATH to locate only the current shared objects dependencies | ||
if not path and len(runpaths) > 0: | ||
path = VirtualLinkStrategy.find( | ||
runpaths, soname, identity_class, machine_type | ||
) | ||
|
||
if not path and not VirtualLinkStrategy.has_nodeflib(current): | ||
path = VirtualLinkStrategy.find( | ||
default_paths, soname, identity_class, machine_type | ||
) | ||
|
||
if not path: | ||
raise Exception(f"Could not find {soname}") | ||
|
||
# lets add the basename of the soname to a cache | ||
# so that any object that requires the same soname is skipped | ||
# this works since this is the same behavior as in glibc | ||
seen.add(os.path.basename(soname)) | ||
|
||
result[soname] = path | ||
queue.append(lief.parse(path)) | ||
|
||
return result |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import functools | ||
from glob import glob | ||
from os.path import abspath, dirname, isabs, join | ||
from typing import Set | ||
|
||
|
||
# source: https://gist.github.com/stuaxo/79bcdcbaf9aa3b277207 | ||
@functools.lru_cache() | ||
def parse(filename: str = "/etc/ld.so.conf") -> Set[str]: | ||
"""Load all the paths from a given ldso config file""" | ||
paths = set() | ||
directory = dirname(abspath(filename)) | ||
with open(filename) as f: | ||
for line in (_line.rstrip() for _line in f.readlines()): | ||
if line.startswith("include "): | ||
wildcard = line.partition("include ")[-1:][0].rstrip() | ||
if not isabs(wildcard): | ||
wildcard = join(directory, wildcard) | ||
for filename in glob(wildcard): | ||
paths |= parse(filename) | ||
elif not line.startswith("#"): | ||
if line: | ||
paths.add(line) | ||
return paths |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@trws lief makes this very easy although finding that I had to this was not ;)