Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce framework for various link strategies #3

Merged
merged 3 commits into from
Jan 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ test:

all: lint typecheck pytest

clean:
rm -f *_stamped

.PHONY: typecheck lint format

.DEFAULT_GOAL := all
.DEFAULT_GOAL := all
70 changes: 40 additions & 30 deletions shrinkwrap/cli.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,60 @@
import os
import re
from shutil import copystat
from typing import Optional

import click
import lief # type: ignore
from sh import Command, ErrorReturnCode # type: ignore

from shrinkwrap.elf import LinkStrategy


@click.command()
@click.argument("file", type=click.Path(exists=True))
@click.option("-o", "--output", type=click.Path(), required=False)
def shrinkwrap(file: str, output: Optional[str]):
@click.option(
"-l",
"--link-strategy",
default="native",
show_default=True,
type=click.Choice(["native", "virtual"], case_sensitive=True),
)
def shrinkwrap(file: str, output: Optional[str], link_strategy: str):
"""Freeze the dependencies into the top level shared object file."""
if output is None:
output = os.path.basename(file) + "_stamped"

try:
binary: lief.Binary = lief.parse(file)
if not binary.has_interpreter:
click.echo("no interpreter set on the binary")
exit(1)
interpreter = Command(binary.interpreter)
resolution = interpreter("--list", file)

needed = binary.libraries

for line in resolution:
m = re.match(r"\s*([^ ]+) => ([^ ]+)", line)
if not m:
continue
soname, lib = m.group(1), m.group(2)
if soname in needed:
binary.remove_library(soname)

binary.add_library(lib)

# dump the new binary file
binary.write(output)

# copy the file metadata
copystat(file, output)
except ErrorReturnCode as e:
print(f"shrinkwrap failed: {e.stderr}")
if not lief.is_elf(file):
click.echo(f"{file} is not elf format")
exit(1)

binary: lief.Binary = lief.parse(file)
if not binary.has_interpreter:
click.echo("no interpreter set on the binary")
exit(1)

strategy = LinkStrategy.select_by_name(link_strategy)
resolution = strategy.explore(binary, file)
needed = binary.libraries

for soname, lib in resolution.items():
if soname in needed:
binary.remove_library(soname)
binary.add_library(lib)

# we need to update the VERNEED entries now to match
verneeded = binary.symbols_version_requirement
for verneed in verneeded:
if verneed.name in resolution:
# we want to map the possible shortname soname
# to the absolute one we generate
verneed.name = resolution.get(verneed.name)
Comment on lines +44 to +50
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@trws lief makes this very easy although finding that I had to this was not ;)


# dump the new binary file
binary.write(output)

# copy the file metadata
copystat(file, output)


if __name__ == "__main__":
shrinkwrap()
169 changes: 169 additions & 0 deletions shrinkwrap/elf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
from __future__ import annotations

import os
import re
from abc import ABC, abstractmethod
from typing import Dict, Iterable, Optional

import lief # type: ignore
from sh import Command # type: ignore

from shrinkwrap import ldsoconf


class LinkStrategy(ABC):
@staticmethod
def select_by_name(name: str) -> LinkStrategy:
if name == "native":
return NativeLinkStrategy()
elif name == "virtual":
return VirtualLinkStrategy()
else:
raise Exception(f"Unknown strategy: {name}")

@abstractmethod
def explore(self, binary: lief.Binary, filename: str) -> Dict[str, str]:
"""
Determine the linking for all needed objects
"""
pass


class NativeLinkStrategy(LinkStrategy):
"""Uses the native interpreter in the binary to determine the linking"""

def explore(self, binary: lief.Binary, filename: str) -> Dict[str, str]:
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kind of annoying that the binary doesn't give the original filename and just the name portion :/
Could move a bunch of the lief usage into an ELF friendly class maybe in the future.

interpreter = Command(binary.interpreter)
resolution = interpreter("--list", filename)
result = {}
# TODO: Figure out why `--list` and `ldd` produce different outcomes
# specifically for the interpreter.
# https://gist.github.com/fzakaria/3dc42a039401598d8e0fdbc57f5e7eae
for line in resolution:
Comment on lines +39 to +42
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@trws let me know if you have some knowledge here of this.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a new one on me. Maybe the loader just prints itself instead of looking it up?

m = re.match(r"\s*([^ ]+) => ([^ ]+)", line)
if not m:
continue
soname, lib = m.group(1), m.group(2)
result[soname] = lib
return result


class VirtualLinkStrategy(LinkStrategy):

# TODO: Need to figure out a good way to determine the NEEDED of glibc
# I think it's resolving based on a shared object cache from the .INTERP
# section but that remains to be validated.
SKIP = ["ld-linux.so.2", "ld-linux-x86-64.so.2"]

@staticmethod
def find(
paths: Iterable[str],
soname: str,
identity_class: lief.ELF.ELF_CLASS,
machine_type: lief.ELF.ARCH,
) -> Optional[str]:
"""Given a list of paths, try and find it. It does not search recursively"""
for path in paths:
full_path = os.path.join(path, soname)
if os.path.exists(full_path):
if not lief.is_elf(full_path):
continue
binary = lief.parse(full_path)
if (
binary.header.identity_class != identity_class
or binary.header.machine_type != machine_type
):
continue
return full_path
return None

@staticmethod
def has_nodeflib(binary: lief.Binary) -> bool:
if not binary.has(lief.ELF.DYNAMIC_TAGS.FLAGS_1):
return False
for flag in binary[lief.ELF.DYNAMIC_TAGS.FLAGS_1].flags:
if flag == lief.ELF.DYNAMIC_FLAGS_1.NODEFLIB:
return True
return False

def explore(self, binary: lief.Binary, filename: str) -> Dict[str, str]:
"""
Determine the linking for all needed objects
"""

result = {}
queue = [binary]
rpaths = []
ld_library_path = os.environ.get("LD_LIBRARY_PATH", "").split(":")
default_paths = ldsoconf.parse()
seen = set()

# The following is a rough translation of the search as described in
# https://man7.org/linux/man-pages/man8/ld.so.8.html
# 1. IF RUNPATH is not present, and RPATH is present use RPATH.
# Note: RPATH is cumaltive as it traverses the children
# 2. Use the environment variable LD_LIBRARY_PATH
# 3. Use RUNPATH to locate only the current shared objects dependencies
# 4. Default libraries, unless ELF file has 'nodeflibs' set
while len(queue) > 0:
current = queue.pop()

if current.has(lief.ELF.DYNAMIC_TAGS.RPATH):
rpaths += current.get(lief.ELF.DYNAMIC_TAGS.RPATH).paths

runpaths = []
if current.has(lief.ELF.DYNAMIC_TAGS.RUNPATH):
runpaths += current.get(lief.ELF.DYNAMIC_TAGS.RUNPATH).paths

needed = current.libraries

# any binaries found need to make sure we match
# the identity_class and machine_type
identity_class = current.header.identity_class
machine_type = current.header.machine_type

for soname in needed:

if soname in VirtualLinkStrategy.SKIP:
continue

if os.path.basename(soname) in seen:
continue

path = None
# IF RUNPATH is not present, and RPATH is present use RPATH.
if not path and len(runpaths) == 0 and len(rpaths) > 0:
path = VirtualLinkStrategy.find(
rpaths, soname, identity_class, machine_type
)
# Use the environment variable LD_LIBRARY_PATH
if not path and len(ld_library_path) > 0:
path = VirtualLinkStrategy.find(
ld_library_path, soname, identity_class, machine_type
)
if path:
result[soname] = path

# Use RUNPATH to locate only the current shared objects dependencies
if not path and len(runpaths) > 0:
path = VirtualLinkStrategy.find(
runpaths, soname, identity_class, machine_type
)

if not path and not VirtualLinkStrategy.has_nodeflib(current):
path = VirtualLinkStrategy.find(
default_paths, soname, identity_class, machine_type
)

if not path:
raise Exception(f"Could not find {soname}")

# lets add the basename of the soname to a cache
# so that any object that requires the same soname is skipped
# this works since this is the same behavior as in glibc
seen.add(os.path.basename(soname))

result[soname] = path
queue.append(lief.parse(path))

return result
24 changes: 24 additions & 0 deletions shrinkwrap/ldsoconf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import functools
from glob import glob
from os.path import abspath, dirname, isabs, join
from typing import Set


# source: https://gist.github.com/stuaxo/79bcdcbaf9aa3b277207
@functools.lru_cache()
def parse(filename: str = "/etc/ld.so.conf") -> Set[str]:
"""Load all the paths from a given ldso config file"""
paths = set()
directory = dirname(abspath(filename))
with open(filename) as f:
for line in (_line.rstrip() for _line in f.readlines()):
if line.startswith("include "):
wildcard = line.partition("include ")[-1:][0].rstrip()
if not isabs(wildcard):
wildcard = join(directory, wildcard)
for filename in glob(wildcard):
paths |= parse(filename)
elif not line.startswith("#"):
if line:
paths.add(line)
return paths