Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Repair ELF executables in the "scripts" directory #443

Merged
merged 4 commits into from
Feb 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,5 @@ target/
# Generated by test script
*.zip
wheelhoust-*
tests/testpackage/testpackage/testprogram
tests/integration/testpackage/testpackage/testprogram
tests/integration/testpackage/testpackage/testprogram_nodeps
60 changes: 59 additions & 1 deletion src/auditwheel/repair.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def repair_wheel(
if not exists(dest_dir):
os.mkdir(dest_dir)

# here, fn is a path to a python extension library in
# here, fn is a path to an ELF file (lib or executable) in
# the wheel, and v['libs'] contains its required libs
for fn, v in external_refs_by_fn.items():
ext_libs: dict[str, str] = v[abis[0]]["libs"]
Expand All @@ -92,6 +92,9 @@ def repair_wheel(
patcher.replace_needed(fn, *replacements)

if len(ext_libs) > 0:
if _path_is_script(fn):
fn = _replace_elf_script_with_shim(match.group("name"), fn)

new_rpath = os.path.relpath(dest_dir, os.path.dirname(fn))
new_rpath = os.path.join("$ORIGIN", new_rpath)
append_rpath_within_wheel(fn, new_rpath, ctx.name, patcher)
Expand Down Expand Up @@ -232,3 +235,58 @@ def _resolve_rpath_tokens(rpath: str, lib_base_dir: str) -> str:
rpath = rpath.replace(f"${token}", target) # $TOKEN
rpath = rpath.replace(f"${{{token}}}", target) # ${TOKEN}
return rpath


def _path_is_script(path: str) -> bool:
# Looks something like "uWSGI-2.0.21.data/scripts/uwsgi"
components = path.split("/")
return (
len(components) == 3
and components[0].endswith(".data")
and components[1] == "scripts"
)


def _replace_elf_script_with_shim(package_name: str, orig_path: str) -> str:
"""Move an ELF script and replace it with a shim.

We can't directly rewrite the RPATH of ELF executables in the "scripts"
directory since scripts aren't installed to a consistent relative path to
platlib files.

Instead, we move the executable into a special directory in platlib and put
a shim script in its place which execs the real executable.

More context: https://github.com/pypa/auditwheel/issues/340

Returns the new path of the moved executable.
"""
scripts_dir = f"{package_name}.scripts"
os.makedirs(scripts_dir, exist_ok=True)

new_path = os.path.join(scripts_dir, os.path.basename(orig_path))
os.rename(orig_path, new_path)

with open(orig_path, "w") as f:
f.write(_script_shim(new_path))
os.chmod(orig_path, os.stat(new_path).st_mode)

return new_path


def _script_shim(binary_path: str) -> str:
return """\
#!python
import os
import sys
import sysconfig


if __name__ == "__main__":
os.execv(
os.path.join(sysconfig.get_path("platlib"), {binary_path!r}),
Copy link
Member

@mayeut mayeut Feb 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this works for any possible deployment scenarios.
Maybe using importlib.metadata on python >= 3.8 would be a more robust way to get the real path.

Maybe this could be an improvement left for a later PR if an issue is reported.

sys.argv,
)
""".format(
binary_path=binary_path,
)
29 changes: 28 additions & 1 deletion tests/integration/test_manylinux.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def tmp_docker_image(base, commands, setup_env={}):

logger.info("Made image %s based on %s", image.short_id, base)
try:
yield image.short_id
yield image.id
finally:
mayeut marked this conversation as resolved.
Show resolved Hide resolved
client = image.client
client.images.remove(image.id)
Expand Down Expand Up @@ -404,6 +404,33 @@ def test_build_wheel_with_binary_executable(
)
assert output.strip() == "2.25"

# Both testprogram and testprogram_nodeps square a number, but:
# * testprogram links against libgsl and had to have its RPATH
# rewritten.
# * testprogram_nodeps links against no shared libraries and wasn't
# rewritten.
#
# Both executables should work when called from the installed bin directory.
assert docker_exec(docker_python, ["/usr/local/bin/testprogram", "4"]) == "16\n"
assert (
docker_exec(docker_python, ["/usr/local/bin/testprogram_nodeps", "4"])
== "16\n"
)

# testprogram should be a Python shim since we had to rewrite its RPATH.
assert (
docker_exec(docker_python, ["head", "-n1", "/usr/local/bin/testprogram"])
== "#!/usr/local/bin/python\n"
)

# testprogram_nodeps should be the unmodified ELF binary.
assert (
docker_exec(
docker_python, ["head", "-c4", "/usr/local/bin/testprogram_nodeps"]
)
== "\x7fELF"
)

def test_build_repair_pure_wheel(self, any_manylinux_container, io_folder):
policy, tag, manylinux_ctr = any_manylinux_container

Expand Down
26 changes: 23 additions & 3 deletions tests/integration/testpackage/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,32 @@

from setuptools import setup

cmd = "gcc testpackage/testprogram.c -lgsl -lgslcblas -o testpackage/testprogram"
subprocess.check_call(cmd.split())
subprocess.check_call(
(
"gcc",
"testpackage/testprogram.c",
"-lgsl",
"-lgslcblas",
"-o",
"testpackage/testprogram",
)
)
subprocess.check_call(
("gcc", "testpackage/testprogram_nodeps.c", "-o", "testpackage/testprogram_nodeps")
)

setup(
name="testpackage",
version="0.0.1",
packages=["testpackage"],
package_data={"testpackage": ["testprogram"]},
package_data={"testpackage": ["testprogram", "testprogram_nodeps"]},
# This places these files at a path like
# "testpackage-0.0.1.data/scripts/testprogram", which is needed to test
# rewriting ELF binaries installed into the scripts directory.
#
# Note that using scripts=[] doesn't work here since setuptools expects the
# scripts to be text and tries to decode them using UTF-8.
data_files=[
("../scripts", ["testpackage/testprogram", "testpackage/testprogram_nodeps"])
],
)
19 changes: 19 additions & 0 deletions tests/integration/testpackage/testpackage/testprogram_nodeps.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/* A simple example program to square a number using no shared libraries. */

#include <stdio.h>
#include <stdlib.h>

int main(int argc, char **argv)
{
int x;

if (argc != 2)
{
fputs("Expected exactly one command line argument\n", stderr);
return EXIT_FAILURE;
}

x = atoi(argv[1]);
printf("%d\n", x*x);
return EXIT_SUCCESS;
}