diff --git a/conda_build/api.py b/conda_build/api.py index 72f2311185..a26f9b0ae8 100644 --- a/conda_build/api.py +++ b/conda_build/api.py @@ -294,20 +294,20 @@ def convert(package_file, output_dir=".", show_imports=False, platforms=None, fo def test_installable(channel='defaults'): """Check to make sure that packages in channel are installable. This is a consistency check for the channel.""" - from .inspect import test_installable + from .inspect_pkg import test_installable return test_installable(channel) def inspect_linkages(packages, prefix=_sys.prefix, untracked=False, all_packages=False, show_files=False, groupby='package', sysroot=''): - from .inspect import inspect_linkages + from .inspect_pkg import inspect_linkages packages = _ensure_list(packages) return inspect_linkages(packages, prefix=prefix, untracked=untracked, all_packages=all_packages, show_files=show_files, groupby=groupby, sysroot=sysroot) def inspect_objects(packages, prefix=_sys.prefix, groupby='filename'): - from .inspect import inspect_objects + from .inspect_pkg import inspect_objects packages = _ensure_list(packages) return inspect_objects(packages, prefix=prefix, groupby=groupby) @@ -334,7 +334,7 @@ def inspect_hash_inputs(packages): Returns a dictionary with a key for each input package and a value of the dictionary loaded from the package's info/hash_input.json file """ - from .inspect import get_hash_input + from .inspect_pkg import get_hash_input return get_hash_input(packages) diff --git a/conda_build/build.py b/conda_build/build.py index 68b8a61f83..569931451f 100644 --- a/conda_build/build.py +++ b/conda_build/build.py @@ -710,7 +710,7 @@ def post_process_files(m, initial_prefix_files): sys.exit(indent("""Error: Untracked file(s) %s found in conda-meta directory. This error usually comes from using conda in the build script. Avoid doing this, as it can lead to packages that include their dependencies.""" % meta_files)) - post_build(m, new_files, build_python=python, config=m.config) + post_build(m, new_files, build_python=python) entry_point_script_names = get_entry_point_script_names(m.get_value('build/entry_points')) if m.noarch == 'python': diff --git a/conda_build/cli/main_build.py b/conda_build/cli/main_build.py index d4e54a3541..00d9fd8d81 100644 --- a/conda_build/cli/main_build.py +++ b/conda_build/cli/main_build.py @@ -246,6 +246,11 @@ def parse_args(args): " you package may depend on files that are not included in the package, and may pass" "tests, but ultimately fail on installed systems.") ) + p.add_argument( + "--no-error-overlinking", dest='error_overlinking', default=True, action="store_false", + help=("Disable error when shared libraries from transitive dependencies are directly" + "linked to any executables or shared libraries in built packages.") + ) p.add_argument( "--long-test-prefix", default=True, action="store_false", help=("Use a long prefix for the test prefix, as well as the build prefix. Affects only " diff --git a/conda_build/config.py b/conda_build/config.py index dcb69a1e7f..dc26eb90bc 100644 --- a/conda_build/config.py +++ b/conda_build/config.py @@ -86,6 +86,7 @@ def _ensure_dir(path): Setting('_src_cache_root', abspath(expanduser(expandvars( cc_conda_build.get('cache_dir')))) if cc_conda_build.get('cache_dir') else None), Setting('copy_test_source_files', True), + Setting('error_overlinking', True), Setting('index', None), # support legacy recipes where only build is specified and expected to be the diff --git a/conda_build/inspect.py b/conda_build/inspect_pkg.py similarity index 97% rename from conda_build/inspect.py rename to conda_build/inspect_pkg.py index e2669b5696..39d6c1a8d9 100644 --- a/conda_build/inspect.py +++ b/conda_build/inspect_pkg.py @@ -15,17 +15,16 @@ import sys import tempfile -from .conda_interface import (iteritems, specs_from_args, is_linked, linked_data, linked, - get_index) -from .conda_interface import display_actions, install_actions -from .conda_interface import memoized - - from conda_build.os_utils.ldd import get_linkages, get_package_obj_files, get_untracked_obj_files from conda_build.os_utils.macho import get_rpaths, human_filetype from conda_build.utils import (groupby, getter, comma_join, rm_rf, package_has_file, get_logger, ensure_list) +from conda_build.conda_interface import (iteritems, specs_from_args, is_linked, linked_data, linked, + get_index) +from conda_build.conda_interface import display_actions, install_actions +from conda_build.conda_interface import memoized + @memoized def dist_files(prefix, dist): diff --git a/conda_build/os_utils/ldd.py b/conda_build/os_utils/ldd.py index 1a61d103bd..3f982a81cb 100644 --- a/conda_build/os_utils/ldd.py +++ b/conda_build/os_utils/ldd.py @@ -9,9 +9,9 @@ from conda_build.conda_interface import untracked from conda_build.conda_interface import linked_data -from conda_build import post from conda_build.os_utils.macho import otool -from conda_build.os_utils.pyldd import inspect_linkages +from conda_build.os_utils.pyldd import codefile_class, inspect_linkages, machofile, is_codefile + LDD_RE = re.compile(r'\s*(.*?)\s*=>\s*(.*?)\s*\(.*\)') LDD_NOT_FOUND_RE = re.compile(r'\s*(.*?)\s*=>\s*not found') @@ -49,6 +49,14 @@ def get_linkages(obj_files, prefix, sysroot): path = join(prefix, f) # ldd quite often fails on foreign architectures. ldd_failed = False + # Detect the filetype to emulate what the system-native tool does. + klass = codefile_class(path) + if klass == machofile: + resolve_filenames = False + recurse = False + else: + resolve_filenames = True + recurse = True try: if sys.platform.startswith('linux'): res[f] = ldd(path) @@ -58,21 +66,22 @@ def get_linkages(obj_files, prefix, sysroot): except: ldd_failed = True finally: - res_py = inspect_linkages(path, sysroot=sysroot) + res_py = inspect_linkages(path, resolve_filenames=resolve_filenames, + sysroot=sysroot, recurse=recurse) res_py = [(basename(lp), lp) for lp in res_py] - # print("set(res_py) {}".format(set(res_py))) if ldd_failed: res[f] = res_py - # else: - # print("set(res[f]) = {}".format(set(res[f]))) - # if set(res[f]) != set(res_py): - # print("WARNING: pyldd disagrees with ldd/otool. This will not cause any") - # print("WARNING: problems for this build, but please file a bug at:") - # print("WARNING: https://github.com/conda/conda-build") - # print("WARNING: and (if possible) attach file {}".format(path)) - # print("WARNING: ldd/tool gives {}, pyldd gives {}" - # .format(set(res[f]), set(res_py))) - + else: + if set(res[f]) != set(res_py): + print("WARNING: pyldd disagrees with ldd/otool. This will not cause any") + print("WARNING: problems for this build, but please file a bug at:") + print("WARNING: https://github.com/conda/conda-build") + print("WARNING: and (if possible) attach file {}".format(path)) + print("WARNING: \nldd/otool gives:\n{}\npyldd gives:\n{}\n" + .format("\n".join(str(e) for e in res[f]), "\n".join(str(e) + for e in res_py))) + print("Diffs\n{}".format(set(res[f]) - set(res_py))) + print("Diffs\n{}".format(set(res_py) - set(res[f]))) return res @@ -84,7 +93,7 @@ def get_package_obj_files(dist, prefix): if data: for f in data.get('files', []): path = join(prefix, f) - if post.is_obj(path): + if is_codefile(path): res.append(f) return res @@ -96,7 +105,7 @@ def get_untracked_obj_files(prefix): files = untracked(prefix) for f in files: path = join(prefix, f) - if post.is_obj(path): + if is_codefile(path): res.append(f) return res diff --git a/conda_build/os_utils/macho.py b/conda_build/os_utils/macho.py index 56d1aad9bb..87ac5660c8 100644 --- a/conda_build/os_utils/macho.py +++ b/conda_build/os_utils/macho.py @@ -1,8 +1,10 @@ from __future__ import absolute_import, division, print_function +import re import sys -from subprocess import Popen, check_output, PIPE +from subprocess import Popen, check_output, PIPE, STDOUT, CalledProcessError from os.path import islink, isfile +from conda_build.os_utils.pyldd import inspect_rpaths from itertools import islice NO_EXT = ( @@ -157,7 +159,11 @@ def otool(path, cb_filter=is_dylib_info): Any key values that can be converted to integers are converted to integers, the rest are strings. """ - lines = check_output(['otool', '-l', path]).decode('utf-8').splitlines() + lines = check_output(['otool', '-l', path], stderr=STDOUT).decode('utf-8') + # llvm-objdump returns 0 for some things that are anything but successful completion. + if (re.match('.*(is not a Mach-O|invalid|expected|unexpected).*', lines, re.MULTILINE)): + raise CalledProcessError + lines = lines.splitlines() return _get_matching_load_commands(lines, cb_filter) @@ -178,8 +184,12 @@ def get_id(path): def get_rpaths(path): """Return a list of the dylib rpaths""" - rpaths = otool(path, is_rpath) - return [rpath['path'] for rpath in rpaths] + # rpaths = otool(path, is_rpath) + # res_otool = [rpath['path'] for rpath in rpaths] + res_pyldd = inspect_rpaths(path, resolve_dirnames=False, use_os_varnames=True) + # if set(res_otool) != set(res_pyldd): + # print("disagreement about get_rpaths {} vs {}".format(set(res_otool), set(res_pyldd))) + return res_pyldd def add_rpath(path, rpath, verbose=False): diff --git a/conda_build/os_utils/pyldd.py b/conda_build/os_utils/pyldd.py index e2da315290..6c3092018d 100644 --- a/conda_build/os_utils/pyldd.py +++ b/conda_build/os_utils/pyldd.py @@ -5,12 +5,58 @@ import re import struct import sys - import logging + +from conda_build.utils import ensure_list + logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) +''' +# Detect security flags via readelf (from https://github.com/hugsy/gef) +# .. spawning out to readelf is not something we intend to do though .. +@lru_cache(32) +def checksec(filename): + """Check the security property of the ELF binary. The following properties are: + - Canary + - NX + - PIE + - Fortify + - Partial/Full RelRO. + Return a Python dict() with the different keys mentioned above, and the boolean + associated whether the protection was found.""" + + try: + readelf = which("readelf") + except IOError: + err("Missing `readelf`") + return + + def __check_security_property(opt, filename, pattern): + cmd = [readelf,] + cmd += opt.split() + cmd += [filename,] + lines = gef_execute_external(cmd, as_list=True) + for line in lines: + if re.search(pattern, line): + return True + return False + + results = collections.OrderedDict() + results["Canary"] = __check_security_property("-s", filename, r"__stack_chk_fail") is True + has_gnu_stack = __check_security_property("-W -l", filename, r"GNU_STACK") is True + if has_gnu_stack: + results["NX"] = __check_security_property("-W -l", filename, r"GNU_STACK.*RWE") is False + else: + results["NX"] = False + results["PIE"] = __check_security_property("-h", filename, r"Type:.*EXEC") is False + results["Fortify"] = __check_security_property("-s", filename, r"_chk@GLIBC") is True + results["Partial RelRO"] = __check_security_property("-l", filename, r"GNU_RELRO") is True + results["Full RelRO"] = __check_security_property("-d", filename, r"BIND_NOW") is True + return results +''' + ''' Eventual goal is to become a full replacement for `ldd` `otool -L` and `ntldd' For now only works with ELF and Mach-O files and command-line execution is not @@ -84,8 +130,7 @@ LC_LOAD_UPWARD_DYLIB = 0x23 LC_REEXPORT_DYLIB = 0x1f LC_LAZY_LOAD_DYLIB = 0x20 -LC_LOAD_DYLIBS = (LC_ID_DYLIB, - LC_LOAD_DYLIB, +LC_LOAD_DYLIBS = (LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, LC_LOAD_UPWARD_DYLIB, LC_LAZY_LOAD_DYLIB, @@ -156,6 +201,20 @@ def read(self, size=maxint): return bytes +class UnixExecutable(object): + def get_rpaths_transitive(self): + return self.rpaths_transitive + + def get_rpaths_nontransitive(self): + return self.rpaths_nontransitive + + def get_shared_libraries(self): + return self.shared_libraries + + def is_executable(self): + return True + + def read_data(file, endian, num=1): """ Read a given number of 32-bits unsigned integers from the given file @@ -296,38 +355,84 @@ def mach_o_find_rpaths(ofile, arch): return results -def _get_resolved_location(codefile, so, exedir, selfdir, sysroot='', resolved_rpath=None): - '''Returns a tuple of resolved location, ''' +def _get_resolved_location(codefile, + unresolved, + exedir, + selfdir, + LD_LIBRARY_PATH='', + default_paths=None, + sysroot='', + resolved_rpath=None): + ''' + From `man ld.so` + + When resolving shared object dependencies, the dynamic linker first inspects each dependency + string to see if it contains a slash (this can occur if a shared object pathname containing + slashes was specified at link time). If a slash is found, then the dependency string is + interpreted as a (relative or absolute) pathname, and the shared object is loaded using that + pathname. + + If a shared object dependency does not contain a slash, then it is searched for in the + following order: + + o Using the directories specified in the DT_RPATH dynamic section attribute of the binary + if present and DT_RUNPATH attribute does not exist. Use of DT_RPATH is deprecated. + + o Using the environment variable LD_LIBRARY_PATH (unless the executable is being run in + secure-execution mode; see below). in which case it is ignored. + + o Using the directories specified in the DT_RUNPATH dynamic section attribute of the + binary if present. Such directories are searched only to find those objects required + by DT_NEEDED (direct dependencies) entries and do not apply to those objects' children, + which must themselves have their own DT_RUNPATH entries. This is unlike DT_RPATH, + which is applied to searches for all children in the dependency tree. + + o From the cache file /etc/ld.so.cache, which contains a compiled list of candidate + shared objects previously found in the augmented library path. If, however, the binary + was linked with the -z nodeflib linker option, shared objects in the default paths are + skipped. Shared objects installed in hardware capability directories (see below) are + preferred to other shared objects. + + o In the default path /lib, and then /usr/lib. (On some 64-bit architectures, the default + paths for 64-bit shared objects are /lib64, and then /usr/lib64.) If the binary was + linked with the -z nodeflib linker option, this step is skipped. + + Returns a tuple of resolved location, rpath_used, in_sysroot + ''' rpath_result = None found = False - if so.startswith('$RPATH'): + ld_library_paths = [] if not LD_LIBRARY_PATH else LD_LIBRARY_PATH.split(':') + if unresolved.startswith('$RPATH'): these_rpaths = [resolved_rpath] if resolved_rpath else \ - codefile.get_rpaths_transitive() + \ - codefile.get_rpaths_nontransitive() + codefile.get_rpaths_transitive() + \ + ld_library_paths + \ + codefile.get_rpaths_nontransitive() + \ + [dp.replace('$SYSROOT', sysroot) for dp in ensure_list(default_paths)] for rpath in these_rpaths: - resolved = so.replace('$RPATH', rpath) \ - .replace('$SELFDIR', selfdir) \ - .replace('$EXEDIR', exedir) + resolved = unresolved.replace('$RPATH', rpath) \ + .replace('$SELFDIR', selfdir) \ + .replace('$EXEDIR', exedir) exists = os.path.exists(resolved) - exists_sysroot = os.path.exists(os.path.join(sysroot, resolved.lstrip('/'))) + exists_sysroot = exists and sysroot and resolved.startswith(sysroot) if resolved_rpath or exists or exists_sysroot: rpath_result = rpath found = True break if not found: # Return the so name so that it can be warned about as missing. - return so, None, False - else: - resolved = so.replace('$SELFDIR', selfdir) \ - .replace('$EXEDIR', exedir) + return unresolved, None, False + elif any(a in unresolved for a in ('$SELFDIR', '$EXEDIR')): + resolved = unresolved.replace('$SELFDIR', selfdir) \ + .replace('$EXEDIR', exedir) exists = os.path.exists(resolved) - exists_sysroot = os.path.exists(os.path.join(sysroot, resolved.lstrip('/'))) - if exists_sysroot and sysroot != '': - if exists: - log.warning('shared library exists in both the system and also the sysroot, picking non-sysroot') # noqa + exists_sysroot = exists and sysroot and resolved.startswith(sysroot) + else: + if unresolved.startswith('/'): + return unresolved, None, False else: - return os.path.join(sysroot, resolved.lstrip('/')), rpath_result, True - return resolved, rpath_result, False + return os.path.join(selfdir, unresolved), None, False + + return resolved, rpath_result, exists_sysroot def _get_resolved_relocated_location(codefile, so, src_exedir, src_selfdir, @@ -340,7 +445,7 @@ def _get_resolved_relocated_location(codefile, so, src_exedir, src_selfdir, return src_resolved, dst_resolved, in_sysroot -class machofile(object): +class machofile(UnixExecutable): def __init__(self, file, arch, initial_rpaths_transitive=[]): self.shared_libraries = [] results = mach_o_find_dylibs(file, arch) @@ -350,28 +455,28 @@ def __init__(self, file, arch, initial_rpaths_transitive=[]): file.seek(0) self.rpaths_transitive = initial_rpaths_transitive filetypes, rpaths = zip(*mach_o_find_rpaths(file, arch)) - self.rpaths_transitive.extend( - [rpath.replace('@loader_path', '$SELFDIR') - .replace('@executable_path', '$EXEDIR') - .replace('@rpath', '$RPATH') - for rpath in rpaths[0] if rpath]) - self.rpaths_nontransitive = [] + local_rpaths = [self.from_os_varnames(rpath) + for rpath in rpaths[0] if rpath] + self.rpaths_transitive.extend(local_rpaths) + self.rpaths_nontransitive = local_rpaths self.shared_libraries.extend( - [(so, so.replace('@loader_path', '$SELFDIR') - .replace('@executable_path', '$EXEDIR') - .replace('@rpath', '$RPATH')) for so in sos[0] if so]) + [(so, self.from_os_varnames(so)) for so in sos[0] if so]) file.seek(0) # Not actually used .. self.selfdir = os.path.dirname(file.name) + self.filename = file.name - def get_rpaths_transitive(self): - return self.rpaths_transitive - - def get_rpaths_nontransitive(self): - return [] + def to_os_varnames(self, input_): + """Don't make these functions - they are methods to match the API for elffiles.""" + return input_.replace('$SELFDIR', '@loader_path') \ + .replace('$EXEDIR', '@executable_path') \ + .replace('$RPATH', '@rpath') - def get_shared_libraries(self): - return self.shared_libraries + def from_os_varnames(self, input_): + """Don't make these functions - they are methods to match the API for elffiles.""" + return input_.replace('@loader_path', '$SELFDIR') \ + .replace('@executable_path', '$EXEDIR') \ + .replace('@rpath', '$RPATH') def get_resolved_shared_libraries(self, src_exedir, src_selfdir, sysroot=''): result = [] @@ -395,9 +500,8 @@ def get_relocated_shared_libraries(self, src_exedir, src_selfdir, result.append((so, resolved, dst_resolved, in_sysroot)) return result - def is_executable(self): - # TODO :: Write this. - return True + def uniqueness_key(self): + return self.filename ########################################### @@ -430,8 +534,12 @@ def is_executable(self): PT_NOTE = 4 PT_SHLIB = 5 PT_PHDR = 6 +PT_LOOS = 0x60000000 PT_LOPROC = 0x70000000 PT_HIPROC = 0x7fffffff +PT_GNU_EH_FRAME = (PT_LOOS + 0x474e550) +PT_GNU_STACK = (PT_LOOS + 0x474e551) +PT_GNU_RELRO = (PT_LOOS + 0x474e552) SHT_PROGBITS = 0x1 SHT_SYMTAB = 0x2 @@ -630,6 +738,7 @@ def postprocess(self, elffile, file): dt_needed = [] dt_rpath = [] dt_runpath = [] + dt_soname = '$EXECUTABLE' for m in range(int(self.sh_size / self.sh_entsize)): file.seek(self.sh_offset + (m * self.sh_entsize)) d_tag, = struct.unpack(endian + ptr_type, file.read(sz_ptr)) @@ -638,10 +747,12 @@ def postprocess(self, elffile, file): dt_needed.append(d_val_ptr) elif d_tag == DT_RPATH: dt_rpath.append(d_val_ptr) - elif d_tag == DT_RPATH: + elif d_tag == DT_RUNPATH: dt_runpath.append(d_val_ptr) elif d_tag == DT_STRTAB: dt_strtab_ptr = d_val_ptr + elif d_tag == DT_SONAME: + dt_soname = d_val_ptr if dt_strtab_ptr: strsec, offset = elffile.find_section_and_offset(dt_strtab_ptr) if strsec and strsec.sh_type == SHT_STRTAB: @@ -662,6 +773,10 @@ def postprocess(self, elffile, file): elffile.dt_runpath.extend([rp if rp.endswith(os.sep) else rp + os.sep for rp in rpaths]) + if dt_soname != '$EXECUTABLE': + end = dt_soname + strsec.table[dt_soname:].index('\0') + elffile.dt_soname = strsec.table[dt_soname:end] + # runpath always takes precedence. if len(elffile.dt_runpath): elffile.dt_rpath = [] @@ -690,7 +805,7 @@ def postprocess(self, elffile, file): elffile.program_interpreter = file.read(self.p_filesz - 1).decode() -class elffile(object): +class elffile(UnixExecutable): def __init__(self, file, initial_rpaths_transitive=[]): self.ehdr = elfheader(file) self.dt_needed = [] @@ -699,10 +814,12 @@ def __init__(self, file, initial_rpaths_transitive=[]): self.programheaders = [] self.elfsections = [] self.program_interpreter = None + self.dt_soname = '$EXECUTABLE' # Not actually used .. self.selfdir = os.path.dirname(file.name) for n in range(self.ehdr.phnum): + file.seek(self.ehdr.phoff + (n * self.ehdr.phentsize)) self.programheaders.append(programheader(self.ehdr, file)) for n in range(self.ehdr.shnum): file.seek(self.ehdr.shoff + (n * self.ehdr.shentsize)) @@ -712,6 +829,7 @@ def __init__(self, file, initial_rpaths_transitive=[]): ph.postprocess(self, file) for es in self.elfsections: es.postprocess(self, file) + # TODO :: If we have a program_interpreter we need to run it as: # TODO :: LD_DEBUG=all self.program_interpreter --inhibit-cache --list file.name # TODO :: then process the output line e.g.: @@ -720,18 +838,33 @@ def __init__(self, file, initial_rpaths_transitive=[]): # TODO :: when run through QEMU also, so in that case, # TODO :: we must run os.path.join(sysroot,self.program_interpreter) # TODO :: Interesting stuff: https://www.cs.virginia.edu/~dww4s/articles/ld_linux.html - self.rpaths_transitive = [rpath.replace('$ORIGIN', '$SELFDIR') - .replace('$LIB', '/usr/lib') - for rpath in (self.dt_rpath + - initial_rpaths_transitive)] - self.rpaths_nontransitive = [rpath.replace('$ORIGIN', '$SELFDIR') - .replace('$LIB', '/usr/lib') - for rpath in self.dt_runpath] - # This is implied. Making it explicit allows sharing the - # same _get_resolved_location() function with macho-o - self.shared_libraries = [(needed, '$RPATH/' + needed) + + dt_rpath = [p.rstrip("/") for p in self.dt_rpath] + dt_runpath = [p.rstrip("/") for p in self.dt_runpath] + self.rpaths_transitive = [self.from_os_varnames(rpath) + for rpath in (initial_rpaths_transitive + dt_rpath)] + self.rpaths_nontransitive = [self.from_os_varnames(rpath) + for rpath in dt_runpath] + # Lookup must be avoided when DT_NEEDED contains any '/'s + self.shared_libraries = [(needed, needed if '/' in needed else '$RPATH/' + needed) for needed in self.dt_needed] + def to_os_varnames(self, input): + if self.ehdr.sz_ptr == 8: + libdir = '/lib64' + else: + libdir = '/lib' + return input.replace('$SELFDIR', '$ORIGIN') \ + .replace(libdir, '$LIB') + + def from_os_varnames(self, input): + if self.ehdr.sz_ptr == 8: + libdir = '/lib64' + else: + libdir = '/lib' + return input.replace('$ORIGIN', '$SELFDIR') \ + .replace('$LIB', libdir) + def find_section_and_offset(self, addr): 'Can be called immediately after the elfsections have been constructed' for es in self.elfsections: @@ -739,46 +872,40 @@ def find_section_and_offset(self, addr): return es, addr - es.sh_addr return None, None - def get_rpaths_transitive(self): - return self.rpaths_transitive - - def get_rpaths_nontransitive(self): - return self.rpaths_nontransitive - - def get_shared_libraries(self): - return self.shared_libraries - def get_resolved_shared_libraries(self, src_exedir, src_selfdir, sysroot=''): result = [] for so_orig, so in self.shared_libraries: resolved, rpath, in_sysroot = \ - _get_resolved_location(self, so, src_exedir, src_selfdir, sysroot) + _get_resolved_location(self, + so, + src_exedir, + src_selfdir, + LD_LIBRARY_PATH='', + default_paths=['$SYSROOT/lib64', '$SYSROOT/usr/lib64'], + sysroot=sysroot) result.append((so_orig, resolved, rpath, in_sysroot)) return result - def is_executable(self): - return True - def selfdir(self): return None + def uniqueness_key(self): + return self.dt_soname -class inscrutablefile(object): - def __init__(self, file, initial_rpaths_transitive=[]): - return +class inscrutablefile(UnixExecutable): def get_rpaths_transitive(self): return [] - def get_resolved_shared_libraries(self, src_exedir, src_selfdir, sysroot=''): + def get_resolved_shared_libraries(self, *args, **kw): return [] - def is_executable(self): - return True - def selfdir(self): return None + def uniqueness_key(self): + return 'unknown' + def codefile(file, arch='any', initial_rpaths_transitive=[]): magic, = struct.unpack(BIG_ENDIAN + 'L', file.read(4)) @@ -821,42 +948,87 @@ def is_codefile(filename, skip_symlinks=True): return True -# TODO :: Consider memoizing instead of repeatedly scanning -# TODO :: libc.so/libSystem.dylib when inspect_linkages(recurse=True) -def _inspect_linkages_this(filename, sysroot='', arch='native'): +def _trim_sysroot(sysroot): while sysroot.endswith('/') or sysroot.endswith('\\'): sysroot = sysroot[:-1] + return sysroot + + +def _get_arch_if_native(arch): if arch == 'native': _, _, _, _, arch = os.uname() + return arch + + +# TODO :: Consider memoizing instead of repeatedly scanning +# TODO :: libc.so/libSystem.dylib when inspect_linkages(recurse=True) +def _inspect_linkages_this(filename, sysroot='', arch='native'): + ''' + + :param filename: + :param sysroot: + :param arch: + :return: + ''' + if not os.path.exists(filename): - return [], [] + return None, [], [] + sysroot = _trim_sysroot(sysroot) + arch = _get_arch_if_native(arch) with open(filename, 'rb') as f: # TODO :: Problems here: # TODO :: 1. macOS can modify RPATH for children in each .so - # TODO :: 2. Linux can identify the program interpreter which can change the initial RPATHs - cf = codefile(f, arch, ['/lib', '/usr/lib']) + # TODO :: 2. Linux can identify the program interpreter which can change the default_paths + cf = codefile(f, arch) dirname = os.path.dirname(filename) results = cf.get_resolved_shared_libraries(dirname, dirname, sysroot) if not results: - return [], [] + return cf.uniqueness_key(), [], [] orig_names, resolved_names, _, in_sysroot = map(list, zip(*results)) - return orig_names, resolved_names + return cf.uniqueness_key(), orig_names, resolved_names + + +def inspect_rpaths(filename, resolve_dirnames=True, use_os_varnames=True, + sysroot='', arch='native'): + if not os.path.exists(filename): + return [], [] + sysroot = _trim_sysroot(sysroot) + arch = _get_arch_if_native(arch) + with open(filename, 'rb') as f: + # TODO :: Problems here: + # TODO :: 1. macOS can modify RPATH for children in each .so + # TODO :: 2. Linux can identify the program interpreter which can change the initial RPATHs + # TODO :: Should '/lib', '/usr/lib' not include (or be?!) `sysroot`(s) instead? + cf = codefile(f, arch, ['/lib', '/usr/lib']) + if resolve_dirnames: + return [_get_resolved_location(cf, rpath, os.path.dirname(filename), + os.path.dirname(filename), sysroot)[0] + for rpath in cf.rpaths_nontransitive] + else: + if use_os_varnames: + return [cf.to_os_varnames(rpath) for rpath in cf.rpaths_nontransitive] + else: + return cf.rpaths_nontransitive # TODO :: Consider returning a tree structure or a dict when recurse is True? def inspect_linkages(filename, resolve_filenames=True, recurse=True, sysroot='', arch='native'): + already_seen = set() todo = set([filename]) done = set() results = set() while todo != done: filename = next(iter(todo - done)) - these_orig, these_resolved = _inspect_linkages_this(filename, sysroot=sysroot, arch=arch) - if resolve_filenames: - results.update(these_resolved) - else: - results.update(these_orig) - if recurse: - todo.update(these_resolved) + uniqueness_key, these_orig, these_resolved = _inspect_linkages_this( + filename, sysroot=sysroot, arch=arch) + if uniqueness_key not in already_seen: + if resolve_filenames: + results.update(these_resolved) + else: + results.update(these_orig) + if recurse: + todo.update(these_resolved) + already_seen.add(uniqueness_key) done.add(filename) return results diff --git a/conda_build/post.py b/conda_build/post.py index 7a0d003dd0..91a035c192 100644 --- a/conda_build/post.py +++ b/conda_build/post.py @@ -3,7 +3,7 @@ from collections import defaultdict import fnmatch from functools import partial -from glob import glob +from glob2 import glob import io import locale import re @@ -17,14 +17,15 @@ readlink = False from conda_build.os_utils import external -from .conda_interface import lchmod -from .conda_interface import walk_prefix -from .conda_interface import md5_file -from .conda_interface import PY3 -from .conda_interface import TemporaryDirectory +from conda_build.conda_interface import lchmod +from conda_build.conda_interface import walk_prefix +from conda_build.conda_interface import md5_file +from conda_build.conda_interface import PY3 +from conda_build.conda_interface import TemporaryDirectory from conda_build import utils -from conda_build.os_utils.pyldd import is_codefile +from conda_build.os_utils.pyldd import is_codefile, inspect_linkages +from conda_build.inspect_pkg import which_package if sys.platform == 'darwin': from conda_build.os_utils import macho @@ -287,6 +288,8 @@ def osx_ch_link(path, link_dict, host_prefix, build_prefix, files): print(".. fixing linking of %s in %s instead" % (link, path)) link_loc = find_lib(link, host_prefix, files, path) + print("New link location is %s" % (link_loc)) + if not link_loc: return @@ -402,16 +405,139 @@ def assert_relative_osx(path, prefix): assert not name.startswith(prefix), path -def mk_relative(m, f, files, config): - assert sys.platform != 'win32' - path = os.path.join(config.host_prefix, f) +def print_msg(errors, text): + if text.startswith(" ERROR"): + errors.append(text) + print(text) + + +def _find_needed_dso_in_prefix(m, needed_dso, f, files, errors, run_reqs, + msg_prelude, info_prelude): + in_prefix_dso = os.path.normpath(needed_dso.replace(m.config.host_prefix + '/', '')) + n_dso_p = "Needed DSO {} in {}".format(in_prefix_dso, f) + and_also = " (and also in this package)" if in_prefix_dso in files else "" + pkgs = list(which_package(in_prefix_dso, m.config.host_prefix)) + if len(pkgs) == 1: + if pkgs[0].quad[0] not in run_reqs: + print_msg(errors, '{}: {} found in {}{}'.format(msg_prelude, + n_dso_p, + pkgs[0], + and_also)) + print_msg(errors, '{}: .. but {} not in reqs/run, i.e. it is overlinked' + ' (likely) or a missing dependency (less likely)'. + format(msg_prelude, pkgs[0].quad[0])) + elif m.config.verbose: + print_msg(errors, '{}: {} found in {}{}'.format(info_prelude, + n_dso_p, + pkgs[0], + and_also)) + elif len(pkgs) > 1: + print_msg(errors, '{}: {} found in multiple packages:{}'.format(msg_prelude, + in_prefix_dso, + and_also)) + for pkg in pkgs: + print_msg(errors, '{}: {}'.format(msg_prelude, pkg)) + if pkg.dist_name not in m.meta.requirements.host: + print_msg(errors, '{}: .. but {} not in reqs/host (is transitive)'. + format(msg_prelude, pkg.dist_name)) + else: + if in_prefix_dso not in files: + print_msg(errors, '{}: {} not found in any packages'.format(msg_prelude, + in_prefix_dso)) + elif m.config.verbose: + print_msg(errors, '{}: {} found in this package'.format(info_prelude, + in_prefix_dso)) + + +def _find_needed_dso_in_system(m, needed_dso, errors, sysroots, msg_prelude, + info_prelude, warn_prelude): + # A system dependency then. We should be able to find it in one of the CDT o + # compiler packages on linux or at in a sysroot folder on other OSes. + # + if m.config.verbose and len(sysroots): + # Check id we have a CDT package. + dso_fname = os.path.basename(needed_dso) + sysroot_files = [] + for sysroot in sysroots: + sysroot_files.extend(glob(os.path.join(sysroot, '**', dso_fname))) + if len(sysroot_files): + # Removing config.build_prefix is only *really* for Linux, though we could + # use CONDA_BUILD_SYSROOT for macOS. We should figure out what to do about + # /opt/X11 too. + in_prefix_dso = os.path.normpath(sysroot_files[0].replace( + m.config.build_prefix + '/', '')) + n_dso_p = "Needed DSO {}".format(in_prefix_dso) + pkgs = list(which_package(in_prefix_dso, m.config.build_prefix)) + if len(pkgs): + print_msg(errors, '{}: {} found in CDT/compiler package {}'. + format(info_prelude, n_dso_p, pkgs[0])) + else: + print_msg(errors, '{}: {} not found in any CDT/compiler package?!'. + format(info_prelude, n_dso_p)) + else: + prelude = warn_prelude if needed_dso.startswith('$RPATH') else msg_prelude + print_msg(errors, "{}: {} not found in sysroot, is this binary repackaging?" + " .. do you need to use install_name_tool/patchelf?". + format(prelude, needed_dso)) + else: + # When a needed_dso begins with $RPATH it means we are making a CDT package + # (in any other case this would be a problem), but I should verify it is ok + # for CDT packages too. + if needed_dso.startswith('$RPATH'): + print_msg(errors, "{}: {} returned by pyldd. A CDT package?". + format(warn_prelude, needed_dso)) + else: + print_msg(errors, "{}: did not find - or even know where to look for: {}". + format(msg_prelude, needed_dso)) + + +def _inspect_file_linking(m, path, files, errors, pkg_name, run_reqs, sysroots): + f = os.path.basename(path) + warn_prelude = "WARNING ({},{})".format(pkg_name, f) + err_prelude = " ERROR ({},{})".format(pkg_name, f) + info_prelude = " INFO ({},{})".format(pkg_name, f) + msg_prelude = err_prelude if m.config.error_overlinking else warn_prelude + + needed = inspect_linkages(path, resolve_filenames=True, recurse=False) + for needed_dso in needed: + if needed_dso.startswith(m.config.host_prefix): + _find_needed_dso_in_prefix(m, needed_dso, f, files, errors, run_reqs, + msg_prelude, info_prelude) + elif needed_dso.startswith(m.config.build_prefix): + print_msg(errors, "ERROR: {} found in build prefix; should never happen".format( + needed_dso)) + else: + _find_needed_dso_in_system(m, needed_dso, errors, sysroots, msg_prelude, info_prelude, + warn_prelude) + + +def check_overlinking(m, files): + errors = [] + pkg_name = m.get_value('package/name') + + run_reqs = [req.split(' ')[0] for req in m.meta.get('requirements', {}).get('run', [])] + sysroots = glob(os.path.join(m.config.build_prefix, '**', 'sysroot')) + if not len(sysroots): + if m.config.variant.get('target_platform') == 'osx-64': + sysroots = ['/usr/lib', '/opt/X11', '/System/Library/Frameworks'] + + for f in files: + path = os.path.join(m.config.host_prefix, f) + if not is_obj(path): + continue + _inspect_file_linking(m, path, files, errors, pkg_name, run_reqs, sysroots) + if len(errors): + sys.exit(1) + + +def post_process_shared_lib(m, f, files): + path = os.path.join(m.config.host_prefix, f) if not is_obj(path): return - if sys.platform.startswith('linux'): - mk_relative_linux(f, config.host_prefix, rpaths=m.get_value('build/rpaths', ['lib'])) + mk_relative_linux(f, m.config.host_prefix, rpaths=m.get_value('build/rpaths', ['lib'])) elif sys.platform == 'darwin': - mk_relative_osx(path, config.host_prefix, config.build_prefix, files=files) + mk_relative_osx(path, m.config.host_prefix, m.config.build_prefix, files=files) def fix_permissions(files, prefix): @@ -438,12 +564,12 @@ def fix_permissions(files, prefix): log.warn(str(e)) -def post_build(m, files, build_python, config): +def post_build(m, files, build_python): print('number of files:', len(files)) - fix_permissions(files, config.host_prefix) + fix_permissions(files, m.config.host_prefix) for f in files: - make_hardlink_copy(f, config.host_prefix) + make_hardlink_copy(f, m.config.host_prefix) if sys.platform == 'win32': return @@ -453,16 +579,17 @@ def post_build(m, files, build_python, config): print("Skipping binary relocation logic") osx_is_app = bool(m.get_value('build/osx_is_app', False)) and sys.platform == 'darwin' - check_symlinks(files, config.host_prefix, config.croot) - prefix_files = utils.prefix_files(config.host_prefix) + check_symlinks(files, m.config.host_prefix, m.config.croot) + prefix_files = utils.prefix_files(m.config.host_prefix) for f in files: if f.startswith('bin/'): - fix_shebang(f, prefix=config.host_prefix, build_python=build_python, + fix_shebang(f, prefix=m.config.host_prefix, build_python=build_python, osx_is_app=osx_is_app) if binary_relocation is True or (isinstance(binary_relocation, list) and f in binary_relocation): - mk_relative(m, f, prefix_files, config) + post_process_shared_lib(m, f, prefix_files) + check_overlinking(m, files) def check_symlinks(files, prefix, croot): diff --git a/conda_build/skeletons/cran.py b/conda_build/skeletons/cran.py index d9f2683d07..8039e1777d 100644 --- a/conda_build/skeletons/cran.py +++ b/conda_build/skeletons/cran.py @@ -49,8 +49,10 @@ {patches} build: + merge_build_host: True # [win] # If this is a new build for the same version, increment the build number. number: {build_number} + {noarch_generic} # This is required to make R link correctly on Linux. rpaths: @@ -799,6 +801,11 @@ def skeletonize(in_packages, output_dir=".", output_suffix="", add_maintainer=No else: d['homeurl'] = ' https://CRAN.R-project.org/package={}'.format(package) + if cran_package.get("NeedsCompilation", 'no') == 'yes': + d['noarch_generic'] = '' + else: + d['noarch_generic'] = 'noarch: generic' + if 'Description' in cran_package: d['summary_comment'] = '' d['summary'] = ' ' + yaml_quote_string(cran_package['Description']) diff --git a/tests/test-recipes/metadata/_overlinkage_detection/0001-Add-lib-to-CMAKE_FIND_LIBRARY_PREFIXES-for-lzma.patch b/tests/test-recipes/metadata/_overlinkage_detection/0001-Add-lib-to-CMAKE_FIND_LIBRARY_PREFIXES-for-lzma.patch new file mode 100644 index 0000000000..8dd5f54ce9 --- /dev/null +++ b/tests/test-recipes/metadata/_overlinkage_detection/0001-Add-lib-to-CMAKE_FIND_LIBRARY_PREFIXES-for-lzma.patch @@ -0,0 +1,13 @@ +--- work/CMakeLists.txt.orig 2017-12-15 16:52:54.709341000 -0600 ++++ work/CMakeLists.txt 2017-12-15 16:54:40.943948000 -0600 +@@ -454,7 +454,10 @@ + # Find LZMA + # + IF(ENABLE_LZMA) ++ SET(OLD_CMAKE_FIND_LIBRARY_PREFIXES "${CMAKE_FIND_LIBRARY_PREFIXES}") ++ SET(CMAKE_FIND_LIBRARY_PREFIXES ";lib") + FIND_PACKAGE(LibLZMA) ++ SET(CMAKE_FIND_LIBRARY_PREFIXES "${OLD_CMAKE_FIND_LIBRARY_PREFIXES}") + ELSE() + SET(LIBZMA_FOUND FALSE) # Override cached value + ENDIF() diff --git a/tests/test-recipes/metadata/_overlinkage_detection/build_scripts/default.sh b/tests/test-recipes/metadata/_overlinkage_detection/build_scripts/default.sh new file mode 100644 index 0000000000..9f82c94a66 --- /dev/null +++ b/tests/test-recipes/metadata/_overlinkage_detection/build_scripts/default.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +autoreconf -vfi +mkdir build-${HOST} && pushd build-${HOST} +${SRC_DIR}/configure --prefix=${PREFIX} \ + --with-zlib \ + --with-bz2lib \ + --with-iconv \ + --with-lz4 \ + --with-lzma \ + --with-lzo2 \ + --without-cng \ + --with-openssl \ + --without-nettle \ + --with-xml2 \ + --without-expat +make -j${CPU_COUNT} ${VERBOSE_AT} +make install +popd diff --git a/tests/test-recipes/metadata/_overlinkage_detection/build_scripts/no_as_needed.sh b/tests/test-recipes/metadata/_overlinkage_detection/build_scripts/no_as_needed.sh new file mode 100644 index 0000000000..dcf914a27b --- /dev/null +++ b/tests/test-recipes/metadata/_overlinkage_detection/build_scripts/no_as_needed.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# this recipe will overlink libraries without the --as-needed linker arg +re='^(.*)-Wl,--as-needed(.*)$' +if [[ ${LDFLAGS} =~ $re ]]; then + export LDFLAGS="${BASH_REMATCH[1]}${BASH_REMATCH[2]}" +fi + +autoreconf -vfi +mkdir build-${HOST} && pushd build-${HOST} +${SRC_DIR}/configure --prefix=${PREFIX} \ + --with-zlib \ + --with-bz2lib \ + --with-iconv \ + --with-lz4 \ + --with-lzma \ + --with-lzo2 \ + --without-cng \ + --with-openssl \ + --without-nettle \ + --with-xml2 \ + --without-expat +make -j${CPU_COUNT} ${VERBOSE_AT} +make install +popd diff --git a/tests/test-recipes/metadata/_overlinkage_detection/meta.yaml b/tests/test-recipes/metadata/_overlinkage_detection/meta.yaml new file mode 100644 index 0000000000..4d6fd1bf17 --- /dev/null +++ b/tests/test-recipes/metadata/_overlinkage_detection/meta.yaml @@ -0,0 +1,66 @@ +{% set version = "3.3.2" %} + +package: + name: libarchive + version: {{ version }} + +source: + fn: libarchive-{{ version }}.tar.gz + url: https://github.com/libarchive/libarchive/archive/v{{ version }}.tar.gz + sha256: 22560b89d420b11a4a724877bc6672b7d7c0f239e77d42a39dcc5a9c3b28db82 + patches: + - 0001-Add-lib-to-CMAKE_FIND_LIBRARY_PREFIXES-for-lzma.patch + +build: + number: 3 + skip: True # [win and py2k] + +requirements: + build: + - {{ compiler('c') }} + - autoconf # [unix] + - automake # [unix] + - libtool # [unix] + - pkg-config # [unix] + - cmake # [win] + host: + - bzip2 + - libiconv # [osx] + - lz4-c + - xz + - lzo + - openssl + - libxml2 + - zlib + +test: + files: + - test/hello_world.xar + commands: + # Verify pkg-config file is in place. + - test -f "${PREFIX}/lib/pkgconfig/libarchive.pc" # [unix] + + # Verify headers are in place. + - test -f "${PREFIX}/include/archive.h" # [unix] + - test -f "${PREFIX}/include/archive_entry.h" # [unix] + + # Verify libraries are in place. + - test -f "${PREFIX}/lib/libarchive.a" # [unix] + - test -f "${PREFIX}/lib/libarchive.so" # [linux] + - test -f "${PREFIX}/lib/libarchive.dylib" # [osx] + + # Check for commands + - bsdcat --version + - bsdcpio --version + - bsdtar --version + - bsdtar -tf test/hello_world.xar + +about: + home: http://www.libarchive.org/ + summary: Multi-format archive and compression library + license: BSD 2-Clause + +extra: + recipe-maintainers: + - jakirkham + - mingwandroid diff --git a/tests/test-recipes/metadata/_overlinkage_detection/test/hello_world.xar b/tests/test-recipes/metadata/_overlinkage_detection/test/hello_world.xar new file mode 100644 index 0000000000..64d81ecf46 Binary files /dev/null and b/tests/test-recipes/metadata/_overlinkage_detection/test/hello_world.xar differ diff --git a/tests/test_api_build.py b/tests/test_api_build.py index 5158c61a4c..54c395e512 100644 --- a/tests/test_api_build.py +++ b/tests/test_api_build.py @@ -30,7 +30,7 @@ from conda_build.build import VersionOrder from conda_build.render import finalize_metadata from conda_build.utils import (copy_into, on_win, check_call_env, convert_path_for_cygwin_or_msys2, - package_has_file, check_output_env, get_conda_operation_locks) + package_has_file, check_output_env, get_conda_operation_locks, rm_rf) from conda_build.os_utils.external import find_executable from conda_build.exceptions import DependencyNeedsBuildingError @@ -914,7 +914,8 @@ def test_run_exports(testing_metadata, testing_config, testing_workdir): testing_metadata.meta['requirements']['build'] = ['test_has_run_exports', '{{ compiler("c") }}'] testing_metadata.meta['requirements']['host'] = ['test_has_run_exports_implicit_weak'] api.output_yaml(testing_metadata, 'host_present_strong/meta.yaml') - m = api.render(os.path.join(testing_workdir, 'host_present_strong'), config=testing_config)[0][0] + m = api.render(os.path.join(testing_workdir, 'host_present_strong'), + config=testing_config)[0][0] assert any('strong_pinned_package 1.0' in req for req in m.meta['requirements']['host']) assert 'strong_pinned_package 1.0.*' in m.meta['requirements']['run'] # weak one from test_has_run_exports should be excluded, since it is a build dep @@ -1183,7 +1184,8 @@ def test_pin_depends(testing_config): """ recipe = os.path.join(metadata_dir, '_pin_depends_record') m = api.render(recipe, config=testing_config)[0][0] - # the recipe python is not pinned, and having pin_depends set to record will not show it in record + # the recipe python is not pinned, and having pin_depends set to record + # will not show it in record assert not any(re.search('python\s+[23]\.', dep) for dep in m.meta['requirements']['run']) output = api.build(m, config=testing_config)[0] requires = package_has_file(output, 'info/requires') @@ -1217,3 +1219,17 @@ def test_provides_features_metadata(testing_config): assert index['requires_features'] == {'test': 'ok'} assert 'provides_features' in index assert index['provides_features'] == {'test2': 'also_ok'} + + +@pytest.mark.skipif(not sys.platform.startswith('linux'), + reason="Not implemented outside linux for now") +def test_overlinking_detection(testing_config): + testing_config.activate = True + recipe = os.path.join(metadata_dir, '_overlinkage_detection') + dest_file = os.path.join(recipe, 'build.sh') + copy_into(os.path.join(recipe, 'build_scripts', 'default.sh'), dest_file) + api.build(recipe, config=testing_config) + copy_into(os.path.join(recipe, 'build_scripts', 'no_as_needed.sh'), dest_file) + with pytest.raises(SystemExit): + api.build(recipe, config=testing_config) + rm_rf(dest_file)