diff --git a/compiler_gym/envs/llvm/__init__.py b/compiler_gym/envs/llvm/__init__.py index ebbeaff004..56c275966b 100644 --- a/compiler_gym/envs/llvm/__init__.py +++ b/compiler_gym/envs/llvm/__init__.py @@ -9,7 +9,7 @@ from compiler_gym.envs.llvm.compute_observation import compute_observation from compiler_gym.envs.llvm.llvm_benchmark import ( ClangInvocation, - get_system_includes, + get_system_library_flags, make_benchmark, ) from compiler_gym.envs.llvm.llvm_env import LlvmEnv @@ -24,7 +24,7 @@ __all__ = [ "ClangInvocation", "compute_observation", - "get_system_includes", + "get_system_library_flags", "LLVM_SERVICE_BINARY", "LlvmEnv", "make_benchmark", diff --git a/compiler_gym/envs/llvm/llvm_benchmark.py b/compiler_gym/envs/llvm/llvm_benchmark.py index 62e049d832..2cc251ecef 100644 --- a/compiler_gym/envs/llvm/llvm_benchmark.py +++ b/compiler_gym/envs/llvm/llvm_benchmark.py @@ -6,10 +6,13 @@ import logging import os import random +import shlex import subprocess +import sys import tempfile from concurrent.futures import as_completed from datetime import datetime +from functools import lru_cache from pathlib import Path from typing import Iterable, List, Optional, Union @@ -22,22 +25,31 @@ logger = logging.getLogger(__name__) -def get_compiler_includes(compiler: str) -> Iterable[Path]: - """Run the system compiler in verbose mode on a dummy input to get the - system header search path. +def _get_system_library_flags(compiler: str) -> Iterable[str]: + """Run the given compiler in verbose mode on a dummy input to extract the + set of system include paths, and on macOS, the location of + libclang_rt.osx.a. + + Returns an iterable sequence of compiler line flags. """ # Create a temporary directory to write the compiled 'binary' to, since # GNU assembler does not support piping to stdout. with tempfile.TemporaryDirectory() as d: try: + cmd = [compiler, "-xc++", "-v", "-", "-o", str(Path(d) / "a.out")] + # On macOS we need to compile a binary to invoke the linker. + if sys.platform != "darwin": + cmd.append("-c") with Popen( - [compiler, "-xc++", "-v", "-c", "-", "-o", str(Path(d) / "a.out")], + cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True, ) as process: - _, stderr = process.communicate(input="", timeout=30) + _, stderr = process.communicate( + input="int main(){return 0;}", timeout=30 + ) if process.returncode: raise OSError( f"Failed to invoke {compiler}. " @@ -59,18 +71,21 @@ def get_compiler_includes(compiler: str) -> Iterable[Path]: # /path/2 # End of search list in_search_list = False - for line in stderr.split("\n"): + lines = stderr.split("\n") + for line in lines: if in_search_list and line.startswith("End of search list"): break elif in_search_list: # We have an include path to return. path = Path(line.strip()) - yield path + yield "-isystem" + yield str(path) # Compatibility fix for compiling benchmark sources which use the # '#include ' header, which on macOS is located in a # 'machine/endian.h' directory. if (path / "machine").is_dir(): - yield path / "machine" + yield "-isystem" + yield str(path / "machine") elif line.startswith("#include <...> search starts here:"): in_search_list = True else: @@ -80,32 +95,40 @@ def get_compiler_includes(compiler: str) -> Iterable[Path]: msg += f":\n{stderr}" raise OSError(msg) - -# Memoized search paths. Call get_system_includes() to access them. -_SYSTEM_INCLUDES = None + # On macOS we need to provide the location of the libclang_rt.osx.a library, + # which we can grab from the linker invocation. + if sys.platform == "darwin": + ld_invocation = shlex.split(lines[-1]) + for i in range(1, len(ld_invocation) - 1): + if ld_invocation[i] == "-lSystem": + yield "-lSystem" + yield ld_invocation[i + 1] -def get_system_includes() -> List[Path]: - """Determine the system include paths for C/C++ compilation jobs. +@lru_cache(maxsize=16) +def get_system_library_flags(compiler: Optional[str] = None) -> List[str]: + """Determine the set of compilation flags needed to use the host system + libraries. This uses the system compiler to determine the search paths for C/C++ system - headers. By default, :code:`c++` is invoked. This can be overridden by - setting :code:`os.environ["CXX"]`. + headers, and on macOS, the location of libclang_rt.osx.a. By default, + :code:`c++` is invoked. This can be overridden by setting + :code:`os.environ["CXX"]` prior to calling this function. - :return: A list of paths to system header directories. - :raises OSError: If the compiler fails, or if the search paths cannot be - determined. + The results of this function are cached, so changes to CXX will have no + effect on subsequent calls. + + :return: A list of command line flags for a compiler. + + :raises OSError: If the compiler fails, or if the output of the compiler + cannot be understood. """ - # Memoize the system includes paths. - global _SYSTEM_INCLUDES - if _SYSTEM_INCLUDES is None: - system_compiler = os.environ.get("CXX", "c++") - try: - _SYSTEM_INCLUDES = list(get_compiler_includes(system_compiler)) - except OSError as e: - logger.warning("%s", e) - _SYSTEM_INCLUDES = [] - return _SYSTEM_INCLUDES + compiler = compiler or os.environ.get("CXX", "c++") + try: + return list(_get_system_library_flags(compiler)) + except OSError as e: + logger.warning("%s", e) + return [] class ClangInvocation: @@ -119,7 +142,7 @@ def __init__( :param args: The list of arguments to pass to clang. :param system_includes: Whether to include the system standard libraries during compilation jobs. This requires a system toolchain. See - :func:`get_system_includes`. + :func:`get_system_library_flags`. :param timeout: The maximum number of seconds to allow clang to run before terminating. """ @@ -130,8 +153,7 @@ def __init__( def command(self, outpath: Path) -> List[str]: cmd = [str(llvm.clang_path())] if self.system_includes: - for directory in get_system_includes(): - cmd += ["-isystem", str(directory)] + cmd += get_system_library_flags() cmd += [str(s) for s in self.args] cmd += ["-c", "-emit-llvm", "-o", str(outpath)] @@ -253,7 +275,7 @@ def make_benchmark( :param system_includes: Whether to include the system standard libraries during compilation jobs. This requires a system toolchain. See - :func:`get_system_includes`. + :func:`get_system_library_flags`. :param timeout: The maximum number of seconds to allow clang to run before terminating. diff --git a/compiler_gym/envs/llvm/llvm_env.py b/compiler_gym/envs/llvm/llvm_env.py index 05c401cadf..c8b5b9c455 100644 --- a/compiler_gym/envs/llvm/llvm_env.py +++ b/compiler_gym/envs/llvm/llvm_env.py @@ -424,7 +424,7 @@ def make_benchmark( :param system_includes: Whether to include the system standard libraries during compilation jobs. This requires a system toolchain. See - :func:`get_system_includes`. + :func:`get_system_library_flags`. :param timeout: The maximum number of seconds to allow clang to run before terminating. diff --git a/examples/example_unrolling_service/__init__.py b/examples/example_unrolling_service/__init__.py index a6960ee782..4686e352e3 100644 --- a/examples/example_unrolling_service/__init__.py +++ b/examples/example_unrolling_service/__init__.py @@ -9,7 +9,7 @@ from compiler_gym.datasets import Benchmark, Dataset from compiler_gym.datasets.uri import BenchmarkUri -from compiler_gym.envs.llvm.llvm_benchmark import get_system_includes +from compiler_gym.envs.llvm.llvm_benchmark import get_system_library_flags from compiler_gym.spaces import Reward from compiler_gym.third_party import llvm from compiler_gym.util.registration import register @@ -114,9 +114,7 @@ def preprocess(src: Path) -> bytes: "-I", str(NEURO_VECTORIZER_HEADER.parent), src, - ] - for directory in get_system_includes(): - cmd += ["-isystem", str(directory)] + ] + get_system_library_flags() return subprocess.check_output( cmd, timeout=300, diff --git a/examples/example_unrolling_service/example_without_bazel.py b/examples/example_unrolling_service/example_without_bazel.py index 7abef82306..9cf0799313 100644 --- a/examples/example_unrolling_service/example_without_bazel.py +++ b/examples/example_unrolling_service/example_without_bazel.py @@ -21,7 +21,7 @@ import compiler_gym from compiler_gym.datasets import Benchmark, Dataset -from compiler_gym.envs.llvm.llvm_benchmark import get_system_includes +from compiler_gym.envs.llvm.llvm_benchmark import get_system_library_flags from compiler_gym.spaces import Reward from compiler_gym.third_party import llvm from compiler_gym.util.registration import register @@ -124,8 +124,7 @@ def preprocess(src: Path) -> bytes: str(NEURO_VECTORIZER_HEADER.parent), src, ] - for directory in get_system_includes(): - cmd += ["-isystem", str(directory)] + cmd += get_system_library_flags() return subprocess.check_output( cmd, timeout=300, diff --git a/examples/loop_optimizations_service/__init__.py b/examples/loop_optimizations_service/__init__.py index 41195cfc2c..45ab1055a5 100644 --- a/examples/loop_optimizations_service/__init__.py +++ b/examples/loop_optimizations_service/__init__.py @@ -8,7 +8,7 @@ from typing import Iterable from compiler_gym.datasets import Benchmark, Dataset -from compiler_gym.envs.llvm.llvm_benchmark import get_system_includes +from compiler_gym.envs.llvm.llvm_benchmark import get_system_library_flags from compiler_gym.spaces import Reward from compiler_gym.third_party import llvm from compiler_gym.util.registration import register @@ -115,8 +115,7 @@ def preprocess(src: Path) -> bytes: str(NEURO_VECTORIZER_HEADER.parent), src, ] - for directory in get_system_includes(): - cmd += ["-isystem", str(directory)] + cmd += get_system_library_flags() return subprocess.check_output( cmd, timeout=300, diff --git a/examples/loop_optimizations_service/example_without_bazel.py b/examples/loop_optimizations_service/example_without_bazel.py index 0dc1eac3c5..db6c5b7c4b 100644 --- a/examples/loop_optimizations_service/example_without_bazel.py +++ b/examples/loop_optimizations_service/example_without_bazel.py @@ -20,7 +20,7 @@ import compiler_gym from compiler_gym.datasets import Benchmark, Dataset -from compiler_gym.envs.llvm.llvm_benchmark import get_system_includes +from compiler_gym.envs.llvm.llvm_benchmark import get_system_library_flags from compiler_gym.spaces import Reward from compiler_gym.third_party import llvm from compiler_gym.util.registration import register @@ -125,9 +125,7 @@ def preprocess(src: Path) -> bytes: "-I", str(NEURO_VECTORIZER_HEADER.parent), src, - ] - for directory in get_system_includes(): - cmd += ["-isystem", str(directory)] + ] + get_system_library_flags() return subprocess.check_output( cmd, timeout=300, diff --git a/tests/llvm/custom_benchmarks_test.py b/tests/llvm/custom_benchmarks_test.py index 047494fce7..67363bf9e9 100644 --- a/tests/llvm/custom_benchmarks_test.py +++ b/tests/llvm/custom_benchmarks_test.py @@ -13,6 +13,7 @@ from compiler_gym.datasets import Benchmark, BenchmarkInitError from compiler_gym.envs import LlvmEnv, llvm +from compiler_gym.envs.llvm.llvm_benchmark import get_system_library_flags from compiler_gym.service.proto import Benchmark as BenchmarkProto from compiler_gym.service.proto import File from compiler_gym.util.runfiles_path import runfiles_path @@ -286,26 +287,29 @@ def test_two_custom_benchmarks_reset(env: LlvmEnv): assert env.benchmark == benchmark2.uri -def test_get_compiler_includes_not_found(): - with pytest.raises(OSError, match=r"Failed to invoke not-a-real-binary"): - list(llvm.llvm_benchmark.get_compiler_includes("not-a-real-binary")) +def test_get_system_library_flags_not_found(caplog): + assert get_system_library_flags("not-a-real-binary") == [] + logging_message = caplog.record_tuples[-1][-1] + assert "Failed to invoke not-a-real-binary" in logging_message -def test_get_compiler_includes_nonzero_exit_status(): +def test_get_system_library_flags_nonzero_exit_status(caplog): """Test that setting the $CXX to an invalid binary raises an error.""" - with pytest.raises(OSError, match=r"Failed to invoke false"): - list(llvm.llvm_benchmark.get_compiler_includes("false")) + assert get_system_library_flags("false") == [] + logging_message = caplog.record_tuples[-1][-1] + assert "Failed to invoke false" in logging_message -def test_get_compiler_includes_output_parse_failure(): +def test_get_system_library_flags_output_parse_failure(caplog): """Test that setting the $CXX to an invalid binary raises an error.""" old_cxx = os.environ.get("CXX") - os.environ["CXX"] = "echo" try: - with pytest.raises( - OSError, match="Failed to parse '#include <...>' search paths from echo" - ): - list(llvm.llvm_benchmark.get_compiler_includes("echo")) + os.environ["CXX"] = "echo" + assert get_system_library_flags("echo") == [] + logging_message = caplog.record_tuples[-1][-1] + assert ( + "Failed to parse '#include <...>' search paths from echo" in logging_message + ) finally: if old_cxx: os.environ["CXX"] = old_cxx