Skip to content

Commit

Permalink
[llvm] Compute and return -lSystem location on macOS.
Browse files Browse the repository at this point in the history
This extends the logic for extracting #include search paths to also
extract the location of the -lSystem library from the host target.

This is to enable compiler binary support on macOS.
  • Loading branch information
ChrisCummins committed Feb 22, 2022
1 parent 60218c2 commit b9413dc
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 61 deletions.
4 changes: 2 additions & 2 deletions compiler_gym/envs/llvm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from compiler_gym.envs.llvm.compute_observation import compute_observation
from compiler_gym.envs.llvm.llvm_benchmark import (
ClangInvocation,
get_system_includes,
get_system_library_flags,
make_benchmark,
)
from compiler_gym.envs.llvm.llvm_env import LlvmEnv
Expand All @@ -24,7 +24,7 @@
__all__ = [
"ClangInvocation",
"compute_observation",
"get_system_includes",
"get_system_library_flags",
"LLVM_SERVICE_BINARY",
"LlvmEnv",
"make_benchmark",
Expand Down
86 changes: 54 additions & 32 deletions compiler_gym/envs/llvm/llvm_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@
import logging
import os
import random
import shlex
import subprocess
import sys
import tempfile
from concurrent.futures import as_completed
from datetime import datetime
from functools import lru_cache
from pathlib import Path
from typing import Iterable, List, Optional, Union

Expand All @@ -22,22 +25,31 @@
logger = logging.getLogger(__name__)


def get_compiler_includes(compiler: str) -> Iterable[Path]:
"""Run the system compiler in verbose mode on a dummy input to get the
system header search path.
def _get_system_library_flags(compiler: str) -> Iterable[str]:
"""Run the given compiler in verbose mode on a dummy input to extract the
set of system include paths, and on macOS, the location of
libclang_rt.osx.a.
Returns an iterable sequence of compiler line flags.
"""
# Create a temporary directory to write the compiled 'binary' to, since
# GNU assembler does not support piping to stdout.
with tempfile.TemporaryDirectory() as d:
try:
cmd = [compiler, "-xc++", "-v", "-", "-o", str(Path(d) / "a.out")]
# On macOS we need to compile a binary to invoke the linker.
if sys.platform != "darwin":
cmd.append("-c")
with Popen(
[compiler, "-xc++", "-v", "-c", "-", "-o", str(Path(d) / "a.out")],
cmd,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE,
universal_newlines=True,
) as process:
_, stderr = process.communicate(input="", timeout=30)
_, stderr = process.communicate(
input="int main(){return 0;}", timeout=30
)
if process.returncode:
raise OSError(
f"Failed to invoke {compiler}. "
Expand All @@ -59,18 +71,21 @@ def get_compiler_includes(compiler: str) -> Iterable[Path]:
# /path/2
# End of search list
in_search_list = False
for line in stderr.split("\n"):
lines = stderr.split("\n")
for line in lines:
if in_search_list and line.startswith("End of search list"):
break
elif in_search_list:
# We have an include path to return.
path = Path(line.strip())
yield path
yield "-isystem"
yield str(path)
# Compatibility fix for compiling benchmark sources which use the
# '#include <endian.h>' header, which on macOS is located in a
# 'machine/endian.h' directory.
if (path / "machine").is_dir():
yield path / "machine"
yield "-isystem"
yield str(path / "machine")
elif line.startswith("#include <...> search starts here:"):
in_search_list = True
else:
Expand All @@ -80,32 +95,40 @@ def get_compiler_includes(compiler: str) -> Iterable[Path]:
msg += f":\n{stderr}"
raise OSError(msg)


# Memoized search paths. Call get_system_includes() to access them.
_SYSTEM_INCLUDES = None
# On macOS we need to provide the location of the libclang_rt.osx.a library,
# which we can grab from the linker invocation.
if sys.platform == "darwin":
ld_invocation = shlex.split(lines[-1])
for i in range(1, len(ld_invocation) - 1):
if ld_invocation[i] == "-lSystem":
yield "-lSystem"
yield ld_invocation[i + 1]


def get_system_includes() -> List[Path]:
"""Determine the system include paths for C/C++ compilation jobs.
@lru_cache(maxsize=16)
def get_system_library_flags(compiler: Optional[str] = None) -> List[str]:
"""Determine the set of compilation flags needed to use the host system
libraries.
This uses the system compiler to determine the search paths for C/C++ system
headers. By default, :code:`c++` is invoked. This can be overridden by
setting :code:`os.environ["CXX"]`.
headers, and on macOS, the location of libclang_rt.osx.a. By default,
:code:`c++` is invoked. This can be overridden by setting
:code:`os.environ["CXX"]` prior to calling this function.
:return: A list of paths to system header directories.
:raises OSError: If the compiler fails, or if the search paths cannot be
determined.
The results of this function are cached, so changes to CXX will have no
effect on subsequent calls.
:return: A list of command line flags for a compiler.
:raises OSError: If the compiler fails, or if the output of the compiler
cannot be understood.
"""
# Memoize the system includes paths.
global _SYSTEM_INCLUDES
if _SYSTEM_INCLUDES is None:
system_compiler = os.environ.get("CXX", "c++")
try:
_SYSTEM_INCLUDES = list(get_compiler_includes(system_compiler))
except OSError as e:
logger.warning("%s", e)
_SYSTEM_INCLUDES = []
return _SYSTEM_INCLUDES
compiler = compiler or os.environ.get("CXX", "c++")
try:
return list(_get_system_library_flags(compiler))
except OSError as e:
logger.warning("%s", e)
return []


class ClangInvocation:
Expand All @@ -119,7 +142,7 @@ def __init__(
:param args: The list of arguments to pass to clang.
:param system_includes: Whether to include the system standard libraries
during compilation jobs. This requires a system toolchain. See
:func:`get_system_includes`.
:func:`get_system_library_flags`.
:param timeout: The maximum number of seconds to allow clang to run
before terminating.
"""
Expand All @@ -130,8 +153,7 @@ def __init__(
def command(self, outpath: Path) -> List[str]:
cmd = [str(llvm.clang_path())]
if self.system_includes:
for directory in get_system_includes():
cmd += ["-isystem", str(directory)]
cmd += get_system_library_flags()

cmd += [str(s) for s in self.args]
cmd += ["-c", "-emit-llvm", "-o", str(outpath)]
Expand Down Expand Up @@ -253,7 +275,7 @@ def make_benchmark(
:param system_includes: Whether to include the system standard libraries
during compilation jobs. This requires a system toolchain. See
:func:`get_system_includes`.
:func:`get_system_library_flags`.
:param timeout: The maximum number of seconds to allow clang to run before
terminating.
Expand Down
2 changes: 1 addition & 1 deletion compiler_gym/envs/llvm/llvm_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ def make_benchmark(
:param system_includes: Whether to include the system standard libraries
during compilation jobs. This requires a system toolchain. See
:func:`get_system_includes`.
:func:`get_system_library_flags`.
:param timeout: The maximum number of seconds to allow clang to run
before terminating.
Expand Down
6 changes: 2 additions & 4 deletions examples/example_unrolling_service/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from compiler_gym.datasets import Benchmark, Dataset
from compiler_gym.datasets.uri import BenchmarkUri
from compiler_gym.envs.llvm.llvm_benchmark import get_system_includes
from compiler_gym.envs.llvm.llvm_benchmark import get_system_library_flags
from compiler_gym.spaces import Reward
from compiler_gym.third_party import llvm
from compiler_gym.util.registration import register
Expand Down Expand Up @@ -114,9 +114,7 @@ def preprocess(src: Path) -> bytes:
"-I",
str(NEURO_VECTORIZER_HEADER.parent),
src,
]
for directory in get_system_includes():
cmd += ["-isystem", str(directory)]
] + get_system_library_flags()
return subprocess.check_output(
cmd,
timeout=300,
Expand Down
5 changes: 2 additions & 3 deletions examples/example_unrolling_service/example_without_bazel.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

import compiler_gym
from compiler_gym.datasets import Benchmark, Dataset
from compiler_gym.envs.llvm.llvm_benchmark import get_system_includes
from compiler_gym.envs.llvm.llvm_benchmark import get_system_library_flags
from compiler_gym.spaces import Reward
from compiler_gym.third_party import llvm
from compiler_gym.util.registration import register
Expand Down Expand Up @@ -124,8 +124,7 @@ def preprocess(src: Path) -> bytes:
str(NEURO_VECTORIZER_HEADER.parent),
src,
]
for directory in get_system_includes():
cmd += ["-isystem", str(directory)]
cmd += get_system_library_flags()
return subprocess.check_output(
cmd,
timeout=300,
Expand Down
5 changes: 2 additions & 3 deletions examples/loop_optimizations_service/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import Iterable

from compiler_gym.datasets import Benchmark, Dataset
from compiler_gym.envs.llvm.llvm_benchmark import get_system_includes
from compiler_gym.envs.llvm.llvm_benchmark import get_system_library_flags
from compiler_gym.spaces import Reward
from compiler_gym.third_party import llvm
from compiler_gym.util.registration import register
Expand Down Expand Up @@ -115,8 +115,7 @@ def preprocess(src: Path) -> bytes:
str(NEURO_VECTORIZER_HEADER.parent),
src,
]
for directory in get_system_includes():
cmd += ["-isystem", str(directory)]
cmd += get_system_library_flags()
return subprocess.check_output(
cmd,
timeout=300,
Expand Down
6 changes: 2 additions & 4 deletions examples/loop_optimizations_service/example_without_bazel.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

import compiler_gym
from compiler_gym.datasets import Benchmark, Dataset
from compiler_gym.envs.llvm.llvm_benchmark import get_system_includes
from compiler_gym.envs.llvm.llvm_benchmark import get_system_library_flags
from compiler_gym.spaces import Reward
from compiler_gym.third_party import llvm
from compiler_gym.util.registration import register
Expand Down Expand Up @@ -125,9 +125,7 @@ def preprocess(src: Path) -> bytes:
"-I",
str(NEURO_VECTORIZER_HEADER.parent),
src,
]
for directory in get_system_includes():
cmd += ["-isystem", str(directory)]
] + get_system_library_flags()
return subprocess.check_output(
cmd,
timeout=300,
Expand Down
28 changes: 16 additions & 12 deletions tests/llvm/custom_benchmarks_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from compiler_gym.datasets import Benchmark, BenchmarkInitError
from compiler_gym.envs import LlvmEnv, llvm
from compiler_gym.envs.llvm.llvm_benchmark import get_system_library_flags
from compiler_gym.service.proto import Benchmark as BenchmarkProto
from compiler_gym.service.proto import File
from compiler_gym.util.runfiles_path import runfiles_path
Expand Down Expand Up @@ -286,26 +287,29 @@ def test_two_custom_benchmarks_reset(env: LlvmEnv):
assert env.benchmark == benchmark2.uri


def test_get_compiler_includes_not_found():
with pytest.raises(OSError, match=r"Failed to invoke not-a-real-binary"):
list(llvm.llvm_benchmark.get_compiler_includes("not-a-real-binary"))
def test_get_system_library_flags_not_found(caplog):
assert get_system_library_flags("not-a-real-binary") == []
logging_message = caplog.record_tuples[-1][-1]
assert "Failed to invoke not-a-real-binary" in logging_message


def test_get_compiler_includes_nonzero_exit_status():
def test_get_system_library_flags_nonzero_exit_status(caplog):
"""Test that setting the $CXX to an invalid binary raises an error."""
with pytest.raises(OSError, match=r"Failed to invoke false"):
list(llvm.llvm_benchmark.get_compiler_includes("false"))
assert get_system_library_flags("false") == []
logging_message = caplog.record_tuples[-1][-1]
assert "Failed to invoke false" in logging_message


def test_get_compiler_includes_output_parse_failure():
def test_get_system_library_flags_output_parse_failure(caplog):
"""Test that setting the $CXX to an invalid binary raises an error."""
old_cxx = os.environ.get("CXX")
os.environ["CXX"] = "echo"
try:
with pytest.raises(
OSError, match="Failed to parse '#include <...>' search paths from echo"
):
list(llvm.llvm_benchmark.get_compiler_includes("echo"))
os.environ["CXX"] = "echo"
assert get_system_library_flags("echo") == []
logging_message = caplog.record_tuples[-1][-1]
assert (
"Failed to parse '#include <...>' search paths from echo" in logging_message
)
finally:
if old_cxx:
os.environ["CXX"] = old_cxx
Expand Down

0 comments on commit b9413dc

Please sign in to comment.