Skip to content

Commit

Permalink
[llvm] Add utility functions to split, merge, and construct benchmarks.
Browse files Browse the repository at this point in the history
This adds three utility functions:

    def make_benchmark_from_source(
        source: str,
        copt: Optional[List[str]] = None,
        lang: str = "c++",
        system_includes: bool = True,
        timeout: int = 600,
    ) -> Benchmark:
        ...

    def split_benchmark_by_function(
        benchmark: Benchmark, timeout: float = 300
    ) -> List[Benchmark]:
        ...

    def merge_benchmarks(
        benchmarks: List[Benchmark], timeout: float = 300
    ) -> Benchmark:
       ...

Credit to @hughleat for llvm-extract-one, an extension of LLVM's
llvm-extract that enables functions to be extracted by an integer
index rather than by name. This enables extracting anonymous
functions.
  • Loading branch information
ChrisCummins committed Nov 2, 2022
1 parent e659791 commit edc5348
Show file tree
Hide file tree
Showing 10 changed files with 639 additions and 38 deletions.
6 changes: 6 additions & 0 deletions compiler_gym/envs/llvm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
ClangInvocation,
get_system_library_flags,
make_benchmark,
make_benchmark_from_source,
merge_benchmarks,
split_benchmark_by_function,
)
from compiler_gym.envs.llvm.llvm_env import LlvmEnv

Expand All @@ -30,8 +33,11 @@
"LLVM_SERVICE_BINARY",
"LlvmEnv",
"make_benchmark",
"make_benchmark_from_source",
"merge_benchmarks",
"observation_spaces",
"reward_spaces",
"split_benchmark_by_function",
]

LLVM_SERVICE_BINARY = runfiles_path(
Expand Down
261 changes: 239 additions & 22 deletions compiler_gym/envs/llvm/llvm_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import sys
import tempfile
from concurrent.futures import as_completed
from copy import deepcopy
from datetime import datetime
from functools import lru_cache
from pathlib import Path
Expand Down Expand Up @@ -175,6 +176,27 @@ def command(self, outpath: Path) -> List[str]:

return cmd

# NOTE(cummins): There is some discussion about the best way to create a
# bitcode that is unoptimized yet does not hinder downstream
# optimization opportunities. Here we are using a configuration based on
# -O1 in which we prevent the -O1 optimization passes from running. This
# is because LLVM produces different function attributes dependening on
# the optimization level. E.g. "-O0 -Xclang -disable-llvm-optzns -Xclang
# -disable-O0-optnone" will generate code with "noinline" attributes set
# on the functions, wheras "-Oz -Xclang -disable-llvm-optzns" will
# generate functions with "minsize" and "optsize" attributes set.
#
# See also:
# <https://lists.llvm.org/pipermail/llvm-dev/2018-August/thread.html#125365>
# <https://github.com/facebookresearch/CompilerGym/issues/110>
DEFAULT_COPT = [
"-O1",
"-Xclang",
"-disable-llvm-passes",
"-Xclang",
"-disable-llvm-optzns",
]

@classmethod
def from_c_file(
cls,
Expand All @@ -184,29 +206,8 @@ def from_c_file(
timeout: int = 600,
) -> "ClangInvocation":
copt = copt or []
# NOTE(cummins): There is some discussion about the best way to create a
# bitcode that is unoptimized yet does not hinder downstream
# optimization opportunities. Here we are using a configuration based on
# -O1 in which we prevent the -O1 optimization passes from running. This
# is because LLVM produces different function attributes dependening on
# the optimization level. E.g. "-O0 -Xclang -disable-llvm-optzns -Xclang
# -disable-O0-optnone" will generate code with "noinline" attributes set
# on the functions, wheras "-Oz -Xclang -disable-llvm-optzns" will
# generate functions with "minsize" and "optsize" attributes set.
#
# See also:
# <https://lists.llvm.org/pipermail/llvm-dev/2018-August/thread.html#125365>
# <https://github.com/facebookresearch/CompilerGym/issues/110>
DEFAULT_COPT = [
"-O1",
"-Xclang",
"-disable-llvm-passes",
"-Xclang",
"-disable-llvm-optzns",
]

return cls(
DEFAULT_COPT + copt + [str(path)],
cls.DEFAULT_COPT + copt + [str(path)],
system_includes=system_includes,
timeout=timeout,
)
Expand Down Expand Up @@ -422,3 +423,219 @@ def _add_path(path: Path):
timestamp = datetime.now().strftime("%Y%m%HT%H%M%S")
uri = f"benchmark://user-v0/{timestamp}-{random.randrange(16**4):04x}"
return Benchmark.from_file_contents(uri, bitcode)


def make_benchmark_from_source(
source: str,
copt: Optional[List[str]] = None,
lang: str = "c++",
system_includes: bool = True,
timeout: int = 600,
) -> Benchmark:
"""Create a benchmark from a string of source code.
This function takes a string of source code and generates a benchmark that
can be passed to :meth:`compiler_gym.envs.LlvmEnv.reset`.
Example usage:
>>> benchmark = make_benchmark_from_source("int A() {return 0;}")
>>> env = gym.make("llvm-v0")
>>> env.reset(benchmark=benchmark)
The clang invocation used is roughly equivalent to:
.. code-block::
$ clang - -O0 -c -emit-llvm -o benchmark.bc
Additional compile-time arguments to clang can be provided using the
:code:`copt` argument:
>>> benchmark = make_benchmark_from_source("...", copt=['-O2'])
:param source: A string of source code.
:param copt: A list of command line options to pass to clang when compiling
source files.
:param lang: The source language, passed to clang via the :code:`-x`
argument. Defaults to C++.
:param system_includes: Whether to include the system standard libraries
during compilation jobs. This requires a system toolchain. See
:func:`get_system_library_flags`.
:param timeout: The maximum number of seconds to allow clang to run before
terminating.
:return: A :code:`Benchmark` instance.
:raises FileNotFoundError: If any input sources are not found.
:raises TypeError: If the inputs are of unsupported types.
:raises OSError: If a suitable compiler cannot be found.
:raises BenchmarkInitError: If a compilation job fails.
:raises TimeoutExpired: If a compilation job exceeds :code:`timeout`
seconds.
"""
cmd = [
str(llvm.clang_path()),
f"-x{lang}",
"-",
"-o",
"-",
"-c",
"-emit-llvm",
*ClangInvocation.DEFAULT_COPT,
]
if system_includes:
cmd += get_system_library_flags()
cmd += copt or []

with Popen(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE
) as clang:
bitcode, stderr = clang.communicate(source.encode("utf-8"), timeout=timeout)
if clang.returncode:
raise BenchmarkInitError(
f"Failed to make benchmark with compiler error: {stderr.decode('utf-8')}"
)

timestamp = datetime.now().strftime("%Y%m%HT%H%M%S")
uri = f"benchmark://user-v0/{timestamp}-{random.randrange(16**4):04x}"
return Benchmark.from_file_contents(uri, bitcode)


def split_benchmark_by_function(
benchmark: Benchmark, maximum_function_count: int = 0, timeout: float = 300
) -> List[Benchmark]:
"""Split a benchmark into single-function benchmarks.
This function takes a benchmark as input and divides it into a set of
independent benchmarks, where each benchmark contains a single function from
the input.
Under the hood, this uses an extension to `llvm-extract
<https://llvm.org/docs/CommandGuide/llvm-extract.html>`__ to pull out
individual parts of programs.
In pseudo code, this is roughly equivalent to:
.. code-block::py
for i in number_of_functions_in_benchmark(benchmark):
yield llvm_extract(benchmark, function_number=i)
:param benchmark: A benchmark to split.
:param maximum_function_count: If a positive integer, this specifies the
maximum number of single-function benchmarks to extract from the input.
If the input contains more than this number of functions, the remainder
are ignored.
:param timeout: The maximum number of seconds to allow llvm-extract to run
before terminating.
:return: A list of :code:`Benchmark` instances.
:raises ValueError: If the input benchmark contains no functions, or if
llvm-extract fails.
:raises TimeoutExpired: If any llvm-extract job exceeds :code:`timeout`
seconds.
"""
original_uri = deepcopy(benchmark.uri)
original_bitcode = benchmark.proto.program.contents

# Count the number of functions in the benchmark.
with Popen(
[str(llvm.llvm_extract_one_path()), "-", "-count-only", "-o", "-"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE,
) as p:
stdout, stderr = p.communicate(original_bitcode, timeout=timeout)
if p.returncode:
raise ValueError(
"Failed to count number of functions in benchmark: "
f"{stderr.decode('utf-8')}"
)
number_of_functions = int(stdout.decode("utf-8"))
if number_of_functions <= 0:
raise ValueError("No functions found!")

# Iterate over the number of functions, extracting each one in turn.
split_benchmarks: List[Benchmark] = []
n = min(number_of_functions, maximum_function_count or number_of_functions)
for i in range(n):
with Popen(
[str(llvm.llvm_extract_one_path()), "-", "-n", str(i), "-o", "-"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE,
) as p:
stdout, stderr = p.communicate(original_bitcode, timeout=timeout)
if p.returncode:
raise ValueError(
"Failed to extract function {i}: " f"{stderr.decode('utf-8')}"
)

original_uri.params["function"] = str(i)
split_benchmarks.append(
Benchmark.from_file_contents(uri=original_uri, data=stdout)
)
logger.debug("Extracted %s", original_uri)

return split_benchmarks


def merge_benchmarks(benchmarks: List[Benchmark], timeout: float = 300) -> Benchmark:
"""Merge a list of benchmarks into a single benchmark.
Under the hood, this `llvm-link
<https://llvm.org/docs/CommandGuide/llvm-link.html>`__ to combine each of
the bitcodes of the input benchmarks into a single bitcode.
:param benchmarks: A list of benchmarks to merge.
:param timeout: The maximum number of seconds to allow llvm-link to run
before terminating.
:return: A :code:`Benchmark` instance.
:raises ValueError: If the input contains no benchmarks, or if llvm-link
fails.
:raises TimeoutExpired: If llvm-link exceeds :code:`timeout` seconds.
"""
if not benchmarks:
raise ValueError("No benchmarks!")

transient_cache = transient_cache_path(".")
transient_cache.mkdir(parents=True, exist_ok=True)
with tempfile.TemporaryDirectory(dir=transient_cache, prefix="llvm-link") as d:
tmpdir = Path(d)

# Write each of the benchmark bitcodes to a temporary file.
cmd = [str(llvm.llvm_link_path()), "-o", "-", "-f"]
for i, benchmark in enumerate(benchmarks):
bitcode_path = tmpdir / f"{i}.bc"
with open(bitcode_path, "wb") as f:
f.write(benchmark.proto.program.contents)
cmd.append(str(bitcode_path))

# Run llvm-link on the temporary files.
with Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p:
stdout, stderr = p.communicate(timeout=timeout)
if p.returncode:
raise ValueError(
f"Failed to merge benchmarks: {stderr.decode('utf-8')}"
)

timestamp = datetime.now().strftime("%Y%m%HT%H%M%S")
uri = f"benchmark://llvm-link-v0/{timestamp}-{random.randrange(16**4):04x}"
return Benchmark.from_file_contents(uri=uri, data=stdout)
44 changes: 44 additions & 0 deletions compiler_gym/envs/llvm/service/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,50 @@ cc_library(
],
)

filegroup(
name = "llvm-extract-one-files",
srcs = [
":llvm-extract-one",
] + select({
"@llvm//:darwin": [],
"//conditions:default": [
":libLLVMPolly",
],
}),
visibility = ["//visibility:public"],
)

cc_binary(
name = "llvm-extract-one-prelinked",
srcs = ["LlvmExtractOne.cc"],
copts = [
"-DGOOGLE_PROTOBUF_NO_RTTI",
"-fno-rtti",
"-std=c++17",
],
deps = [
"@llvm//10.0.0",
],
)

genrule(
name = "llvm-extract-one-bin",
srcs = [":llvm-extract-one-prelinked"],
outs = ["llvm-extract-one"],
cmd = select({
"@llvm//:darwin": (
"cp $(location :llvm-extract-one-prelinked) $@"
),
"//conditions:default": (
"cp $(location :llvm-extract-one-prelinked) $@ && " +
"chmod 666 $@ && " +
"patchelf --set-rpath '$$ORIGIN' $@ && " +
"chmod 555 $@"
),
}),
visibility = ["//visibility:public"],
)

cc_library(
name = "LlvmSession",
srcs = ["LlvmSession.cc"],
Expand Down
14 changes: 14 additions & 0 deletions compiler_gym/envs/llvm/service/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,20 @@ cg_cc_library(
PUBLIC
)

llvm_map_components_to_libnames(_LLVM_LIBS core support irreader)
cg_cc_binary(
NAME llvm-extract-one
SRCS LlvmExtractOne.cc
COPTS
"-fno-rtti"
ABS_DEPS
${_LLVM_LIBS}
INCLUDES
${LLVM_INCLUDE_DIRS}
DEFINES
${LLVM_DEFINITIONS}
)

llvm_map_components_to_libnames(_LLVM_LIBS
core analysis coroutines objcarcopts target codegen
x86codegen x86asmparser #TODO(boian): can these be found programmatically
Expand Down
Loading

0 comments on commit edc5348

Please sign in to comment.