Skip to content

Commit

Permalink
Add semantics validation for cBench benchmarks.
Browse files Browse the repository at this point in the history
Add semantics validation for cBench benchmarks. This is achieved by
adding a new validation callback mechanism that, when invoked,
compiles the given cBench benchmark to a binary and executes it using
prepared datasets. The output of the program, along with any generated
output files, is differential tested against a copy of the program
compiled without optimizations. A change in program behavior that is
detected by this mechanism is reported.

Calling `compiler_gym.validate_state()` on a benchmark that supports
semantics validation will automatically run it.

The core of the implementation is in
compiler_gym/envs/llvm/dataset.py. It defines a set of library
functions so that these validation callbacks can be defined ad-hoc for
cBench in quite a succinct form, e.g.:

    validator(
        benchmark="benchmark://cBench-v0/ghostscript",
        cmd="$BIN -sDEVICE=ppm -dNOPAUSE -dQUIET -sOutputFile=output.ppm -- 1.ps",
        data=["office_data/1.ps"],
        outs=["output.ppm"],
        linkopts=["-lm", "-lz"],
        pre_execution_callback=setup_ghostscript_library_files,
    )

As part of #45 we may want to make a public API similar to this and
move it into the dataset definitions.

Multiple validation callbacks can be defined for a single
benchmark. Where a benchmark matches multiple validators, they are
executed in parallel.

Compiling binaries from cBench benchmarks requires that the bitcodes
be compiled against the system-specific standard library, so this
patch also splits the cBench dataset into macOS and Linux versions.
  • Loading branch information
ChrisCummins committed Jan 29, 2021
1 parent 377461b commit c520919
Show file tree
Hide file tree
Showing 101 changed files with 1,113 additions and 349 deletions.
86 changes: 84 additions & 2 deletions WORKSPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
workspace(name = "CompilerGym")

load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file")

# === Google test ===

Expand Down Expand Up @@ -201,11 +201,93 @@ filegroup(
)

http_archive(
name = "ctuning-ai",
build_file_content = """
filegroup(
name = "all",
srcs = glob(["**"]),
visibility = ["//visibility:public"],
)
filegroup(
name = "readme",
srcs = ["README.md"],
visibility = ["//visibility:public"],
)
""",
sha256 = "2e12233a3d898a96a2ce279123ee256bee357f649e3c579f974628b6c35ea53d",
strip_prefix = "ai-afaba3e2900523a0813693fd614a114b4a6f6729",
urls = ["https://github.com/ctuning/ai/archive/afaba3e2900523a0813693fd614a114b4a6f6729.tar.gz"],
)

# Datasets.

http_file(
name = "cBench_consumer_tiff_data",
sha256 = "779abb7b7fee8733313e462e6066c16375e9209a9f7ff692fd06c7598946939a",
urls = ["https://downloads.sourceforge.net/project/cbenchmark/cDatasets/V1.1/cDatasets_V1.1_consumer_tiff_data.tar.gz"],
)

http_file(
name = "cBench_office_data",
sha256 = "cfa09cd37cb93aba57415033905dc6308653c7b833feba5a25067bfb62999f32",
urls = ["https://downloads.sourceforge.net/project/cbenchmark/cDatasets/V1.1/cDatasets_V1.1_office_data.tar.gz"],
)

http_file(
name = "cBench_telecom_data",
sha256 = "e5cb6663beefe32fd12f90c8f533f8e1bce2f05ee4e3836efb5556d5e1089df0",
urls = ["https://downloads.sourceforge.net/project/cbenchmark/cDatasets/V1.1/cDatasets_V1.1_telecom_data.tar.gz"],
)

http_file(
name = "cBench_consumer_jpeg_data",
sha256 = "bec5ffc15cd2f952d9a786f3cd31d90955c318a5e4f69c5ba472f79d5a3e8f0b",
urls = ["https://downloads.sourceforge.net/project/cbenchmark/cDatasets/V1.1/cDatasets_V1.1_consumer_jpeg_data.tar.gz"],
)

http_file(
name = "cBench_telecom_gsm_data",
sha256 = "52545d3a0ce15021131c62d96d3a3d7e6670e2d6c34226ac9a3d5191a1ee214a",
urls = ["https://downloads.sourceforge.net/project/cbenchmark/cDatasets/V1.1/cDatasets_V1.1_telecom_gsm_data.tar.gz"],
)

http_file(
name = "cBench_consumer_data",
sha256 = "a4d40344af3022bfd7b4c6fcf6d59d598825b07d9e37769dbf1b3effa39aa445",
urls = ["https://downloads.sourceforge.net/project/cbenchmark/cDatasets/V1.1/cDatasets_V1.1_consumer_data.tar.gz"],
)

http_file(
name = "cBench_bzip2_data",
sha256 = "46e5760eeef77e6b0c273af92de971bc45f33a59e0efc183073d9aa6b716c302",
urls = ["https://downloads.sourceforge.net/project/cbenchmark/cDatasets/V1.1/cDatasets_V1.1_bzip2_data.tar.gz"],
)

http_file(
name = "cBench_network_patricia_data",
sha256 = "72dae0e670d93ef929e50aca7a138463e0915502281ccafe793e378cb2a85dfb",
urls = ["https://downloads.sourceforge.net/project/cbenchmark/cDatasets/V1.1/cDatasets_V1.1_network_patricia_data.tar.gz"],
)

http_file(
name = "cBench_network_dijkstra_data",
build_file_content = all_content,
sha256 = "41c13f59cdfbc772081cd941f499b030370bc570fc2ba60a5c4b7194bc36ca5f",
urls = ["https://downloads.sourceforge.net/project/cbenchmark/cDatasets/V1.1/cDatasets_V1.1_network_dijkstra_data.tar.gz"],
)

http_file(
name = "cBench_automotive_susan_data",
sha256 = "df56e1e44ccc560072381cdb001d770003ac74f92593dd5dbdfdd4ff9332a8e6",
urls = ["https://downloads.sourceforge.net/project/cbenchmark/cDatasets/V1.1/cDatasets_V1.1_automotive_susan_data.tar.gz"],
)

http_file(
name = "cBench_automotive_qsort_data",
sha256 = "510b4225021408ac190f6f793e7d7171d3553c9916cfa8b2fb4ace005105e768",
urls = ["https://downloads.sourceforge.net/project/cbenchmark/cDatasets/V1.1/cDatasets_V1.1_automotive_qsort_data.tar.gz"],
)

# === C++ cpuinfo ===

http_archive(
Expand Down
4 changes: 4 additions & 0 deletions compiler_gym/envs/llvm/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,12 @@ py_library(
py_library(
name = "datasets",
srcs = ["datasets.py"],
data = [
"//compiler_gym/third_party/llvm:clang",
],
deps = [
"//compiler_gym/datasets:dataset",
"//compiler_gym/util",
],
)

Expand Down
Loading

0 comments on commit c520919

Please sign in to comment.