From 0ed344700f4f1e39dc52be2b6dc67ce20e7db091 Mon Sep 17 00:00:00 2001 From: canesche Date: Sat, 4 Jun 2022 17:14:02 -0300 Subject: [PATCH 01/14] Creating the Jotai benchmark. --- compiler_gym/envs/llvm/datasets/BUILD | 1 + .../envs/llvm/datasets/CMakeLists.txt | 1 + compiler_gym/envs/llvm/datasets/__init__.py | 18 ++ compiler_gym/envs/llvm/datasets/jotaibench.py | 243 ++++++++++++++++++ compiler_gym/test_jotai.py | 36 +++ tests/llvm/datasets/jotaibench_test.py | 69 +++++ 6 files changed, 368 insertions(+) create mode 100644 compiler_gym/envs/llvm/datasets/jotaibench.py create mode 100644 compiler_gym/test_jotai.py create mode 100644 tests/llvm/datasets/jotaibench_test.py diff --git a/compiler_gym/envs/llvm/datasets/BUILD b/compiler_gym/envs/llvm/datasets/BUILD index 3fe83022d..39391e937 100644 --- a/compiler_gym/envs/llvm/datasets/BUILD +++ b/compiler_gym/envs/llvm/datasets/BUILD @@ -9,6 +9,7 @@ py_library( srcs = [ "__init__.py", "anghabench.py", + "jotaibench.py", "cbench.py", "chstone.py", "clgen.py", diff --git a/compiler_gym/envs/llvm/datasets/CMakeLists.txt b/compiler_gym/envs/llvm/datasets/CMakeLists.txt index 3dd710eb6..bb776d1e6 100644 --- a/compiler_gym/envs/llvm/datasets/CMakeLists.txt +++ b/compiler_gym/envs/llvm/datasets/CMakeLists.txt @@ -11,6 +11,7 @@ cg_py_library( SRCS "__init__.py" "anghabench.py" + "jotaibench.py" "cbench.py" "chstone.py" "clgen.py" diff --git a/compiler_gym/envs/llvm/datasets/__init__.py b/compiler_gym/envs/llvm/datasets/__init__.py index 31d714b32..55e460f69 100644 --- a/compiler_gym/envs/llvm/datasets/__init__.py +++ b/compiler_gym/envs/llvm/datasets/__init__.py @@ -8,6 +8,7 @@ from compiler_gym.datasets import Dataset, TarDatasetWithManifest from compiler_gym.envs.llvm.datasets.anghabench import AnghaBenchDataset +from compiler_gym.envs.llvm.datasets.jotaibench import JotaiBenchDataset from compiler_gym.envs.llvm.datasets.cbench import ( CBenchDataset, CBenchLegacyDataset, @@ -261,6 +262,22 @@ def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset manifest_sha256=anghabench_v0_manifest_sha256, deprecated="Please use anghabench-v1", ) + yield JotaiBenchDataset(site_data_base=site_data_base, sort_order=0) + # Add legacy version of Jotaibench using an old manifest. + jotaibench_v0_manifest_url, jotaibench_v0_manifest_sha256 = { + "linux": ( + "https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-linux-manifest.bz2", + "a038d25d39ee9472662a9704dfff19c9e3512ff6a70f1067af85c5cb3784b477", + ), + }[sys.platform] + yield JotaiBenchDataset( + name="benchmark://jotaibench-v0", + site_data_base=site_data_base, + sort_order=0, + manifest_url=jotaibench_v0_manifest_url, + manifest_sha256=jotaibench_v0_manifest_sha256, + deprecated="Please use jotaibench-v1", + ) yield BlasDataset(site_data_base=site_data_base, sort_order=0) yield CLgenDataset(site_data_base=site_data_base, sort_order=0) yield CBenchDataset(site_data_base=site_data_base) @@ -294,6 +311,7 @@ def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset __all__ = [ "AnghaBenchDataset", + "JotaiBenchDataset" "BlasDataset", "CBenchDataset", "CBenchLegacyDataset", diff --git a/compiler_gym/envs/llvm/datasets/jotaibench.py b/compiler_gym/envs/llvm/datasets/jotaibench.py new file mode 100644 index 000000000..e1140f572 --- /dev/null +++ b/compiler_gym/envs/llvm/datasets/jotaibench.py @@ -0,0 +1,243 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import subprocess +import sys +from concurrent.futures import as_completed +from pathlib import Path +from typing import Optional + +from compiler_gym.datasets import Benchmark, TarDataset, TarDatasetWithManifest +from compiler_gym.datasets.benchmark import BenchmarkWithSource +from compiler_gym.datasets.uri import BenchmarkUri +from compiler_gym.envs.llvm.llvm_benchmark import ( + ClangInvocation, + get_system_library_flags, +) +from compiler_gym.service.proto import BenchmarkDynamicConfig, Command +from compiler_gym.util import thread_pool +from compiler_gym.util.filesystem import atomic_file_write + + + +class JotaiBenchDataset(TarDatasetWithManifest): + """A dataset of C programs curated from GitHub source code. + + The dataset is from: + + da Silva, Anderson Faustino, Bruno Conde Kind, José Wesley de Souza + Magalhaes, Jerônimo Nunes Rocha, Breno Campos Ferreira Guimaraes, and + Fernando Magno Quinão Pereira. "ANGHABENCH: A Suite with One Million + Compilable C Benchmarks for Code-Size Reduction." In 2021 IEEE/ACM + International Symposium on Code Generation and Optimization (CGO), + pp. 378-390. IEEE, 2021. + + And is available at: + + http://cuda.dcc.ufmg.br/Jotai/src/ + + Installation + ------------ + + The AnghaBench dataset consists of C functions that are compiled to LLVM-IR + on-demand and cached. The first time each benchmark is used there is an + overhead of compiling it from C to bitcode. This is a one-off cost. + """ + + def __init__( + self, + site_data_base: Path, + sort_order: int = 0, + manifest_url: Optional[str] = None, + manifest_sha256: Optional[str] = None, + deprecated: Optional[str] = None, + name: Optional[str] = None, + ): + manifest_url_, manifest_sha256_ = { + "linux": ( + "http://cuda.dcc.ufmg.br/Jotai/src/Jotai_printRetVal.tar.bz2", + "7d2c6326036d87a02318e81a29560f9bb4ead3dc33ffbd43e4fb2e95e09dd621", + ), + }[sys.platform] + super().__init__( + name=name or "benchmark://jotai-v1", + description="Compile-only C/C++ functions extracted from GitHub", + references={ + "Paper": "https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf", + "Homepage": "http://cuda.dcc.ufmg.br/angha/", + }, + license="Unknown. See: https://github.com/brenocfg/AnghaBench/issues/1", + site_data_base=site_data_base, + manifest_urls=[manifest_url or manifest_url_], + manifest_sha256=manifest_sha256 or manifest_sha256_, + tar_urls=[ + "http://cuda.dcc.ufmg.br/Jotai/src/Jotai_printRetVal.tar.bz2" + ], + tar_sha256="7d2c6326036d87a02318e81a29560f9bb4ead3dc33ffbd43e4fb2e95e09dd621", + strip_prefix="programs_no-ub_printableRetVal", + tar_compression="bz2", + benchmark_file_suffix=".bc", + sort_order=sort_order, + deprecated=deprecated, + ) + + def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark: + self.install() + + benchmark_name = uri.path[1:] + if not benchmark_name: + raise LookupError(f"No benchmark specified: {uri}") + + # The absolute path of the file, without an extension. + path_stem = self.dataset_root / benchmark_name + + bitcode_abspath = Path(f"{path_stem}.bc") + c_file_abspath = Path(f"{path_stem}.c") + + # If the file does not exist, compile it on-demand. + if not bitcode_abspath.is_file(): + if not c_file_abspath.is_file(): + raise LookupError( + f"Benchmark not found: {uri} (file not found: {c_file_abspath})" + ) + + with atomic_file_write(bitcode_abspath) as tmp_path: + compile_cmd = ClangInvocation.from_c_file( + c_file_abspath, + copt=[ + "-ferror-limit=1", # Stop on first error. + "-w", # No warnings. + ], + ).command(outpath=tmp_path) + subprocess.check_call(compile_cmd, timeout=300) + + return BenchmarkWithSource.create( + uri, bitcode_abspath, "function.c", c_file_abspath + ) + + def compile_all(self): + n = self.size + executor = thread_pool.get_thread_pool_executor() + # Since the dataset is lazily compiled, simply iterating over the full + # set of URIs will compile everything. Do this in parallel. + futures = ( + executor.submit(self.benchmark, uri) for uri in self.benchmark_uris() + ) + for i, future in enumerate(as_completed(futures), start=1): + future.result() + print( + f"\r\033[KCompiled {i} of {n} programs ({i/n:.1%} complete)", + flush=True, + end="", + ) + + +class JotaiBenchRunnableDataset(TarDataset): + """TODO.""" + + def __init__( + self, + site_data_base: Path, + ): + super().__init__( + name="benchmark://jotai-runnable-v0", + description="Runnable C/C++ functions extracted from GitHub", + references={ + # TODO: Update these as necessary: + "Paper": "https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf", + "Homepage": "http://cuda.dcc.ufmg.br/angha/", + }, + license="", # TODO: License name. + site_data_base=site_data_base, + tar_urls=[ + # TODO: URL of where to download a tarball that contains the + # benchmarks. For debugging, you could use something like + # Dropbox or similar. For eventual production we can host them + # in our S3 bucket for you. + "http://cuda.dcc.ufmg.br/Jotai/src/Jotai_printRetVal.tar.bz2" + ], + tar_sha256="7d2c6326036d87a02318e81a29560f9bb4ead3dc33ffbd43e4fb2e95e09dd621", # TODO: sha256sum of the above tarfile. + strip_prefix="programs_no-ub_printableRetVal", # TODO: If there is a subdirectory to strip, specify it here. + tar_compression="bz2", + # TODO: The file extension that is used to automatically enumerate + # the benchmarks. + benchmark_file_suffix=".c", + ) + + def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark: + self.install() + + benchmark_name = uri.path[1:] + if not benchmark_name: + raise LookupError(f"No benchmark specified: {uri}") + + # The absolute path of the file, without an extension. + path_stem = self.dataset_root / benchmark_name + + bitcode_abspath = Path(f"{path_stem}.bc") + c_file_abspath = Path(f"{path_stem}.c") + + # If the file does not exist, compile it to a bitcode file on-demand. + if not bitcode_abspath.is_file(): + if not c_file_abspath.is_file(): + raise LookupError( + f"Benchmark not found: {uri} (file not found: {c_file_abspath})" + ) + + with atomic_file_write(bitcode_abspath) as tmp_path: + compile_cmd = ClangInvocation.from_c_file( + c_file_abspath, + copt=[ + "-ferror-limit=1", # Stop on first error. + "-w", # No warnings. + ], + ).command(outpath=tmp_path) + subprocess.check_call(compile_cmd, timeout=300) + + benchmark = BenchmarkWithSource.create( + uri, bitcode_abspath, "function.c", c_file_abspath + ) + + # TODO: Here is where we specify how to build and run the benchmark. + # This is what makes a benchmark "runnable". + benchmark.proto.dynamic_config.MergeFrom( + BenchmarkDynamicConfig( + build_cmd=Command( + # TODO: Here is where you specify the command to build the + # benchmark. Assuming no deps, this should be fine. + argument=["$CC", "$IN"] + get_system_library_flags(), + timeout_seconds=60, + outfile=["a.out"], + ), + run_cmd=Command( + # TODO: Here is where you specify the command to build the + # benchmark. Assuming no deps, this should be fine. + argument=["./a.out 0"], + timeout_seconds=60, + # TODO: If the benchmark needs any input files, specify it here. + infile=[], + # TODO: If the benchmark produces any output files, specify it + # here. + outfile=[], + ), + ) + ) + + return benchmark + + def compile_all(self): + n = self.size + executor = thread_pool.get_thread_pool_executor() + # Since the dataset is lazily compiled, simply iterating over the full + # set of URIs will compile everything. Do this in parallel. + futures = ( + executor.submit(self.benchmark, uri) for uri in self.benchmark_uris() + ) + for i, future in enumerate(as_completed(futures), start=1): + future.result() + print( + f"\r\033[KCompiled {i} of {n} programs ({i/n:.1%} complete)", + flush=True, + end="", + ) diff --git a/compiler_gym/test_jotai.py b/compiler_gym/test_jotai.py new file mode 100644 index 000000000..a5f5ab6d3 --- /dev/null +++ b/compiler_gym/test_jotai.py @@ -0,0 +1,36 @@ +import gym +import compiler_gym + +env = compiler_gym.make( + "llvm-v0", + benchmark="jotai-v1/extr_anypixelfirmwarecontrollersrcfifo.c_FIFO_available_Final", + observation_space="Autophase", # selects the observation space + reward_space="IrInstructionCountOz", # selects the optimization target +) + +env.reset() +#env.render() + +#env1 = compiler_gym.make( # creates a new environment (same as gym.make) +# "llvm-v0", # selects the compiler to use +# benchmark="cbench-v1/qsort", # selects the program to compile +# observation_space="Autophase", # selects the observation space +# reward_space="IrInstructionCountOz", # selects the optimization target +#) + +#for dataset in env.datasets: +# print(dataset.name) + +#env.reset(benchmark="benchmark://jotai-v1/extr_anypixelfirmwarecontrollersrcfifo.c_FIFO_available_Final") + +#info = env.step(env.action_space.sample()) +#print(info) + +episode_reward = 0 +for i in range(1, 101): + observation, reward, done, info = env.step(env.action_space.sample()) + if done: + break + episode_reward += reward + print(f"Step {i}, quality={episode_reward:.3%}") +env.close() \ No newline at end of file diff --git a/tests/llvm/datasets/jotaibench_test.py b/tests/llvm/datasets/jotaibench_test.py new file mode 100644 index 000000000..121e23e14 --- /dev/null +++ b/tests/llvm/datasets/jotaibench_test.py @@ -0,0 +1,69 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""Tests for the JotaiBench dataset.""" +import sys +from itertools import islice +from pathlib import Path + +import gym +import pytest + + +import compiler_gym.envs.llvm # noqa register environments +from compiler_gym.envs.llvm import LlvmEnv +from compiler_gym.envs.llvm.datasets import JotaiBenchDataset +from tests.pytest_plugins.common import skip_on_ci +from tests.test_main import main + +pytest_plugins = ["tests.pytest_plugins.common", "tests.pytest_plugins.llvm"] + + +@pytest.fixture(scope="module") +def jotaibench_dataset() -> JotaiBenchDataset: + with gym.make("llvm-v0") as env: + ds = env.datasets["jotaibench-v1"] + yield ds + + +def test_jotaibench_size(jotaibench_dataset: JotaiBenchDataset): + if sys.platform == "darwin": + assert jotaibench_dataset.size == 1041265 + else: + assert jotaibench_dataset.size == 1041333 + + +def test_missing_benchmark_name(jotaibench_dataset: JotaiBenchDataset, mocker): + # Mock install() so that on CI it doesn't download and unpack the tarfile. + mocker.patch.object(jotaibench_dataset, "install") + + with pytest.raises( + LookupError, match=r"^No benchmark specified: benchmark://jotaibench-v1$" + ): + jotaibench_dataset.benchmark("benchmark://jotaibench-v1") + jotaibench_dataset.install.assert_called_once() + + with pytest.raises( + LookupError, match=r"^No benchmark specified: benchmark://jotaibench-v1/$" + ): + jotaibench_dataset.benchmark("benchmark://jotaibench-v1/") + assert jotaibench_dataset.install.call_count == 2 + + +@skip_on_ci +@pytest.mark.parametrize("index", range(250)) +def test_jotaibench_random_select( + env: LlvmEnv, jotaibench_dataset: JotaiBenchDataset, index: int, tmpwd: Path +): + uri = next(islice(jotaibench_dataset.benchmark_uris(), index, None)) + benchmark = jotaibench_dataset.benchmark(uri) + env.reset(benchmark=benchmark) + + assert benchmark.source + benchmark.write_sources_to_directory(tmpwd) + assert (tmpwd / "function.c").is_file() + + +if __name__ == "__main__": + main() From 7f808c3e0030a83b8dca4b2061978d422e5714ec Mon Sep 17 00:00:00 2001 From: canesche Date: Sat, 4 Jun 2022 17:26:52 -0300 Subject: [PATCH 02/14] Creating the Jotai benchmark. --- tests/llvm/datasets/BUILD | 14 ++++++++++++++ tests/llvm/datasets/CMakeLists.txt | 13 +++++++++++++ 2 files changed, 27 insertions(+) diff --git a/tests/llvm/datasets/BUILD b/tests/llvm/datasets/BUILD index 880afaae3..5c59969b5 100644 --- a/tests/llvm/datasets/BUILD +++ b/tests/llvm/datasets/BUILD @@ -18,6 +18,20 @@ py_test( ], ) +py_test( + name = "jotaibench_test", + timeout = "long", + srcs = ["jotaibench_test.py"], + shard_count = 8, + deps = [ + "//compiler_gym/envs/llvm", + "//compiler_gym/envs/llvm/datasets", + "//tests:test_main", + "//tests/pytest_plugins:common", + "//tests/pytest_plugins:llvm", + ], +) + py_test( name = "cbench_test", timeout = "long", diff --git a/tests/llvm/datasets/CMakeLists.txt b/tests/llvm/datasets/CMakeLists.txt index e07084a4c..17a37e388 100644 --- a/tests/llvm/datasets/CMakeLists.txt +++ b/tests/llvm/datasets/CMakeLists.txt @@ -18,6 +18,19 @@ cg_py_test( tests::test_main ) +cg_py_test( + NAME + jotaibench_test + SRCS + "jotaibench_test.py" + DEPS + compiler_gym::envs::llvm::llvm + compiler_gym::envs::llvm::datasets::datasets + tests::pytest_plugins::common + tests::pytest_plugins::llvm + tests::test_main +) + cg_py_test( NAME cbench_test From f76263a7e3a5eefbfb833904b3f6ecea7e8a8e0a Mon Sep 17 00:00:00 2001 From: canesche Date: Thu, 16 Jun 2022 16:45:19 -0300 Subject: [PATCH 03/14] removing test_jotai.py --- compiler_gym/test_jotai.py | 36 ------------------------------------ 1 file changed, 36 deletions(-) delete mode 100644 compiler_gym/test_jotai.py diff --git a/compiler_gym/test_jotai.py b/compiler_gym/test_jotai.py deleted file mode 100644 index a5f5ab6d3..000000000 --- a/compiler_gym/test_jotai.py +++ /dev/null @@ -1,36 +0,0 @@ -import gym -import compiler_gym - -env = compiler_gym.make( - "llvm-v0", - benchmark="jotai-v1/extr_anypixelfirmwarecontrollersrcfifo.c_FIFO_available_Final", - observation_space="Autophase", # selects the observation space - reward_space="IrInstructionCountOz", # selects the optimization target -) - -env.reset() -#env.render() - -#env1 = compiler_gym.make( # creates a new environment (same as gym.make) -# "llvm-v0", # selects the compiler to use -# benchmark="cbench-v1/qsort", # selects the program to compile -# observation_space="Autophase", # selects the observation space -# reward_space="IrInstructionCountOz", # selects the optimization target -#) - -#for dataset in env.datasets: -# print(dataset.name) - -#env.reset(benchmark="benchmark://jotai-v1/extr_anypixelfirmwarecontrollersrcfifo.c_FIFO_available_Final") - -#info = env.step(env.action_space.sample()) -#print(info) - -episode_reward = 0 -for i in range(1, 101): - observation, reward, done, info = env.step(env.action_space.sample()) - if done: - break - episode_reward += reward - print(f"Step {i}, quality={episode_reward:.3%}") -env.close() \ No newline at end of file From 4c300eb18c0dc1a5a245747fa9e3aa17fd5e8be3 Mon Sep 17 00:00:00 2001 From: canesche Date: Thu, 16 Jun 2022 16:48:26 -0300 Subject: [PATCH 04/14] removing TODOs --- compiler_gym/envs/llvm/datasets/jotaibench.py | 23 +++---------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/compiler_gym/envs/llvm/datasets/jotaibench.py b/compiler_gym/envs/llvm/datasets/jotaibench.py index e1140f572..6f683c9a8 100644 --- a/compiler_gym/envs/llvm/datasets/jotaibench.py +++ b/compiler_gym/envs/llvm/datasets/jotaibench.py @@ -134,8 +134,6 @@ def compile_all(self): class JotaiBenchRunnableDataset(TarDataset): - """TODO.""" - def __init__( self, site_data_base: Path, @@ -144,24 +142,17 @@ def __init__( name="benchmark://jotai-runnable-v0", description="Runnable C/C++ functions extracted from GitHub", references={ - # TODO: Update these as necessary: "Paper": "https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf", "Homepage": "http://cuda.dcc.ufmg.br/angha/", }, - license="", # TODO: License name. + license="", site_data_base=site_data_base, tar_urls=[ - # TODO: URL of where to download a tarball that contains the - # benchmarks. For debugging, you could use something like - # Dropbox or similar. For eventual production we can host them - # in our S3 bucket for you. "http://cuda.dcc.ufmg.br/Jotai/src/Jotai_printRetVal.tar.bz2" ], - tar_sha256="7d2c6326036d87a02318e81a29560f9bb4ead3dc33ffbd43e4fb2e95e09dd621", # TODO: sha256sum of the above tarfile. - strip_prefix="programs_no-ub_printableRetVal", # TODO: If there is a subdirectory to strip, specify it here. + tar_sha256="7d2c6326036d87a02318e81a29560f9bb4ead3dc33ffbd43e4fb2e95e09dd621", + strip_prefix="programs_no-ub_printableRetVal", tar_compression="bz2", - # TODO: The file extension that is used to automatically enumerate - # the benchmarks. benchmark_file_suffix=".c", ) @@ -199,26 +190,18 @@ def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark: uri, bitcode_abspath, "function.c", c_file_abspath ) - # TODO: Here is where we specify how to build and run the benchmark. # This is what makes a benchmark "runnable". benchmark.proto.dynamic_config.MergeFrom( BenchmarkDynamicConfig( build_cmd=Command( - # TODO: Here is where you specify the command to build the - # benchmark. Assuming no deps, this should be fine. argument=["$CC", "$IN"] + get_system_library_flags(), timeout_seconds=60, outfile=["a.out"], ), run_cmd=Command( - # TODO: Here is where you specify the command to build the - # benchmark. Assuming no deps, this should be fine. argument=["./a.out 0"], timeout_seconds=60, - # TODO: If the benchmark needs any input files, specify it here. infile=[], - # TODO: If the benchmark produces any output files, specify it - # here. outfile=[], ), ) From 79e6a0bf90f99a79c1220a6eb94564a3a1905062 Mon Sep 17 00:00:00 2001 From: canesche Date: Tue, 19 Jul 2022 06:33:19 -0300 Subject: [PATCH 05/14] Correction bugs and update tests --- compiler_gym/envs/llvm/datasets/jotaibench.py | 2 +- tests/llvm/datasets/llvm_datasets_test.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler_gym/envs/llvm/datasets/jotaibench.py b/compiler_gym/envs/llvm/datasets/jotaibench.py index 6f683c9a8..d03494b79 100644 --- a/compiler_gym/envs/llvm/datasets/jotaibench.py +++ b/compiler_gym/envs/llvm/datasets/jotaibench.py @@ -61,7 +61,7 @@ def __init__( ), }[sys.platform] super().__init__( - name=name or "benchmark://jotai-v1", + name=name or "benchmark://jotaibench-v1", description="Compile-only C/C++ functions extracted from GitHub", references={ "Paper": "https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf", diff --git a/tests/llvm/datasets/llvm_datasets_test.py b/tests/llvm/datasets/llvm_datasets_test.py index 61251f649..95ff5ddf4 100644 --- a/tests/llvm/datasets/llvm_datasets_test.py +++ b/tests/llvm/datasets/llvm_datasets_test.py @@ -12,6 +12,7 @@ def test_default_dataset_list(): with gym.make("llvm-v0") as env: assert list(d.name for d in env.datasets) == [ + "benchmark://jotaibench-v1", "benchmark://cbench-v1", "benchmark://anghabench-v1", "benchmark://blas-v0", From 9b19da4fd9b3281218f6ef31aa54669e5809373b Mon Sep 17 00:00:00 2001 From: canesche Date: Tue, 19 Jul 2022 13:15:23 -0300 Subject: [PATCH 06/14] update jotaibench with some correction --- compiler_gym/envs/llvm/datasets/jotaibench.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/compiler_gym/envs/llvm/datasets/jotaibench.py b/compiler_gym/envs/llvm/datasets/jotaibench.py index d03494b79..53be063a4 100644 --- a/compiler_gym/envs/llvm/datasets/jotaibench.py +++ b/compiler_gym/envs/llvm/datasets/jotaibench.py @@ -40,7 +40,7 @@ class JotaiBenchDataset(TarDatasetWithManifest): Installation ------------ - The AnghaBench dataset consists of C functions that are compiled to LLVM-IR + The JotaiBench dataset consists of C functions that are compiled to LLVM-IR on-demand and cached. The first time each benchmark is used there is an overhead of compiling it from C to bitcode. This is a one-off cost. """ @@ -56,8 +56,8 @@ def __init__( ): manifest_url_, manifest_sha256_ = { "linux": ( - "http://cuda.dcc.ufmg.br/Jotai/src/Jotai_printRetVal.tar.bz2", - "7d2c6326036d87a02318e81a29560f9bb4ead3dc33ffbd43e4fb2e95e09dd621", + "https://github.com/lac-dcc/jotai-benchmarks/raw/main/benchmarks/programs_no-ub_printableRetVal.bz2", + "3657a36b129d462c11d451a5bc9365e73f404a814e8726b383a2d7e64faa3d73", ), }[sys.platform] super().__init__( @@ -67,15 +67,15 @@ def __init__( "Paper": "https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf", "Homepage": "http://cuda.dcc.ufmg.br/angha/", }, - license="Unknown. See: https://github.com/brenocfg/AnghaBench/issues/1", + license="GNU General Public License v3.0 (GPLv3)", site_data_base=site_data_base, manifest_urls=[manifest_url or manifest_url_], manifest_sha256=manifest_sha256 or manifest_sha256_, tar_urls=[ - "http://cuda.dcc.ufmg.br/Jotai/src/Jotai_printRetVal.tar.bz2" + "https://github.com/lac-dcc/jotai-benchmarks/raw/main/benchmarks/programs_no-ub_printableRetVal.bz2" ], - tar_sha256="7d2c6326036d87a02318e81a29560f9bb4ead3dc33ffbd43e4fb2e95e09dd621", - strip_prefix="programs_no-ub_printableRetVal", + tar_sha256="3657a36b129d462c11d451a5bc9365e73f404a814e8726b383a2d7e64faa3d73", + strip_prefix="", tar_compression="bz2", benchmark_file_suffix=".bc", sort_order=sort_order, @@ -145,13 +145,13 @@ def __init__( "Paper": "https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf", "Homepage": "http://cuda.dcc.ufmg.br/angha/", }, - license="", + license="GNU General Public License v3.0 (GPLv3)", site_data_base=site_data_base, tar_urls=[ - "http://cuda.dcc.ufmg.br/Jotai/src/Jotai_printRetVal.tar.bz2" + "https://github.com/lac-dcc/jotai-benchmarks/raw/main/benchmarks/programs_no-ub_printableRetVal.bz2" ], - tar_sha256="7d2c6326036d87a02318e81a29560f9bb4ead3dc33ffbd43e4fb2e95e09dd621", - strip_prefix="programs_no-ub_printableRetVal", + tar_sha256="3657a36b129d462c11d451a5bc9365e73f404a814e8726b383a2d7e64faa3d73", + strip_prefix="", tar_compression="bz2", benchmark_file_suffix=".c", ) From d5441b163ecb7627702023b8bc42715d42497d6e Mon Sep 17 00:00:00 2001 From: canesche Date: Tue, 19 Jul 2022 14:11:50 -0300 Subject: [PATCH 07/14] Correction benchmark --- compiler_gym/envs/llvm/datasets/jotaibench.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler_gym/envs/llvm/datasets/jotaibench.py b/compiler_gym/envs/llvm/datasets/jotaibench.py index 53be063a4..ccdd13219 100644 --- a/compiler_gym/envs/llvm/datasets/jotaibench.py +++ b/compiler_gym/envs/llvm/datasets/jotaibench.py @@ -75,7 +75,7 @@ def __init__( "https://github.com/lac-dcc/jotai-benchmarks/raw/main/benchmarks/programs_no-ub_printableRetVal.bz2" ], tar_sha256="3657a36b129d462c11d451a5bc9365e73f404a814e8726b383a2d7e64faa3d73", - strip_prefix="", + strip_prefix="programs_no-ub_printableRetVal", tar_compression="bz2", benchmark_file_suffix=".bc", sort_order=sort_order, @@ -151,7 +151,7 @@ def __init__( "https://github.com/lac-dcc/jotai-benchmarks/raw/main/benchmarks/programs_no-ub_printableRetVal.bz2" ], tar_sha256="3657a36b129d462c11d451a5bc9365e73f404a814e8726b383a2d7e64faa3d73", - strip_prefix="", + strip_prefix="programs_no-ub_printableRetVal", tar_compression="bz2", benchmark_file_suffix=".c", ) From 1deb2dfb61556137ebb33d305dbc414332abebc6 Mon Sep 17 00:00:00 2001 From: canesche Date: Tue, 19 Jul 2022 17:36:01 +0000 Subject: [PATCH 08/14] files formated with pre-commit run --all-files --- compiler_gym/envs/llvm/datasets/__init__.py | 5 ++--- compiler_gym/envs/llvm/datasets/jotaibench.py | 7 +++---- tests/llvm/datasets/jotaibench_test.py | 1 - 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/compiler_gym/envs/llvm/datasets/__init__.py b/compiler_gym/envs/llvm/datasets/__init__.py index 55e460f69..014fe4301 100644 --- a/compiler_gym/envs/llvm/datasets/__init__.py +++ b/compiler_gym/envs/llvm/datasets/__init__.py @@ -8,7 +8,6 @@ from compiler_gym.datasets import Dataset, TarDatasetWithManifest from compiler_gym.envs.llvm.datasets.anghabench import AnghaBenchDataset -from compiler_gym.envs.llvm.datasets.jotaibench import JotaiBenchDataset from compiler_gym.envs.llvm.datasets.cbench import ( CBenchDataset, CBenchLegacyDataset, @@ -17,6 +16,7 @@ from compiler_gym.envs.llvm.datasets.chstone import CHStoneDataset from compiler_gym.envs.llvm.datasets.clgen import CLgenDataset from compiler_gym.envs.llvm.datasets.csmith import CsmithBenchmark, CsmithDataset +from compiler_gym.envs.llvm.datasets.jotaibench import JotaiBenchDataset from compiler_gym.envs.llvm.datasets.llvm_stress import LlvmStressDataset from compiler_gym.envs.llvm.datasets.poj104 import POJ104Dataset, POJ104LegacyDataset from compiler_gym.util.runfiles_path import site_data_path @@ -311,8 +311,7 @@ def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset __all__ = [ "AnghaBenchDataset", - "JotaiBenchDataset" - "BlasDataset", + "JotaiBenchDataset" "BlasDataset", "CBenchDataset", "CBenchLegacyDataset", "CLgenDataset", diff --git a/compiler_gym/envs/llvm/datasets/jotaibench.py b/compiler_gym/envs/llvm/datasets/jotaibench.py index ccdd13219..723b7a53f 100644 --- a/compiler_gym/envs/llvm/datasets/jotaibench.py +++ b/compiler_gym/envs/llvm/datasets/jotaibench.py @@ -20,7 +20,6 @@ from compiler_gym.util.filesystem import atomic_file_write - class JotaiBenchDataset(TarDatasetWithManifest): """A dataset of C programs curated from GitHub source code. @@ -145,13 +144,13 @@ def __init__( "Paper": "https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf", "Homepage": "http://cuda.dcc.ufmg.br/angha/", }, - license="GNU General Public License v3.0 (GPLv3)", + license="GNU General Public License v3.0 (GPLv3)", site_data_base=site_data_base, tar_urls=[ "https://github.com/lac-dcc/jotai-benchmarks/raw/main/benchmarks/programs_no-ub_printableRetVal.bz2" ], - tar_sha256="3657a36b129d462c11d451a5bc9365e73f404a814e8726b383a2d7e64faa3d73", - strip_prefix="programs_no-ub_printableRetVal", + tar_sha256="3657a36b129d462c11d451a5bc9365e73f404a814e8726b383a2d7e64faa3d73", + strip_prefix="programs_no-ub_printableRetVal", tar_compression="bz2", benchmark_file_suffix=".c", ) diff --git a/tests/llvm/datasets/jotaibench_test.py b/tests/llvm/datasets/jotaibench_test.py index 121e23e14..e10bd87f7 100644 --- a/tests/llvm/datasets/jotaibench_test.py +++ b/tests/llvm/datasets/jotaibench_test.py @@ -10,7 +10,6 @@ import gym import pytest - import compiler_gym.envs.llvm # noqa register environments from compiler_gym.envs.llvm import LlvmEnv from compiler_gym.envs.llvm.datasets import JotaiBenchDataset From 97d3d7dddd2086c0bb421bbe8e340b510049e7e3 Mon Sep 17 00:00:00 2001 From: canesche Date: Mon, 8 Aug 2022 14:02:47 +0000 Subject: [PATCH 09/14] correction format bazel buildfier --- compiler_gym/envs/llvm/datasets/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler_gym/envs/llvm/datasets/BUILD b/compiler_gym/envs/llvm/datasets/BUILD index 39391e937..4f2577db0 100644 --- a/compiler_gym/envs/llvm/datasets/BUILD +++ b/compiler_gym/envs/llvm/datasets/BUILD @@ -9,11 +9,11 @@ py_library( srcs = [ "__init__.py", "anghabench.py", - "jotaibench.py", "cbench.py", "chstone.py", "clgen.py", "csmith.py", + "jotaibench.py", "llvm_stress.py", "poj104.py", ], From b0cf6cf6d82c6e5e370c5150f915b716db3c6563 Mon Sep 17 00:00:00 2001 From: canesche Date: Sun, 14 Aug 2022 20:56:18 +0000 Subject: [PATCH 10/14] Updating files with CLI correction and test from jotai benchmarks. --- compiler_gym/envs/llvm/datasets/__init__.py | 11 +++++-- compiler_gym/envs/llvm/datasets/jotaibench.py | 18 +++++------ docs/source/llvm/api.rst | 2 ++ docs/source/llvm/index.rst | 4 ++- tests/llvm/datasets/CMakeLists.txt | 26 ++++++++-------- tests/llvm/datasets/jotaibench_test.py | 30 +++++++------------ tests/llvm/datasets/llvm_datasets_test.py | 2 +- 7 files changed, 46 insertions(+), 47 deletions(-) diff --git a/compiler_gym/envs/llvm/datasets/__init__.py b/compiler_gym/envs/llvm/datasets/__init__.py index 014fe4301..982378392 100644 --- a/compiler_gym/envs/llvm/datasets/__init__.py +++ b/compiler_gym/envs/llvm/datasets/__init__.py @@ -265,9 +265,13 @@ def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset yield JotaiBenchDataset(site_data_base=site_data_base, sort_order=0) # Add legacy version of Jotaibench using an old manifest. jotaibench_v0_manifest_url, jotaibench_v0_manifest_sha256 = { + "darwin": ( + "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2", + "39464256405aacefdb7550a7f990c9c578264c132804eec3daac091fa3c21bd1", + ), "linux": ( - "https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-linux-manifest.bz2", - "a038d25d39ee9472662a9704dfff19c9e3512ff6a70f1067af85c5cb3784b477", + "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2", + "3657a36b129d462c11d451a5bc9365e73f404a814e8726b383a2d7e64faa3d73", ), }[sys.platform] yield JotaiBenchDataset( @@ -311,7 +315,7 @@ def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset __all__ = [ "AnghaBenchDataset", - "JotaiBenchDataset" "BlasDataset", + "BlasDataset", "CBenchDataset", "CBenchLegacyDataset", "CLgenDataset", @@ -319,6 +323,7 @@ def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset "CsmithDataset", "get_llvm_datasets", "GitHubDataset", + "JotaiBenchDataset", "LinuxDataset", "LlvmStressDataset", "MibenchDataset", diff --git a/compiler_gym/envs/llvm/datasets/jotaibench.py b/compiler_gym/envs/llvm/datasets/jotaibench.py index 723b7a53f..33b8ac852 100644 --- a/compiler_gym/envs/llvm/datasets/jotaibench.py +++ b/compiler_gym/envs/llvm/datasets/jotaibench.py @@ -55,7 +55,7 @@ def __init__( ): manifest_url_, manifest_sha256_ = { "linux": ( - "https://github.com/lac-dcc/jotai-benchmarks/raw/main/benchmarks/programs_no-ub_printableRetVal.bz2", + "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2", "3657a36b129d462c11d451a5bc9365e73f404a814e8726b383a2d7e64faa3d73", ), }[sys.platform] @@ -71,12 +71,12 @@ def __init__( manifest_urls=[manifest_url or manifest_url_], manifest_sha256=manifest_sha256 or manifest_sha256_, tar_urls=[ - "https://github.com/lac-dcc/jotai-benchmarks/raw/main/benchmarks/programs_no-ub_printableRetVal.bz2" + "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2" ], tar_sha256="3657a36b129d462c11d451a5bc9365e73f404a814e8726b383a2d7e64faa3d73", - strip_prefix="programs_no-ub_printableRetVal", + strip_prefix="jotaibench-v1", tar_compression="bz2", - benchmark_file_suffix=".bc", + benchmark_file_suffix=".c", sort_order=sort_order, deprecated=deprecated, ) @@ -138,7 +138,7 @@ def __init__( site_data_base: Path, ): super().__init__( - name="benchmark://jotai-runnable-v0", + name="benchmark://jotai-runnable-v1", description="Runnable C/C++ functions extracted from GitHub", references={ "Paper": "https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf", @@ -147,10 +147,10 @@ def __init__( license="GNU General Public License v3.0 (GPLv3)", site_data_base=site_data_base, tar_urls=[ - "https://github.com/lac-dcc/jotai-benchmarks/raw/main/benchmarks/programs_no-ub_printableRetVal.bz2" + "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2" ], tar_sha256="3657a36b129d462c11d451a5bc9365e73f404a814e8726b383a2d7e64faa3d73", - strip_prefix="programs_no-ub_printableRetVal", + strip_prefix="jotaibench-v1", tar_compression="bz2", benchmark_file_suffix=".c", ) @@ -194,12 +194,12 @@ def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark: BenchmarkDynamicConfig( build_cmd=Command( argument=["$CC", "$IN"] + get_system_library_flags(), - timeout_seconds=60, + timeout_seconds=30, outfile=["a.out"], ), run_cmd=Command( argument=["./a.out 0"], - timeout_seconds=60, + timeout_seconds=30, infile=[], outfile=[], ), diff --git a/docs/source/llvm/api.rst b/docs/source/llvm/api.rst index 36c3adc5c..06af7fffd 100644 --- a/docs/source/llvm/api.rst +++ b/docs/source/llvm/api.rst @@ -45,6 +45,8 @@ Datasets .. autoclass:: GitHubDataset +.. autoclass:: JotaiBenchDataset + .. autoclass:: LinuxDataset .. autoclass:: LlvmStressDataset diff --git a/docs/source/llvm/index.rst b/docs/source/llvm/index.rst index 704a2f5c7..216cc8ddb 100644 --- a/docs/source/llvm/index.rst +++ b/docs/source/llvm/index.rst @@ -40,6 +40,8 @@ We provide several datasets of open-source LLVM-IR benchmarks for use: +----------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+ | benchmark://github-v0 | 49,738 | Compile-only C/C++ objects from GitHub [`Paper `__] | No | +----------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+ +| benchmark://jotaibench-v1 | 18,761 | Compile-only C/C++ functions extracted from GitHub [`Homepage `__, `Paper`__] | No | ++----------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+ | benchmark://linux-v0 | 13,894 | Compile-only object files from C Linux kernel [`Homepage `__] | No | +----------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+ | benchmark://mibench-v1 | 40 | C benchmarks [`Paper `__] | No | @@ -56,7 +58,7 @@ We provide several datasets of open-source LLVM-IR benchmarks for use: +----------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+ | generator://llvm-stress-v0 | ∞ | Randomly generated LLVM-IR [`Documentation `__] | No | +----------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+ -| Total | 1,158,701 | | | +| Total | 1,177,462 | | | +----------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+ .. [#f1] Values are for the Linux datasets. Some of the datasets contain fewer diff --git a/tests/llvm/datasets/CMakeLists.txt b/tests/llvm/datasets/CMakeLists.txt index 17a37e388..f0cd0a736 100644 --- a/tests/llvm/datasets/CMakeLists.txt +++ b/tests/llvm/datasets/CMakeLists.txt @@ -18,19 +18,6 @@ cg_py_test( tests::test_main ) -cg_py_test( - NAME - jotaibench_test - SRCS - "jotaibench_test.py" - DEPS - compiler_gym::envs::llvm::llvm - compiler_gym::envs::llvm::datasets::datasets - tests::pytest_plugins::common - tests::pytest_plugins::llvm - tests::test_main -) - cg_py_test( NAME cbench_test @@ -110,6 +97,19 @@ cg_py_test( tests::test_main ) +cg_py_test( + NAME + jotaibench_test + SRCS + "jotaibench_test.py" + DEPS + compiler_gym::envs::llvm::llvm + compiler_gym::envs::llvm::datasets::datasets + tests::pytest_plugins::common + tests::pytest_plugins::llvm + tests::test_main +) + cg_py_test( NAME llvm_datasets_test diff --git a/tests/llvm/datasets/jotaibench_test.py b/tests/llvm/datasets/jotaibench_test.py index e10bd87f7..cd325c52f 100644 --- a/tests/llvm/datasets/jotaibench_test.py +++ b/tests/llvm/datasets/jotaibench_test.py @@ -4,18 +4,22 @@ # LICENSE file in the root directory of this source tree. """Tests for the JotaiBench dataset.""" import sys -from itertools import islice -from pathlib import Path import gym import pytest import compiler_gym.envs.llvm # noqa register environments -from compiler_gym.envs.llvm import LlvmEnv + +# from compiler_gym.envs.llvm import LlvmEnv from compiler_gym.envs.llvm.datasets import JotaiBenchDataset -from tests.pytest_plugins.common import skip_on_ci + +# from tests.pytest_plugins.common import skip_on_ci from tests.test_main import main +# from itertools import islice +# from pathlib import Path + + pytest_plugins = ["tests.pytest_plugins.common", "tests.pytest_plugins.llvm"] @@ -28,9 +32,9 @@ def jotaibench_dataset() -> JotaiBenchDataset: def test_jotaibench_size(jotaibench_dataset: JotaiBenchDataset): if sys.platform == "darwin": - assert jotaibench_dataset.size == 1041265 + assert jotaibench_dataset.size == 2138885 else: - assert jotaibench_dataset.size == 1041333 + assert jotaibench_dataset.size == 2138885 def test_missing_benchmark_name(jotaibench_dataset: JotaiBenchDataset, mocker): @@ -50,19 +54,5 @@ def test_missing_benchmark_name(jotaibench_dataset: JotaiBenchDataset, mocker): assert jotaibench_dataset.install.call_count == 2 -@skip_on_ci -@pytest.mark.parametrize("index", range(250)) -def test_jotaibench_random_select( - env: LlvmEnv, jotaibench_dataset: JotaiBenchDataset, index: int, tmpwd: Path -): - uri = next(islice(jotaibench_dataset.benchmark_uris(), index, None)) - benchmark = jotaibench_dataset.benchmark(uri) - env.reset(benchmark=benchmark) - - assert benchmark.source - benchmark.write_sources_to_directory(tmpwd) - assert (tmpwd / "function.c").is_file() - - if __name__ == "__main__": main() diff --git a/tests/llvm/datasets/llvm_datasets_test.py b/tests/llvm/datasets/llvm_datasets_test.py index 95ff5ddf4..bf2f34f58 100644 --- a/tests/llvm/datasets/llvm_datasets_test.py +++ b/tests/llvm/datasets/llvm_datasets_test.py @@ -12,13 +12,13 @@ def test_default_dataset_list(): with gym.make("llvm-v0") as env: assert list(d.name for d in env.datasets) == [ - "benchmark://jotaibench-v1", "benchmark://cbench-v1", "benchmark://anghabench-v1", "benchmark://blas-v0", "benchmark://chstone-v0", "benchmark://clgen-v0", "benchmark://github-v0", + "benchmark://jotaibench-v1", "benchmark://linux-v0", "benchmark://mibench-v1", "benchmark://npb-v0", From e66ab791c7f395d47c388fd5d33b56e23ee00c3f Mon Sep 17 00:00:00 2001 From: canesche Date: Fri, 19 Aug 2022 16:18:52 +0000 Subject: [PATCH 11/14] Correction link to download of benchmark and test --- compiler_gym/envs/llvm/datasets/jotaibench.py | 12 ++++---- tests/llvm/datasets/jotaibench_test.py | 30 ++++++++++++------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/compiler_gym/envs/llvm/datasets/jotaibench.py b/compiler_gym/envs/llvm/datasets/jotaibench.py index 33b8ac852..f1a011180 100644 --- a/compiler_gym/envs/llvm/datasets/jotaibench.py +++ b/compiler_gym/envs/llvm/datasets/jotaibench.py @@ -55,8 +55,8 @@ def __init__( ): manifest_url_, manifest_sha256_ = { "linux": ( - "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2", - "3657a36b129d462c11d451a5bc9365e73f404a814e8726b383a2d7e64faa3d73", + "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2?raw=true", + "202d14b0f3f78210c7472b7d4ef7c33d828174c30a3bced6950fc1ca88773983", ), }[sys.platform] super().__init__( @@ -71,9 +71,9 @@ def __init__( manifest_urls=[manifest_url or manifest_url_], manifest_sha256=manifest_sha256 or manifest_sha256_, tar_urls=[ - "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2" + "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2?raw=true" ], - tar_sha256="3657a36b129d462c11d451a5bc9365e73f404a814e8726b383a2d7e64faa3d73", + tar_sha256="202d14b0f3f78210c7472b7d4ef7c33d828174c30a3bced6950fc1ca88773983", strip_prefix="jotaibench-v1", tar_compression="bz2", benchmark_file_suffix=".c", @@ -147,9 +147,9 @@ def __init__( license="GNU General Public License v3.0 (GPLv3)", site_data_base=site_data_base, tar_urls=[ - "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2" + "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2?raw=true" ], - tar_sha256="3657a36b129d462c11d451a5bc9365e73f404a814e8726b383a2d7e64faa3d73", + tar_sha256="202d14b0f3f78210c7472b7d4ef7c33d828174c30a3bced6950fc1ca88773983", strip_prefix="jotaibench-v1", tar_compression="bz2", benchmark_file_suffix=".c", diff --git a/tests/llvm/datasets/jotaibench_test.py b/tests/llvm/datasets/jotaibench_test.py index cd325c52f..db837b93c 100644 --- a/tests/llvm/datasets/jotaibench_test.py +++ b/tests/llvm/datasets/jotaibench_test.py @@ -4,22 +4,18 @@ # LICENSE file in the root directory of this source tree. """Tests for the JotaiBench dataset.""" import sys +from itertools import islice +from pathlib import Path import gym import pytest import compiler_gym.envs.llvm # noqa register environments - -# from compiler_gym.envs.llvm import LlvmEnv +from compiler_gym.envs.llvm import LlvmEnv from compiler_gym.envs.llvm.datasets import JotaiBenchDataset - -# from tests.pytest_plugins.common import skip_on_ci +from tests.pytest_plugins.common import skip_on_ci from tests.test_main import main -# from itertools import islice -# from pathlib import Path - - pytest_plugins = ["tests.pytest_plugins.common", "tests.pytest_plugins.llvm"] @@ -32,9 +28,9 @@ def jotaibench_dataset() -> JotaiBenchDataset: def test_jotaibench_size(jotaibench_dataset: JotaiBenchDataset): if sys.platform == "darwin": - assert jotaibench_dataset.size == 2138885 + assert jotaibench_dataset.size == 2138894 else: - assert jotaibench_dataset.size == 2138885 + assert jotaibench_dataset.size == 2138894 def test_missing_benchmark_name(jotaibench_dataset: JotaiBenchDataset, mocker): @@ -54,5 +50,19 @@ def test_missing_benchmark_name(jotaibench_dataset: JotaiBenchDataset, mocker): assert jotaibench_dataset.install.call_count == 2 +@skip_on_ci +@pytest.mark.parametrize("index", range(250)) +def test_anghabench_random_select( + env: LlvmEnv, jotaibench_dataset: JotaiBenchDataset, index: int, tmpwd: Path +): + uri = next(islice(jotaibench_dataset.benchmark_uris(), index, None)) + benchmark = jotaibench_dataset.benchmark(uri) + env.reset(benchmark=benchmark) + + assert benchmark.source + benchmark.write_sources_to_directory(tmpwd) + assert (tmpwd / "function.c").is_file() + + if __name__ == "__main__": main() From a65d25c6be4c183a9d637ba7d379a82f4b031db8 Mon Sep 17 00:00:00 2001 From: canesche Date: Fri, 19 Aug 2022 18:25:02 +0000 Subject: [PATCH 12/14] adding darwin manifest url --- compiler_gym/envs/llvm/datasets/jotaibench.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/compiler_gym/envs/llvm/datasets/jotaibench.py b/compiler_gym/envs/llvm/datasets/jotaibench.py index f1a011180..033c5fffe 100644 --- a/compiler_gym/envs/llvm/datasets/jotaibench.py +++ b/compiler_gym/envs/llvm/datasets/jotaibench.py @@ -54,6 +54,10 @@ def __init__( name: Optional[str] = None, ): manifest_url_, manifest_sha256_ = { + "darwin": ( + "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2?raw=true", + "202d14b0f3f78210c7472b7d4ef7c33d828174c30a3bced6950fc1ca88773983", + ), "linux": ( "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2?raw=true", "202d14b0f3f78210c7472b7d4ef7c33d828174c30a3bced6950fc1ca88773983", From 8299a58ab93a97433002c42dbda3b4e0978a77a5 Mon Sep 17 00:00:00 2001 From: canesche Date: Tue, 23 Aug 2022 16:31:45 +0000 Subject: [PATCH 13/14] removing deprecated dataset in the same PR --- compiler_gym/envs/llvm/datasets/__init__.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/compiler_gym/envs/llvm/datasets/__init__.py b/compiler_gym/envs/llvm/datasets/__init__.py index 982378392..b6d4deb7c 100644 --- a/compiler_gym/envs/llvm/datasets/__init__.py +++ b/compiler_gym/envs/llvm/datasets/__init__.py @@ -263,25 +263,6 @@ def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset deprecated="Please use anghabench-v1", ) yield JotaiBenchDataset(site_data_base=site_data_base, sort_order=0) - # Add legacy version of Jotaibench using an old manifest. - jotaibench_v0_manifest_url, jotaibench_v0_manifest_sha256 = { - "darwin": ( - "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2", - "39464256405aacefdb7550a7f990c9c578264c132804eec3daac091fa3c21bd1", - ), - "linux": ( - "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2", - "3657a36b129d462c11d451a5bc9365e73f404a814e8726b383a2d7e64faa3d73", - ), - }[sys.platform] - yield JotaiBenchDataset( - name="benchmark://jotaibench-v0", - site_data_base=site_data_base, - sort_order=0, - manifest_url=jotaibench_v0_manifest_url, - manifest_sha256=jotaibench_v0_manifest_sha256, - deprecated="Please use jotaibench-v1", - ) yield BlasDataset(site_data_base=site_data_base, sort_order=0) yield CLgenDataset(site_data_base=site_data_base, sort_order=0) yield CBenchDataset(site_data_base=site_data_base) From f82089fbc855de65ef6e24d81809927a2dff6b8d Mon Sep 17 00:00:00 2001 From: canesche Date: Tue, 23 Aug 2022 18:26:08 +0000 Subject: [PATCH 14/14] replacing jotaibench's version v1 to v0 --- compiler_gym/envs/llvm/datasets/jotaibench.py | 16 ++++++++-------- tests/llvm/datasets/jotaibench_test.py | 10 +++++----- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/compiler_gym/envs/llvm/datasets/jotaibench.py b/compiler_gym/envs/llvm/datasets/jotaibench.py index 033c5fffe..0012cf7ad 100644 --- a/compiler_gym/envs/llvm/datasets/jotaibench.py +++ b/compiler_gym/envs/llvm/datasets/jotaibench.py @@ -56,15 +56,15 @@ def __init__( manifest_url_, manifest_sha256_ = { "darwin": ( "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2?raw=true", - "202d14b0f3f78210c7472b7d4ef7c33d828174c30a3bced6950fc1ca88773983", + "b5a51af3d4e2f77a66001635ec64ed321e0ece19873c4a888040859af7556401", ), "linux": ( "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2?raw=true", - "202d14b0f3f78210c7472b7d4ef7c33d828174c30a3bced6950fc1ca88773983", + "b5a51af3d4e2f77a66001635ec64ed321e0ece19873c4a888040859af7556401", ), }[sys.platform] super().__init__( - name=name or "benchmark://jotaibench-v1", + name=name or "benchmark://jotaibench-v0", description="Compile-only C/C++ functions extracted from GitHub", references={ "Paper": "https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf", @@ -77,8 +77,8 @@ def __init__( tar_urls=[ "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2?raw=true" ], - tar_sha256="202d14b0f3f78210c7472b7d4ef7c33d828174c30a3bced6950fc1ca88773983", - strip_prefix="jotaibench-v1", + tar_sha256="b5a51af3d4e2f77a66001635ec64ed321e0ece19873c4a888040859af7556401", + strip_prefix="jotaibench-v0", tar_compression="bz2", benchmark_file_suffix=".c", sort_order=sort_order, @@ -142,7 +142,7 @@ def __init__( site_data_base: Path, ): super().__init__( - name="benchmark://jotai-runnable-v1", + name="benchmark://jotai-runnable-v0", description="Runnable C/C++ functions extracted from GitHub", references={ "Paper": "https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf", @@ -153,8 +153,8 @@ def __init__( tar_urls=[ "https://github.com/lac-dcc/jotai-benchmarks/blob/main/benchmarks/jotaibench.bz2?raw=true" ], - tar_sha256="202d14b0f3f78210c7472b7d4ef7c33d828174c30a3bced6950fc1ca88773983", - strip_prefix="jotaibench-v1", + tar_sha256="b5a51af3d4e2f77a66001635ec64ed321e0ece19873c4a888040859af7556401", + strip_prefix="jotaibench-v0", tar_compression="bz2", benchmark_file_suffix=".c", ) diff --git a/tests/llvm/datasets/jotaibench_test.py b/tests/llvm/datasets/jotaibench_test.py index db837b93c..e0cc408ff 100644 --- a/tests/llvm/datasets/jotaibench_test.py +++ b/tests/llvm/datasets/jotaibench_test.py @@ -22,7 +22,7 @@ @pytest.fixture(scope="module") def jotaibench_dataset() -> JotaiBenchDataset: with gym.make("llvm-v0") as env: - ds = env.datasets["jotaibench-v1"] + ds = env.datasets["jotaibench-v0"] yield ds @@ -38,15 +38,15 @@ def test_missing_benchmark_name(jotaibench_dataset: JotaiBenchDataset, mocker): mocker.patch.object(jotaibench_dataset, "install") with pytest.raises( - LookupError, match=r"^No benchmark specified: benchmark://jotaibench-v1$" + LookupError, match=r"^No benchmark specified: benchmark://jotaibench-v0$" ): - jotaibench_dataset.benchmark("benchmark://jotaibench-v1") + jotaibench_dataset.benchmark("benchmark://jotaibench-v0") jotaibench_dataset.install.assert_called_once() with pytest.raises( - LookupError, match=r"^No benchmark specified: benchmark://jotaibench-v1/$" + LookupError, match=r"^No benchmark specified: benchmark://jotaibench-v0/$" ): - jotaibench_dataset.benchmark("benchmark://jotaibench-v1/") + jotaibench_dataset.benchmark("benchmark://jotaibench-v0/") assert jotaibench_dataset.install.call_count == 2