Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[llvm] Add the chstone-v0 dataset #284

Merged
merged 2 commits into from
May 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compiler_gym/envs/llvm/datasets/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ py_library(
"__init__.py",
"anghabench.py",
"cbench.py",
"chstone.py",
"clgen.py",
"csmith.py",
"llvm_stress.py",
Expand Down
2 changes: 2 additions & 0 deletions compiler_gym/envs/llvm/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from compiler_gym.datasets import Dataset, TarDatasetWithManifest
from compiler_gym.envs.llvm.datasets.anghabench import AnghaBenchDataset
from compiler_gym.envs.llvm.datasets.cbench import CBenchDataset, CBenchLegacyDataset
from compiler_gym.envs.llvm.datasets.chstone import CHStoneDataset
from compiler_gym.envs.llvm.datasets.clgen import CLgenDataset
from compiler_gym.envs.llvm.datasets.csmith import CsmithBenchmark, CsmithDataset
from compiler_gym.envs.llvm.datasets.llvm_stress import LlvmStressDataset
Expand Down Expand Up @@ -248,6 +249,7 @@ def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset
sort_order=100,
)
yield CBenchLegacyDataset(site_data_base=site_data_base)
yield CHStoneDataset(site_data_base=site_data_base)
yield CsmithDataset(site_data_base=site_data_base, sort_order=0)
yield GitHubDataset(site_data_base=site_data_base, sort_order=0)
yield LinuxDataset(site_data_base=site_data_base, sort_order=0)
Expand Down
135 changes: 135 additions & 0 deletions compiler_gym/envs/llvm/datasets/chstone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import subprocess
from concurrent.futures import as_completed
from pathlib import Path
from typing import Iterable

from compiler_gym.datasets import Benchmark, TarDatasetWithManifest
from compiler_gym.datasets.benchmark import BenchmarkWithSource
from compiler_gym.envs.llvm.llvm_benchmark import ClangInvocation
from compiler_gym.util import thread_pool
from compiler_gym.util.filesystem import atomic_file_write

URIS = [
"benchmark://chstone-v0/adpcm",
"benchmark://chstone-v0/aes",
"benchmark://chstone-v0/blowfish",
"benchmark://chstone-v0/dfadd",
"benchmark://chstone-v0/dfdiv",
"benchmark://chstone-v0/dfmul",
"benchmark://chstone-v0/dfsin",
"benchmark://chstone-v0/gsm",
"benchmark://chstone-v0/jpeg",
"benchmark://chstone-v0/mips",
"benchmark://chstone-v0/motion",
"benchmark://chstone-v0/sha",
]


class CHStoneDataset(TarDatasetWithManifest):
"""A dataset of C programs curated from GitHub source code.

The dataset is from:

Hara, Yuko, Hiroyuki Tomiyama, Shinya Honda, Hiroaki Takada, and Katsuya
Ishii. "Chstone: A benchmark program suite for practical c-based
high-level synthesis." In 2008 IEEE International Symposium on Circuits
and Systems, pp. 1192-1195. IEEE, 2008.

And is available at:

http://www.ertl.jp/chstone/
"""

def __init__(
self,
site_data_base: Path,
sort_order: int = 0,
):
super().__init__(
name="benchmark://chstone-v0",
description="Benchmarks for C-based High-Level Synthesis",
references={
"Paper": "http://www.yxi.com/applications/iscas2008-300_1027.pdf",
"Homepage": "http://www.ertl.jp/chstone/",
},
license="Mixture of open source and public domain licenses",
site_data_base=site_data_base,
tar_urls=[
"https://github.com/ChrisCummins/patmos_HLS/archive/e62d878ceb91e5a18007ca2e0a9602ee44ff7d59.tar.gz"
],
tar_sha256="f7acab9d3c3dc7b971e62c8454bc909d84bddb6d0a96378e41beb94231739acb",
strip_prefix="patmos_HLS-e62d878ceb91e5a18007ca2e0a9602ee44ff7d59/benchmarks/CHStone",
tar_compression="gz",
benchmark_file_suffix=".bc",
sort_order=sort_order,
# We provide our own manifest.
manifest_urls=[],
manifest_sha256="",
)

def benchmark_uris(self) -> Iterable[str]:
yield from URIS

def benchmark(self, uri: str) -> Benchmark:
self.install()

benchmark_name = uri[len(self.name) + 1 :]
if not benchmark_name:
raise LookupError(f"No benchmark specified: {uri}")

bitcode_abspath = self.dataset_root / f"{benchmark_name}.bc"

# Most of the source files are named after the parent directory, but not
# all.
c_file_name = {
"blowfish": "bf.c",
"motion": "mpeg2.c",
"sha": "sha_driver.c",
"jpeg": "main.c",
}.get(benchmark_name, f"{benchmark_name}.c")
c_file_abspath = self.dataset_root / benchmark_name / c_file_name

# If the file does not exist, compile it on-demand.
if not bitcode_abspath.is_file():
if not c_file_abspath.is_file():
raise LookupError(
f"Benchmark not found: {uri} (file not found: {c_file_abspath})"
)

with atomic_file_write(bitcode_abspath) as tmp_path:
compile_cmd = ClangInvocation.from_c_file(
c_file_abspath,
copt=[
"-ferror-limit=1", # Stop on first error.
"-w", # No warnings.
],
).command(outpath=tmp_path)
subprocess.check_call(compile_cmd, timeout=300)

return BenchmarkWithSource.create(
uri, bitcode_abspath, "function.c", c_file_abspath
)

@property
def size(self) -> int:
return len(URIS)

def compile_all(self):
n = self.size
executor = thread_pool.get_thread_pool_executor()
# Since the dataset is lazily compiled, simply iterating over the full
# set of URIs will compile everything. Do this in parallel.
futures = (
executor.submit(self.benchmark, uri) for uri in self.benchmark_uris()
)
for i, future in enumerate(as_completed(futures), start=1):
future.result()
print(
f"\r\033[KCompiled {i} of {n} programs ({i/n:.1%} complete)",
flush=True,
end="",
)
3 changes: 3 additions & 0 deletions compiler_gym/util/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ def _do_download_attempt(url: str, sha256: Optional[str]) -> bytes:


def _download(urls: List[str], sha256: Optional[str], max_retries: int) -> bytes:
if not urls:
raise ValueError("No URLs to download")

# Cache hit.
if sha256 and cache_path(f"downloads/{sha256}").is_file():
with open(str(cache_path(f"downloads/{sha256}")), "rb") as f:
Expand Down
13 changes: 13 additions & 0 deletions tests/llvm/datasets/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,19 @@ py_test(
],
)

py_test(
name = "chstone_test",
timeout = "moderate",
srcs = ["chstone_test.py"],
deps = [
"//compiler_gym/envs/llvm",
"//compiler_gym/envs/llvm/datasets",
"//tests:test_main",
"//tests/pytest_plugins:common",
"//tests/pytest_plugins:llvm",
],
)

py_test(
name = "clgen_test",
timeout = "moderate",
Expand Down
57 changes: 57 additions & 0 deletions tests/llvm/datasets/chstone_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Tests for the AnghaBench dataset."""
import gym
import pytest

import compiler_gym.envs.llvm # noqa register environments
from compiler_gym.envs.llvm import LlvmEnv
from compiler_gym.envs.llvm.datasets import CHStoneDataset, chstone
from tests.test_main import main

pytest_plugins = ["tests.pytest_plugins.common", "tests.pytest_plugins.llvm"]


@pytest.fixture(scope="module")
def chstone_dataset() -> CHStoneDataset:
env = gym.make("llvm-v0")
try:
ds = env.datasets["chstone-v0"]
finally:
env.close()
yield ds


def test_anghabench_size(chstone_dataset: CHStoneDataset):
assert chstone_dataset.size == 12


def test_missing_benchmark_name(chstone_dataset: CHStoneDataset, mocker):
# Mock install() so that on CI it doesn't download and unpack the tarfile.
mocker.patch.object(chstone_dataset, "install")

with pytest.raises(
LookupError, match=r"^No benchmark specified: benchmark://chstone-v0$"
):
chstone_dataset.benchmark("benchmark://chstone-v0")
chstone_dataset.install.assert_called_once()

with pytest.raises(
LookupError, match=r"^No benchmark specified: benchmark://chstone-v0/$"
):
chstone_dataset.benchmark("benchmark://chstone-v0/")
assert chstone_dataset.install.call_count == 2


@pytest.mark.parametrize("uri", chstone.URIS)
def test_chstone_benchmark_reset(
env: LlvmEnv, chstone_dataset: CHStoneDataset, uri: str
):
env.reset(chstone_dataset.benchmark(uri))
assert env.benchmark == uri


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions tests/llvm/datasets/llvm_datasets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def test_default_dataset_list():
"benchmark://cbench-v1",
"benchmark://anghabench-v1",
"benchmark://blas-v0",
"benchmark://chstone-v0",
"benchmark://clgen-v0",
"benchmark://github-v0",
"benchmark://linux-v0",
Expand Down
5 changes: 5 additions & 0 deletions tests/util/download_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,10 @@ def patched_download(*args):
download.download("example", sha256="123")


def test_download_no_urls():
with pytest.raises(ValueError, match="No URLs to download"):
download.download(urls=[])


if __name__ == "__main__":
main()