Skip to content

Commit

Permalink
Rename Dataset tuple to LegacyDataset.
Browse files Browse the repository at this point in the history
In preparation for introducing a new Dataset class.

Issue #45.
  • Loading branch information
ChrisCummins committed Feb 26, 2021
1 parent 52e5f98 commit c8e6116
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 40 deletions.
10 changes: 8 additions & 2 deletions compiler_gym/bin/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,13 @@
import humanize
from absl import app, flags

from compiler_gym.datasets.dataset import Dataset, activate, deactivate, delete, require
from compiler_gym.datasets.dataset import (
LegacyDataset,
activate,
deactivate,
delete,
require,
)
from compiler_gym.util.flags.env_from_flags import env_from_flags
from compiler_gym.util.tabulate import tabulate

Expand Down Expand Up @@ -183,7 +189,7 @@ def enumerate_directory(name: str, path: Path):
for path in path.iterdir():
if not path.is_file() or not path.name.endswith(".json"):
continue
dataset = Dataset.from_json_file(path)
dataset = LegacyDataset.from_json_file(path)
rows.append(
(dataset.name, dataset.license, dataset.file_count, dataset.size_bytes)
)
Expand Down
10 changes: 8 additions & 2 deletions compiler_gym/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Manage datasets of benchmarks."""
from compiler_gym.datasets.dataset import Dataset, activate, deactivate, delete, require
from compiler_gym.datasets.dataset import (
LegacyDataset,
activate,
deactivate,
delete,
require,
)

__all__ = ["Dataset", "require", "activate", "deactivate", "delete"]
__all__ = ["LegacyDataset", "require", "activate", "deactivate", "delete"]
22 changes: 12 additions & 10 deletions compiler_gym/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from compiler_gym.util.download import download


class Dataset(NamedTuple):
class LegacyDataset(NamedTuple):
"""A collection of benchmarks for use by an environment."""

name: str
Expand Down Expand Up @@ -48,11 +48,11 @@ class Dataset(NamedTuple):
"""A list of platforms supported by this dataset. Allowed platforms 'macos' and 'linux'."""

@classmethod
def from_json_file(cls, path: Path) -> "Dataset":
def from_json_file(cls, path: Path) -> "LegacyDataset":
"""Construct a dataset form a JSON metadata file.
:param path: Path of the JSON metadata.
:return: A Dataset instance.
:return: A LegacyDataset instance.
"""
try:
with open(str(path), "rb") as f:
Expand Down Expand Up @@ -136,7 +136,7 @@ def deactivate(env, name: str) -> bool:
return True


def require(env, dataset: Union[str, Dataset]) -> bool:
def require(env, dataset: Union[str, LegacyDataset]) -> bool:
"""Require that the given dataset is available to the environment.
This will download and activate the dataset if it is not already installed.
Expand All @@ -151,12 +151,14 @@ def require(env, dataset: Union[str, Dataset]) -> bool:
:param env: The environment that this dataset is required for.
:param dataset: The name of the dataset to download, the URL of the dataset,
or a :class:`Dataset` instance.
or a :class:`LegacyDataset` instance.
:return: :code:`True` if the dataset was downloaded, or :code:`False` if the
dataset was already available.
"""

def download_and_unpack_archive(url: str, sha256: Optional[str] = None) -> Dataset:
def download_and_unpack_archive(
url: str, sha256: Optional[str] = None
) -> LegacyDataset:
json_files_before = {
f
for f in env.inactive_datasets_site_path.iterdir()
Expand All @@ -173,9 +175,9 @@ def download_and_unpack_archive(url: str, sha256: Optional[str] = None) -> Datas
new_json = json_files_after - json_files_before
if not len(new_json):
raise OSError(f"Downloaded dataset {url} contains no metadata JSON file")
return Dataset.from_json_file(list(new_json)[0])
return LegacyDataset.from_json_file(list(new_json)[0])

def unpack_local_archive(path: Path) -> Dataset:
def unpack_local_archive(path: Path) -> LegacyDataset:
if not path.is_file():
raise FileNotFoundError(f"File not found: {path}")
json_files_before = {
Expand All @@ -193,12 +195,12 @@ def unpack_local_archive(path: Path) -> Dataset:
new_json = json_files_after - json_files_before
if not len(new_json):
raise OSError(f"Downloaded dataset {url} contains no metadata JSON file")
return Dataset.from_json_file(list(new_json)[0])
return LegacyDataset.from_json_file(list(new_json)[0])

with fasteners.InterProcessLock(env.datasets_site_path / "LOCK"):
# Resolve the name and URL of the dataset.
sha256 = None
if isinstance(dataset, Dataset):
if isinstance(dataset, LegacyDataset):
name, url = dataset.name, dataset.url
elif isinstance(dataset, str):
# Check if we have already downloaded the dataset.
Expand Down
26 changes: 15 additions & 11 deletions compiler_gym/envs/compiler_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from gym.spaces import Space

from compiler_gym.compiler_env_state import CompilerEnvState
from compiler_gym.datasets.dataset import Dataset, require
from compiler_gym.datasets.dataset import LegacyDataset, require
from compiler_gym.service import (
CompilerGymServiceConnection,
ConnectionOpts,
Expand Down Expand Up @@ -103,9 +103,9 @@ class CompilerEnv(gym.Env):
to store benchmarks.
:vartype datasets_site_path: Optional[Path]
:ivar available_datasets: A mapping from dataset name to :class:`Dataset`
:ivar available_datasets: A mapping from dataset name to :class:`LegacyDataset`
objects that are available to download.
:vartype available_datasets: Dict[str, Dataset]
:vartype available_datasets: Dict[str, LegacyDataset]
:ivar observation: A view of the available observation spaces that permits
on-demand computation of observations.
Expand Down Expand Up @@ -196,7 +196,7 @@ def __init__(
self._service_endpoint: Union[str, Path] = service
self._connection_settings = connection_settings or ConnectionOpts()
self.datasets_site_path: Optional[Path] = None
self.available_datasets: Dict[str, Dataset] = {}
self.available_datasets: Dict[str, LegacyDataset] = {}

# The benchmark that is currently being used, and the benchmark that
# the user requested. Those do not always correlate, since the user
Expand Down Expand Up @@ -818,7 +818,7 @@ def _reward_view_type(self):
"""
return RewardView

def require_datasets(self, datasets: List[Union[str, Dataset]]) -> None:
def require_datasets(self, datasets: List[Union[str, LegacyDataset]]) -> None:
"""Require that the given datasets are available to the environment.
Example usage:
Expand All @@ -834,8 +834,11 @@ def require_datasets(self, datasets: List[Union[str, Dataset]]) -> None:
:param datasets: A list of datasets to require. Each dataset is the name
of an available dataset, the URL of a dataset to download, or a
:class:`Dataset` instance.
:class:`LegacyDataset` instance.
:return: Whether a new dataset was downloaded.
"""
self.logger.debug("Requiring datasets: %s", datasets)
dataset_installed = False
for dataset in datasets:
dataset_installed |= require(self, dataset)
Expand All @@ -849,15 +852,16 @@ def require_datasets(self, datasets: List[Union[str, Dataset]]) -> None:
),
)
self.make_manifest_file()
return dataset_installed

def require_dataset(self, dataset: Union[str, Dataset]) -> None:
def require_dataset(self, dataset: Union[str, LegacyDataset]) -> bool:
"""Require that the given dataset is available to the environment.
Alias for
:meth:`env.require_datasets([dataset]) <compiler_gym.envs.CompilerEnv.require_datasets>`.
:param dataset: The name of the dataset to download, the URL of the dataset, or a
:class:`Dataset` instance.
:class:`LegacyDataset` instance.
"""
return self.require_datasets([dataset])

Expand Down Expand Up @@ -885,7 +889,7 @@ def make_manifest_file(self) -> Path:
)
return manifest_path

def register_dataset(self, dataset: Dataset) -> bool:
def register_dataset(self, dataset: LegacyDataset) -> bool:
"""Register a new dataset.
After registering, the dataset name may be used by
Expand All @@ -894,13 +898,13 @@ def register_dataset(self, dataset: Dataset) -> bool:
Example usage:
>>> my_dataset = Dataset(name="my-dataset-v0", ...)
>>> my_dataset = LegacyDataset(name="my-dataset-v0", ...)
>>> env = gym.make("llvm-v0")
>>> env.register_dataset(my_dataset)
>>> env.require_dataset("my-dataset-v0")
>>> env.benchmark = "my-dataset-v0/1"
:param dataset: A :class:`Dataset` instance describing the new dataset.
:param dataset: A :class:`LegacyDataset` instance describing the new dataset.
:return: :code:`True` if the dataset was added, else :code:`False`.
:raises ValueError: If a dataset with this name is already registered.
"""
Expand Down
24 changes: 12 additions & 12 deletions compiler_gym/envs/llvm/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import fasteners

from compiler_gym.datasets.dataset import Dataset
from compiler_gym.datasets.dataset import LegacyDataset
from compiler_gym.util.download import download
from compiler_gym.util.runfiles_path import cache_path, runfiles_path, site_data_path
from compiler_gym.util.timer import Timer
Expand All @@ -42,7 +42,7 @@
_COMPILE_ARGS = []

LLVM_DATASETS = [
Dataset(
LegacyDataset(
name="blas-v0",
url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-blas-v0.tar.bz2",
license="BSD 3-Clause",
Expand All @@ -52,7 +52,7 @@
size_bytes=3969036,
sha256="e724a8114709f8480adeb9873d48e426e8d9444b00cddce48e342b9f0f2b096d",
),
Dataset(
LegacyDataset(
name="cBench-v0",
url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-cBench-v0-macos.tar.bz2",
license="BSD 3-Clause",
Expand All @@ -63,7 +63,7 @@
sha256="072a730c86144a07bba948c49afe543e4f06351f1cb17f7de77f91d5c1a1b120",
platforms=["macos"],
),
Dataset(
LegacyDataset(
name="cBench-v0",
url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-cBench-v0-linux.tar.bz2",
license="BSD 3-Clause",
Expand All @@ -74,7 +74,7 @@
sha256="9b5838a90895579aab3b9375e8eeb3ed2ae58e0ad354fec7eb4f8b31ecb4a360",
platforms=["linux"],
),
Dataset(
LegacyDataset(
name="github-v0",
url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-github-v0.tar.bz2",
license="CC BY 4.0",
Expand All @@ -84,7 +84,7 @@
size_bytes=725974100,
sha256="880269dd7a5c2508ea222a2e54c318c38c8090eb105c0a87c595e9dd31720764",
),
Dataset(
LegacyDataset(
name="linux-v0",
url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-linux-v0.tar.bz2",
license="GPL-2.0",
Expand All @@ -94,7 +94,7 @@
size_bytes=516031044,
sha256="a1ae5c376af30ab042c9e54dc432f89ce75f9ebaee953bc19c08aff070f12566",
),
Dataset(
LegacyDataset(
name="mibench-v0",
url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-mibench-v0.tar.bz2",
license="BSD 3-Clause",
Expand All @@ -104,7 +104,7 @@
size_bytes=238480,
sha256="128c090c40b955b99fdf766da167a5f642018fb35c16a1d082f63be2e977eb13",
),
Dataset(
LegacyDataset(
name="npb-v0",
url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-npb-v0.tar.bz2",
license="NASA Open Source Agreement v1.3",
Expand All @@ -114,7 +114,7 @@
size_bytes=2287444,
sha256="793ac2e7a4f4ed83709e8a270371e65b724da09eaa0095c52e7f4209f63bb1f2",
),
Dataset(
LegacyDataset(
name="opencv-v0",
url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-opencv-v0.tar.bz2",
license="Apache 2.0",
Expand All @@ -124,7 +124,7 @@
size_bytes=21903008,
sha256="003df853bd58df93572862ca2f934c7b129db2a3573bcae69a2e59431037205c",
),
Dataset(
LegacyDataset(
name="poj104-v0",
url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-poj104-v0.tar.bz2",
license="BSD 3-Clause",
Expand All @@ -134,7 +134,7 @@
size_bytes=304207752,
sha256="6254d629887f6b51efc1177788b0ce37339d5f3456fb8784415ed3b8c25cce27",
),
Dataset(
LegacyDataset(
name="polybench-v0",
url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-polybench-v0.tar.bz2",
license="BSD 3-Clause",
Expand All @@ -144,7 +144,7 @@
size_bytes=162624,
sha256="968087e68470e5b44dc687dae195143000c7478a23d6631b27055bb3bb3116b1",
),
Dataset(
LegacyDataset(
name="tensorflow-v0",
url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-tensorflow-v0.tar.bz2",
license="Apache 2.0",
Expand Down
6 changes: 3 additions & 3 deletions tests/compiler_env_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
import gym
import pytest

from compiler_gym.datasets import Dataset
from compiler_gym.datasets import LegacyDataset
from compiler_gym.envs import CompilerEnv
from tests.test_main import main

pytest_plugins = ["tests.pytest_plugins.llvm"]


def make_dataset(**kwargs) -> Dataset:
def make_dataset(**kwargs) -> LegacyDataset:
default_kwargs = {
"name": "test-dataset-v0",
"url": "https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-blas-v0.tar.bz2",
Expand All @@ -27,7 +27,7 @@ def make_dataset(**kwargs) -> Dataset:
"sha256": "e724a8114709f8480adeb9873d48e426e8d9444b00cddce48e342b9f0f2b096d",
}
default_kwargs.update(kwargs)
return Dataset(**default_kwargs)
return LegacyDataset(**default_kwargs)


def test_register_dataset(env: CompilerEnv):
Expand Down

0 comments on commit c8e6116

Please sign in to comment.