Skip to content

Commit

Permalink
Add automated deprecation warnings to the Dataset class.
Browse files Browse the repository at this point in the history
This replaces the boolean `hidden` value with a `deprecated` message,
which is emitted automatically on a call to `install()`.

Issue facebookresearch#45. Fixes facebookresearch#219.
  • Loading branch information
ChrisCummins committed Apr 29, 2021
1 parent 5842023 commit 8f836c4
Show file tree
Hide file tree
Showing 10 changed files with 75 additions and 56 deletions.
36 changes: 23 additions & 13 deletions compiler_gym/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
import logging
import os
import shutil
import warnings
from pathlib import Path
from typing import Dict, Iterable, Optional, Union

from deprecated.sphinx import deprecated
from deprecated.sphinx import deprecated as mark_deprecated

from compiler_gym.datasets.benchmark import Benchmark
from compiler_gym.datasets.uri import DATASET_NAME_RE
Expand Down Expand Up @@ -39,7 +40,7 @@ def __init__(
site_data_base: Path,
benchmark_class=Benchmark,
references: Optional[Dict[str, str]] = None,
hidden: bool = False,
deprecated: Optional[str] = None,
sort_order: int = 0,
logger: Optional[logging.Logger] = None,
validatable: str = "No",
Expand All @@ -63,9 +64,11 @@ def __init__(
:param references: A dictionary containing URLs for this dataset, keyed
by their name. E.g. :code:`references["Paper"] = "https://..."`.
:param hidden: Whether the dataset should be excluded from the
:meth:`datasets() <compiler_gym.datasets.Datasets.dataset>` iterator
of any :class:`Datasets <compiler_gym.datasets.Datasets>` container.
:param deprecated: Mark the dataset as deprecated and issue a warning
when :meth:`install() <compiler_gym.datasets.Dataset.install>`,
including the given method. Deprecated datasets are excluded from
the :meth:`datasets() <compiler_gym.datasets.Datasets.dataset>`
iterator by default.
:param sort_order: An optional numeric value that should be used to
order this dataset relative to others. Lowest value sorts first.
Expand All @@ -91,7 +94,7 @@ def __init__(
self._protocol = components.group("dataset_protocol")
self._version = int(components.group("dataset_version"))
self._references = references or {}
self._hidden = hidden
self._deprecation_message = deprecated
self._validatable = validatable

self._logger = logger
Expand Down Expand Up @@ -170,14 +173,14 @@ def references(self) -> Dict[str, str]:
return self._references

@property
def hidden(self) -> str:
def deprecated(self) -> bool:
"""Whether the dataset is included in the iterable sequence of datasets
of a containing :class:`Datasets <compiler_gym.datasets.Datasets>`
collection.
:type: bool
"""
return self._hidden
return self._deprecation_message is not None

@property
def validatable(self) -> str:
Expand Down Expand Up @@ -257,12 +260,19 @@ def installed(self) -> bool:
def install(self) -> None:
"""Install this dataset locally.
Implementing this method is optional.
Implementing this method is optional. If implementing this method, you
must call :code:`super().install()` first.
This method should not perform redundant work - it should detect whether
any work needs to be done so that repeated calls to install will
complete quickly.
"""
if self.deprecated:
warnings.warn(
f"Dataset '{self.name}' is marked as deprecated. {self._deprecation_message}",
category=DeprecationWarning,
stacklevel=2,
)

def uninstall(self) -> None:
"""Remove any local data for this benchmark.
Expand Down Expand Up @@ -346,7 +356,7 @@ class DatasetInitError(OSError):
"""Base class for errors raised if a dataset fails to initialize."""


@deprecated(
@mark_deprecated(
version="0.1.4",
reason=(
"Datasets are now automatically activated. "
Expand All @@ -367,7 +377,7 @@ def activate(env, dataset: Union[str, Dataset]) -> bool:
return False


@deprecated(
@mark_deprecated(
version="0.1.4",
reason=(
"Please use :meth:`del env.datasets[dataset] <compiler_gym.datasets.Datasets.__delitem__>`. "
Expand All @@ -390,7 +400,7 @@ def delete(env, dataset: Union[str, Dataset]) -> bool:
return False


@deprecated(
@mark_deprecated(
version="0.1.4",
reason=(
"Please use :meth:`env.datasets.deactivate() <compiler_gym.datasets.Datasets.deactivate>`. "
Expand All @@ -413,7 +423,7 @@ def deactivate(env, dataset: Union[str, Dataset]) -> bool:
return False


@deprecated(
@mark_deprecated(
version="0.1.7",
reason=(
"Datasets are now installed automatically, there is no need to call :code:`require()`. "
Expand Down
6 changes: 3 additions & 3 deletions compiler_gym/datasets/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def __init__(
):
self._datasets: Dict[str, Dataset] = {d.name: d for d in datasets}
self._visible_datasets: Set[str] = set(
name for name, dataset in self._datasets.items() if not dataset.hidden
name for name, dataset in self._datasets.items() if not dataset.deprecated
)

def datasets(self, with_deprecated: bool = False) -> Iterable[Dataset]:
Expand All @@ -95,7 +95,7 @@ def datasets(self, with_deprecated: bool = False) -> Iterable[Dataset]:
"""
datasets = self._datasets.values()
if not with_deprecated:
datasets = (d for d in datasets if not d.hidden)
datasets = (d for d in datasets if not d.deprecated)
yield from sorted(datasets, key=lambda d: (d.sort_order, d.name))

def __iter__(self) -> Iterable[Dataset]:
Expand Down Expand Up @@ -147,7 +147,7 @@ def __setitem__(self, key: str, dataset: Dataset):
dataset_name = resolve_uri_protocol(key)

self._datasets[dataset_name] = dataset
if not dataset.hidden:
if not dataset.deprecated:
self._visible_datasets.add(dataset_name)

def __delitem__(self, dataset: str):
Expand Down
2 changes: 2 additions & 0 deletions compiler_gym/datasets/tar_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ def installed(self) -> bool:
return self._installed

def install(self) -> None:
super().install()

if self.installed:
return

Expand Down
5 changes: 4 additions & 1 deletion compiler_gym/envs/llvm/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,10 @@ def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset
yield CBenchDataset(
site_data_base=site_data_base,
name="benchmark://cBench-v1",
hidden=True,
deprecated=(
"Please use 'benchmark://cbench-v1' (note the lowercase name). "
"The dataset is the same, only the name has changed"
),
manifest_url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-cBench-v1-manifest.bz2",
manifest_sha256="635b94eeb2784dfedb3b53fd8f84517c3b4b95d851ddb662d4c1058c72dc81e0",
sort_order=100,
Expand Down
14 changes: 3 additions & 11 deletions compiler_gym/envs/llvm/datasets/cbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from typing import Callable, Dict, List, NamedTuple, Optional

import fasteners
from deprecated.sphinx import deprecated

from compiler_gym.datasets import Benchmark, TarDatasetWithManifest
from compiler_gym.third_party import llvm
Expand Down Expand Up @@ -506,7 +505,7 @@ def __init__(
name="benchmark://cbench-v1",
manifest_url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-cbench-v1-manifest.bz2",
manifest_sha256="eeffd7593aeb696a160fd22e6b0c382198a65d0918b8440253ea458cfe927741",
hidden=False,
deprecated=None,
):
platform = {"darwin": "macos"}.get(sys.platform, sys.platform)
url, sha256 = _CBENCH_TARS[platform]
Expand All @@ -527,7 +526,7 @@ def __init__(
site_data_base=site_data_base,
sort_order=sort_order,
benchmark_class=CBenchBenchmark,
hidden=hidden,
deprecated=deprecated,
validatable="Partially",
)

Expand Down Expand Up @@ -575,16 +574,9 @@ def __init__(self, site_data_base: Path):
strip_prefix="cBench-v0",
benchmark_file_suffix=".bc",
site_data_base=site_data_base,
hidden=True, # Hidden because it is deprecated.
deprecated="Please use 'benchmark://cbench-v1'",
)

@deprecated(
version="0.1.4",
reason=("Dataset 'cBench-v0' is deprecated, please use 'cbench-v1'"),
)
def install(self) -> None:
super().install()


# ===============================
# Definition of cBench validators
Expand Down
2 changes: 2 additions & 0 deletions compiler_gym/envs/llvm/datasets/csmith.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ def installed(self) -> bool:

def install(self) -> None:
"""Download and build the Csmith binary."""
super().install()

if self.installed:
return

Expand Down
11 changes: 1 addition & 10 deletions compiler_gym/envs/llvm/datasets/poj104.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
from pathlib import Path
from typing import Optional

from deprecated.sphinx import deprecated

from compiler_gym.datasets import Benchmark, BenchmarkInitError, TarDatasetWithManifest
from compiler_gym.datasets.benchmark import BenchmarkWithSource
from compiler_gym.envs.llvm.llvm_benchmark import ClangInvocation
Expand Down Expand Up @@ -191,12 +189,5 @@ def __init__(self, site_data_base: Path, sort_order: int = 0):
benchmark_file_suffix=".bc",
site_data_base=site_data_base,
sort_order=sort_order,
hidden=True,
deprecated="Please update to benchmark://poj104-v1.",
)

@deprecated(
version="0.1.8",
reason=("Dataset 'poj104-v0' is deprecated, please use 'poj104-v1'"),
)
def install(self) -> None:
super().install()
20 changes: 17 additions & 3 deletions tests/datasets/dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_dataset_optional_properties():
)

assert dataset.references == {} # Default value.
assert not dataset.hidden
assert not dataset.deprecated
assert dataset.sort_order == 0
assert dataset.validatable == "No"

Expand All @@ -73,15 +73,15 @@ def test_dataset_optional_properties_explicit_values():
license="MIT",
site_data_base="test",
references={"GitHub": "https://github.com/facebookresearch/CompilerGym"},
hidden=True,
deprecated="Deprecation message",
sort_order=10,
validatable="Yes",
)

assert dataset.references == {
"GitHub": "https://github.com/facebookresearch/CompilerGym"
}
assert dataset.hidden
assert dataset.deprecated
assert dataset.sort_order == 10
assert dataset.validatable == "Yes"

Expand Down Expand Up @@ -134,6 +134,20 @@ def test_dataset_site_data_directory(tmpwd: Path):
assert not dataset.site_data_path.is_dir() # Dir is not created until needed.


def test_dataset_deprecation_message(tmpwd: Path):
"""Test that a deprecation warning is emitted on install()."""
dataset = Dataset(
name="benchmark://test-v0",
description="A test dataset",
license="MIT",
site_data_base="test",
deprecated="The cat sat on the mat",
)

with pytest.warns(DeprecationWarning, match="The cat sat on the mat"):
dataset.install()


class TestDataset(Dataset):
"""A dataset to use for testing."""

Expand Down
18 changes: 9 additions & 9 deletions tests/datasets/datasets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class MockDataset:
def __init__(self, name):
self.name = name
self.installed = False
self.hidden = False
self.deprecated = False
self.benchmark_values = []
self.sort_order = 0

Expand Down Expand Up @@ -52,7 +52,7 @@ def __init__(self, uri):
self.uri = uri

def __repr__(self):
return str(self.name)
return str(self.uri)


def test_enumerate_datasets_empty():
Expand All @@ -78,20 +78,20 @@ def test_enumerate_datasets_with_custom_sort_order():
assert list(datasets) == [db, da]


def test_enumerate_hidden_datasets():
def test_enumerate_deprecated_datasets():
da = MockDataset("benchmark://a")
db = MockDataset("benchmark://b")
datasets = Datasets((da, db))

db.hidden = True
db.deprecated = True
assert list(datasets) == [da]
assert list(datasets.datasets(with_deprecated=True)) == [da, db]


def test_enumerate_datasets_hidden_at_construction_time():
def test_enumerate_datasets_deprecated_at_construction_time():
da = MockDataset("benchmark://a")
db = MockDataset("benchmark://b")
db.hidden = True
db.deprecated = True
datasets = Datasets((da, db))

assert list(datasets) == [da]
Expand All @@ -107,11 +107,11 @@ def test_datasets_add_dataset():
assert list(datasets) == [da]


def test_datasets_add_hidden_dataset():
def test_datasets_add_deprecated_dataset():
datasets = Datasets([])

da = MockDataset("benchmark://a")
da.hidden = True
da.deprecated = True
datasets["benchmark://foo-v0"] = da

assert list(datasets) == []
Expand Down Expand Up @@ -213,7 +213,7 @@ def test_benchmark_uris_order():
def test_benchmarks_iter_deprecated():
da = MockDataset("benchmark://foo-v0")
db = MockDataset("benchmark://bar-v0")
db.hidden = True
db.deprecated = True
ba = MockBenchmark(uri="benchmark://foo-v0/abc")
bb = MockBenchmark(uri="benchmark://foo-v0/123")
bc = MockBenchmark(uri="benchmark://bar-v0/abc")
Expand Down
17 changes: 11 additions & 6 deletions tests/llvm/datasets/cbench_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,21 @@ def test_validate_sha_output_invalid():

def test_cbench_v0_deprecation(env: LlvmEnv):
"""Test that cBench-v0 emits a deprecation warning when used."""
with pytest.deprecated_call(
match="Dataset 'cBench-v0' is deprecated, please use 'cbench-v1'"
):
with pytest.deprecated_call(match="Please use 'benchmark://cbench-v1'"):
env.datasets["cBench-v0"].install()

with pytest.deprecated_call(
match="Dataset 'cBench-v0' is deprecated, please use 'cbench-v1'"
):
with pytest.deprecated_call(match="Please use 'benchmark://cbench-v1'"):
env.datasets.benchmark("benchmark://cBench-v0/crc32")


def test_cbench_v1_deprecation(env: LlvmEnv):
"""Test that cBench-v1 emits a deprecation warning when used."""
with pytest.deprecated_call(match="Please use 'benchmark://cbench-v1'"):
env.datasets["cBench-v1"].install()

with pytest.deprecated_call(match="Please use 'benchmark://cbench-v1'"):
env.datasets.benchmark("benchmark://cBench-v1/crc32")


if __name__ == "__main__":
main()

0 comments on commit 8f836c4

Please sign in to comment.