diff --git a/compiler_gym/datasets/dataset.py b/compiler_gym/datasets/dataset.py index 295ab4a00f..b726dae80c 100644 --- a/compiler_gym/datasets/dataset.py +++ b/compiler_gym/datasets/dataset.py @@ -5,10 +5,11 @@ import logging import os import shutil +import warnings from pathlib import Path from typing import Dict, Iterable, Optional, Union -from deprecated.sphinx import deprecated +from deprecated.sphinx import deprecated as mark_deprecated from compiler_gym.datasets.benchmark import Benchmark from compiler_gym.datasets.uri import DATASET_NAME_RE @@ -39,7 +40,7 @@ def __init__( site_data_base: Path, benchmark_class=Benchmark, references: Optional[Dict[str, str]] = None, - hidden: bool = False, + deprecated: Optional[str] = None, sort_order: int = 0, logger: Optional[logging.Logger] = None, validatable: str = "No", @@ -63,9 +64,11 @@ def __init__( :param references: A dictionary containing URLs for this dataset, keyed by their name. E.g. :code:`references["Paper"] = "https://..."`. - :param hidden: Whether the dataset should be excluded from the - :meth:`datasets() ` iterator - of any :class:`Datasets ` container. + :param deprecated: Mark the dataset as deprecated and issue a warning + when :meth:`install() `, + including the given method. Deprecated datasets are excluded from + the :meth:`datasets() ` + iterator by default. :param sort_order: An optional numeric value that should be used to order this dataset relative to others. Lowest value sorts first. @@ -91,7 +94,7 @@ def __init__( self._protocol = components.group("dataset_protocol") self._version = int(components.group("dataset_version")) self._references = references or {} - self._hidden = hidden + self._deprecation_message = deprecated self._validatable = validatable self._logger = logger @@ -170,14 +173,14 @@ def references(self) -> Dict[str, str]: return self._references @property - def hidden(self) -> str: + def deprecated(self) -> bool: """Whether the dataset is included in the iterable sequence of datasets of a containing :class:`Datasets ` collection. :type: bool """ - return self._hidden + return self._deprecation_message is not None @property def validatable(self) -> str: @@ -257,12 +260,19 @@ def installed(self) -> bool: def install(self) -> None: """Install this dataset locally. - Implementing this method is optional. + Implementing this method is optional. If implementing this method, you + must call :code:`super().install()` first. This method should not perform redundant work - it should detect whether any work needs to be done so that repeated calls to install will complete quickly. """ + if self.deprecated: + warnings.warn( + f"Dataset '{self.name}' is marked as deprecated. {self._deprecation_message}", + category=DeprecationWarning, + stacklevel=2, + ) def uninstall(self) -> None: """Remove any local data for this benchmark. @@ -346,7 +356,7 @@ class DatasetInitError(OSError): """Base class for errors raised if a dataset fails to initialize.""" -@deprecated( +@mark_deprecated( version="0.1.4", reason=( "Datasets are now automatically activated. " @@ -367,7 +377,7 @@ def activate(env, dataset: Union[str, Dataset]) -> bool: return False -@deprecated( +@mark_deprecated( version="0.1.4", reason=( "Please use :meth:`del env.datasets[dataset] `. " @@ -390,7 +400,7 @@ def delete(env, dataset: Union[str, Dataset]) -> bool: return False -@deprecated( +@mark_deprecated( version="0.1.4", reason=( "Please use :meth:`env.datasets.deactivate() `. " @@ -413,7 +423,7 @@ def deactivate(env, dataset: Union[str, Dataset]) -> bool: return False -@deprecated( +@mark_deprecated( version="0.1.7", reason=( "Datasets are now installed automatically, there is no need to call :code:`require()`. " diff --git a/compiler_gym/datasets/datasets.py b/compiler_gym/datasets/datasets.py index db11fc736a..4a0e2c3c80 100644 --- a/compiler_gym/datasets/datasets.py +++ b/compiler_gym/datasets/datasets.py @@ -79,7 +79,7 @@ def __init__( ): self._datasets: Dict[str, Dataset] = {d.name: d for d in datasets} self._visible_datasets: Set[str] = set( - name for name, dataset in self._datasets.items() if not dataset.hidden + name for name, dataset in self._datasets.items() if not dataset.deprecated ) def datasets(self, with_deprecated: bool = False) -> Iterable[Dataset]: @@ -95,7 +95,7 @@ def datasets(self, with_deprecated: bool = False) -> Iterable[Dataset]: """ datasets = self._datasets.values() if not with_deprecated: - datasets = (d for d in datasets if not d.hidden) + datasets = (d for d in datasets if not d.deprecated) yield from sorted(datasets, key=lambda d: (d.sort_order, d.name)) def __iter__(self) -> Iterable[Dataset]: @@ -147,7 +147,7 @@ def __setitem__(self, key: str, dataset: Dataset): dataset_name = resolve_uri_protocol(key) self._datasets[dataset_name] = dataset - if not dataset.hidden: + if not dataset.deprecated: self._visible_datasets.add(dataset_name) def __delitem__(self, dataset: str): diff --git a/compiler_gym/datasets/tar_dataset.py b/compiler_gym/datasets/tar_dataset.py index 5eb349a9bc..2bbaa09407 100644 --- a/compiler_gym/datasets/tar_dataset.py +++ b/compiler_gym/datasets/tar_dataset.py @@ -73,6 +73,8 @@ def installed(self) -> bool: return self._installed def install(self) -> None: + super().install() + if self.installed: return diff --git a/compiler_gym/envs/llvm/datasets/__init__.py b/compiler_gym/envs/llvm/datasets/__init__.py index 9ef0086abb..f0cada1a19 100644 --- a/compiler_gym/envs/llvm/datasets/__init__.py +++ b/compiler_gym/envs/llvm/datasets/__init__.py @@ -213,7 +213,10 @@ def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset yield CBenchDataset( site_data_base=site_data_base, name="benchmark://cBench-v1", - hidden=True, + deprecated=( + "Please use 'benchmark://cbench-v1' (note the lowercase name). " + "The dataset is the same, only the name has changed" + ), manifest_url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-cBench-v1-manifest.bz2", manifest_sha256="635b94eeb2784dfedb3b53fd8f84517c3b4b95d851ddb662d4c1058c72dc81e0", sort_order=100, diff --git a/compiler_gym/envs/llvm/datasets/cbench.py b/compiler_gym/envs/llvm/datasets/cbench.py index f873f512d2..f8587ae2d9 100644 --- a/compiler_gym/envs/llvm/datasets/cbench.py +++ b/compiler_gym/envs/llvm/datasets/cbench.py @@ -18,7 +18,6 @@ from typing import Callable, Dict, List, NamedTuple, Optional import fasteners -from deprecated.sphinx import deprecated from compiler_gym.datasets import Benchmark, TarDatasetWithManifest from compiler_gym.third_party import llvm @@ -506,7 +505,7 @@ def __init__( name="benchmark://cbench-v1", manifest_url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-cbench-v1-manifest.bz2", manifest_sha256="eeffd7593aeb696a160fd22e6b0c382198a65d0918b8440253ea458cfe927741", - hidden=False, + deprecated=None, ): platform = {"darwin": "macos"}.get(sys.platform, sys.platform) url, sha256 = _CBENCH_TARS[platform] @@ -527,7 +526,7 @@ def __init__( site_data_base=site_data_base, sort_order=sort_order, benchmark_class=CBenchBenchmark, - hidden=hidden, + deprecated=deprecated, validatable="Partially", ) @@ -575,16 +574,9 @@ def __init__(self, site_data_base: Path): strip_prefix="cBench-v0", benchmark_file_suffix=".bc", site_data_base=site_data_base, - hidden=True, # Hidden because it is deprecated. + deprecated="Please use 'benchmark://cbench-v1'", ) - @deprecated( - version="0.1.4", - reason=("Dataset 'cBench-v0' is deprecated, please use 'cbench-v1'"), - ) - def install(self) -> None: - super().install() - # =============================== # Definition of cBench validators diff --git a/compiler_gym/envs/llvm/datasets/csmith.py b/compiler_gym/envs/llvm/datasets/csmith.py index 2f41712be3..b8f541126b 100644 --- a/compiler_gym/envs/llvm/datasets/csmith.py +++ b/compiler_gym/envs/llvm/datasets/csmith.py @@ -129,6 +129,8 @@ def installed(self) -> bool: def install(self) -> None: """Download and build the Csmith binary.""" + super().install() + if self.installed: return diff --git a/compiler_gym/envs/llvm/datasets/poj104.py b/compiler_gym/envs/llvm/datasets/poj104.py index 856a0e9bb6..39fc8e9c12 100644 --- a/compiler_gym/envs/llvm/datasets/poj104.py +++ b/compiler_gym/envs/llvm/datasets/poj104.py @@ -8,8 +8,6 @@ from pathlib import Path from typing import Optional -from deprecated.sphinx import deprecated - from compiler_gym.datasets import Benchmark, BenchmarkInitError, TarDatasetWithManifest from compiler_gym.datasets.benchmark import BenchmarkWithSource from compiler_gym.envs.llvm.llvm_benchmark import ClangInvocation @@ -191,12 +189,5 @@ def __init__(self, site_data_base: Path, sort_order: int = 0): benchmark_file_suffix=".bc", site_data_base=site_data_base, sort_order=sort_order, - hidden=True, + deprecated="Please update to benchmark://poj104-v1.", ) - - @deprecated( - version="0.1.8", - reason=("Dataset 'poj104-v0' is deprecated, please use 'poj104-v1'"), - ) - def install(self) -> None: - super().install() diff --git a/tests/datasets/dataset_test.py b/tests/datasets/dataset_test.py index f960ed0b4c..ec8f074e2f 100644 --- a/tests/datasets/dataset_test.py +++ b/tests/datasets/dataset_test.py @@ -60,7 +60,7 @@ def test_dataset_optional_properties(): ) assert dataset.references == {} # Default value. - assert not dataset.hidden + assert not dataset.deprecated assert dataset.sort_order == 0 assert dataset.validatable == "No" @@ -73,7 +73,7 @@ def test_dataset_optional_properties_explicit_values(): license="MIT", site_data_base="test", references={"GitHub": "https://github.com/facebookresearch/CompilerGym"}, - hidden=True, + deprecated="Deprecation message", sort_order=10, validatable="Yes", ) @@ -81,7 +81,7 @@ def test_dataset_optional_properties_explicit_values(): assert dataset.references == { "GitHub": "https://github.com/facebookresearch/CompilerGym" } - assert dataset.hidden + assert dataset.deprecated assert dataset.sort_order == 10 assert dataset.validatable == "Yes" @@ -134,6 +134,20 @@ def test_dataset_site_data_directory(tmpwd: Path): assert not dataset.site_data_path.is_dir() # Dir is not created until needed. +def test_dataset_deprecation_message(tmpwd: Path): + """Test that a deprecation warning is emitted on install().""" + dataset = Dataset( + name="benchmark://test-v0", + description="A test dataset", + license="MIT", + site_data_base="test", + deprecated="The cat sat on the mat", + ) + + with pytest.warns(DeprecationWarning, match="The cat sat on the mat"): + dataset.install() + + class TestDataset(Dataset): """A dataset to use for testing.""" diff --git a/tests/datasets/datasets_test.py b/tests/datasets/datasets_test.py index b2a0bcb298..3392513ad8 100644 --- a/tests/datasets/datasets_test.py +++ b/tests/datasets/datasets_test.py @@ -17,7 +17,7 @@ class MockDataset: def __init__(self, name): self.name = name self.installed = False - self.hidden = False + self.deprecated = False self.benchmark_values = [] self.sort_order = 0 @@ -52,7 +52,7 @@ def __init__(self, uri): self.uri = uri def __repr__(self): - return str(self.name) + return str(self.uri) def test_enumerate_datasets_empty(): @@ -78,20 +78,20 @@ def test_enumerate_datasets_with_custom_sort_order(): assert list(datasets) == [db, da] -def test_enumerate_hidden_datasets(): +def test_enumerate_deprecated_datasets(): da = MockDataset("benchmark://a") db = MockDataset("benchmark://b") datasets = Datasets((da, db)) - db.hidden = True + db.deprecated = True assert list(datasets) == [da] assert list(datasets.datasets(with_deprecated=True)) == [da, db] -def test_enumerate_datasets_hidden_at_construction_time(): +def test_enumerate_datasets_deprecated_at_construction_time(): da = MockDataset("benchmark://a") db = MockDataset("benchmark://b") - db.hidden = True + db.deprecated = True datasets = Datasets((da, db)) assert list(datasets) == [da] @@ -107,11 +107,11 @@ def test_datasets_add_dataset(): assert list(datasets) == [da] -def test_datasets_add_hidden_dataset(): +def test_datasets_add_deprecated_dataset(): datasets = Datasets([]) da = MockDataset("benchmark://a") - da.hidden = True + da.deprecated = True datasets["benchmark://foo-v0"] = da assert list(datasets) == [] @@ -213,7 +213,7 @@ def test_benchmark_uris_order(): def test_benchmarks_iter_deprecated(): da = MockDataset("benchmark://foo-v0") db = MockDataset("benchmark://bar-v0") - db.hidden = True + db.deprecated = True ba = MockBenchmark(uri="benchmark://foo-v0/abc") bb = MockBenchmark(uri="benchmark://foo-v0/123") bc = MockBenchmark(uri="benchmark://bar-v0/abc") diff --git a/tests/llvm/datasets/cbench_test.py b/tests/llvm/datasets/cbench_test.py index ad3d8287b9..13543ee0b3 100644 --- a/tests/llvm/datasets/cbench_test.py +++ b/tests/llvm/datasets/cbench_test.py @@ -68,16 +68,21 @@ def test_validate_sha_output_invalid(): def test_cbench_v0_deprecation(env: LlvmEnv): """Test that cBench-v0 emits a deprecation warning when used.""" - with pytest.deprecated_call( - match="Dataset 'cBench-v0' is deprecated, please use 'cbench-v1'" - ): + with pytest.deprecated_call(match="Please use 'benchmark://cbench-v1'"): env.datasets["cBench-v0"].install() - with pytest.deprecated_call( - match="Dataset 'cBench-v0' is deprecated, please use 'cbench-v1'" - ): + with pytest.deprecated_call(match="Please use 'benchmark://cbench-v1'"): env.datasets.benchmark("benchmark://cBench-v0/crc32") +def test_cbench_v1_deprecation(env: LlvmEnv): + """Test that cBench-v1 emits a deprecation warning when used.""" + with pytest.deprecated_call(match="Please use 'benchmark://cbench-v1'"): + env.datasets["cBench-v1"].install() + + with pytest.deprecated_call(match="Please use 'benchmark://cbench-v1'"): + env.datasets.benchmark("benchmark://cBench-v1/crc32") + + if __name__ == "__main__": main()