diff --git a/dvc/commands/experiments/clean.py b/dvc/commands/experiments/clean.py index 890eb34f3dd..0f44e33c78a 100644 --- a/dvc/commands/experiments/clean.py +++ b/dvc/commands/experiments/clean.py @@ -14,7 +14,7 @@ def run(self): def add_parser(experiments_subparsers, parent_parser): - EXPERIMENTS_CLEAN_HELP = "Cleanup dvc exp internal tempfiles." + EXPERIMENTS_CLEAN_HELP = "Cleanup experiments temporary internal files." experiments_clean_parser = experiments_subparsers.add_parser( "clean", parents=[parent_parser], diff --git a/dvc/commands/experiments/diff.py b/dvc/commands/experiments/diff.py index de3fc5e5074..674535f5c7e 100644 --- a/dvc/commands/experiments/diff.py +++ b/dvc/commands/experiments/diff.py @@ -52,7 +52,7 @@ def run(self): def add_parser(experiments_subparsers, parent_parser): - EXPERIMENTS_DIFF_HELP = "Show changes between experiments in the DVC repository." + EXPERIMENTS_DIFF_HELP = "Show changes between experiments." experiments_diff_parser = experiments_subparsers.add_parser( "diff", diff --git a/dvc/commands/experiments/gc.py b/dvc/commands/experiments/gc.py index d0cf8ec5c1b..b610670f851 100644 --- a/dvc/commands/experiments/gc.py +++ b/dvc/commands/experiments/gc.py @@ -78,7 +78,8 @@ def run(self): def add_parser(experiments_subparsers, parent_parser): EXPERIMENTS_GC_HELP = "Garbage collect unneeded experiments." EXPERIMENTS_GC_DESCRIPTION = ( - "Removes all experiments which are not derived from the specifiedGit revisions." + "Removes all experiments which are not derived" + " from the specified Git revisions." ) experiments_gc_parser = experiments_subparsers.add_parser( "gc", diff --git a/dvc/commands/experiments/save.py b/dvc/commands/experiments/save.py index d0c3863f870..9045dc5c2c6 100644 --- a/dvc/commands/experiments/save.py +++ b/dvc/commands/experiments/save.py @@ -35,7 +35,7 @@ def run(self): def add_parser(experiments_subparsers, parent_parser): - EXPERIMENTS_SAVE_HELP = "Save current workspace as a dvc experiment." + EXPERIMENTS_SAVE_HELP = "Save current workspace as an experiment." save_parser = experiments_subparsers.add_parser( "save", parents=[parent_parser], diff --git a/dvc/commands/plots.py b/dvc/commands/plots.py index 9ec7e0d7936..de8ec6535b1 100644 --- a/dvc/commands/plots.py +++ b/dvc/commands/plots.py @@ -30,7 +30,7 @@ def _show_json(renderers: List["RendererWithErrors"], split=False): from dvc.utils.serialize import encode_exception - ui.write_json(result, default=encode_exception) + ui.write_json(result, highlight=False, default=encode_exception) def _adjust_vega_renderers(renderers): diff --git a/dvc/config_schema.py b/dvc/config_schema.py index bdfeed003be..fdb7b3386f7 100644 --- a/dvc/config_schema.py +++ b/dvc/config_schema.py @@ -1,8 +1,21 @@ +import logging import os from urllib.parse import urlparse -from funcy import walk_values -from voluptuous import All, Any, Coerce, Invalid, Lower, Optional, Range, Schema +from funcy import once, walk_values +from voluptuous import ( + REMOVE_EXTRA, + All, + Any, + Coerce, + Invalid, + Lower, + Optional, + Range, + Schema, +) + +logger = logging.getLogger(__name__) Bool = All( Lower, @@ -65,6 +78,27 @@ class RelPath(str): pass +class FeatureSchema(Schema): + def __init__(self, schema, required=False): + super().__init__(schema, required=required, extra=REMOVE_EXTRA) + + @staticmethod + @once + def _log_deprecated(keys): + # only run this once per session + message = "%s config option%s unsupported" + paths = ", ".join(f"'feature.{key}'" for key in keys) + pluralize = " is" if len(keys) == 1 else "s are" + logger.warning(message, paths, pluralize) + + def __call__(self, data): + ret = super().__call__(data) + extra_keys = data.keys() - ret.keys() + if extra_keys: + self._log_deprecated(sorted(extra_keys)) + return ret + + REMOTE_COMMON = { "url": str, "checksum_jobs": All(Coerce(int), Range(1)), @@ -239,12 +273,12 @@ class RelPath(str): ) }, "state": { - "dir": str, + "dir": str, # obsoleted "row_limit": All(Coerce(int), Range(1)), # obsoleted "row_cleanup_quota": All(Coerce(int), Range(0, 100)), # obsoleted }, "index": { - "dir": str, + "dir": str, # obsoleted }, "machine": { str: { @@ -262,12 +296,12 @@ class RelPath(str): }, }, # section for experimental features - "feature": { - Optional("machine", default=False): Bool, - # enabled by default. It's of no use, kept for backward compatibility. - Optional("data_index_cache", default=False): Bool, - Optional("parametrization", default=True): Bool, - }, + # only specified keys are validated, others get logged and then ignored/removed + "feature": FeatureSchema( + { + Optional("machine", default=False): Bool, + }, + ), "plots": { "html_template": str, Optional("auto_open", default=False): Bool, diff --git a/dvc/data_cloud.py b/dvc/data_cloud.py index b5094ca6e27..c9401fa8db9 100644 --- a/dvc/data_cloud.py +++ b/dvc/data_cloud.py @@ -75,7 +75,7 @@ def get_remote( config["version_aware"] = True fs = cls(**config) - config["tmp_dir"] = self.repo.index_db_dir + config["tmp_dir"] = self.repo.site_cache_dir if self.repo.data_index is not None: index = self.repo.data_index.view(("remote", name)) else: diff --git a/dvc/fs/__init__.py b/dvc/fs/__init__.py index ae2544de101..59ad0e5a1f7 100644 --- a/dvc/fs/__init__.py +++ b/dvc/fs/__init__.py @@ -121,7 +121,7 @@ def _get_cloud_fs(repo_config, **kwargs): remote_conf = get_fs_config(repo_config, **kwargs) try: - remote_conf = SCHEMA["remote"][str](remote_conf) + remote_conf = SCHEMA["remote"][str](remote_conf) # type: ignore[index] except Invalid as exc: raise RepoConfigError(str(exc)) from None diff --git a/dvc/info.py b/dvc/info.py index b204dc1d7be..733a6e22eaa 100644 --- a/dvc/info.py +++ b/dvc/info.py @@ -54,6 +54,7 @@ def get_dvc_info(): fs_root = get_fs_type(os.path.abspath(root_directory)) info.append(f"Workspace directory: {fs_root}") info.append(f"Repo: {_get_dvc_repo_info(repo)}") + info.append(f"Repo.site_cache_dir: {repo.site_cache_dir}") except NotDvcRepoError: pass except SCMError: diff --git a/dvc/output.py b/dvc/output.py index 9d53927b516..1316fb53a28 100644 --- a/dvc/output.py +++ b/dvc/output.py @@ -2,6 +2,7 @@ import os import posixpath from collections import defaultdict +from contextlib import suppress from operator import itemgetter from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Type from urllib.parse import urlparse @@ -1017,7 +1018,8 @@ def get_dir_cache(self, **kwargs): except FileNotFoundError: if self.remote: kwargs["remote"] = self.remote - self.repo.cloud.pull([obj.hash_info], **kwargs) + with suppress(Exception): + self.repo.cloud.pull([obj.hash_info], **kwargs) if self.obj: return self.obj diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index a0aaadd679b..b7b2587a517 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -134,38 +134,6 @@ def _get_repo_dirs( assert root_dir return root_dir, dvc_dir - def _get_database_dir(self, db_name: str) -> Optional[str]: - from dvc.fs import localfs - - # NOTE: by default, store SQLite-based remote indexes and state's - # `links` and `md5s` caches in the repository itself to avoid any - # possible state corruption in 'shared cache dir' scenario, but allow - # user to override this through config when, say, the repository is - # located on a mounted volume — see - # https://github.com/iterative/dvc/issues/4420 - base_db_dir = self.config.get(db_name, {}).get("dir", None) - if not base_db_dir: - return self.tmp_dir - - import hashlib - - if self.local_dvc_dir: - fs: "FileSystem" = localfs - local_root = fs.path.parent(self.local_dvc_dir) - else: - fs = self.fs - local_root = self.root_dir - root_dir_hash = hashlib.sha224(local_root.encode("utf-8")).hexdigest() - - db_dir = fs.path.join( - base_db_dir, - self.DVC_DIR, - f"{fs.path.name(local_root)}-{root_dir_hash[0:7]}", - ) - - fs.makedirs(db_dir, exist_ok=True) - return db_dir - def __init__( # noqa: PLR0915 self, root_dir: Optional[str] = None, @@ -241,8 +209,8 @@ def __init__( # noqa: PLR0915 hardlink_lock=self.config["core"].get("hardlink_lock", False), friendly=True, ) - state_db_dir = self._get_database_dir("state") - self.state = State(self.root_dir, state_db_dir, self.dvcignore) + os.makedirs(self.site_cache_dir, exist_ok=True) + self.state = State(self.root_dir, self.site_cache_dir, self.dvcignore) else: self.lock = LockNoop() self.state = StateNoop() @@ -397,16 +365,12 @@ def fs(self, fs: "FileSystem"): self._reset() @property - def data_index(self) -> Optional["DataIndex"]: + def data_index(self) -> "DataIndex": from dvc_data.index import DataIndex - if not self.index_db_dir: - return None - if self._data_index is None: - index_dir = os.path.join(self.index_db_dir, "index", "data") + index_dir = os.path.join(self.site_cache_dir, "index", "data") os.makedirs(index_dir, exist_ok=True) - self._data_index = DataIndex.open(os.path.join(index_dir, "db.db")) return self._data_index @@ -590,8 +554,27 @@ def dvcfs(self) -> "DVCFileSystem": return DVCFileSystem(repo=self, subrepos=self.subrepos, **self._fs_conf) @cached_property - def index_db_dir(self): - return self._get_database_dir("index") + def site_cache_dir(self) -> str: + import hashlib + + import platformdirs + + from dvc.fs import GitFileSystem + + cache_dir = platformdirs.site_cache_dir("dvc", "iterative", opinion=True) + + if isinstance(self.fs, GitFileSystem): + relparts = () + if self.root_dir != "/": + # subrepo + relparts = self.fs.path.relparts(self.root_dir, "/") + root_dir = os.path.join(self.scm.root_dir, *relparts) + else: + root_dir = self.root_dir + + repo_token = hashlib.md5(os.fsencode(root_dir)).hexdigest() # noqa: S324 + + return os.path.join(cache_dir, "repo", repo_token) @contextmanager def open_by_relpath(self, path, remote=None, mode="r", encoding=None): diff --git a/dvc/repo/index.py b/dvc/repo/index.py index 361b5796d06..68b8fd12dce 100644 --- a/dvc/repo/index.py +++ b/dvc/repo/index.py @@ -354,42 +354,34 @@ def data_tree(self): @cached_property def data(self) -> "Dict[str, DataIndex]": - from dvc_data.index import DataIndex - prefix: "DataIndexKey" loaded = False index = self.repo.data_index - if index is None: - index = DataIndex() - prefix = ("tree", self.data_tree.hash_info.value) if index.has_node(prefix): loaded = True - try: - if not loaded: - _load_data_from_outs(index, prefix, self.outs) - index.commit() + if not loaded: + _load_data_from_outs(index, prefix, self.outs) + index.commit() - by_workspace = {} - by_workspace["repo"] = index.view((*prefix, "repo")) - by_workspace["local"] = index.view((*prefix, "local")) + by_workspace = {} + by_workspace["repo"] = index.view((*prefix, "repo")) + by_workspace["local"] = index.view((*prefix, "local")) - for out in self.outs: - if not out.use_cache: - continue + for out in self.outs: + if not out.use_cache: + continue - ws, key = out.index_key - if ws not in by_workspace: - by_workspace[ws] = index.view((*prefix, ws)) + ws, key = out.index_key + if ws not in by_workspace: + by_workspace[ws] = index.view((*prefix, ws)) - data_index = by_workspace[ws] - _load_storage_from_out(data_index.storage_map, key, out) + data_index = by_workspace[ws] + _load_storage_from_out(data_index.storage_map, key, out) - return by_workspace - finally: - index.close() + return by_workspace @staticmethod def _hash_targets( diff --git a/dvc/repo/init.py b/dvc/repo/init.py index 8d6c7c78e47..5a9ab093da8 100644 --- a/dvc/repo/init.py +++ b/dvc/repo/init.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) -def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False): +def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False): # noqa: C901 """ Creates an empty repo on the given directory -- basically a `.dvc` directory with subdirectories for configuration and cache. @@ -75,6 +75,11 @@ def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False): proj = Repo(root_dir) + if os.path.isdir(proj.site_cache_dir): + proj.close() + remove(proj.site_cache_dir) + proj = Repo(root_dir) + with proj.scm_context(autostage=True) as context: files = [ config.files["repo"], diff --git a/dvc/ui/__init__.py b/dvc/ui/__init__.py index 88c4a9430da..e11abeb8a44 100644 --- a/dvc/ui/__init__.py +++ b/dvc/ui/__init__.py @@ -17,7 +17,9 @@ if TYPE_CHECKING: from rich.console import Console as RichConsole + from rich.console import JustifyMethod, OverflowMethod from rich.status import Status + from rich.style import Style from rich.text import Text as RichText from dvc.progress import Tqdm @@ -105,8 +107,9 @@ def write_json( # noqa: PLR0913 if indent is None and self.isatty(): indent = 2 - console = self.error_console if stderr else self.rich_console - return console.print_json( + from rich.json import JSON + + json = JSON.from_data( data=data, indent=indent, highlight=bool(highlight), @@ -117,6 +120,50 @@ def write_json( # noqa: PLR0913 default=default, sort_keys=sort_keys, ) + if not highlight: + return self.write(json.text, stderr=stderr) + return self.rich_print(json, stderr=stderr, soft_wrap=True) + + def rich_print( + self, + *objects: Any, + sep: str = " ", + end: str = "\n", + stderr: bool = False, + style: Optional[Union[str, "Style"]] = None, + justify: Optional["JustifyMethod"] = None, + overflow: Optional["OverflowMethod"] = None, + no_wrap: Optional[bool] = None, + emoji: Optional[bool] = None, + markup: Optional[bool] = None, + highlight: Optional[bool] = None, + width: Optional[int] = None, + height: Optional[int] = None, + crop: bool = True, + soft_wrap: Optional[bool] = None, + new_line_start: bool = False, + ) -> None: + if stderr: + console = self.error_console + else: + console = self.rich_console + return console.print( + *objects, + sep=sep, + end=end, + style=style, + justify=justify, + overflow=overflow, + no_wrap=no_wrap, + emoji=emoji, + markup=markup, + highlight=highlight, + width=width, + height=height, + crop=crop, + soft_wrap=soft_wrap, + new_line_start=new_line_start, + ) def write( self, @@ -142,10 +189,21 @@ def write( with Tqdm.external_write_mode(file=file): # if we are inside pager context, send the output to rich's buffer if styled or self._paginate.get(): - console = self.error_console if stderr else self.rich_console if styled: - return console.print(*objects, sep=sep, end=end) - return console.out(*objects, sep=sep, end=end, highlight=False) + return self.rich_print(*objects, sep=sep, end=end, stderr=stderr) + return self.rich_print( + sep.join(str(_object) for _object in objects), + style=None, + highlight=False, + emoji=False, + markup=False, + no_wrap=True, + overflow="ignore", + crop=False, + sep=sep, + end=end, + stderr=stderr, + ) values = (self.formatter.format(obj, style) for obj in objects) return print(*values, sep=sep, end=end, file=file) diff --git a/pyproject.toml b/pyproject.toml index 8612f1d9cc9..8c1a98867c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "configobj>=5.0.6", "distro>=1.3", "dpath<3,>=2.1.0", - "dvc-data>=0.42.1,<0.43", + "dvc-data>=0.42.3,<0.43", "dvc-http", "dvc-render>=0.1.2", "dvc-studio-client>=0.5.0,<1", @@ -48,7 +48,7 @@ dependencies = [ "networkx>=2.5", "packaging>=19", "pathspec>=0.10.3", - "platformdirs<4,>=3", + "platformdirs<4,>=3.1.1", "psutil>=5.8", "pydot>=1.2.4", "pygtrie>=2.3.2", @@ -74,7 +74,7 @@ gs = ["dvc-gs==2.22.0"] hdfs = ["dvc-hdfs==2.19"] lint = [ "mypy==1.1.1", - "pylint==2.16.4", + "pylint==2.17.0", "types-colorama", "types-psutil", "types-requests", diff --git a/tests/conftest.py b/tests/conftest.py index 10d93bc67c1..dbaabba916e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,3 @@ -import gc import json import os import sys @@ -244,17 +243,3 @@ def run( return stage return run - - -@pytest.fixture(autouse=True) -def gc_collect_on_dvc_close_on_win(mocker): - from dvc.repo import Repo - - close = Repo.close - - def wrapped(repo): - close(repo) - gc.collect() - - if os.name == "nt": - mocker.patch("dvc.repo.Repo.close", wrapped) diff --git a/tests/func/test_state.py b/tests/func/test_state.py index b113597a794..65c5ee2628d 100644 --- a/tests/func/test_state.py +++ b/tests/func/test_state.py @@ -1,7 +1,5 @@ import os -import re -from dvc.repo import Repo from dvc_data.hashfile.hash import file_md5 from dvc_data.hashfile.hash_info import HashInfo from dvc_data.hashfile.state import State @@ -71,15 +69,3 @@ def test_get_unused_links(tmp_dir, dvc): dvc.fs, ) ) == {"bar"} - - -def test_state_dir_config(make_tmp_dir, dvc): - assert dvc.state.tmp_dir == dvc.tmp_dir - - index_dir = str(make_tmp_dir("tmp_index")) - repo = Repo(config={"state": {"dir": index_dir}}) - assert os.path.dirname(repo.state.tmp_dir) == os.path.join(index_dir, ".dvc") - assert re.match( - r"^test_state_dir_config0-([0-9a-f]+)$", - os.path.basename(repo.state.tmp_dir), - ) diff --git a/tests/unit/output/test_output.py b/tests/unit/output/test_output.py index ce543f9d23c..d2c07ed8845 100644 --- a/tests/unit/output/test_output.py +++ b/tests/unit/output/test_output.py @@ -116,7 +116,7 @@ def test_remote_missing_dependency_on_dir_pull(tmp_dir, scm, dvc, mocker): remove(dvc.cache.local.path) mocker.patch( - "dvc.data_cloud.DataCloud.get_remote_odb", + "dvc.data_cloud.DataCloud.get_remote", side_effect=RemoteMissingDepsError(dvc.fs, "azure", "azure://", []), ) with pytest.raises(RemoteMissingDepsError): diff --git a/tests/unit/remote/test_remote.py b/tests/unit/remote/test_remote.py index 0847a779c73..89a2b6c3ee3 100644 --- a/tests/unit/remote/test_remote.py +++ b/tests/unit/remote/test_remote.py @@ -1,11 +1,8 @@ -import os - import pytest from dvc_gs import GSFileSystem from dvc_s3 import S3FileSystem from dvc.fs import get_cloud_fs -from dvc_data.hashfile.db import get_index def test_remote_with_hash_jobs(dvc): @@ -57,18 +54,3 @@ def test_makedirs_not_create_for_top_level_path(fs_cls, dvc, mocker): fs.makedirs(url) assert not mocked_client.called - - -def test_remote_index_dir_config(make_tmp_dir, dvc): - index_dir = str(make_tmp_dir("tmp_index")) - with dvc.config.edit() as conf: - conf["index"]["dir"] = index_dir - conf["remote"]["s3"] = {"url": "s3://bucket/name"} - - dvc.root_dir = "/usr/local/test_repo" - dvc.dvc_dir = "/usr/local/test_repo/.dvc" - dvc.__dict__.pop("local_dvc_dir") - - assert os.path.dirname( - get_index(dvc.cloud.get_remote_odb(name="s3")).index_dir - ) == os.path.join(index_dir, ".dvc", "test_repo-a473718", "index") diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index d0badc22f0f..c81efa7fc84 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -1,3 +1,4 @@ +import logging import os import textwrap @@ -94,3 +95,22 @@ def test_load_configob_error(tmp_dir, dvc, mocker): with pytest.raises(ConfigError): with config.edit(): pass + + +def test_feature_section_supports_arbitrary_values(caplog): + with caplog.at_level(logging.WARNING, logger="dvc.config_schema"): + data = Config.validate( + { + "feature": { + "random_key_1": "random_value_1", + "random_key_2": 42, + } + } + ) + + assert "random_key_1" not in data + assert "random_key_2" not in data + assert ( + "'feature.random_key_1', 'feature.random_key_2' " + "config options are unsupported" + ) in caplog.text