Skip to content

Commit

Permalink
Merge branch 'main' into plots-errors-json
Browse files Browse the repository at this point in the history
  • Loading branch information
skshetry authored Mar 13, 2023
2 parents 189b6ce + 16ecd53 commit cf99a31
Show file tree
Hide file tree
Showing 20 changed files with 189 additions and 140 deletions.
2 changes: 1 addition & 1 deletion dvc/commands/experiments/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def run(self):


def add_parser(experiments_subparsers, parent_parser):
EXPERIMENTS_CLEAN_HELP = "Cleanup dvc exp internal tempfiles."
EXPERIMENTS_CLEAN_HELP = "Cleanup experiments temporary internal files."
experiments_clean_parser = experiments_subparsers.add_parser(
"clean",
parents=[parent_parser],
Expand Down
2 changes: 1 addition & 1 deletion dvc/commands/experiments/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def run(self):


def add_parser(experiments_subparsers, parent_parser):
EXPERIMENTS_DIFF_HELP = "Show changes between experiments in the DVC repository."
EXPERIMENTS_DIFF_HELP = "Show changes between experiments."

experiments_diff_parser = experiments_subparsers.add_parser(
"diff",
Expand Down
3 changes: 2 additions & 1 deletion dvc/commands/experiments/gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ def run(self):
def add_parser(experiments_subparsers, parent_parser):
EXPERIMENTS_GC_HELP = "Garbage collect unneeded experiments."
EXPERIMENTS_GC_DESCRIPTION = (
"Removes all experiments which are not derived from the specifiedGit revisions."
"Removes all experiments which are not derived"
" from the specified Git revisions."
)
experiments_gc_parser = experiments_subparsers.add_parser(
"gc",
Expand Down
2 changes: 1 addition & 1 deletion dvc/commands/experiments/save.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def run(self):


def add_parser(experiments_subparsers, parent_parser):
EXPERIMENTS_SAVE_HELP = "Save current workspace as a dvc experiment."
EXPERIMENTS_SAVE_HELP = "Save current workspace as an experiment."
save_parser = experiments_subparsers.add_parser(
"save",
parents=[parent_parser],
Expand Down
2 changes: 1 addition & 1 deletion dvc/commands/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def _show_json(renderers: List["RendererWithErrors"], split=False):

from dvc.utils.serialize import encode_exception

ui.write_json(result, default=encode_exception)
ui.write_json(result, highlight=False, default=encode_exception)


def _adjust_vega_renderers(renderers):
Expand Down
54 changes: 44 additions & 10 deletions dvc/config_schema.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
import logging
import os
from urllib.parse import urlparse

from funcy import walk_values
from voluptuous import All, Any, Coerce, Invalid, Lower, Optional, Range, Schema
from funcy import once, walk_values
from voluptuous import (
REMOVE_EXTRA,
All,
Any,
Coerce,
Invalid,
Lower,
Optional,
Range,
Schema,
)

logger = logging.getLogger(__name__)

Bool = All(
Lower,
Expand Down Expand Up @@ -65,6 +78,27 @@ class RelPath(str):
pass


class FeatureSchema(Schema):
def __init__(self, schema, required=False):
super().__init__(schema, required=required, extra=REMOVE_EXTRA)

@staticmethod
@once
def _log_deprecated(keys):
# only run this once per session
message = "%s config option%s unsupported"
paths = ", ".join(f"'feature.{key}'" for key in keys)
pluralize = " is" if len(keys) == 1 else "s are"
logger.warning(message, paths, pluralize)

def __call__(self, data):
ret = super().__call__(data)
extra_keys = data.keys() - ret.keys()
if extra_keys:
self._log_deprecated(sorted(extra_keys))
return ret


REMOTE_COMMON = {
"url": str,
"checksum_jobs": All(Coerce(int), Range(1)),
Expand Down Expand Up @@ -239,12 +273,12 @@ class RelPath(str):
)
},
"state": {
"dir": str,
"dir": str, # obsoleted
"row_limit": All(Coerce(int), Range(1)), # obsoleted
"row_cleanup_quota": All(Coerce(int), Range(0, 100)), # obsoleted
},
"index": {
"dir": str,
"dir": str, # obsoleted
},
"machine": {
str: {
Expand All @@ -262,12 +296,12 @@ class RelPath(str):
},
},
# section for experimental features
"feature": {
Optional("machine", default=False): Bool,
# enabled by default. It's of no use, kept for backward compatibility.
Optional("data_index_cache", default=False): Bool,
Optional("parametrization", default=True): Bool,
},
# only specified keys are validated, others get logged and then ignored/removed
"feature": FeatureSchema(
{
Optional("machine", default=False): Bool,
},
),
"plots": {
"html_template": str,
Optional("auto_open", default=False): Bool,
Expand Down
2 changes: 1 addition & 1 deletion dvc/data_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def get_remote(
config["version_aware"] = True

fs = cls(**config)
config["tmp_dir"] = self.repo.index_db_dir
config["tmp_dir"] = self.repo.site_cache_dir
if self.repo.data_index is not None:
index = self.repo.data_index.view(("remote", name))
else:
Expand Down
2 changes: 1 addition & 1 deletion dvc/fs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def _get_cloud_fs(repo_config, **kwargs):

remote_conf = get_fs_config(repo_config, **kwargs)
try:
remote_conf = SCHEMA["remote"][str](remote_conf)
remote_conf = SCHEMA["remote"][str](remote_conf) # type: ignore[index]
except Invalid as exc:
raise RepoConfigError(str(exc)) from None

Expand Down
1 change: 1 addition & 0 deletions dvc/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def get_dvc_info():
fs_root = get_fs_type(os.path.abspath(root_directory))
info.append(f"Workspace directory: {fs_root}")
info.append(f"Repo: {_get_dvc_repo_info(repo)}")
info.append(f"Repo.site_cache_dir: {repo.site_cache_dir}")
except NotDvcRepoError:
pass
except SCMError:
Expand Down
4 changes: 3 additions & 1 deletion dvc/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import posixpath
from collections import defaultdict
from contextlib import suppress
from operator import itemgetter
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Type
from urllib.parse import urlparse
Expand Down Expand Up @@ -1017,7 +1018,8 @@ def get_dir_cache(self, **kwargs):
except FileNotFoundError:
if self.remote:
kwargs["remote"] = self.remote
self.repo.cloud.pull([obj.hash_info], **kwargs)
with suppress(Exception):
self.repo.cloud.pull([obj.hash_info], **kwargs)

if self.obj:
return self.obj
Expand Down
67 changes: 25 additions & 42 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,38 +134,6 @@ def _get_repo_dirs(
assert root_dir
return root_dir, dvc_dir

def _get_database_dir(self, db_name: str) -> Optional[str]:
from dvc.fs import localfs

# NOTE: by default, store SQLite-based remote indexes and state's
# `links` and `md5s` caches in the repository itself to avoid any
# possible state corruption in 'shared cache dir' scenario, but allow
# user to override this through config when, say, the repository is
# located on a mounted volume — see
# https://github.com/iterative/dvc/issues/4420
base_db_dir = self.config.get(db_name, {}).get("dir", None)
if not base_db_dir:
return self.tmp_dir

import hashlib

if self.local_dvc_dir:
fs: "FileSystem" = localfs
local_root = fs.path.parent(self.local_dvc_dir)
else:
fs = self.fs
local_root = self.root_dir
root_dir_hash = hashlib.sha224(local_root.encode("utf-8")).hexdigest()

db_dir = fs.path.join(
base_db_dir,
self.DVC_DIR,
f"{fs.path.name(local_root)}-{root_dir_hash[0:7]}",
)

fs.makedirs(db_dir, exist_ok=True)
return db_dir

def __init__( # noqa: PLR0915
self,
root_dir: Optional[str] = None,
Expand Down Expand Up @@ -241,8 +209,8 @@ def __init__( # noqa: PLR0915
hardlink_lock=self.config["core"].get("hardlink_lock", False),
friendly=True,
)
state_db_dir = self._get_database_dir("state")
self.state = State(self.root_dir, state_db_dir, self.dvcignore)
os.makedirs(self.site_cache_dir, exist_ok=True)
self.state = State(self.root_dir, self.site_cache_dir, self.dvcignore)
else:
self.lock = LockNoop()
self.state = StateNoop()
Expand Down Expand Up @@ -397,16 +365,12 @@ def fs(self, fs: "FileSystem"):
self._reset()

@property
def data_index(self) -> Optional["DataIndex"]:
def data_index(self) -> "DataIndex":
from dvc_data.index import DataIndex

if not self.index_db_dir:
return None

if self._data_index is None:
index_dir = os.path.join(self.index_db_dir, "index", "data")
index_dir = os.path.join(self.site_cache_dir, "index", "data")
os.makedirs(index_dir, exist_ok=True)

self._data_index = DataIndex.open(os.path.join(index_dir, "db.db"))

return self._data_index
Expand Down Expand Up @@ -590,8 +554,27 @@ def dvcfs(self) -> "DVCFileSystem":
return DVCFileSystem(repo=self, subrepos=self.subrepos, **self._fs_conf)

@cached_property
def index_db_dir(self):
return self._get_database_dir("index")
def site_cache_dir(self) -> str:
import hashlib

import platformdirs

from dvc.fs import GitFileSystem

cache_dir = platformdirs.site_cache_dir("dvc", "iterative", opinion=True)

if isinstance(self.fs, GitFileSystem):
relparts = ()
if self.root_dir != "/":
# subrepo
relparts = self.fs.path.relparts(self.root_dir, "/")
root_dir = os.path.join(self.scm.root_dir, *relparts)
else:
root_dir = self.root_dir

repo_token = hashlib.md5(os.fsencode(root_dir)).hexdigest() # noqa: S324

return os.path.join(cache_dir, "repo", repo_token)

@contextmanager
def open_by_relpath(self, path, remote=None, mode="r", encoding=None):
Expand Down
38 changes: 15 additions & 23 deletions dvc/repo/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,42 +354,34 @@ def data_tree(self):

@cached_property
def data(self) -> "Dict[str, DataIndex]":
from dvc_data.index import DataIndex

prefix: "DataIndexKey"
loaded = False

index = self.repo.data_index
if index is None:
index = DataIndex()

prefix = ("tree", self.data_tree.hash_info.value)
if index.has_node(prefix):
loaded = True

try:
if not loaded:
_load_data_from_outs(index, prefix, self.outs)
index.commit()
if not loaded:
_load_data_from_outs(index, prefix, self.outs)
index.commit()

by_workspace = {}
by_workspace["repo"] = index.view((*prefix, "repo"))
by_workspace["local"] = index.view((*prefix, "local"))
by_workspace = {}
by_workspace["repo"] = index.view((*prefix, "repo"))
by_workspace["local"] = index.view((*prefix, "local"))

for out in self.outs:
if not out.use_cache:
continue
for out in self.outs:
if not out.use_cache:
continue

ws, key = out.index_key
if ws not in by_workspace:
by_workspace[ws] = index.view((*prefix, ws))
ws, key = out.index_key
if ws not in by_workspace:
by_workspace[ws] = index.view((*prefix, ws))

data_index = by_workspace[ws]
_load_storage_from_out(data_index.storage_map, key, out)
data_index = by_workspace[ws]
_load_storage_from_out(data_index.storage_map, key, out)

return by_workspace
finally:
index.close()
return by_workspace

@staticmethod
def _hash_targets(
Expand Down
7 changes: 6 additions & 1 deletion dvc/repo/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
logger = logging.getLogger(__name__)


def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False):
def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False): # noqa: C901
"""
Creates an empty repo on the given directory -- basically a
`.dvc` directory with subdirectories for configuration and cache.
Expand Down Expand Up @@ -75,6 +75,11 @@ def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False):

proj = Repo(root_dir)

if os.path.isdir(proj.site_cache_dir):
proj.close()
remove(proj.site_cache_dir)
proj = Repo(root_dir)

with proj.scm_context(autostage=True) as context:
files = [
config.files["repo"],
Expand Down
Loading

0 comments on commit cf99a31

Please sign in to comment.