Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove data ls and support for --meta/--desc/--label/--type cli flags #9481

Merged
merged 1 commit into from
May 19, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions dvc/annotations.py
Original file line number Diff line number Diff line change
@@ -17,12 +17,6 @@ class Annotation:
labels: List[str] = field(default_factory=list)
meta: Dict[str, Any] = field(default_factory=dict)

def update(self, **kwargs) -> "Annotation":
for attr, value in kwargs.items():
if value and hasattr(self, attr):
setattr(self, attr, value)
return self

def to_dict(self) -> Dict[str, str]:
return compact(asdict(self))

20 changes: 0 additions & 20 deletions dvc/cli/actions.py

This file was deleted.

37 changes: 0 additions & 37 deletions dvc/commands/add.py
Original file line number Diff line number Diff line change
@@ -2,43 +2,12 @@
import logging

from dvc.cli import completion
from dvc.cli.actions import KeyValueArgs
from dvc.cli.command import CmdBase
from dvc.cli.utils import append_doc_link

logger = logging.getLogger(__name__)


def _add_annotating_args(parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--desc",
type=str,
metavar="<text>",
help="User description of the data.",
)
parser.add_argument(
"--meta",
metavar="key=value",
nargs=1,
action=KeyValueArgs,
help="Custom metadata to add to the data.",
)
parser.add_argument(
"--label",
dest="labels",
type=str,
action="append",
metavar="<str>",
help="Label for the data.",
)
parser.add_argument(
"--type",
type=str,
metavar="<str>",
help="Type of the data.",
)


class CmdAdd(CmdBase):
def run(self):
from dvc.exceptions import DvcException, RecursiveAddingWhileUsingFilename
@@ -54,11 +23,7 @@ def run(self):
fname=self.args.file,
external=self.args.external,
glob=self.args.glob,
desc=self.args.desc,
Copy link
Member Author

@skshetry skshetry May 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dberenbaum, do we also want to get rid of --desc?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's drop it from dvc add. We could add support for all these flags and make them write artifact info to dvc.yaml in that case, but that's probably a separate issue.

I assume .dvc files with descriptions won't break for regular data operations, right? In other words, we still allow for that field even if we don't use it at all?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All of the changes are for CLI. This PR affects add/import/import-url. It does not break .dvc or dvc.yaml files.

out=self.args.out,
type=self.args.type,
labels=self.args.labels,
meta=self.args.meta,
remote=self.args.remote,
to_remote=self.args.to_remote,
jobs=self.args.jobs,
@@ -150,8 +115,6 @@ def add_parser(subparsers, parent_parser):
default=False,
help="Override local file or folder if exists.",
)

_add_annotating_args(parser)
parser.add_argument(
"targets", nargs="+", help="Input files/directories to add."
).complete = completion.FILE
111 changes: 2 additions & 109 deletions dvc/commands/data.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
import argparse
import logging
from operator import itemgetter
from typing import TYPE_CHECKING, Any, Dict, Iterable, Set
from typing import TYPE_CHECKING

from funcy import chunks, compact, log_durations

from dvc.cli import completion
from dvc.cli.actions import CommaSeparatedArgs
from dvc.cli.command import CmdBase
from dvc.cli.utils import append_doc_link, fix_subparsers, hide_subparsers_from_help
from dvc.cli.utils import append_doc_link, fix_subparsers
from dvc.ui import ui
from dvc.utils import colorize

@@ -132,63 +129,6 @@ def run(self) -> int:
return self._show_status(status)


class CmdDataLs(CmdBase):
@staticmethod
def _show_table(
d: Iterable[Dict[str, Any]],
filter_types: Set[str],
filter_labels: Set[str],
markdown: bool = False,
) -> None:
from rich.style import Style

from dvc.compare import TabularData

td = TabularData(
columns=["Path", "Type", "Labels", "Description"], fill_value="-"
)
for entry in sorted(d, key=itemgetter("path")):
typ = entry.get("type", "")
desc = entry.get("desc", "-")
labels = entry.get("labels", [])

if filter_types and typ not in filter_types:
continue
if filter_labels and filter_labels.isdisjoint(labels):
continue

rich_label = ui.rich_text()
for index, label in enumerate(labels):
if index:
rich_label.append(",")
style = Style(bold=label in filter_labels, color="green")
rich_label.append(label, style=style)

if markdown and desc:
desc = desc.partition("\n")[0]

path = ui.rich_text(entry["path"], style="cyan")
type_style = Style(bold=typ in filter_types, color="yellow")
typ = ui.rich_text(entry.get("type", ""), style=type_style)
td.append([path, typ or "-", rich_label or "-", desc])

td.render(markdown=markdown, rich_table=True)

def run(self):
from dvc.repo.data import ls

filter_labels = set(self.args.labels)
filter_types = set(self.args.type)
d = ls(self.repo, targets=self.args.targets, recursive=self.args.recursive)
self._show_table(
d,
filter_labels=filter_labels,
filter_types=filter_types,
markdown=self.args.markdown,
)
return 0


def add_parser(subparsers, parent_parser):
data_parser = subparsers.add_parser(
"data",
@@ -250,50 +190,3 @@ def add_parser(subparsers, parent_parser):
help="Use cached remote index (don't check remote).",
)
data_status_parser.set_defaults(func=CmdDataStatus)

DATA_LS_HELP = "List data tracked by DVC with its metadata."
data_ls_parser = data_subparsers.add_parser(
"ls",
aliases=["list"],
parents=[parent_parser],
description=append_doc_link(DATA_LS_HELP, "data/ls"),
formatter_class=argparse.RawDescriptionHelpFormatter,
add_help=False,
)
data_ls_parser.add_argument(
"--md",
dest="markdown",
action="store_true",
default=False,
help="Show tabulated output in the Markdown format (GFM).",
)
data_ls_parser.add_argument(
"--type",
action=CommaSeparatedArgs,
default=[],
help="Comma-separated list of type to filter.",
)
data_ls_parser.add_argument(
"--labels",
action=CommaSeparatedArgs,
default=[],
help="Comma-separated list of labels to filter.",
)
data_ls_parser.add_argument(
"-R",
"--recursive",
action="store_true",
default=False,
help="Recursively list from the specified directories.",
)
data_ls_parser.add_argument(
"targets",
default=None,
nargs="*",
help=(
"Limit command scope to these tracked files/directories, "
".dvc files, or stage names."
),
).complete = completion.DVCFILES_AND_STAGE
data_ls_parser.set_defaults(func=CmdDataLs)
hide_subparsers_from_help(data_subparsers)
8 changes: 0 additions & 8 deletions dvc/commands/imp.py
Original file line number Diff line number Diff line change
@@ -22,10 +22,6 @@ def run(self):
rev=self.args.rev,
no_exec=self.args.no_exec,
no_download=self.args.no_download,
desc=self.args.desc,
type=self.args.type,
labels=self.args.labels,
meta=self.args.meta,
jobs=self.args.jobs,
)
except CloneError:
@@ -42,8 +38,6 @@ def run(self):


def add_parser(subparsers, parent_parser):
from .add import _add_annotating_args

IMPORT_HELP = (
"Download file or directory tracked by DVC or by Git "
"into the workspace, and track it."
@@ -106,6 +100,4 @@ def add_parser(subparsers, parent_parser):
),
metavar="<number>",
)

_add_annotating_args(import_parser)
import_parser.set_defaults(func=CmdImport)
7 changes: 0 additions & 7 deletions dvc/commands/imp_url.py
Original file line number Diff line number Diff line change
@@ -20,10 +20,6 @@ def run(self):
no_download=self.args.no_download,
remote=self.args.remote,
to_remote=self.args.to_remote,
desc=self.args.desc,
type=self.args.type,
labels=self.args.labels,
meta=self.args.meta,
jobs=self.args.jobs,
force=self.args.force,
version_aware=self.args.version_aware,
@@ -41,8 +37,6 @@ def run(self):


def add_parser(subparsers, parent_parser):
from .add import _add_annotating_args

IMPORT_HELP = "Download or copy file from URL and take it under DVC control."

import_parser = subparsers.add_parser(
@@ -126,5 +120,4 @@ def add_parser(subparsers, parent_parser):
default=False,
help="Import using cloud versioning. Implied if the URL contains a version ID.",
)
_add_annotating_args(import_parser)
import_parser.set_defaults(func=CmdImportUrl)
3 changes: 0 additions & 3 deletions dvc/repo/add.py
Original file line number Diff line number Diff line change
@@ -297,7 +297,4 @@ def create_stages(
force=force,
)
output_exists = False

out_obj = stage.outs[0]
out_obj.annot.update(**kwargs)
yield StageInfo(stage, output_exists)
27 changes: 1 addition & 26 deletions dvc/repo/data.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,6 @@
import os
import posixpath
from itertools import chain
from typing import (
TYPE_CHECKING,
Any,
Dict,
Iterable,
Iterator,
List,
Optional,
TypedDict,
Union,
)
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, TypedDict, Union

from dvc.ui import ui

@@ -251,17 +240,3 @@ def status(
unchanged=list(unchanged),
git=git_info,
)


def ls(
repo: "Repo",
targets: Optional[List[Optional[str]]] = None,
recursive: bool = False,
) -> Iterator[Dict[str, Any]]:
targets = targets or [None]
pairs = chain.from_iterable(
repo.stage.collect_granular(target, recursive=recursive) for target in targets
)
for stage, filter_info in pairs:
for out in stage.filter_outs(filter_info):
yield {"path": str(out), **out.annot.to_dict()}
7 changes: 0 additions & 7 deletions dvc/repo/imp_url.py
Original file line number Diff line number Diff line change
@@ -25,10 +25,6 @@ def imp_url( # noqa: C901, PLR0913
no_exec=False,
remote=None,
to_remote=False,
desc=None,
type=None, # noqa: A002, pylint: disable=redefined-builtin
labels=None,
meta=None,
jobs=None,
force=False,
fs_config=None,
@@ -70,9 +66,6 @@ def imp_url( # noqa: C901, PLR0913
fs_config=fs_config,
)

out_obj = stage.outs[0]
out_obj.annot.update(desc=desc, type=type, labels=labels, meta=meta)

try:
self.check_graph(stages={stage})
except OutputDuplicationError as exc:
21 changes: 0 additions & 21 deletions tests/func/test_add.py
Original file line number Diff line number Diff line change
@@ -11,7 +11,6 @@

import dvc as dvc_module
import dvc_data
from dvc.annotations import Annotation
from dvc.cachemgr import CacheManager
from dvc.cli import main
from dvc.config import ConfigError
@@ -1124,26 +1123,6 @@ def test_add_ignore_duplicated_targets(tmp_dir, dvc, capsys):
assert "ignoring duplicated targets: foo, bar" in err


def test_add_with_annotations(M, tmp_dir, dvc):
tmp_dir.gen("foo", "foo")

annot = {
"desc": "foo desc",
"labels": ["l1", "l2"],
"type": "t1",
"meta": {"key": "value"},
}
(stage,) = dvc.add("foo", **annot)
assert stage.outs[0].annot == Annotation(**annot)
assert (tmp_dir / "foo.dvc").parse() == M.dict(outs=[M.dict(**annot)])

# try to selectively update/overwrite some annotations
annot = {**annot, "type": "t2"}
(stage,) = dvc.add("foo", type="t2")
assert stage.outs[0].annot == Annotation(**annot)
assert (tmp_dir / "foo.dvc").parse() == M.dict(outs=[M.dict(**annot)])


def test_add_updates_to_cloud_versioning_dir(tmp_dir, dvc):
data_dvc = tmp_dir / "data.dvc"
data_dvc.dump(
Loading