Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pull/fetch/push: add stats #3735

Merged
merged 5 commits into from
May 5, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 7 additions & 25 deletions dvc/command/checkout.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,17 @@
import argparse
import logging
import operator

import colorama

from dvc.command.base import append_doc_link
from dvc.command.base import CmdBase
from dvc.exceptions import CheckoutError
from dvc.utils.humanize import get_summary

logger = logging.getLogger(__name__)


def _human_join(words):
words = list(words)
if not words:
return ""

return (
"{before} and {after}".format(
before=", ".join(words[:-1]), after=words[-1],
)
if len(words) > 1
else words[0]
)


def log_summary(stats):
states = ("added", "deleted", "modified")
summary = (
"{} {}".format(len(stats[state]), state)
for state in states
if stats.get(state)
)
logger.info(_human_join(summary) or "No changes.")


def log_changes(stats):
colors = [
("modified", colorama.Fore.YELLOW,),
Expand Down Expand Up @@ -75,7 +53,11 @@ def run(self):
stats = exc.stats

if self.args.summary:
log_summary(stats)
default_message = "No changes."
msg = get_summary(
sorted(stats.items(), key=operator.itemgetter(0))
)
logger.info(msg or default_message)
else:
log_changes(stats)

Expand Down
23 changes: 13 additions & 10 deletions dvc/command/data_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,25 @@

from dvc.command.base import append_doc_link
from dvc.command.base import CmdBase
from dvc.command.checkout import log_summary
from dvc.command.checkout import log_changes
from dvc.utils.humanize import get_summary
from dvc.exceptions import DvcException, CheckoutError


logger = logging.getLogger(__name__)


class CmdDataBase(CmdBase):
@classmethod
def check_up_to_date(cls, processed_files_count):
if processed_files_count == 0:
logger.info("Everything is up to date.")
def log_summary(self, stats):
default_msg = "Everything is up to date."
logger.info(get_summary(stats.items()) or default_msg)


class CmdDataPull(CmdDataBase):
def log_summary(self, stats):
log_changes(stats)
super().log_summary(stats)

def run(self):
try:
stats = self.repo.pull(
Expand All @@ -31,13 +35,12 @@ def run(self):
force=self.args.force,
recursive=self.args.recursive,
)
log_summary(stats)
self.log_summary(stats)
except (CheckoutError, DvcException) as exc:
log_summary(getattr(exc, "stats", {}))
self.log_summary(getattr(exc, "stats", {}))
logger.exception("failed to pull data from the cloud")
return 1

self.check_up_to_date(stats["downloaded"])
return 0


Expand All @@ -54,10 +57,10 @@ def run(self):
with_deps=self.args.with_deps,
recursive=self.args.recursive,
)
self.log_summary({"pushed": processed_files_count})
except DvcException:
logger.exception("failed to push data to the cloud")
return 1
self.check_up_to_date(processed_files_count)
return 0


Expand All @@ -74,10 +77,10 @@ def run(self):
with_deps=self.args.with_deps,
recursive=self.args.recursive,
)
self.log_summary({"fetched": processed_files_count})
except DvcException:
logger.exception("failed to fetch data from the cloud")
return 1
self.check_up_to_date(processed_files_count)
return 0


Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ def pull(
targets=targets, with_deps=with_deps, force=force, recursive=recursive
)

stats["downloaded"] = processed_files_count
stats["fetched"] = processed_files_count
return stats
27 changes: 27 additions & 0 deletions dvc/utils/humanize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from funcy import is_seq


def join(words):
words = list(words)
if not words:
return ""

return (
"{before} and {after}".format(
before=", ".join(words[:-1]), after=words[-1],
)
if len(words) > 1
else words[0]
)


def get_summary(stats):
status = (
(state, len(data) if is_seq(data) else data)
for state, data in stats
if data
)
return join(
"{} file{} {}".format(num, "s" if num > 1 else "", state)
for state, num in status
)
2 changes: 1 addition & 1 deletion tests/func/test_checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,7 @@ def test_stats_does_not_show_changes_by_default(tmp_dir, dvc, scm, caplog):
with caplog.at_level(logging.INFO, logger="dvc"):
caplog.clear()
assert main(["checkout", "--summary"]) == 0
assert "2 deleted" in caplog.text
assert "2 files deleted" in caplog.text
assert "dir" not in caplog.text
assert "other" not in caplog.text

Expand Down
62 changes: 57 additions & 5 deletions tests/func/test_data_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,14 +724,14 @@ def test_pull_git_imports(request, tmp_dir, dvc, scm, erepo):
dvc.imp(fspath(erepo), "foo")
dvc.imp(fspath(erepo), "dir", out="new_dir", rev="HEAD~")

assert dvc.pull()["downloaded"] == 0
assert dvc.pull()["fetched"] == 0

for item in ["foo", "new_dir", dvc.cache.local.cache_dir]:
remove(item)
os.makedirs(dvc.cache.local.cache_dir, exist_ok=True)
clean_repos()

assert dvc.pull(force=True)["downloaded"] == 2
assert dvc.pull(force=True)["fetched"] == 2

assert (tmp_dir / "foo").exists()
assert (tmp_dir / "foo").read_text() == "foo"
Expand All @@ -751,11 +751,11 @@ def test_pull_external_dvc_imports(tmp_dir, dvc, scm, erepo_dir):
dvc.imp(fspath(erepo_dir), "foo")
dvc.imp(fspath(erepo_dir), "dir", out="new_dir", rev="HEAD~")

assert dvc.pull()["downloaded"] == 0
assert dvc.pull()["fetched"] == 0

clean(["foo", "new_dir"], dvc)

assert dvc.pull(force=True)["downloaded"] == 2
assert dvc.pull(force=True)["fetched"] == 2

assert (tmp_dir / "foo").exists()
assert (tmp_dir / "foo").read_text() == "foo"
Expand Down Expand Up @@ -796,7 +796,7 @@ def test_dvc_pull_pipeline_stages(tmp_dir, dvc, local_remote, run_copy):
for target in [stage.addressing, out]:
clean(outs, dvc)
stats = dvc.pull([target])
assert stats["downloaded"] == 1
assert stats["fetched"] == 1
assert stats["added"] == [out]
assert os.path.exists(out)
assert not any(os.path.exists(out) for out in set(outs) - {out})
Expand Down Expand Up @@ -847,3 +847,55 @@ def test_pipeline_file_target_ops(tmp_dir, dvc, local_remote, run_copy):

with pytest.raises(StageNotFound):
dvc.pull(["dvc.yaml:StageThatDoesNotExist"])


@pytest.mark.parametrize(
"fs, msg",
[
({"foo": "foo", "bar": "bar"}, "2 files pushed"),
({"foo": "foo"}, "1 file pushed"),
({}, "Everything is up to date"),
],
)
def test_push_stats(tmp_dir, dvc, fs, msg, local_remote, caplog):
tmp_dir.dvc_gen(fs)
caplog.clear()
with caplog.at_level(level=logging.INFO, logger="dvc"):
main(["push"])
assert msg in caplog.text


@pytest.mark.parametrize(
"fs, msg",
[
({"foo": "foo", "bar": "bar"}, "2 files fetched"),
({"foo": "foo"}, "1 file fetched"),
({}, "Everything is up to date."),
],
)
def test_fetch_stats(tmp_dir, dvc, fs, msg, local_remote, caplog):
tmp_dir.dvc_gen(fs)
dvc.push()
clean(list(fs.keys()), dvc)
caplog.clear()
with caplog.at_level(level=logging.INFO, logger="dvc"):
main(["fetch"])
assert msg in caplog.text


def test_pull_stats(tmp_dir, dvc, local_remote, caplog):
tmp_dir.dvc_gen({"foo": "foo", "bar": "bar"})
dvc.push()
clean(["foo", "bar"], dvc)
(tmp_dir / "bar").write_text("foobar")
caplog.clear()
with caplog.at_level(level=logging.INFO, logger="dvc"):
main(["pull", "--force"])
assert "M\tbar" in caplog.text
assert "A\tfoo" in caplog.text
assert "2 files fetched" in caplog.text
assert "1 file added" in caplog.text
assert "1 file modified" in caplog.text
with caplog.at_level(level=logging.INFO, logger="dvc"):
main(["pull"])
assert "Everything is up to date." in caplog.text
29 changes: 1 addition & 28 deletions tests/unit/command/test_checkout.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging

from dvc.cli import parse_args
from dvc.command.checkout import CmdCheckout, log_summary, log_changes
from dvc.command.checkout import CmdCheckout, log_changes


def test_checkout(tmp_dir, dvc, mocker):
Expand All @@ -23,33 +23,6 @@ def test_checkout(tmp_dir, dvc, mocker):
)


def test_log_summary(caplog):
stats = {
"added": ["file1", "file2", "file3"],
"deleted": ["file4", "file5"],
"modified": ["file6", "file7"],
}

def _assert_output(stats, expected_text):
with caplog.at_level(logging.INFO, logger="dvc"):
caplog.clear()
log_summary(stats)
assert expected_text in caplog.text

_assert_output(stats, "3 added, 2 deleted and 2 modified")

del stats["deleted"][1]
_assert_output(stats, "3 added, 1 deleted and 2 modified")

del stats["deleted"][0]
_assert_output(stats, "3 added and 2 modified")

del stats["modified"]
_assert_output(stats, "3 added")

_assert_output({}, "No changes.")


def test_log_changes(caplog):
stats = {
"added": ["file1", "dir1/"],
Expand Down
40 changes: 40 additions & 0 deletions tests/unit/utils/test_humanize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from collections import OrderedDict

from dvc.utils.humanize import get_summary


def test_get_summary():
# dict, so that we could delete from it easily
stats = OrderedDict(
[
("fetched", 3),
("added", ["file1", "file2", "file3"]),
("deleted", ["file4", "file5"]),
("modified", ["file6", "file7"]),
]
)

assert get_summary(stats.items()) == (
"3 files fetched, "
"3 files added, "
"2 files deleted "
"and "
"2 files modified"
)

del stats["fetched"]
del stats["deleted"][1]
assert (
get_summary(stats.items())
== "3 files added, 1 file deleted and 2 files modified"
)

del stats["deleted"][0]
assert get_summary(stats.items()) == "3 files added and 2 files modified"

del stats["modified"]
assert get_summary(stats.items()) == "3 files added"

assert get_summary([]) == ""
assert get_summary([("x", 0), ("y", [])]) == ""
assert get_summary([("x", 1), ("y", [])]) == "1 file x"