From a58be4ef4999eeb71a3a47435bd19aaf89b22dd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Mon, 2 Nov 2020 23:22:23 +0100 Subject: [PATCH 1/4] plots/metrics: unified parsing errors communication --- dvc/repo/plots/__init__.py | 35 +++++++++++------ dvc/repo/plots/data.py | 63 ++++++++++++++++++++----------- dvc/utils/serialize/_json.py | 4 +- tests/func/metrics/test_common.py | 16 ++++++++ 4 files changed, 81 insertions(+), 37 deletions(-) diff --git a/dvc/repo/plots/__init__.py b/dvc/repo/plots/__init__.py index 5ae87abd69..233e60672d 100644 --- a/dvc/repo/plots/__init__.py +++ b/dvc/repo/plots/__init__.py @@ -4,6 +4,7 @@ from dvc.exceptions import DvcException, NoPlotsError from dvc.repo.collect import collect +from dvc.repo.plots.data import PlotParsingError from dvc.schema import PLOT_PROPS from dvc.tree.repo import RepoTree from dvc.utils import relpath @@ -64,17 +65,24 @@ def collect(self, targets=None, revs=None): return data @staticmethod - def render(data, revs=None, props=None, templates=None): - """Renders plots""" - props = props or {} - - # Merge data by plot file and apply overriding props - plots = _prepare_plots(data, revs, props) - - return { - datafile: _render(datafile, desc["data"], desc["props"], templates) - for datafile, desc in plots.items() - } + def render(plots, templates=None): + result = {} + for datafile, desc in plots.items(): + try: + result[datafile] = _render( + datafile, desc["data"], desc["props"], templates + ) + except PlotParsingError as e: + logger.debug( + "failed to read '%s' on '%s'", + e.path, + e.revision, + exc_info=True, + ) + + if not any(result.values()): + raise DvcException("Failed to parse any plot.") + return result def show(self, targets=None, revs=None, props=None, templates=None): from .data import NoMetricInHistoryError @@ -94,7 +102,10 @@ def show(self, targets=None, revs=None, props=None, templates=None): if templates is None: templates = self.templates - return self.render(data, revs, props, templates) + + plots = _prepare_plots(data, revs, props or {}) + + return self.render(plots, templates) def diff(self, *args, **kwargs): from .diff import diff diff --git a/dvc/repo/plots/data.py b/dvc/repo/plots/data.py index a8cbc4ef5c..2eea1d95b2 100644 --- a/dvc/repo/plots/data.py +++ b/dvc/repo/plots/data.py @@ -1,14 +1,13 @@ import csv import io -import json import os from collections import OrderedDict from copy import copy -from funcy import first +from funcy import first, reraise from dvc.exceptions import DvcException -from dvc.utils.serialize import loads_yaml +from dvc.utils.serialize import ParseError, parse_json, parse_yaml class PlotMetricTypeError(DvcException): @@ -32,6 +31,14 @@ def __init__(self, path): super().__init__(f"Could not find '{path}'.") +class PlotParsingError(ParseError): + def __init__(self, path, revision): + self.path = path + self.revision = revision + + super().__init__(path, f"revision: {revision}") + + def plot_data(filename, revision, content): _, extension = os.path.splitext(filename.lower()) if extension == ".json": @@ -159,7 +166,10 @@ def _processors(self): return [_filter_fields, _append_index, _append_revision] def to_datapoints(self, **kwargs): - data = self.raw(**kwargs) + with reraise( + ParseError, PlotParsingError(self.filename, self.revision) + ): + data = self.raw(**kwargs) for data_proc in self._processors(): data = data_proc( @@ -170,7 +180,9 @@ def to_datapoints(self, **kwargs): class JSONPlotData(PlotData): def raw(self, **kwargs): - return json.loads(self.content, object_pairs_hook=OrderedDict) + return parse_json( + self.content, self.filename, object_pairs_hook=OrderedDict + ) def _processors(self): parent_processors = super()._processors() @@ -183,31 +195,36 @@ def __init__(self, filename, revision, content, delimiter=","): self.delimiter = delimiter def raw(self, header=True, **kwargs): # pylint: disable=arguments-differ - first_row = first(csv.reader(io.StringIO(self.content))) + with reraise( + csv.Error, ParseError(self.filename, f"revision: {self.revision}") + ): + first_row = first(csv.reader(io.StringIO(self.content))) - if header: - reader = csv.DictReader( - io.StringIO(self.content), delimiter=self.delimiter, - ) - else: - reader = csv.DictReader( - io.StringIO(self.content), - delimiter=self.delimiter, - fieldnames=[str(i) for i in range(len(first_row))], - ) + if header: + reader = csv.DictReader( + io.StringIO(self.content), delimiter=self.delimiter, + ) + else: + reader = csv.DictReader( + io.StringIO(self.content), + delimiter=self.delimiter, + fieldnames=[str(i) for i in range(len(first_row))], + ) - fieldnames = reader.fieldnames - data = list(reader) + fieldnames = reader.fieldnames + data = list(reader) - return [ - OrderedDict([(field, data_point[field]) for field in fieldnames]) - for data_point in data - ] + return [ + OrderedDict( + [(field, data_point[field]) for field in fieldnames] + ) + for data_point in data + ] class YAMLPlotData(PlotData): def raw(self, **kwargs): - return loads_yaml(self.content, typ="rt") + return parse_yaml(self.content, self.filename, typ="rt") def _processors(self): parent_processors = super()._processors() diff --git a/dvc/utils/serialize/_json.py b/dvc/utils/serialize/_json.py index 3c247e85af..ad045016c5 100644 --- a/dvc/utils/serialize/_json.py +++ b/dvc/utils/serialize/_json.py @@ -14,9 +14,9 @@ def load_json(path, tree=None): return _load_data(path, parser=parse_json, tree=tree) -def parse_json(text, path): +def parse_json(text, path, **kwargs): with reraise(json.JSONDecodeError, JSONFileCorruptedError(path)): - return json.loads(text) or {} + return json.loads(text, **kwargs) or {} def dump_json(path, data, tree=None): diff --git a/tests/func/metrics/test_common.py b/tests/func/metrics/test_common.py index 31eff0e93a..9071fdd103 100644 --- a/tests/func/metrics/test_common.py +++ b/tests/func/metrics/test_common.py @@ -27,3 +27,19 @@ def test_diff_no_file_on_target_rev( diff_fun(dvc, "metric.json", "master") assert "'metric.json' was not found at: 'master'." in caplog.text + + +@pytest.mark.parametrize( + "fun, malformed_metric", + ( + (lambda repo: repo.metrics, '{"m": 1'), + (lambda repo: repo.plots, '[{"m": 1}, {"m": 2}'), + ), +) +def test_show_malformed_metric(tmp_dir, scm, dvc, fun, malformed_metric): + tmp_dir.gen("metric.json", malformed_metric) + _write_json( + tmp_dir, {"val": 1, "val2": [{"p": 1}, {"p": 2}]}, "metric_proper.json" + ) + + fun(dvc).show(targets=["metric.json", "metric_proper.json"]) From 1e99e9fbe71dd86b3223df8ae0604d18eaf18659 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Tue, 3 Nov 2020 14:46:11 +0100 Subject: [PATCH 2/4] plots/metrics: unify no metrics/plots messaging --- dvc/command/plots.py | 2 +- dvc/exceptions.py | 17 ++++++-- dvc/repo/metrics/show.py | 13 ++---- dvc/repo/plots/__init__.py | 24 +++++++---- dvc/repo/plots/data.py | 42 +++++++++----------- tests/func/metrics/plots/test_show.py | 7 ---- tests/func/metrics/test_common.py | 57 ++++++++++++++++----------- 7 files changed, 87 insertions(+), 75 deletions(-) diff --git a/dvc/command/plots.py b/dvc/command/plots.py index 04e5b2918d..775214c036 100644 --- a/dvc/command/plots.py +++ b/dvc/command/plots.py @@ -73,7 +73,7 @@ def run(self): ) except DvcException: - logger.exception("") + logger.exception("failed to show plots") return 1 return 0 diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 17b4a87eda..1a4a44f99d 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -174,11 +174,20 @@ class NoMetricsError(DvcException): pass -class NoPlotsError(DvcException): - def __init__(self): +class NoMetricsParsedError(NoMetricsError): + def __init__(self, command): + super().__init__( + f"Could not parse {command} files. Use `-v` option to see more " + "details." + ) + + +class NoMetricsFoundError(NoMetricsError): + def __init__(self, command, run_options): super().__init__( - "no plots in this repository. Use `--plots/--plots-no-cache` " - "options for `dvc run` to mark stage outputs as plots." + f"No {command} files in this repository. " + f"Use `{run_options}` options for " + f"`dvc run` to mark stage outputs as {command}." ) diff --git a/dvc/repo/metrics/show.py b/dvc/repo/metrics/show.py index 28e1816c07..6a08224c45 100644 --- a/dvc/repo/metrics/show.py +++ b/dvc/repo/metrics/show.py @@ -1,6 +1,6 @@ import logging -from dvc.exceptions import NoMetricsError +from dvc.exceptions import NoMetricsFoundError, NoMetricsParsedError from dvc.repo import locked from dvc.repo.collect import collect from dvc.scm.base import SCMError @@ -104,16 +104,9 @@ def show( if not res: if metrics_found: - msg = ( - "Could not parse metrics files. Use `-v` option to see more " - "details." - ) + raise NoMetricsParsedError("metrics") else: - msg = ( - "no metrics files in this repository. Use `-m/-M` options for " - "`dvc run` to mark stage outputs as metrics." - ) - raise NoMetricsError(msg) + raise NoMetricsFoundError("metrics", "-m/-M") # Hide workspace metrics if they are the same as in the active branch try: diff --git a/dvc/repo/plots/__init__.py b/dvc/repo/plots/__init__.py index 233e60672d..e4ad6fa390 100644 --- a/dvc/repo/plots/__init__.py +++ b/dvc/repo/plots/__init__.py @@ -2,7 +2,11 @@ from funcy import cached_property, first, project -from dvc.exceptions import DvcException, NoPlotsError +from dvc.exceptions import ( + DvcException, + NoMetricsFoundError, + NoMetricsParsedError, +) from dvc.repo.collect import collect from dvc.repo.plots.data import PlotParsingError from dvc.schema import PLOT_PROPS @@ -65,7 +69,13 @@ def collect(self, targets=None, revs=None): return data @staticmethod - def render(plots, templates=None): + def render(data, revs=None, props=None, templates=None): + """Renders plots""" + props = props or {} + + # Merge data by plot file and apply overriding props + plots = _prepare_plots(data, revs, props) + result = {} for datafile, desc in plots.items(): try: @@ -81,7 +91,8 @@ def render(plots, templates=None): ) if not any(result.values()): - raise DvcException("Failed to parse any plot.") + raise NoMetricsParsedError("plots") + return result def show(self, targets=None, revs=None, props=None, templates=None): @@ -98,14 +109,11 @@ def show(self, targets=None, revs=None, props=None, templates=None): # No data at all is a special error with a special message if not data: - raise NoPlotsError() + raise NoMetricsFoundError("plots", "--plots/--plots-no-cache") if templates is None: templates = self.templates - - plots = _prepare_plots(data, revs, props or {}) - - return self.render(plots, templates) + return self.render(data, revs, props, templates) def diff(self, *args, **kwargs): from .diff import diff diff --git a/dvc/repo/plots/data.py b/dvc/repo/plots/data.py index 2eea1d95b2..bff7aeb3d4 100644 --- a/dvc/repo/plots/data.py +++ b/dvc/repo/plots/data.py @@ -167,7 +167,8 @@ def _processors(self): def to_datapoints(self, **kwargs): with reraise( - ParseError, PlotParsingError(self.filename, self.revision) + [ParseError, csv.Error], + PlotParsingError(self.filename, self.revision), ): data = self.raw(**kwargs) @@ -195,31 +196,26 @@ def __init__(self, filename, revision, content, delimiter=","): self.delimiter = delimiter def raw(self, header=True, **kwargs): # pylint: disable=arguments-differ - with reraise( - csv.Error, ParseError(self.filename, f"revision: {self.revision}") - ): - first_row = first(csv.reader(io.StringIO(self.content))) + first_row = first(csv.reader(io.StringIO(self.content))) - if header: - reader = csv.DictReader( - io.StringIO(self.content), delimiter=self.delimiter, - ) - else: - reader = csv.DictReader( - io.StringIO(self.content), - delimiter=self.delimiter, - fieldnames=[str(i) for i in range(len(first_row))], - ) + if header: + reader = csv.DictReader( + io.StringIO(self.content), delimiter=self.delimiter, + ) + else: + reader = csv.DictReader( + io.StringIO(self.content), + delimiter=self.delimiter, + fieldnames=[str(i) for i in range(len(first_row))], + ) - fieldnames = reader.fieldnames - data = list(reader) + fieldnames = reader.fieldnames + data = list(reader) - return [ - OrderedDict( - [(field, data_point[field]) for field in fieldnames] - ) - for data_point in data - ] + return [ + OrderedDict([(field, data_point[field]) for field in fieldnames]) + for data_point in data + ] class YAMLPlotData(PlotData): diff --git a/tests/func/metrics/plots/test_show.py b/tests/func/metrics/plots/test_show.py index 0e4f05e532..f4a29986c2 100644 --- a/tests/func/metrics/plots/test_show.py +++ b/tests/func/metrics/plots/test_show.py @@ -398,13 +398,6 @@ def _replace(path, src, dst): path.write_text(path.read_text().replace(src, dst)) -def test_no_plots(tmp_dir, dvc): - from dvc.exceptions import NoPlotsError - - with pytest.raises(NoPlotsError): - dvc.plots.show() - - def test_should_raise_on_no_template(tmp_dir, dvc, run_copy_metrics): metric = [{"val": 2}, {"val": 3}] _write_json(tmp_dir, metric, "metric_t.json") diff --git a/tests/func/metrics/test_common.py b/tests/func/metrics/test_common.py index 9071fdd103..d0ebc37156 100644 --- a/tests/func/metrics/test_common.py +++ b/tests/func/metrics/test_common.py @@ -2,44 +2,57 @@ import pytest +from dvc.main import main from tests.func.metrics.utils import _write_json -def metrics_diff(dvc, filename, revision): - dvc.metrics.diff(targets=[filename], a_rev=revision) - - -def plots_diff(dvc, filename, revision): - dvc.plots.diff(targets=[filename], revs=[revision]) - - @pytest.mark.parametrize( - "diff_fun, metric_value", - ((metrics_diff, {"m": 1}), (plots_diff, [{"m": 1}, {"m": 2}])), + "command, metric_value", + (("metrics", {"m": 1}), ("plots", [{"m": 1}, {"m": 2}])), ) def test_diff_no_file_on_target_rev( - tmp_dir, scm, dvc, caplog, diff_fun, metric_value + tmp_dir, scm, dvc, caplog, command, metric_value ): with tmp_dir.branch("new_branch", new=True): _write_json(tmp_dir, metric_value, "metric.json") with caplog.at_level(logging.WARNING, "dvc"): - diff_fun(dvc, "metric.json", "master") + assert ( + main([command, "diff", "master", "--targets", "metric.json"]) + == 0 + ) assert "'metric.json' was not found at: 'master'." in caplog.text @pytest.mark.parametrize( - "fun, malformed_metric", - ( - (lambda repo: repo.metrics, '{"m": 1'), - (lambda repo: repo.plots, '[{"m": 1}, {"m": 2}'), - ), + "command, malformed_metric", + (("metrics", '{"m": 1'), ("plots", '[{"m": 1}, {"m": 2}'),), ) -def test_show_malformed_metric(tmp_dir, scm, dvc, fun, malformed_metric): +def test_show_malformed_metric( + tmp_dir, scm, dvc, caplog, command, malformed_metric +): tmp_dir.gen("metric.json", malformed_metric) - _write_json( - tmp_dir, {"val": 1, "val2": [{"p": 1}, {"p": 2}]}, "metric_proper.json" - ) - fun(dvc).show(targets=["metric.json", "metric_proper.json"]) + with caplog.at_level(logging.ERROR, "dvc"): + assert main([command, "show", "metric.json"]) == 1 + + assert ( + f"Could not parse {command} files. " + "Use `-v` option to see more details." + ) in caplog.text + + +@pytest.mark.parametrize( + "command, run_options", + (("metrics", "-m/-M"), ("plots", "--plots/--plots-no-cache"),), +) +def test_show_no_metrics_files(tmp_dir, dvc, caplog, command, run_options): + with caplog.at_level(logging.ERROR, "dvc"): + assert main([command, "show"]) == 1 + + assert ( + f"No {command} files in this repository. " + f"Use `{run_options}` options for " + f"`dvc run` to mark stage outputs as {command}." + ) in caplog.text From 896a49cf3b867afdc4c6aa9d65d9ceb82b1a30c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Tue, 3 Nov 2020 17:18:37 +0100 Subject: [PATCH 3/4] fixup --- tests/func/metrics/metrics/test_show.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/func/metrics/metrics/test_show.py b/tests/func/metrics/metrics/test_show.py index f346b35caf..97fea223e6 100644 --- a/tests/func/metrics/metrics/test_show.py +++ b/tests/func/metrics/metrics/test_show.py @@ -2,16 +2,11 @@ import pytest +from dvc.exceptions import NoMetricsError from dvc.repo import Repo -from dvc.repo.metrics.show import NoMetricsError from dvc.utils.fs import remove -def test_show_empty(dvc): - with pytest.raises(NoMetricsError): - dvc.metrics.show() - - def test_show_simple(tmp_dir, dvc, run_copy_metrics): tmp_dir.gen("metrics_t.yaml", "1.1") run_copy_metrics( From f7a9795c9ac4fce0fb045168817651b843910c29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Tue, 3 Nov 2020 17:22:32 +0100 Subject: [PATCH 4/4] fixup --- tests/func/metrics/metrics/test_show.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/func/metrics/metrics/test_show.py b/tests/func/metrics/metrics/test_show.py index 97fea223e6..7f78f0a40d 100644 --- a/tests/func/metrics/metrics/test_show.py +++ b/tests/func/metrics/metrics/test_show.py @@ -2,7 +2,6 @@ import pytest -from dvc.exceptions import NoMetricsError from dvc.repo import Repo from dvc.utils.fs import remove @@ -31,15 +30,6 @@ def test_show_multiple(tmp_dir, dvc, run_copy_metrics): assert dvc.metrics.show() == {"": {"foo": {"foo": 1}, "baz": {"baz": 2}}} -def test_show_invalid_metric(tmp_dir, dvc, run_copy_metrics): - tmp_dir.gen("metrics_temp.yaml", "foo:\n- bar\n- baz\nxyz: string") - run_copy_metrics( - "metrics_temp.yaml", "metrics.yaml", metrics=["metrics.yaml"] - ) - with pytest.raises(NoMetricsError): - dvc.metrics.show() - - def test_show_branch(tmp_dir, scm, dvc, run_copy_metrics): tmp_dir.gen("metrics_temp.yaml", "foo: 1") run_copy_metrics(