diff --git a/docs/changes.rst b/docs/changes.rst index b411280e..c67be823 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -15,7 +15,9 @@ all releases are available on `Anaconda.org - :gh:`34` skips ``pytask_collect_task_teardown`` if task is None. - :gh:`35` adds the ability to capture stdout and stderr with the CaptureManager. - :gh:`36` reworks the debugger to make it work with the CaptureManager. -- :gh:`37` removes reports argument from hooks related to task collection. +- :gh:`37` removes ``reports`` argument from hooks related to task collection. +- :gh:`38` allows to pass dictionaries as dependencies and products and inside the + function ``depends_on`` and ``produces`` become dictionaries. 0.0.8 - 2020-10-04 diff --git a/docs/tutorials/how_to_define_dependencies_products.rst b/docs/tutorials/how_to_define_dependencies_products.rst index 76f8a9d2..cdbac675 100644 --- a/docs/tutorials/how_to_define_dependencies_products.rst +++ b/docs/tutorials/how_to_define_dependencies_products.rst @@ -40,14 +40,6 @@ The ``@pytask.mark.produces`` decorator attaches a product to a task. The string task is defined. -Optional usage in signature ---------------------------- - -As seen before, if you have a task with products (or dependencies), you can use -``produces`` (``depends_on``) as a function argument and receive the path or list of -paths inside the functions. It helps to avoid repetition. - - Dependencies ------------ @@ -64,27 +56,77 @@ Most tasks have dependencies. Similar to products, you can use the produces.write_text(bold_text) +Optional usage in signature +--------------------------- + +As seen before, if you have a task with products (or dependencies), you can use +``produces`` (``depends_on``) as a function argument and receive the path or a +dictionary of paths inside the functions. It helps to avoid repetition. + + Multiple dependencies and products ---------------------------------- -If you have multiple dependencies or products, pass a list to the decorator. Inside the -function you receive a list of :class:`pathlib.Path` as well. +Most tasks have multiple dependencies or products. The easiest way to attach multiple +dependencies or products to a task is to pass a :class:`list`, :class:`tuple` or other +iterator to the decorator which contains :class:`str` or :class:`pathlib.Path`. .. code-block:: python - @pytask.mark.depends_on(["text_a.txt", "text_b.txt"]) - @pytask.mark.produces(["bold_text_a.txt", "bold_text_b.txt"]) - def task_make_text_bold(depends_on, produces): - for dependency, product in zip(depends_on, produces): - text = dependency.read_text() - bold_text = f"**{text}**" - product.write_text(bold_text) + @pytask.mark.depends_on(["text_1.txt", "text_2.txt"]) + def task_example(depends_on): + pass + +The function argument ``depends_on`` or ``produces`` becomes a dictionary where keys are +the positions in the list and values are :class:`pathlib.Path`. + +.. code-block:: python + + depends_on = {0: Path("text_1.txt"), 1: Path("text_2.txt")} + +Why dictionaries and not lists? First, dictionaries with positions as keys behave very +similar to lists and conversion between both is easy. + +Secondly, dictionaries allow to access paths to dependencies and products via labels +which is preferred over positional access when tasks become more complex and the order +changes. + +To assign labels to dependencies or products, pass a dictionary or a list of tuples with +the name in the first and the path in the second position to the decorator. For example, + +.. code-block:: python + + @pytask.mark.depends_on({"first": "text_1.txt", "second": "text_2.txt"}) + @pytask.mark.produces("out.txt") + def task_example(depends_on, produces): + text = depends_on["first"].read_text() + " " + depends_on["second"].read_text() + produces.write_text(text) + +or with tuples + +.. code-block:: python + + @pytask.mark.depends_on([("first", "text_1.txt"), ("second", "text_2.txt")]) + def task_example(): + ... + + +Multiple decorators +------------------- + +You can also attach multiple decorators to a function which will be merged into a single +dictionary. This might help you to group certain dependencies and apply them to multiple +tasks. + +.. code-block:: python + + common_dependencies = ["text_1.txt", "text_2.txt"] -The last task is overly complex since it is the same operation performed for two -independent dependencies and products. There must be a better way |tm|, right? Check out -the :doc:`tutorial on parametrization `. -.. |tm| unicode:: U+2122 + @pytask.mark.depends_on(common_dependencies) + @pytask.mark.depends_on("text_3.txt") + def task_example(): + ... .. rubric:: References diff --git a/src/_pytask/clean.py b/src/_pytask/clean.py index bab0784e..fbcddf06 100644 --- a/src/_pytask/clean.py +++ b/src/_pytask/clean.py @@ -55,7 +55,6 @@ def pytask_post_parse(config): @click.command() @click.option( - "-m", "--mode", type=click.Choice(["dry-run", "interactive", "force"]), help=_HELP_TEXT_MODE, @@ -166,7 +165,7 @@ def _yield_paths_from_task(task): """Yield all paths attached to a task.""" yield task.path for attribute in ["depends_on", "produces"]: - for node in getattr(task, attribute): + for node in getattr(task, attribute).values(): if isinstance(node.value, Path): yield node.value diff --git a/src/_pytask/collect.py b/src/_pytask/collect.py index d6de4af0..af530898 100644 --- a/src/_pytask/collect.py +++ b/src/_pytask/collect.py @@ -3,6 +3,7 @@ import importlib import inspect import sys +import time import traceback from pathlib import Path @@ -15,11 +16,14 @@ from _pytask.report import CollectionReport from _pytask.report import CollectionReportFile from _pytask.report import CollectionReportTask +from _pytask.report import format_collect_footer @hookimpl def pytask_collect(session): """Collect tasks.""" + session.collection_start = time.time() + reports = _collect_from_paths(session) tasks = _extract_successful_tasks_from_reports(reports) @@ -31,13 +35,12 @@ def pytask_collect(session): ) reports.append(report) - session.hook.pytask_collect_log(session=session, reports=reports, tasks=tasks) - session.collection_reports = reports session.tasks = tasks - if any(i for i in reports if not i.successful): - raise CollectionError + session.hook.pytask_collect_log( + session=session, reports=session.collection_reports, tasks=session.tasks + ) return True @@ -214,19 +217,36 @@ def _extract_successful_tasks_from_reports(reports): @hookimpl def pytask_collect_log(session, reports, tasks): """Log collection.""" + session.collection_end = time.time() tm_width = session.config["terminal_width"] message = f"Collected {len(tasks)} task(s)." - if session.deselected: - message += f" Deselected {len(session.deselected)} task(s)." + + n_deselected = len(session.deselected) + if n_deselected: + message += f" Deselected {n_deselected} task(s)." click.echo(message) failed_reports = [i for i in reports if not i.successful] if failed_reports: - click.echo(f"{{:=^{tm_width}}}".format(" Errors during collection ")) + click.echo("") + click.echo(f"{{:=^{tm_width}}}".format(" Failures during collection ")) for report in failed_reports: click.echo(f"{{:_^{tm_width}}}".format(report.format_title())) + + click.echo("") + traceback.print_exception(*report.exc_info) + click.echo("") - click.echo("=" * tm_width) + + duration = round(session.collection_end - session.collection_start, 2) + click.echo( + format_collect_footer( + len(tasks), len(failed_reports), n_deselected, duration, tm_width + ), + nl=True, + ) + + raise CollectionError diff --git a/src/_pytask/collect_command.py b/src/_pytask/collect_command.py index edefcd0f..9060c341 100644 --- a/src/_pytask/collect_command.py +++ b/src/_pytask/collect_command.py @@ -85,8 +85,8 @@ def _organize_tasks(tasks): task_dict = { task_name: { - "depends_on": [node.name for node in task.depends_on], - "produces": [node.name for node in task.produces], + "depends_on": [node.name for node in task.depends_on.values()], + "produces": [node.name for node in task.produces.values()], } } diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index cbc910aa..1ffe4d14 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -1,6 +1,7 @@ """Deals with nodes which are dependencies or products of a task.""" import functools import inspect +import itertools import pathlib from abc import ABCMeta from abc import abstractmethod @@ -13,7 +14,7 @@ from _pytask.exceptions import NodeNotCollectedError from _pytask.exceptions import NodeNotFoundError from _pytask.mark import get_marks_from_obj -from _pytask.shared import to_list +from _pytask.shared import find_duplicates def depends_on(objects: Union[Any, Iterable[Any]]) -> Union[Any, Iterable[Any]]: @@ -68,11 +69,11 @@ class PythonFunctionTask(MetaTask): """pathlib.Path: Path to the file where the task was defined.""" function = attr.ib(type=callable) """callable: The task function.""" - depends_on = attr.ib(converter=to_list) + depends_on = attr.ib(factory=dict) """Optional[List[MetaNode]]: A list of dependencies of task.""" - produces = attr.ib(converter=to_list) + produces = attr.ib(factory=dict) """List[MetaNode]: A list of products of task.""" - markers = attr.ib() + markers = attr.ib(factory=list) """Optional[List[Mark]]: A list of markers attached to the task function.""" _report_sections = attr.ib(factory=list) @@ -80,10 +81,12 @@ class PythonFunctionTask(MetaTask): def from_path_name_function_session(cls, path, name, function, session): """Create a task from a path, name, function, and session.""" objects = _extract_nodes_from_function_markers(function, depends_on) - dependencies = _collect_nodes(session, path, name, objects) + nodes = _convert_objects_to_node_dictionary(objects, "depends_on") + dependencies = _collect_nodes(session, path, name, nodes) objects = _extract_nodes_from_function_markers(function, produces) - products = _collect_nodes(session, path, name, objects) + nodes = _convert_objects_to_node_dictionary(objects, "produces") + products = _collect_nodes(session, path, name, nodes) markers = [ marker @@ -118,8 +121,10 @@ def _get_kwargs_from_task_for_function(self): attribute = getattr(self, name) kwargs[name] = ( attribute[0].value - if len(attribute) == 1 - else [node.value for node in attribute] + if len(attribute) == 1 and 0 in attribute + else { + node_name: node.value for node_name, node in attribute.items() + } ) return kwargs @@ -169,8 +174,9 @@ def state(self): def _collect_nodes(session, path, name, nodes): """Collect nodes for a task.""" - collect_nodes = [] - for node in nodes: + collected_nodes = {} + + for node_name, node in nodes.items(): collected_node = session.hook.pytask_collect_node( session=session, path=path, node=node ) @@ -180,9 +186,9 @@ def _collect_nodes(session, path, name, nodes): f"'{name}' in '{path}'." ) else: - collect_nodes.append(collected_node) + collected_nodes[node_name] = collected_node - return collect_nodes + return collected_nodes def _extract_nodes_from_function_markers(function, parser): @@ -195,4 +201,82 @@ def _extract_nodes_from_function_markers(function, parser): """ marker_name = parser.__name__ for marker in get_marks_from_obj(function, marker_name): - yield from to_list(parser(*marker.args, **marker.kwargs)) + parsed = parser(*marker.args, **marker.kwargs) + yield parsed + + +def _convert_objects_to_node_dictionary(objects, when): + list_of_tuples = _convert_objects_to_list_of_tuples(objects) + _check_that_names_are_not_used_multiple_times(list_of_tuples, when) + nodes = _convert_nodes_to_dictionary(list_of_tuples) + return nodes + + +def _convert_objects_to_list_of_tuples(objects): + out = [] + for obj in objects: + if isinstance(obj, dict): + obj = obj.items() + + if isinstance(obj, Iterable) and not isinstance(obj, str): + for x in obj: + if isinstance(x, Iterable) and not isinstance(x, str): + tuple_x = tuple(x) + if len(tuple_x) in [1, 2]: + out.append(tuple_x) + else: + raise ValueError("ERROR") + else: + out.append((x,)) + else: + out.append((obj,)) + + return out + + +def _check_that_names_are_not_used_multiple_times(list_of_tuples, when): + """Check that names of nodes are not assigned multiple times. + + Tuples in the list have either one or two elements. The first element in the two + element tuples is the name and cannot occur twice. + + Examples + -------- + >>> _check_that_names_are_not_used_multiple_times( + ... [("a",), ("a", 1)], "depends_on" + ... ) + >>> _check_that_names_are_not_used_multiple_times( + ... [("a", 0), ("a", 1)], "produces" + ... ) + Traceback (most recent call last): + ValueError: '@pytask.mark.produces' has nodes with the same name: {'a'} + + """ + names = [x[0] for x in list_of_tuples if len(x) == 2] + duplicated = find_duplicates(names) + + if duplicated: + raise ValueError( + f"'@pytask.mark.{when}' has nodes with the same name: {duplicated}" + ) + + +def _convert_nodes_to_dictionary(list_of_tuples): + nodes = {} + counter = itertools.count() + names = [x[0] for x in list_of_tuples if len(x) == 2] + + for tuple_ in list_of_tuples: + if len(tuple_) == 2: + node_name, node = tuple_ + nodes[node_name] = node + + else: + while True: + node_name = next(counter) + if node_name not in names: + break + + nodes[node_name] = tuple_[0] + + return nodes diff --git a/src/_pytask/parametrize.py b/src/_pytask/parametrize.py index 4c35d016..924fc143 100644 --- a/src/_pytask/parametrize.py +++ b/src/_pytask/parametrize.py @@ -13,6 +13,7 @@ from _pytask.config import hookimpl from _pytask.mark import MARK_GEN as mark # noqa: N811 +from _pytask.shared import find_duplicates def parametrize( @@ -110,7 +111,7 @@ def pytask_parametrize_task(session, name, obj): names_and_functions.append((name_, wrapped_func)) names = [i[0] for i in names_and_functions] - duplicates = _find_duplicates(names) + duplicates = find_duplicates(names) if duplicates: formatted = pprint.pformat( duplicates, width=session.config["terminal_width"] @@ -355,25 +356,3 @@ def _copy_func(func: Callable[[Any], Any]) -> Callable[[Any], Any]: new_func = functools.update_wrapper(new_func, func) new_func.__kwdefaults__ = func.__kwdefaults__ return new_func - - -def _find_duplicates(x: Iterable[Any]): - """Find duplicated entries in iterable. - - Examples - -------- - >>> _find_duplicates(["a", "b", "a"]) - {'a'} - >>> _find_duplicates(["a", "b"]) - set() - - """ - seen = set() - duplicates = set() - - for i in x: - if i in seen: - duplicates.add(i) - seen.add(i) - - return duplicates diff --git a/src/_pytask/report.py b/src/_pytask/report.py index 9b078384..b5f0b13b 100644 --- a/src/_pytask/report.py +++ b/src/_pytask/report.py @@ -39,6 +39,7 @@ def from_task(cls, task): @classmethod def from_exception(cls, path, name, exc_info): + exc_info = remove_internal_traceback_frames_from_exc_info(exc_info) return cls(path=path, name=name, exc_info=exc_info) @property @@ -76,6 +77,7 @@ class ResolvingDependenciesReport: @classmethod def from_exception(cls, exc_info): + exc_info = remove_internal_traceback_frames_from_exc_info(exc_info) return cls(exc_info) @@ -119,6 +121,31 @@ def format_execute_footer(n_successful, n_failed, duration, terminal_width): return formatted_message +def format_collect_footer( + n_successful, n_failed, n_deselected, duration, terminal_width +): + """Format the footer of the execution.""" + message = [] + if n_successful: + message.append( + click.style(f"{n_successful} collected", fg=ColorCode.SUCCESS.value) + ) + if n_failed: + message.append(click.style(f"{n_failed} failed", fg=ColorCode.FAILED.value)) + if n_deselected: + message.append(click.style(f"{n_deselected} deselected", fg="white")) + message = " " + ", ".join(message) + " " + + color = ColorCode.FAILED.value if n_failed else ColorCode.SUCCESS.value + message += click.style(f"in {duration}s ", fg=color) + + formatted_message = _wrap_string_ignoring_ansi_colors( + message, color, terminal_width + ) + + return formatted_message + + def _wrap_string_ignoring_ansi_colors(message, color, width): """Wrap a string with ANSI colors. diff --git a/src/_pytask/resolve_dependencies.py b/src/_pytask/resolve_dependencies.py index f89011a5..11d5893b 100644 --- a/src/_pytask/resolve_dependencies.py +++ b/src/_pytask/resolve_dependencies.py @@ -57,11 +57,11 @@ def pytask_resolve_dependencies_create_dag(tasks): for task in tasks: dag.add_node(task.name, task=task) - for dependency in task.depends_on: + for dependency in task.depends_on.values(): dag.add_node(dependency.name, node=dependency) dag.add_edge(dependency.name, task.name) - for product in task.produces: + for product in task.produces.values(): dag.add_node(product.name, node=product) dag.add_edge(task.name, product.name) @@ -90,6 +90,7 @@ def pytask_resolve_dependencies_validate_dag(dag): """Validate the DAG.""" _check_if_dag_has_cycles(dag) _check_if_root_nodes_are_available(dag) + _check_if_tasks_have_the_same_products(dag) def _have_task_or_neighbors_changed(task_name, dag): @@ -132,18 +133,42 @@ def _check_if_dag_has_cycles(dag): def _check_if_root_nodes_are_available(dag): + missing_root_nodes = {} + for node in dag.nodes: is_node = "node" in dag.nodes[node] is_without_parents = len(list(dag.predecessors(node))) == 0 if is_node and is_without_parents: try: dag.nodes[node]["node"].state() - except NodeNotFoundError as e: - successors = list(dag.successors(node)) - raise NodeNotFoundError( - f"{node} is missing and a dependency of the following tasks: " - f"{successors}." - ) from e + except NodeNotFoundError: + missing_root_nodes[node] = list(dag.successors(node)) + + if missing_root_nodes: + raise ResolvingDependenciesError( + "There are some dependencies missing which do not exist and are not " + "produced by any task. See the following dictionary with missing nodes as " + "keys and dependent tasks as values." + f"\n\n{pprint.pformat(missing_root_nodes)}" + ) + + +def _check_if_tasks_have_the_same_products(dag): + nodes_created_by_multiple_tasks = {} + + for node in dag.nodes: + is_node = "node" in dag.nodes[node] + if is_node: + parents = list(dag.predecessors(node)) + if len(parents) > 1: + nodes_created_by_multiple_tasks[node] = parents + + if nodes_created_by_multiple_tasks: + raise ResolvingDependenciesError( + "There are some tasks which produce the same output. See the following " + "dictionary with products as keys and their producing tasks as values." + f"\n\n{pprint.pformat(nodes_created_by_multiple_tasks)}" + ) @hookimpl @@ -151,7 +176,9 @@ def pytask_resolve_dependencies_log(session, report): """Log errors which happened while resolving dependencies.""" tm_width = session.config["terminal_width"] - click.echo(f"{{:=^{tm_width}}}".format(" Errors while resolving dependencies ")) + click.echo(f"{{:=^{tm_width}}}".format(" Failures during resolving dependencies ")) + + click.echo("") traceback.print_exception(*remove_traceback_from_exc_info(report.exc_info)) diff --git a/src/_pytask/session.py b/src/_pytask/session.py index b6ca6ff5..f5f93e66 100644 --- a/src/_pytask/session.py +++ b/src/_pytask/session.py @@ -30,6 +30,11 @@ class Session: """Optional[List[pytask.report.ExecutionReport]]: Reports for executed tasks.""" exit_code = attr.ib(default=ExitCode.OK) + collection_start = attr.ib(default=None) + collection_end = attr.ib(default=None) + execution_start = attr.ib(default=None) + execution_end = attr.ib(default=None) + @classmethod def from_config(cls, config): """Construct the class from a config.""" diff --git a/src/_pytask/shared.py b/src/_pytask/shared.py index 3c51a611..1f09111d 100644 --- a/src/_pytask/shared.py +++ b/src/_pytask/shared.py @@ -1,7 +1,9 @@ """Functions which are used across various modules.""" import glob -from collections.abc import Iterable +from collections.abc import Sequence from pathlib import Path +from typing import Any +from typing import Iterable def to_list(scalar_or_iter): @@ -25,7 +27,7 @@ def to_list(scalar_or_iter): """ return ( [scalar_or_iter] - if isinstance(scalar_or_iter, str) or not isinstance(scalar_or_iter, Iterable) + if isinstance(scalar_or_iter, str) or not isinstance(scalar_or_iter, Sequence) else list(scalar_or_iter) ) @@ -109,3 +111,25 @@ def convert_truthy_or_falsy_to_bool(x): f"Input '{x}' is neither truthy (True, true, 1) or falsy (False, false, 0)." ) return out + + +def find_duplicates(x: Iterable[Any]): + """Find duplicated entries in iterable. + + Examples + -------- + >>> find_duplicates(["a", "b", "a"]) + {'a'} + >>> find_duplicates(["a", "b"]) + set() + + """ + seen = set() + duplicates = set() + + for i in x: + if i in seen: + duplicates.add(i) + seen.add(i) + + return duplicates diff --git a/src/_pytask/traceback.py b/src/_pytask/traceback.py index 67e06ae2..8d835f6f 100644 --- a/src/_pytask/traceback.py +++ b/src/_pytask/traceback.py @@ -17,13 +17,12 @@ def remove_internal_traceback_frames_from_exc_info(exc_info): """Remove internal traceback frames from exception info. If a non-internal traceback frame is found, return the traceback from the first - occurrence downwards. Otherwise, return the whole traceback. + occurrence downwards. """ if exc_info is not None: filtered_traceback = _filter_internal_traceback_frames(exc_info[2]) - if filtered_traceback is not None: - exc_info = (*exc_info[:2], filtered_traceback) + exc_info = (*exc_info[:2], filtered_traceback) return exc_info diff --git a/tests/test_clean.py b/tests/test_clean.py index 1c7abfea..976e2568 100644 --- a/tests/test_clean.py +++ b/tests/test_clean.py @@ -84,7 +84,7 @@ def test_clean_dry_run_w_directories(sample_project_path, runner): @pytest.mark.end_to_end def test_clean_force(sample_project_path, runner): result = runner.invoke( - cli, ["clean", "-m", "force", sample_project_path.as_posix()] + cli, ["clean", "--mode", "force", sample_project_path.as_posix()] ) assert "Remove" in result.output @@ -99,7 +99,7 @@ def test_clean_force(sample_project_path, runner): @pytest.mark.end_to_end def test_clean_force_w_directories(sample_project_path, runner): result = runner.invoke( - cli, ["clean", "-d", "-m", "force", sample_project_path.as_posix()] + cli, ["clean", "-d", "--mode", "force", sample_project_path.as_posix()] ) assert "Remove" in result.output @@ -112,7 +112,7 @@ def test_clean_force_w_directories(sample_project_path, runner): def test_clean_interactive(sample_project_path, runner): result = runner.invoke( cli, - ["clean", "-m", "interactive", sample_project_path.as_posix()], + ["clean", "--mode", "interactive", sample_project_path.as_posix()], # Three instead of two because the compiled .pyc file is also present. input="y\ny\ny", ) @@ -130,7 +130,7 @@ def test_clean_interactive(sample_project_path, runner): def test_clean_interactive_w_directories(sample_project_path, runner): result = runner.invoke( cli, - ["clean", "-d", "-m", "interactive", sample_project_path.as_posix()], + ["clean", "-d", "--mode", "interactive", sample_project_path.as_posix()], # Three instead of two because the compiled .pyc file is also present. input="y\ny\ny", ) diff --git a/tests/test_collect_command.py b/tests/test_collect_command.py index 6772fa50..6c9f23b5 100644 --- a/tests/test_collect_command.py +++ b/tests/test_collect_command.py @@ -221,8 +221,8 @@ def test_organize_tasks(): task = DummyClass() task.name = "prefix::task_dummy" task.path = "task_path.py" - task.depends_on = [dependency] - task.produces = [] + task.depends_on = {0: dependency} + task.produces = {} result = _organize_tasks([task]) diff --git a/tests/test_execute.py b/tests/test_execute.py index 94c43043..3b726539 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -1,4 +1,3 @@ -import itertools import os import textwrap @@ -8,27 +7,26 @@ @pytest.mark.end_to_end @pytest.mark.parametrize( - "dependencies, products", - itertools.product( - ([], ["in.txt"], ["in_1.txt", "in_2.txt"]), - (["out.txt"], ["out_1.txt", "out_2.txt"]), - ), + "dependencies", + [[], ["in.txt"], ["in_1.txt", "in_2.txt"]], ) +@pytest.mark.parametrize("products", [["out.txt"], ["out_1.txt", "out_2.txt"]]) def test_execution_w_varying_dependencies_products(tmp_path, dependencies, products): source = f""" import pytask from pathlib import Path - @pytask.mark.depends_on({dependencies}) @pytask.mark.produces({products}) def task_dummy(depends_on, produces): - if not isinstance(produces, list): + if isinstance(produces, dict): + produces = produces.values() + elif isinstance(produces, Path): produces = [produces] for product in produces: product.touch() """ - tmp_path.joinpath("task_dummpy.py").write_text(textwrap.dedent(source)) + tmp_path.joinpath("task_dummy.py").write_text(textwrap.dedent(source)) for dependency in dependencies: tmp_path.joinpath(dependency).touch() @@ -42,12 +40,10 @@ def test_depends_on_and_produces_can_be_used_in_task(tmp_path): import pytask from pathlib import Path - @pytask.mark.depends_on("in.txt") @pytask.mark.produces("out.txt") def task_dummy(depends_on, produces): assert isinstance(depends_on, Path) and isinstance(produces, Path) - produces.write_text(depends_on.read_text()) """ tmp_path.joinpath("task_dummy.py").write_text(textwrap.dedent(source)) diff --git a/tests/test_nodes.py b/tests/test_nodes.py index a991eebb..8b64c47d 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -1,5 +1,10 @@ +from contextlib import ExitStack as does_not_raise # noqa: N813 + import pytask import pytest +from _pytask.nodes import _check_that_names_are_not_used_multiple_times +from _pytask.nodes import _convert_nodes_to_dictionary +from _pytask.nodes import _convert_objects_to_list_of_tuples from _pytask.nodes import _extract_nodes_from_function_markers from _pytask.nodes import depends_on from _pytask.nodes import MetaNode @@ -10,7 +15,7 @@ @pytest.mark.unit @pytest.mark.parametrize("decorator", [pytask.mark.depends_on, pytask.mark.produces]) @pytest.mark.parametrize( - "values, expected", [("a", ["a"]), (["b"], ["b"]), (["e", "f"], ["e", "f"])] + "values, expected", [("a", ["a"]), (["b"], [["b"]]), (["e", "f"], [["e", "f"]])] ) def test_extract_args_from_mark(decorator, values, expected): @decorator(values) @@ -28,8 +33,8 @@ def task_dummy(): "values, expected", [ ({"objects": "a"}, ["a"]), - ({"objects": ["b"]}, ["b"]), - ({"objects": ["e", "f"]}, ["e", "f"]), + ({"objects": ["b"]}, [["b"]]), + ({"objects": ["e", "f"]}, [["e", "f"]]), ], ) def test_extract_kwargs_from_mark(decorator, values, expected): @@ -92,3 +97,53 @@ def state(self): task = Node() assert isinstance(task, MetaNode) + + +@pytest.mark.parametrize( + ("x", "expected"), + [ + (["string"], [("string",)]), + (("string",), [("string",)]), + (range(2), [(0,), (1,)]), + ([{"a": 0, "b": 1}], [("a", 0), ("b", 1)]), + ( + ["a", ("b", "c"), {"d": 1, "e": 1}], + [("a",), ("b",), ("c",), ("d", 1), ("e", 1)], + ), + ], +) +def test_convert_objects_to_list_of_tuples(x, expected): + result = _convert_objects_to_list_of_tuples(x) + assert result == expected + + +ERROR = "'@pytask.mark.depends_on' has nodes with the same name:" + + +@pytest.mark.parametrize( + ("x", "expectation"), + [ + ([(0, "a"), (0, "b")], pytest.raises(ValueError, match=ERROR)), + ([("a", 0), ("a", 1)], pytest.raises(ValueError, match=ERROR)), + ([("a", 0), ("b",), ("a", 1)], pytest.raises(ValueError, match=ERROR)), + ([("a", 0), ("b", 0), ("a", 1)], pytest.raises(ValueError, match=ERROR)), + ([("a",), ("a")], does_not_raise()), + ([("a", 0), ("a",)], does_not_raise()), + ([("a", 0), ("b", 1)], does_not_raise()), + ], +) +def test_check_that_names_are_not_used_multiple_times(x, expectation): + with expectation: + _check_that_names_are_not_used_multiple_times(x, "depends_on") + + +@pytest.mark.parametrize( + ("x", "expected"), + [ + ([("a",), ("b",)], {0: "a", 1: "b"}), + ([(1, "a"), ("b",), (0, "c")], {1: "a", 2: "b", 0: "c"}), + ], +) +def test_convert_nodes_to_dictionary(x, expected): + result = _convert_nodes_to_dictionary(x) + assert result == expected diff --git a/tests/test_resolve_dependencies.py b/tests/test_resolve_dependencies.py index de5c0e5f..f35e9893 100644 --- a/tests/test_resolve_dependencies.py +++ b/tests/test_resolve_dependencies.py @@ -1,9 +1,11 @@ import textwrap +from contextlib import ExitStack as does_not_raise # noqa: N813 import attr import networkx as nx import pytest from _pytask.exceptions import NodeNotFoundError +from _pytask.exceptions import ResolvingDependenciesError from _pytask.nodes import MetaNode from _pytask.nodes import MetaTask from _pytask.resolve_dependencies import _check_if_root_nodes_are_available @@ -14,8 +16,8 @@ @attr.s class Task(MetaTask): name = attr.ib(type=str) - depends_on = attr.ib(default=[]) - produces = attr.ib(default=[]) + depends_on = attr.ib(factory=dict) + produces = attr.ib(factory=dict) def execute(self): pass @@ -37,7 +39,10 @@ def state(self): @pytest.mark.unit def test_create_dag(): - task = Task(name="task", depends_on=[Node(name="node_1"), Node(name="node_2")]) + task = Task( + name="task", + depends_on={0: Node(name="node_1"), 1: Node(name="node_2")}, + ) dag = pytask_resolve_dependencies_create_dag([task]) @@ -55,22 +60,39 @@ def test_check_if_root_nodes_are_available(): dag.add_node(available_node.name, node=available_node) dag.add_edge(available_node.name, task.name) - _check_if_root_nodes_are_available(dag) + with does_not_raise(): + _check_if_root_nodes_are_available(dag) missing_node = Node("missing") dag.add_node(missing_node.name, node=missing_node) dag.add_edge(missing_node.name, task.name) - with pytest.raises(NodeNotFoundError): + with pytest.raises(ResolvingDependenciesError): _check_if_root_nodes_are_available(dag) @pytest.mark.end_to_end -def test_cycle_in_dag(tmp_path, runner): +def test_check_if_root_nodes_are_available_end_to_end(tmp_path, runner): source = """ import pytask - from pathlib import Path + @pytask.mark.depends_on("in.txt") + @pytask.mark.produces("out.txt") + def task_dummy(produces): + produces.write_text("1") + """ + tmp_path.joinpath("task_dummy.py").write_text(textwrap.dedent(source)) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + + assert result.exit_code == 4 + assert "Failures during resolving dependencies" in result.output + + +@pytest.mark.end_to_end +def test_cycle_in_dag(tmp_path, runner): + source = """ + import pytask @pytask.mark.depends_on("out_2.txt") @pytask.mark.produces("out_1.txt") @@ -87,4 +109,25 @@ def task_2(produces): result = runner.invoke(cli, [tmp_path.as_posix()]) assert result.exit_code == 4 - assert "Errors while resolving dependencies" in result.output + assert "Failures during resolving dependencies" in result.output + + +@pytest.mark.end_to_end +def test_two_tasks_have_the_same_product(tmp_path, runner): + source = """ + import pytask + + @pytask.mark.produces("out.txt") + def task_1(produces): + produces.write_text("1") + + @pytask.mark.produces("out.txt") + def task_2(produces): + produces.write_text("2") + """ + tmp_path.joinpath("task_dummy.py").write_text(textwrap.dedent(source)) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + + assert result.exit_code == 4 + assert "Failures during resolving dependencies" in result.output