From 0e0231d7ac249895b022b0eff2142c7ce04609fa Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 8 Nov 2023 01:59:59 +0100 Subject: [PATCH 1/8] Fix handling of names and signatures of PythonNodes. --- .pre-commit-config.yaml | 2 +- docs/source/changes.md | 1 + src/_pytask/collect.py | 6 +++-- src/_pytask/nodes.py | 13 ++++++++- tests/__snapshots__/test_collect_command.ambr | 18 +++++++++++++ tests/test_collect_command.py | 27 +++++++++++++++++++ tests/test_nodes.py | 2 +- 7 files changed, 64 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7ea8db36..32023ef9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -47,9 +47,9 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.1.4 hooks: + - id: ruff-format - id: ruff args: [--unsafe-fixes] - - id: ruff-format - repo: https://github.com/dosisod/refurb rev: v1.22.1 hooks: diff --git a/docs/source/changes.md b/docs/source/changes.md index 25bcc6fe..20753f1e 100644 --- a/docs/source/changes.md +++ b/docs/source/changes.md @@ -29,6 +29,7 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and - {pull}`480` removes the check for missing root nodes from the generation of the DAG. It is delegated to the check during the execution. - {pull}`481` improves coverage. +- {pull}`482` correctly handles names and signatures of {class}`~pytask.PythonNode`. ## 0.4.1 - 2023-10-11 diff --git a/src/_pytask/collect.py b/src/_pytask/collect.py index 04824c3f..419bf83e 100644 --- a/src/_pytask/collect.py +++ b/src/_pytask/collect.py @@ -325,7 +325,9 @@ def pytask_collect_node(session: Session, path: Path, node_info: NodeInfo) -> PN node = node_info.value if isinstance(node, PythonNode): - node.name = create_name_of_python_node(node_info) + node.node_info = node_info + if not node.name: + node.name = create_name_of_python_node(node_info) return node if isinstance(node, PPathNode) and not node.path.is_absolute(): @@ -363,7 +365,7 @@ def pytask_collect_node(session: Session, path: Path, node_info: NodeInfo) -> PN return PathNode(name=name, path=node) node_name = create_name_of_python_node(node_info) - return PythonNode(value=node, name=node_name) + return PythonNode(value=node, name=node_name, node_info=node_info) def _raise_error_if_casing_of_path_is_wrong( diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index 9e135a9c..22b4d2a6 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -22,6 +22,7 @@ if TYPE_CHECKING: + from _pytask.models import NodeInfo from _pytask.tree_util import PyTree from _pytask.mark import Mark @@ -211,6 +212,8 @@ class PythonNode(PNode): objects that are hashable like strings and tuples. For dictionaries and other non-hashable objects, you need to provide a function that can hash these objects. + node_info + The infos acquired while collecting the node. signature The signature of the node. @@ -229,11 +232,19 @@ class PythonNode(PNode): name: str = "" value: Any | NoDefault = no_default hash: bool | Callable[[Any], bool] = False # noqa: A003 + node_info: NodeInfo | None = None @property def signature(self) -> str: """The unique signature of the node.""" - raw_key = str(hash_value(self.name)) + raw_key = ( + "".join( + str(hash_value(getattr(self.node_info, name))) + for name in ("arg_name", "path", "task_path", "task_name") + ) + if self.node_info + else str(hash_value(self.node_info)) + ) return hashlib.sha256(raw_key.encode()).hexdigest() def load(self, is_product: bool = False) -> Any: diff --git a/tests/__snapshots__/test_collect_command.ambr b/tests/__snapshots__/test_collect_command.ambr index b2c3841c..bfd98afd 100644 --- a/tests/__snapshots__/test_collect_command.ambr +++ b/tests/__snapshots__/test_collect_command.ambr @@ -25,3 +25,21 @@ ──────────────────────────────────────────────────────────────────────────────── ''' # --- +# name: test_more_nested_pytree_and_python_node_as_return_with_names + ''' + ───────────────────────────── Start pytask session ───────────────────────────── + Platform: -- Python , pytask , pluggy + Root: + Collected 1 task. + + Collected tasks: + └── 🐍 + └── 📝 + ├── 📄 + ├── 📄 + ├── 📄 + └── 📄 + + ──────────────────────────────────────────────────────────────────────────────── + ''' +# --- diff --git a/tests/test_collect_command.py b/tests/test_collect_command.py index 385a889d..0af0c229 100644 --- a/tests/test_collect_command.py +++ b/tests/test_collect_command.py @@ -619,6 +619,33 @@ def test_more_nested_pytree_and_python_node_as_return(runner, snapshot_cli, tmp_ from pytask import PythonNode from typing import Dict + nodes = [ + PythonNode(), + (PythonNode(), PythonNode()), + PythonNode() + ] + + def task_example() -> Annotated[Dict[str, str], nodes]: + return [{"first": "a", "second": "b"}, (1, 2), 1] + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + result = runner.invoke(cli, ["collect", "--nodes", tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + if sys.platform != "win32": + assert result.output == snapshot_cli() + + +@pytest.mark.end_to_end() +def test_more_nested_pytree_and_python_node_as_return_with_names( + runner, snapshot_cli, tmp_path +): + source = """ + from pathlib import Path + from typing import Any + from typing_extensions import Annotated + from pytask import PythonNode + from typing import Dict + nodes = [ PythonNode(name="dict"), (PythonNode(name="tuple1"), PythonNode(name="tuple2")), diff --git a/tests/test_nodes.py b/tests/test_nodes.py index 4920184f..de617db7 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -39,7 +39,7 @@ def test_hash_of_python_node(value, hash_, expected): ), ( PythonNode(name="name", value=None), - "c8265d64828f9e007a9108251883a2b63954c326c678fca23c49a0b08ea7c925", + "a1f217807169de3253d1176ea5c45be20f3db2e2e81ea26c918f6008d2eb715b", ), ( Task(base_name="task", path=Path("task.py"), function=None), From 1601cfaa6c1f161d7753b5a7b4e6f228644fcea0 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 8 Nov 2023 02:09:48 +0100 Subject: [PATCH 2/8] shorten ids of python nodes. --- src/_pytask/collect_utils.py | 6 +----- tests/__snapshots__/test_collect_command.ambr | 16 ++++------------ 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/src/_pytask/collect_utils.py b/src/_pytask/collect_utils.py index a3cf1946..6cb892dd 100644 --- a/src/_pytask/collect_utils.py +++ b/src/_pytask/collect_utils.py @@ -684,11 +684,7 @@ def _collect_product( def create_name_of_python_node(node_info: NodeInfo) -> str: """Create name of PythonNode.""" - prefix = ( - node_info.task_path.as_posix() + "::" + node_info.task_name - if node_info.task_path - else node_info.task_name - ) + prefix = node_info.task_name if node_info.task_path else node_info.task_name node_name = prefix + "::" + node_info.arg_name if node_info.path: suffix = "-".join(map(str, node_info.path)) diff --git a/tests/__snapshots__/test_collect_command.ambr b/tests/__snapshots__/test_collect_command.ambr index bfd98afd..dfe9ffb3 100644 --- a/tests/__snapshots__/test_collect_command.ambr +++ b/tests/__snapshots__/test_collect_command.ambr @@ -9,18 +9,10 @@ Collected tasks: └── 🐍 └── 📝 - ├── 📄 - ├── 📄 - ├── 📄 - └── 📄 + ├── 📄 + ├── 📄 + ├── 📄 + └── 📄 ──────────────────────────────────────────────────────────────────────────────── ''' From a2a5f22581a3eca7db6aee5e2213e2594b0f1060 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 8 Nov 2023 10:02:39 +0100 Subject: [PATCH 3/8] Fix test. --- tests/test_nodes.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_nodes.py b/tests/test_nodes.py index de617db7..ada45206 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -1,6 +1,7 @@ from __future__ import annotations import pickle +import sys from pathlib import Path import pytest @@ -39,7 +40,11 @@ def test_hash_of_python_node(value, hash_, expected): ), ( PythonNode(name="name", value=None), - "a1f217807169de3253d1176ea5c45be20f3db2e2e81ea26c918f6008d2eb715b", + # The hash of None constant https://github.com/python/cpython/pull/99541 + # starting 3.12. + "a1f217807169de3253d1176ea5c45be20f3db2e2e81ea26c918f6008d2eb715b" + if sys.version_info >= (3, 12) + else "2af47fd015a32bd1b8d7f207930ca0a370f3a866c042d517350b59187a8f4987", ), ( Task(base_name="task", path=Path("task.py"), function=None), From 5d02daedebc1e5a2f9402dc76faa64db27f743f6 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 8 Nov 2023 10:59:11 +0100 Subject: [PATCH 4/8] Fix. --- src/_pytask/_hashlib.py | 6 ++++++ tests/test_nodes.py | 7 +------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/_pytask/_hashlib.py b/src/_pytask/_hashlib.py index 0b4d530e..99037422 100644 --- a/src/_pytask/_hashlib.py +++ b/src/_pytask/_hashlib.py @@ -219,7 +219,13 @@ def hash_value(value: Any) -> int | str: Compute the hash of paths, strings, and bytes with a hash function or otherwise the hashes are salted. + The hash of None constant https://github.com/python/cpython/pull/99541 starting with + Python 3.12. + """ + # + if value is None: + return 4238894112 if isinstance(value, Path): value = str(value) if isinstance(value, str): diff --git a/tests/test_nodes.py b/tests/test_nodes.py index ada45206..de617db7 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -1,7 +1,6 @@ from __future__ import annotations import pickle -import sys from pathlib import Path import pytest @@ -40,11 +39,7 @@ def test_hash_of_python_node(value, hash_, expected): ), ( PythonNode(name="name", value=None), - # The hash of None constant https://github.com/python/cpython/pull/99541 - # starting 3.12. - "a1f217807169de3253d1176ea5c45be20f3db2e2e81ea26c918f6008d2eb715b" - if sys.version_info >= (3, 12) - else "2af47fd015a32bd1b8d7f207930ca0a370f3a866c042d517350b59187a8f4987", + "a1f217807169de3253d1176ea5c45be20f3db2e2e81ea26c918f6008d2eb715b", ), ( Task(base_name="task", path=Path("task.py"), function=None), From 866f5001a2a8b2c7dd923692ab8676c944ef61c2 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 8 Nov 2023 11:29:52 +0100 Subject: [PATCH 5/8] Harden hash. --- src/_pytask/_hashlib.py | 11 +++++++++-- src/_pytask/nodes.py | 14 ++++++-------- tests/test_nodes.py | 15 +++++++++++++++ 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/src/_pytask/_hashlib.py b/src/_pytask/_hashlib.py index 99037422..57e01468 100644 --- a/src/_pytask/_hashlib.py +++ b/src/_pytask/_hashlib.py @@ -223,9 +223,16 @@ def hash_value(value: Any) -> int | str: Python 3.12. """ - # if value is None: - return 4238894112 + return 0xFCA86420 + if hasattr(value, "_asdict"): + value = value._asdict() + if isinstance(value, dict): + value = "".join( + "".join((str(hash_value(k)), str(hash_value(v)))) for k, v in value.items() + ) + if isinstance(value, (tuple, list)): + value = "".join(str(hash_value(i)) for i in value) if isinstance(value, Path): value = str(value) if isinstance(value, str): diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index 22b4d2a6..98509443 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -237,14 +237,12 @@ class PythonNode(PNode): @property def signature(self) -> str: """The unique signature of the node.""" - raw_key = ( - "".join( - str(hash_value(getattr(self.node_info, name))) - for name in ("arg_name", "path", "task_path", "task_name") - ) - if self.node_info - else str(hash_value(self.node_info)) - ) + if self.node_info: + dict_ = self.node_info._asdict() + dict_.pop("value", None) + raw_key = str(hash_value(dict_)) + else: + raw_key = str(hash_value(self.node_info)) return hashlib.sha256(raw_key.encode()).hexdigest() def load(self, is_product: bool = False) -> Any: diff --git a/tests/test_nodes.py b/tests/test_nodes.py index de617db7..71e39328 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -4,6 +4,7 @@ from pathlib import Path import pytest +from pytask import NodeInfo from pytask import PathNode from pytask import PickleNode from pytask import PNode @@ -41,6 +42,20 @@ def test_hash_of_python_node(value, hash_, expected): PythonNode(name="name", value=None), "a1f217807169de3253d1176ea5c45be20f3db2e2e81ea26c918f6008d2eb715b", ), + ( + PythonNode( + name="name", + value=None, + node_info=NodeInfo( + arg_name="arg_name", + path=(0, 1, "dict_key"), + value=None, + task_path=Path("task_example.py"), + task_name="task_example", + ), + ), + "37eb87f321c429151e5cdf5b5662ed3934ec361b74221337f8ac9e43aa148922", + ), ( Task(base_name="task", path=Path("task.py"), function=None), "4c96feb6042210c859938d4f6fc835ac1bde64960aeda101d2e2367644f9c22b", From 2825f68d3104b3fca3cb2c94ee36a2401996deac Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 8 Nov 2023 11:47:43 +0100 Subject: [PATCH 6/8] Fix. --- src/_pytask/_hashlib.py | 3 ++- tests/test_nodes.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/_pytask/_hashlib.py b/src/_pytask/_hashlib.py index 57e01468..d40a2d59 100644 --- a/src/_pytask/_hashlib.py +++ b/src/_pytask/_hashlib.py @@ -229,7 +229,8 @@ def hash_value(value: Any) -> int | str: value = value._asdict() if isinstance(value, dict): value = "".join( - "".join((str(hash_value(k)), str(hash_value(v)))) for k, v in value.items() + "".join((str(hash_value(k)), str(hash_value(value[k])))) + for k in sorted(value) ) if isinstance(value, (tuple, list)): value = "".join(str(hash_value(i)) for i in value) diff --git a/tests/test_nodes.py b/tests/test_nodes.py index 71e39328..f6e2dbe2 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -54,7 +54,7 @@ def test_hash_of_python_node(value, hash_, expected): task_name="task_example", ), ), - "37eb87f321c429151e5cdf5b5662ed3934ec361b74221337f8ac9e43aa148922", + "17b24b0e3141135172b02c8d898d0337ea79bea0f56b6272d18b6bb8e75f985f", ), ( Task(base_name="task", path=Path("task.py"), function=None), From 6f2a7eac7f148a035907d1321a81da17845df5d6 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 8 Nov 2023 11:51:52 +0100 Subject: [PATCH 7/8] Fix. --- tests/test_execute.py | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/tests/test_execute.py b/tests/test_execute.py index eb430836..a23a78a0 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -738,7 +738,7 @@ def task_example() -> Annotated[Dict[str, str], PythonNode(name="result")]: @pytest.mark.end_to_end() -def test_more_nested_pytree_and_python_node_as_return(runner, tmp_path): +def test_more_nested_pytree_and_python_node_as_return_with_names(runner, tmp_path): source = """ from pathlib import Path from typing import Any @@ -758,6 +758,39 @@ def task_example() -> Annotated[Dict[str, str], nodes]: tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) result = runner.invoke(cli, [tmp_path.as_posix()]) assert result.exit_code == ExitCode.OK + assert "1 Succeeded" in result.output + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + assert "1 Skipped" in result.output + + +@pytest.mark.end_to_end() +def test_more_nested_pytree_and_python_node_as_return(runner, tmp_path): + source = """ + from pathlib import Path + from typing import Any + from typing_extensions import Annotated + from pytask import PythonNode + from typing import Dict + + nodes = [ + PythonNode(), + (PythonNode(), PythonNode()), + PythonNode() + ] + + def task_example() -> Annotated[Dict[str, str], nodes]: + return [{"first": "a", "second": "b"}, (1, 2), 1] + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + assert "1 Succeeded" in result.output + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + assert "1 Skipped" in result.output @pytest.mark.end_to_end() From 3a91f23713fabb22f4fb2ee0811c3be0de370e49 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 8 Nov 2023 12:01:34 +0100 Subject: [PATCH 8/8] fix. --- src/_pytask/_hashlib.py | 7 ------- src/_pytask/nodes.py | 14 ++++++++------ tests/test_nodes.py | 2 +- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/_pytask/_hashlib.py b/src/_pytask/_hashlib.py index d40a2d59..a3c4edcf 100644 --- a/src/_pytask/_hashlib.py +++ b/src/_pytask/_hashlib.py @@ -225,13 +225,6 @@ def hash_value(value: Any) -> int | str: """ if value is None: return 0xFCA86420 - if hasattr(value, "_asdict"): - value = value._asdict() - if isinstance(value, dict): - value = "".join( - "".join((str(hash_value(k)), str(hash_value(value[k])))) - for k in sorted(value) - ) if isinstance(value, (tuple, list)): value = "".join(str(hash_value(i)) for i in value) if isinstance(value, Path): diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index 98509443..a296fd47 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -237,12 +237,14 @@ class PythonNode(PNode): @property def signature(self) -> str: """The unique signature of the node.""" - if self.node_info: - dict_ = self.node_info._asdict() - dict_.pop("value", None) - raw_key = str(hash_value(dict_)) - else: - raw_key = str(hash_value(self.node_info)) + raw_key = ( + "".join( + str(hash_value(getattr(self.node_info, name))) + for name in ("arg_name", "path", "task_name", "task_path") + ) + if self.node_info + else str(hash_value(self.node_info)) + ) return hashlib.sha256(raw_key.encode()).hexdigest() def load(self, is_product: bool = False) -> Any: diff --git a/tests/test_nodes.py b/tests/test_nodes.py index f6e2dbe2..b396e326 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -54,7 +54,7 @@ def test_hash_of_python_node(value, hash_, expected): task_name="task_example", ), ), - "17b24b0e3141135172b02c8d898d0337ea79bea0f56b6272d18b6bb8e75f985f", + "7284475a87b8f1aa49c40126c5064269f0ba926265b8fe9158a39a882c6a1512", ), ( Task(base_name="task", path=Path("task.py"), function=None),