diff --git a/docs/changes.rst b/docs/changes.rst index cbe0dbe4..6e83862e 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -11,6 +11,9 @@ all releases are available on `PyPI `_ and ------------------- - :gh:`80` replaces some remaining formatting using ``pprint`` with ``rich``. +- :gh:`81` adds a warning if a path is not correctly cased on a case-insensitive file + system. This facilitates cross-platform builds of projects. Deactivate the check by + setting ``check_casing_of_paths = false`` in the configuration file. 0.0.14 - 2021-03-23 diff --git a/docs/reference_guides/configuration.rst b/docs/reference_guides/configuration.rst index c6eebcaa..9186175e 100644 --- a/docs/reference_guides/configuration.rst +++ b/docs/reference_guides/configuration.rst @@ -30,6 +30,27 @@ falsy. The options ----------- +.. confval:: check_casing_of_paths + + Since pytask encourages platform-independent reproducibility, it will raise a + warning if you used a path with incorrect casing on a case-insensitive file system. + For example, the path ``TeXt.TxT`` will match the actual file ``text.txt`` on + case-insensitive file systems (usually Windows and macOS), but not on case-sensitive + systems (usually Linux). + + If you have very strong reasons for relying on this inaccuracy, although, it is + strongly discouraged, you can deactivate the warning in the configuration file with + + .. code-block:: ini + + check_casing_of_paths = false + + .. note:: + + An error is only raised on Windows when a case-insensitive path is used. + Contributions are welcome to also support macOS. + + .. confval:: ignore pytask can ignore files and directories and exclude some tasks or reduce the diff --git a/src/_pytask/collect.py b/src/_pytask/collect.py index e73141c6..c1bbd069 100644 --- a/src/_pytask/collect.py +++ b/src/_pytask/collect.py @@ -1,6 +1,7 @@ """Implement functionality to collect tasks.""" import importlib import inspect +import os import sys import time from pathlib import Path @@ -8,13 +9,16 @@ from typing import List from _pytask.config import hookimpl +from _pytask.config import IS_FILE_SYSTEM_CASE_SENSITIVE from _pytask.console import console from _pytask.enums import ColorCode from _pytask.exceptions import CollectionError from _pytask.mark import has_marker +from _pytask.nodes import create_task_name from _pytask.nodes import FilePathNode from _pytask.nodes import PythonFunctionTask from _pytask.nodes import reduce_node_name +from _pytask.path import find_case_sensitive_path from _pytask.report import CollectionReport from rich.traceback import Traceback @@ -126,9 +130,8 @@ def pytask_collect_task_protocol(session, path, name, obj): return CollectionReport.from_node(task) except Exception: - return CollectionReport.from_exception( - exc_info=sys.exc_info(), node=locals().get("task") - ) + task = PythonFunctionTask(name, create_task_name(path, name), path, None) + return CollectionReport.from_exception(exc_info=sys.exc_info(), node=task) @hookimpl(trylast=True) @@ -146,12 +149,20 @@ def pytask_collect_task(session, path, name, obj): ) +_TEMPLATE_ERROR = ( + "The provided path of the dependency/product in the marker is {}, but the path of " + "the file on disk is {}. Case-sensitive file systems would raise an error.\n\n" + "Please, align the names to ensure reproducibility on case-sensitive file systems " + "(often Linux or macOS) or disable this error with 'check_casing_of_paths = false'." +) + + @hookimpl(trylast=True) -def pytask_collect_node(path, node): +def pytask_collect_node(session, path, node): """Collect a node of a task as a :class:`pytask.nodes.FilePathNode`. Strings are assumed to be paths. This might be a strict assumption, but since this - hook is attempted at last and possible errors will be shown, it is reasonable and + hook is executed at last and possible errors will be shown, it seems reasonable and unproblematic. ``trylast=True`` might be necessary if other plugins try to parse strings themselves @@ -159,6 +170,8 @@ def pytask_collect_node(path, node): Parameters ---------- + session : _pytask.session.Session + The session. path : Union[str, pathlib.Path] The path to file where the task and node are specified. node : Union[str, pathlib.Path] @@ -170,7 +183,19 @@ def pytask_collect_node(path, node): node = Path(node) if isinstance(node, Path): if not node.is_absolute(): - node = path.parent.joinpath(node) + # ``normpath`` removes ``../`` from the path which is necessary for the + # casing check which will fail since ``.resolves()`` also normalizes a path. + node = Path(os.path.normpath(path.parent.joinpath(node))) + + if ( + not IS_FILE_SYSTEM_CASE_SENSITIVE + and session.config["check_casing_of_paths"] + and sys.platform == "win32" + ): + case_sensitive_path = find_case_sensitive_path(node, "win32") + if str(node) != str(case_sensitive_path): + raise Exception(_TEMPLATE_ERROR.format(node, case_sensitive_path)) + return FilePathNode.from_path(node) diff --git a/src/_pytask/config.py b/src/_pytask/config.py index 60f4690f..621101ba 100644 --- a/src/_pytask/config.py +++ b/src/_pytask/config.py @@ -2,6 +2,7 @@ import configparser import itertools import os +import tempfile import warnings from pathlib import Path from typing import List @@ -16,6 +17,7 @@ hookimpl = pluggy.HookimplMarker("pytask") + _IGNORED_FOLDERS = [ ".git/*", ".hg/*", @@ -23,6 +25,7 @@ ".venv/*", ] + _IGNORED_FILES = [ ".codecov.yml", ".gitignore", @@ -37,8 +40,10 @@ "tox.ini", ] + _IGNORED_FILES_AND_FOLDERS = _IGNORED_FILES + _IGNORED_FOLDERS + IGNORED_TEMPORARY_FILES_AND_FOLDERS = [ "*.egg-info/*", ".ipynb_checkpoints/*", @@ -53,6 +58,15 @@ ] +def is_file_system_case_sensitive() -> bool: + """Check whether the file system is case-sensitive.""" + with tempfile.NamedTemporaryFile(prefix="TmP") as tmp_file: + return not os.path.exists(tmp_file.name.lower()) + + +IS_FILE_SYSTEM_CASE_SENSITIVE = is_file_system_case_sensitive() + + @hookimpl def pytask_configure(pm, config_from_cli): """Configure pytask.""" @@ -167,6 +181,14 @@ def pytask_parse_config(config, config_from_cli, config_from_file): callback=lambda x: x if x is None else int(x), ) + config["check_casing_of_paths"] = get_first_non_none_value( + config_from_cli, + config_from_file, + key="check_casing_of_paths", + default=True, + callback=convert_truthy_or_falsy_to_bool, + ) + @hookimpl def pytask_post_parse(config): diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index 7660cd11..9cd4ddcf 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -113,7 +113,7 @@ def from_path_name_function_session(cls, path, name, function, session): return cls( base_name=name, - name=_create_task_name(path, name), + name=create_task_name(path, name), path=path, function=function, depends_on=dependencies, @@ -174,7 +174,8 @@ def from_path(cls, path: pathlib.Path): The `lru_cache` decorator ensures that the same object is not collected twice. """ - path = path.resolve() + if not path.is_absolute(): + raise ValueError("FilePathNode must be instantiated from absolute path.") return cls(path.as_posix(), path, path) def state(self): @@ -185,8 +186,33 @@ def state(self): return str(self.path.stat().st_mtime) -def _collect_nodes(session, path, name, nodes): - """Collect nodes for a task.""" +def _collect_nodes( + session, path: Path, name: str, nodes: Dict[str, Union[str, Path]] +) -> Dict[str, Path]: + """Collect nodes for a task. + + Parameters + ---------- + session : _pytask.session.Session + The session. + path : Path + The path to the task whose nodes are collected. + name : str + The name of the task. + nodes : Dict[str, Union[str, Path]] + A dictionary of nodes parsed from the ``depends_on`` or ``produces`` markers. + + Returns + ------- + Dict[str, Path] + A dictionary of node names and their paths. + + Raises + ------ + NodeNotCollectedError + If the node could not collected. + + """ collected_nodes = {} for node_name, node in nodes.items(): @@ -327,13 +353,13 @@ def _convert_nodes_to_dictionary( return nodes -def _create_task_name(path: Path, base_name: str): +def create_task_name(path: Path, base_name: str): """Create the name of a task from a path and the task's base name. Examples -------- >>> from pathlib import Path - >>> _create_task_name(Path("module.py"), "task_dummy") + >>> create_task_name(Path("module.py"), "task_dummy") 'module.py::task_dummy' """ @@ -359,7 +385,7 @@ def reduce_node_name(node, paths: List[Path]): if isinstance(node, MetaTask): shortened_path = relative_to(node.path, ancestor) - name = _create_task_name(shortened_path, node.base_name) + name = create_task_name(shortened_path, node.base_name) elif isinstance(node, MetaNode): name = relative_to(node.path, ancestor).as_posix() else: diff --git a/src/_pytask/path.py b/src/_pytask/path.py index db7ff3b0..92dbfe84 100644 --- a/src/_pytask/path.py +++ b/src/_pytask/path.py @@ -1,4 +1,5 @@ """This module contains code to handle paths.""" +import functools import os from pathlib import Path from typing import List @@ -86,3 +87,31 @@ def find_common_ancestor(*paths: Union[str, Path]) -> Path: path = os.path.commonpath(paths) path = Path(path) return path + + +@functools.lru_cache() +def find_case_sensitive_path(path: Path, platform: str) -> Path: + """Find the case-sensitive path. + + On case-insensitive file systems (mostly Windows and Mac), a path like ``text.txt`` + and ``TeXt.TxT`` would point to the same file but not on case-sensitive file + systems. + + On Windows, we can use :meth:`pathlib.Path.resolve` to find the real path. + + This does not work on POSIX systems since Python implements them as if they are + always case-sensitive. Some observations: + + - On case-sensitive POSIX systems, :meth:`pathlib.Path.exists` fails with a + case-insensitive path. + - On case-insensitive POSIX systems, :meth:`pathlib.Path.exists` succeeds with a + case-insensitive path. + - On case-insensitive POSIX systems, :meth:`pathlib.Path.resolve` does not return + a case-sensitive path which it does on Windows. + + """ + if platform == "win32": + out = path.resolve() + else: + out = path + return out diff --git a/src/_pytask/resolve_dependencies.py b/src/_pytask/resolve_dependencies.py index 538ae774..82dfaddb 100644 --- a/src/_pytask/resolve_dependencies.py +++ b/src/_pytask/resolve_dependencies.py @@ -6,6 +6,7 @@ import networkx as nx from _pytask.config import hookimpl +from _pytask.config import IS_FILE_SYSTEM_CASE_SENSITIVE from _pytask.console import ARROW_DOWN_ICON from _pytask.console import console from _pytask.console import FILE_ICON @@ -156,6 +157,14 @@ def _format_cycles(cycles: List[Tuple[str]]) -> str: return text +_TEMPLATE_ERROR = ( + "Some dependencies do not exist or are not produced by any task. See the following " + "tree which shows which dependencies are missing for which tasks.\n\n{}" +) +if IS_FILE_SYSTEM_CASE_SENSITIVE: + _TEMPLATE_ERROR += "\n\n(Hint: Sometimes case sensitivity is at fault.)" + + def _check_if_root_nodes_are_available(dag): missing_root_nodes = [] @@ -187,11 +196,7 @@ def _check_if_root_nodes_are_available(dag): dictionary[short_node_name] = short_successors text = _format_dictionary_to_tree(dictionary, "Missing dependencies:") - raise ResolvingDependenciesError( - "Some dependencies do not exist or are not produced by any task. See the " - "following tree which shows which dependencies are missing for which tasks." - f"\n\n{text}" - ) + raise ResolvingDependenciesError(_TEMPLATE_ERROR.format(text)) def _format_dictionary_to_tree(dict_: Dict[str, List[str]], title: str) -> str: diff --git a/tests/test_collect.py b/tests/test_collect.py index ef326d6c..f1d6e468 100644 --- a/tests/test_collect.py +++ b/tests/test_collect.py @@ -1,7 +1,12 @@ +import sys import textwrap +from contextlib import ExitStack as does_not_raise # noqa: N813 +from pathlib import Path import pytest +from _pytask.collect import pytask_collect_node from _pytask.exceptions import NodeNotCollectedError +from _pytask.session import Session from pytask import main @@ -112,3 +117,52 @@ def test_collect_files_w_custom_file_name_pattern( assert session.exit_code == 0 assert len(session.tasks) == expected_collected_tasks + + +@pytest.mark.unit +@pytest.mark.parametrize( + "session, path, node, expectation, expected", + [ + pytest.param( + Session({"check_casing_of_paths": False}, None), + Path(), + Path.cwd() / "text.txt", + does_not_raise(), + Path.cwd() / "text.txt", + id="test with absolute string path", + ), + ], +) +def test_pytask_collect_node(session, path, node, expectation, expected): + with expectation: + result = pytask_collect_node(session, path, node) + assert str(result.path) == str(expected) + + +@pytest.mark.unit +@pytest.mark.skipif( + sys.platform != "win32", reason="Only works on case-insensitive file systems." +) +def test_pytask_collect_node_raises_error_if_path_is_not_correctly_cased(tmp_path): + session = Session({"check_casing_of_paths": True}, None) + task_path = tmp_path / "task_example.py" + real_node = tmp_path / "text.txt" + real_node.touch() + collected_node = tmp_path / "TeXt.TxT" + + with pytest.raises(Exception, match="The provided path of"): + pytask_collect_node(session, task_path, collected_node) + + +@pytest.mark.unit +def test_pytask_collect_node_does_not_raise_error_if_path_is_not_normalized(tmp_path): + session = Session({"check_casing_of_paths": True}, None) + task_path = tmp_path / "task_example.py" + real_node = tmp_path / "text.txt" + collected_node = f"../{tmp_path.name}/text.txt" + + with pytest.warns(None) as record: + result = pytask_collect_node(session, task_path, collected_node) + + assert str(result.path) == str(real_node) + assert not record diff --git a/tests/test_nodes.py b/tests/test_nodes.py index fde7eb21..c337333b 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -8,8 +8,8 @@ from _pytask.nodes import _convert_nodes_to_dictionary from _pytask.nodes import _convert_objects_to_list_of_tuples from _pytask.nodes import _convert_objects_to_node_dictionary -from _pytask.nodes import _create_task_name from _pytask.nodes import _extract_nodes_from_function_markers +from _pytask.nodes import create_task_name from _pytask.nodes import depends_on from _pytask.nodes import FilePathNode from _pytask.nodes import MetaNode @@ -197,7 +197,7 @@ def test_convert_nodes_to_dictionary(x, expected): ], ) def test_create_task_name(path, name, expected): - result = _create_task_name(path, name) + result = create_task_name(path, name) assert result == expected diff --git a/tests/test_path.py b/tests/test_path.py index 6a9418c9..7fd2f9e6 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -5,6 +5,7 @@ from pathlib import PureWindowsPath import pytest +from _pytask.path import find_case_sensitive_path from _pytask.path import find_closest_ancestor from _pytask.path import find_common_ancestor from _pytask.path import relative_to @@ -89,3 +90,26 @@ def test_find_common_ancestor(path_1, path_2, expectation, expected): with expectation: result = find_common_ancestor(path_1, path_2) assert result == expected + + +@pytest.mark.unit +@pytest.mark.skipif(sys.platform != "win32", reason="Only works on Windows.") +@pytest.mark.parametrize( + "path, existing_paths, expected", + [ + pytest.param("text.txt", [], "text.txt", id="non-existing path stays the same"), + pytest.param("text.txt", ["text.txt"], "text.txt", id="existing path is same"), + pytest.param("Text.txt", ["text.txt"], "text.txt", id="correct path"), + pytest.param( + "d/text.txt", ["D/text.txt"], "D/text.txt", id="correct path in folder" + ), + ], +) +def test_find_case_sensitive_path(tmp_path, path, existing_paths, expected): + for p in [path] + existing_paths: + p = tmp_path / p + p.parent.mkdir(parents=True, exist_ok=True) + p.touch() + + result = find_case_sensitive_path(tmp_path / path, sys.platform) + assert result == tmp_path / expected