Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ all releases are available on `PyPI <https://pypi.org/project/pytask>`_ and
-------------------

- :gh:`80` replaces some remaining formatting using ``pprint`` with ``rich``.
- :gh:`81` adds a warning if a path is not correctly cased on a case-insensitive file
system. This facilitates cross-platform builds of projects. Deactivate the check by
setting ``check_casing_of_paths = false`` in the configuration file.


0.0.14 - 2021-03-23
Expand Down
21 changes: 21 additions & 0 deletions docs/reference_guides/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,27 @@ falsy.
The options
-----------

.. confval:: check_casing_of_paths

Since pytask encourages platform-independent reproducibility, it will raise a
warning if you used a path with incorrect casing on a case-insensitive file system.
For example, the path ``TeXt.TxT`` will match the actual file ``text.txt`` on
case-insensitive file systems (usually Windows and macOS), but not on case-sensitive
systems (usually Linux).

If you have very strong reasons for relying on this inaccuracy, although, it is
strongly discouraged, you can deactivate the warning in the configuration file with

.. code-block:: ini

check_casing_of_paths = false

.. note::

An error is only raised on Windows when a case-insensitive path is used.
Contributions are welcome to also support macOS.


.. confval:: ignore

pytask can ignore files and directories and exclude some tasks or reduce the
Expand Down
37 changes: 31 additions & 6 deletions src/_pytask/collect.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
"""Implement functionality to collect tasks."""
import importlib
import inspect
import os
import sys
import time
from pathlib import Path
from typing import Generator
from typing import List

from _pytask.config import hookimpl
from _pytask.config import IS_FILE_SYSTEM_CASE_SENSITIVE
from _pytask.console import console
from _pytask.enums import ColorCode
from _pytask.exceptions import CollectionError
from _pytask.mark import has_marker
from _pytask.nodes import create_task_name
from _pytask.nodes import FilePathNode
from _pytask.nodes import PythonFunctionTask
from _pytask.nodes import reduce_node_name
from _pytask.path import find_case_sensitive_path
from _pytask.report import CollectionReport
from rich.traceback import Traceback

Expand Down Expand Up @@ -126,9 +130,8 @@ def pytask_collect_task_protocol(session, path, name, obj):
return CollectionReport.from_node(task)

except Exception:
return CollectionReport.from_exception(
exc_info=sys.exc_info(), node=locals().get("task")
)
task = PythonFunctionTask(name, create_task_name(path, name), path, None)
return CollectionReport.from_exception(exc_info=sys.exc_info(), node=task)


@hookimpl(trylast=True)
Expand All @@ -146,19 +149,29 @@ def pytask_collect_task(session, path, name, obj):
)


_TEMPLATE_ERROR = (
"The provided path of the dependency/product in the marker is {}, but the path of "
"the file on disk is {}. Case-sensitive file systems would raise an error.\n\n"
"Please, align the names to ensure reproducibility on case-sensitive file systems "
"(often Linux or macOS) or disable this error with 'check_casing_of_paths = false'."
)


@hookimpl(trylast=True)
def pytask_collect_node(path, node):
def pytask_collect_node(session, path, node):
"""Collect a node of a task as a :class:`pytask.nodes.FilePathNode`.

Strings are assumed to be paths. This might be a strict assumption, but since this
hook is attempted at last and possible errors will be shown, it is reasonable and
hook is executed at last and possible errors will be shown, it seems reasonable and
unproblematic.

``trylast=True`` might be necessary if other plugins try to parse strings themselves
like a plugin for downloading files which depends on URLs given as strings.

Parameters
----------
session : _pytask.session.Session
The session.
path : Union[str, pathlib.Path]
The path to file where the task and node are specified.
node : Union[str, pathlib.Path]
Expand All @@ -170,7 +183,19 @@ def pytask_collect_node(path, node):
node = Path(node)
if isinstance(node, Path):
if not node.is_absolute():
node = path.parent.joinpath(node)
# ``normpath`` removes ``../`` from the path which is necessary for the
# casing check which will fail since ``.resolves()`` also normalizes a path.
node = Path(os.path.normpath(path.parent.joinpath(node)))

if (
not IS_FILE_SYSTEM_CASE_SENSITIVE
and session.config["check_casing_of_paths"]
and sys.platform == "win32"
):
case_sensitive_path = find_case_sensitive_path(node, "win32")
if str(node) != str(case_sensitive_path):
raise Exception(_TEMPLATE_ERROR.format(node, case_sensitive_path))

return FilePathNode.from_path(node)


Expand Down
22 changes: 22 additions & 0 deletions src/_pytask/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import configparser
import itertools
import os
import tempfile
import warnings
from pathlib import Path
from typing import List
Expand All @@ -16,13 +17,15 @@

hookimpl = pluggy.HookimplMarker("pytask")


_IGNORED_FOLDERS = [
".git/*",
".hg/*",
".svn/*",
".venv/*",
]


_IGNORED_FILES = [
".codecov.yml",
".gitignore",
Expand All @@ -37,8 +40,10 @@
"tox.ini",
]


_IGNORED_FILES_AND_FOLDERS = _IGNORED_FILES + _IGNORED_FOLDERS


IGNORED_TEMPORARY_FILES_AND_FOLDERS = [
"*.egg-info/*",
".ipynb_checkpoints/*",
Expand All @@ -53,6 +58,15 @@
]


def is_file_system_case_sensitive() -> bool:
"""Check whether the file system is case-sensitive."""
with tempfile.NamedTemporaryFile(prefix="TmP") as tmp_file:
return not os.path.exists(tmp_file.name.lower())


IS_FILE_SYSTEM_CASE_SENSITIVE = is_file_system_case_sensitive()


@hookimpl
def pytask_configure(pm, config_from_cli):
"""Configure pytask."""
Expand Down Expand Up @@ -167,6 +181,14 @@ def pytask_parse_config(config, config_from_cli, config_from_file):
callback=lambda x: x if x is None else int(x),
)

config["check_casing_of_paths"] = get_first_non_none_value(
config_from_cli,
config_from_file,
key="check_casing_of_paths",
default=True,
callback=convert_truthy_or_falsy_to_bool,
)


@hookimpl
def pytask_post_parse(config):
Expand Down
40 changes: 33 additions & 7 deletions src/_pytask/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def from_path_name_function_session(cls, path, name, function, session):

return cls(
base_name=name,
name=_create_task_name(path, name),
name=create_task_name(path, name),
path=path,
function=function,
depends_on=dependencies,
Expand Down Expand Up @@ -174,7 +174,8 @@ def from_path(cls, path: pathlib.Path):
The `lru_cache` decorator ensures that the same object is not collected twice.

"""
path = path.resolve()
if not path.is_absolute():
raise ValueError("FilePathNode must be instantiated from absolute path.")
return cls(path.as_posix(), path, path)

def state(self):
Expand All @@ -185,8 +186,33 @@ def state(self):
return str(self.path.stat().st_mtime)


def _collect_nodes(session, path, name, nodes):
"""Collect nodes for a task."""
def _collect_nodes(
session, path: Path, name: str, nodes: Dict[str, Union[str, Path]]
) -> Dict[str, Path]:
"""Collect nodes for a task.

Parameters
----------
session : _pytask.session.Session
The session.
path : Path
The path to the task whose nodes are collected.
name : str
The name of the task.
nodes : Dict[str, Union[str, Path]]
A dictionary of nodes parsed from the ``depends_on`` or ``produces`` markers.

Returns
-------
Dict[str, Path]
A dictionary of node names and their paths.

Raises
------
NodeNotCollectedError
If the node could not collected.

"""
collected_nodes = {}

for node_name, node in nodes.items():
Expand Down Expand Up @@ -327,13 +353,13 @@ def _convert_nodes_to_dictionary(
return nodes


def _create_task_name(path: Path, base_name: str):
def create_task_name(path: Path, base_name: str):
"""Create the name of a task from a path and the task's base name.

Examples
--------
>>> from pathlib import Path
>>> _create_task_name(Path("module.py"), "task_dummy")
>>> create_task_name(Path("module.py"), "task_dummy")
'module.py::task_dummy'

"""
Expand All @@ -359,7 +385,7 @@ def reduce_node_name(node, paths: List[Path]):

if isinstance(node, MetaTask):
shortened_path = relative_to(node.path, ancestor)
name = _create_task_name(shortened_path, node.base_name)
name = create_task_name(shortened_path, node.base_name)
elif isinstance(node, MetaNode):
name = relative_to(node.path, ancestor).as_posix()
else:
Expand Down
29 changes: 29 additions & 0 deletions src/_pytask/path.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""This module contains code to handle paths."""
import functools
import os
from pathlib import Path
from typing import List
Expand Down Expand Up @@ -86,3 +87,31 @@ def find_common_ancestor(*paths: Union[str, Path]) -> Path:
path = os.path.commonpath(paths)
path = Path(path)
return path


@functools.lru_cache()
def find_case_sensitive_path(path: Path, platform: str) -> Path:
"""Find the case-sensitive path.

On case-insensitive file systems (mostly Windows and Mac), a path like ``text.txt``
and ``TeXt.TxT`` would point to the same file but not on case-sensitive file
systems.

On Windows, we can use :meth:`pathlib.Path.resolve` to find the real path.

This does not work on POSIX systems since Python implements them as if they are
always case-sensitive. Some observations:

- On case-sensitive POSIX systems, :meth:`pathlib.Path.exists` fails with a
case-insensitive path.
- On case-insensitive POSIX systems, :meth:`pathlib.Path.exists` succeeds with a
case-insensitive path.
- On case-insensitive POSIX systems, :meth:`pathlib.Path.resolve` does not return
a case-sensitive path which it does on Windows.

"""
if platform == "win32":
out = path.resolve()
else:
out = path
return out
15 changes: 10 additions & 5 deletions src/_pytask/resolve_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import networkx as nx
from _pytask.config import hookimpl
from _pytask.config import IS_FILE_SYSTEM_CASE_SENSITIVE
from _pytask.console import ARROW_DOWN_ICON
from _pytask.console import console
from _pytask.console import FILE_ICON
Expand Down Expand Up @@ -156,6 +157,14 @@ def _format_cycles(cycles: List[Tuple[str]]) -> str:
return text


_TEMPLATE_ERROR = (
"Some dependencies do not exist or are not produced by any task. See the following "
"tree which shows which dependencies are missing for which tasks.\n\n{}"
)
if IS_FILE_SYSTEM_CASE_SENSITIVE:
_TEMPLATE_ERROR += "\n\n(Hint: Sometimes case sensitivity is at fault.)"


def _check_if_root_nodes_are_available(dag):
missing_root_nodes = []

Expand Down Expand Up @@ -187,11 +196,7 @@ def _check_if_root_nodes_are_available(dag):
dictionary[short_node_name] = short_successors

text = _format_dictionary_to_tree(dictionary, "Missing dependencies:")
raise ResolvingDependenciesError(
"Some dependencies do not exist or are not produced by any task. See the "
"following tree which shows which dependencies are missing for which tasks."
f"\n\n{text}"
)
raise ResolvingDependenciesError(_TEMPLATE_ERROR.format(text))


def _format_dictionary_to_tree(dict_: Dict[str, List[str]], title: str) -> str:
Expand Down
Loading