Skip to content

Commit 7f04fc9

Browse files
authored
Raise error if paths are not correctly cased on case-insensitive Windows. (#81)
1 parent 171671b commit 7f04fc9

File tree

10 files changed

+229
-20
lines changed

10 files changed

+229
-20
lines changed

docs/changes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ all releases are available on `PyPI <https://pypi.org/project/pytask>`_ and
1111
-------------------
1212

1313
- :gh:`80` replaces some remaining formatting using ``pprint`` with ``rich``.
14+
- :gh:`81` adds a warning if a path is not correctly cased on a case-insensitive file
15+
system. This facilitates cross-platform builds of projects. Deactivate the check by
16+
setting ``check_casing_of_paths = false`` in the configuration file.
1417

1518

1619
0.0.14 - 2021-03-23

docs/reference_guides/configuration.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,27 @@ falsy.
3030
The options
3131
-----------
3232

33+
.. confval:: check_casing_of_paths
34+
35+
Since pytask encourages platform-independent reproducibility, it will raise a
36+
warning if you used a path with incorrect casing on a case-insensitive file system.
37+
For example, the path ``TeXt.TxT`` will match the actual file ``text.txt`` on
38+
case-insensitive file systems (usually Windows and macOS), but not on case-sensitive
39+
systems (usually Linux).
40+
41+
If you have very strong reasons for relying on this inaccuracy, although, it is
42+
strongly discouraged, you can deactivate the warning in the configuration file with
43+
44+
.. code-block:: ini
45+
46+
check_casing_of_paths = false
47+
48+
.. note::
49+
50+
An error is only raised on Windows when a case-insensitive path is used.
51+
Contributions are welcome to also support macOS.
52+
53+
3354
.. confval:: ignore
3455

3556
pytask can ignore files and directories and exclude some tasks or reduce the

src/_pytask/collect.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
"""Implement functionality to collect tasks."""
22
import importlib
33
import inspect
4+
import os
45
import sys
56
import time
67
from pathlib import Path
78
from typing import Generator
89
from typing import List
910

1011
from _pytask.config import hookimpl
12+
from _pytask.config import IS_FILE_SYSTEM_CASE_SENSITIVE
1113
from _pytask.console import console
1214
from _pytask.enums import ColorCode
1315
from _pytask.exceptions import CollectionError
1416
from _pytask.mark import has_marker
17+
from _pytask.nodes import create_task_name
1518
from _pytask.nodes import FilePathNode
1619
from _pytask.nodes import PythonFunctionTask
1720
from _pytask.nodes import reduce_node_name
21+
from _pytask.path import find_case_sensitive_path
1822
from _pytask.report import CollectionReport
1923
from rich.traceback import Traceback
2024

@@ -126,9 +130,8 @@ def pytask_collect_task_protocol(session, path, name, obj):
126130
return CollectionReport.from_node(task)
127131

128132
except Exception:
129-
return CollectionReport.from_exception(
130-
exc_info=sys.exc_info(), node=locals().get("task")
131-
)
133+
task = PythonFunctionTask(name, create_task_name(path, name), path, None)
134+
return CollectionReport.from_exception(exc_info=sys.exc_info(), node=task)
132135

133136

134137
@hookimpl(trylast=True)
@@ -146,19 +149,29 @@ def pytask_collect_task(session, path, name, obj):
146149
)
147150

148151

152+
_TEMPLATE_ERROR = (
153+
"The provided path of the dependency/product in the marker is {}, but the path of "
154+
"the file on disk is {}. Case-sensitive file systems would raise an error.\n\n"
155+
"Please, align the names to ensure reproducibility on case-sensitive file systems "
156+
"(often Linux or macOS) or disable this error with 'check_casing_of_paths = false'."
157+
)
158+
159+
149160
@hookimpl(trylast=True)
150-
def pytask_collect_node(path, node):
161+
def pytask_collect_node(session, path, node):
151162
"""Collect a node of a task as a :class:`pytask.nodes.FilePathNode`.
152163
153164
Strings are assumed to be paths. This might be a strict assumption, but since this
154-
hook is attempted at last and possible errors will be shown, it is reasonable and
165+
hook is executed at last and possible errors will be shown, it seems reasonable and
155166
unproblematic.
156167
157168
``trylast=True`` might be necessary if other plugins try to parse strings themselves
158169
like a plugin for downloading files which depends on URLs given as strings.
159170
160171
Parameters
161172
----------
173+
session : _pytask.session.Session
174+
The session.
162175
path : Union[str, pathlib.Path]
163176
The path to file where the task and node are specified.
164177
node : Union[str, pathlib.Path]
@@ -170,7 +183,19 @@ def pytask_collect_node(path, node):
170183
node = Path(node)
171184
if isinstance(node, Path):
172185
if not node.is_absolute():
173-
node = path.parent.joinpath(node)
186+
# ``normpath`` removes ``../`` from the path which is necessary for the
187+
# casing check which will fail since ``.resolves()`` also normalizes a path.
188+
node = Path(os.path.normpath(path.parent.joinpath(node)))
189+
190+
if (
191+
not IS_FILE_SYSTEM_CASE_SENSITIVE
192+
and session.config["check_casing_of_paths"]
193+
and sys.platform == "win32"
194+
):
195+
case_sensitive_path = find_case_sensitive_path(node, "win32")
196+
if str(node) != str(case_sensitive_path):
197+
raise Exception(_TEMPLATE_ERROR.format(node, case_sensitive_path))
198+
174199
return FilePathNode.from_path(node)
175200

176201

src/_pytask/config.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import configparser
33
import itertools
44
import os
5+
import tempfile
56
import warnings
67
from pathlib import Path
78
from typing import List
@@ -16,13 +17,15 @@
1617

1718
hookimpl = pluggy.HookimplMarker("pytask")
1819

20+
1921
_IGNORED_FOLDERS = [
2022
".git/*",
2123
".hg/*",
2224
".svn/*",
2325
".venv/*",
2426
]
2527

28+
2629
_IGNORED_FILES = [
2730
".codecov.yml",
2831
".gitignore",
@@ -37,8 +40,10 @@
3740
"tox.ini",
3841
]
3942

43+
4044
_IGNORED_FILES_AND_FOLDERS = _IGNORED_FILES + _IGNORED_FOLDERS
4145

46+
4247
IGNORED_TEMPORARY_FILES_AND_FOLDERS = [
4348
"*.egg-info/*",
4449
".ipynb_checkpoints/*",
@@ -53,6 +58,15 @@
5358
]
5459

5560

61+
def is_file_system_case_sensitive() -> bool:
62+
"""Check whether the file system is case-sensitive."""
63+
with tempfile.NamedTemporaryFile(prefix="TmP") as tmp_file:
64+
return not os.path.exists(tmp_file.name.lower())
65+
66+
67+
IS_FILE_SYSTEM_CASE_SENSITIVE = is_file_system_case_sensitive()
68+
69+
5670
@hookimpl
5771
def pytask_configure(pm, config_from_cli):
5872
"""Configure pytask."""
@@ -167,6 +181,14 @@ def pytask_parse_config(config, config_from_cli, config_from_file):
167181
callback=lambda x: x if x is None else int(x),
168182
)
169183

184+
config["check_casing_of_paths"] = get_first_non_none_value(
185+
config_from_cli,
186+
config_from_file,
187+
key="check_casing_of_paths",
188+
default=True,
189+
callback=convert_truthy_or_falsy_to_bool,
190+
)
191+
170192

171193
@hookimpl
172194
def pytask_post_parse(config):

src/_pytask/nodes.py

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def from_path_name_function_session(cls, path, name, function, session):
113113

114114
return cls(
115115
base_name=name,
116-
name=_create_task_name(path, name),
116+
name=create_task_name(path, name),
117117
path=path,
118118
function=function,
119119
depends_on=dependencies,
@@ -174,7 +174,8 @@ def from_path(cls, path: pathlib.Path):
174174
The `lru_cache` decorator ensures that the same object is not collected twice.
175175
176176
"""
177-
path = path.resolve()
177+
if not path.is_absolute():
178+
raise ValueError("FilePathNode must be instantiated from absolute path.")
178179
return cls(path.as_posix(), path, path)
179180

180181
def state(self):
@@ -185,8 +186,33 @@ def state(self):
185186
return str(self.path.stat().st_mtime)
186187

187188

188-
def _collect_nodes(session, path, name, nodes):
189-
"""Collect nodes for a task."""
189+
def _collect_nodes(
190+
session, path: Path, name: str, nodes: Dict[str, Union[str, Path]]
191+
) -> Dict[str, Path]:
192+
"""Collect nodes for a task.
193+
194+
Parameters
195+
----------
196+
session : _pytask.session.Session
197+
The session.
198+
path : Path
199+
The path to the task whose nodes are collected.
200+
name : str
201+
The name of the task.
202+
nodes : Dict[str, Union[str, Path]]
203+
A dictionary of nodes parsed from the ``depends_on`` or ``produces`` markers.
204+
205+
Returns
206+
-------
207+
Dict[str, Path]
208+
A dictionary of node names and their paths.
209+
210+
Raises
211+
------
212+
NodeNotCollectedError
213+
If the node could not collected.
214+
215+
"""
190216
collected_nodes = {}
191217

192218
for node_name, node in nodes.items():
@@ -327,13 +353,13 @@ def _convert_nodes_to_dictionary(
327353
return nodes
328354

329355

330-
def _create_task_name(path: Path, base_name: str):
356+
def create_task_name(path: Path, base_name: str):
331357
"""Create the name of a task from a path and the task's base name.
332358
333359
Examples
334360
--------
335361
>>> from pathlib import Path
336-
>>> _create_task_name(Path("module.py"), "task_dummy")
362+
>>> create_task_name(Path("module.py"), "task_dummy")
337363
'module.py::task_dummy'
338364
339365
"""
@@ -359,7 +385,7 @@ def reduce_node_name(node, paths: List[Path]):
359385

360386
if isinstance(node, MetaTask):
361387
shortened_path = relative_to(node.path, ancestor)
362-
name = _create_task_name(shortened_path, node.base_name)
388+
name = create_task_name(shortened_path, node.base_name)
363389
elif isinstance(node, MetaNode):
364390
name = relative_to(node.path, ancestor).as_posix()
365391
else:

src/_pytask/path.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""This module contains code to handle paths."""
2+
import functools
23
import os
34
from pathlib import Path
45
from typing import List
@@ -86,3 +87,31 @@ def find_common_ancestor(*paths: Union[str, Path]) -> Path:
8687
path = os.path.commonpath(paths)
8788
path = Path(path)
8889
return path
90+
91+
92+
@functools.lru_cache()
93+
def find_case_sensitive_path(path: Path, platform: str) -> Path:
94+
"""Find the case-sensitive path.
95+
96+
On case-insensitive file systems (mostly Windows and Mac), a path like ``text.txt``
97+
and ``TeXt.TxT`` would point to the same file but not on case-sensitive file
98+
systems.
99+
100+
On Windows, we can use :meth:`pathlib.Path.resolve` to find the real path.
101+
102+
This does not work on POSIX systems since Python implements them as if they are
103+
always case-sensitive. Some observations:
104+
105+
- On case-sensitive POSIX systems, :meth:`pathlib.Path.exists` fails with a
106+
case-insensitive path.
107+
- On case-insensitive POSIX systems, :meth:`pathlib.Path.exists` succeeds with a
108+
case-insensitive path.
109+
- On case-insensitive POSIX systems, :meth:`pathlib.Path.resolve` does not return
110+
a case-sensitive path which it does on Windows.
111+
112+
"""
113+
if platform == "win32":
114+
out = path.resolve()
115+
else:
116+
out = path
117+
return out

src/_pytask/resolve_dependencies.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import networkx as nx
88
from _pytask.config import hookimpl
9+
from _pytask.config import IS_FILE_SYSTEM_CASE_SENSITIVE
910
from _pytask.console import ARROW_DOWN_ICON
1011
from _pytask.console import console
1112
from _pytask.console import FILE_ICON
@@ -156,6 +157,14 @@ def _format_cycles(cycles: List[Tuple[str]]) -> str:
156157
return text
157158

158159

160+
_TEMPLATE_ERROR = (
161+
"Some dependencies do not exist or are not produced by any task. See the following "
162+
"tree which shows which dependencies are missing for which tasks.\n\n{}"
163+
)
164+
if IS_FILE_SYSTEM_CASE_SENSITIVE:
165+
_TEMPLATE_ERROR += "\n\n(Hint: Sometimes case sensitivity is at fault.)"
166+
167+
159168
def _check_if_root_nodes_are_available(dag):
160169
missing_root_nodes = []
161170

@@ -187,11 +196,7 @@ def _check_if_root_nodes_are_available(dag):
187196
dictionary[short_node_name] = short_successors
188197

189198
text = _format_dictionary_to_tree(dictionary, "Missing dependencies:")
190-
raise ResolvingDependenciesError(
191-
"Some dependencies do not exist or are not produced by any task. See the "
192-
"following tree which shows which dependencies are missing for which tasks."
193-
f"\n\n{text}"
194-
)
199+
raise ResolvingDependenciesError(_TEMPLATE_ERROR.format(text))
195200

196201

197202
def _format_dictionary_to_tree(dict_: Dict[str, List[str]], title: str) -> str:

0 commit comments

Comments
 (0)