From f130f754fdb1db3de7208ca39ec43a228033dd45 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 10 Jan 2024 03:00:48 +0000 Subject: [PATCH 1/5] Add `pathlib._abc.PathModuleBase` Path modules provide a subset of the `os.path` API, specifically those functions needed to provide `PurePathBase` functionality. Each `PurePathBase` subclass references its path module via a `pathmod` class attribute. This commit adds a new `PathModuleBase` class, which provides abstract methods that unconditionally raise `UnsupportedOperation`. An instance of this class is assigned to `PurePathBase.pathmod`, replacing `posixpath`. As a result, `PurePathBase` is no longer POSIX-y by default, and almost[^1] all its methods raise `UnsupportedOperation` courtesy of `pathmod`. Users who subclass `PurePathBase` or `PathBase` should choose the path syntax by setting `pathmod` to `posixpath`, `ntpath`, `os.path`, or their own subclass of `PathModuleBase`, as circumstances demand. [^1] Except `joinpath()`, `__truediv__()`, `__rtruediv__()`. See GH-113888. --- Lib/pathlib/_abc.py | 64 ++++++++++++++++++++++- Lib/test/test_pathlib/test_pathlib_abc.py | 57 +++++++++++++++++++- 2 files changed, 118 insertions(+), 3 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 2fc087d13aee85..bf1bf71f01e9fa 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -132,6 +132,68 @@ class UnsupportedOperation(NotImplementedError): pass +class PathModuleBase: + """Base class for path modules, which do low-level path manipulation. + + Path modules provide a subset of the os.path API, specifically those + functions needed to provide PurePathBase functionality. Each PurePathBase + subclass references its path module via a 'pathmod' class attribute. + + Every method in this base class raises an UnsupportedOperation exception. + """ + + @classmethod + def _unsupported(cls, attr): + raise UnsupportedOperation(f"{cls.__name__}.{attr} is unsupported") + + @property + def sep(self): + """The character used to separate path components.""" + self._unsupported('sep') + + def join(self, *paths): + """Join path segments.""" + self._unsupported('join()') + + def split(self, path): + """Split the path into a pair (head, tail), where *head* is everything + before the final path separator, and *tail* is everything after. + Either part may be empty. + """ + self._unsupported('split()') + + def dirname(self, path): + """Return everything before the final path separator.""" + return self.split(path)[0] + + def basename(self, path): + """Return everything after the final path separator.""" + return self.split(path)[1] + + def splitroot(self, path): + """Split the pathname path into a 3-item tuple (drive, root, tail), + where *drive* is a device name or mount point, *root* is a string of + separators after the drive, and *tail* is everything after the root. + Any part may be empty.""" + self._unsupported('splitroot()') + + def splitdrive(self, path): + """Split the pathname path into a 2-item tuple (drive, tail), where + *drive* is a device name or mount point, and *tail* is everything + after the drive. Either part may be empty.""" + drive, root, rel = self.splitroot(path) + return drive, root + rel + + def normcase(self, path): + """Normalize the case of the path.""" + self._unsupported('normcase()') + + def isabs(self, path): + """Returns whether the path is absolute, i.e. unaffected by the + current directory or drive.""" + self._unsupported('isabs()') + + class PurePathBase: """Base class for pure path objects. @@ -151,7 +213,7 @@ class PurePathBase: # work from occurring when `resolve()` calls `stat()` or `readlink()`. '_resolving', ) - pathmod = posixpath + pathmod = PathModuleBase() def __init__(self, *paths): self._raw_paths = paths diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 14df1e69db1f96..76e4c399394e10 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -5,7 +5,7 @@ import stat import unittest -from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase +from pathlib._abc import UnsupportedOperation, PathModuleBase, PurePathBase, PathBase import posixpath from test.support.os_helper import TESTFN @@ -17,6 +17,23 @@ def test_is_notimplemented(self): self.assertTrue(isinstance(UnsupportedOperation(), NotImplementedError)) +class PathModuleBaseTest(unittest.TestCase): + cls = PathModuleBase + + def test_unsupported_operation(self): + m = self.cls() + e = UnsupportedOperation + with self.assertRaises(e): + m.sep + self.assertRaises(e, m.join, 'foo') + self.assertRaises(e, m.split, 'foo') + self.assertRaises(e, m.dirname, 'foo') + self.assertRaises(e, m.basename, 'foo') + self.assertRaises(e, m.splitroot, 'foo') + self.assertRaises(e, m.splitdrive, 'foo') + self.assertRaises(e, m.normcase, 'foo') + self.assertRaises(e, m.isabs, 'foo') + # # Tests for the pure classes. # @@ -25,6 +42,40 @@ def test_is_notimplemented(self): class PurePathBaseTest(unittest.TestCase): cls = PurePathBase + def test_unsupported_operation_pure(self): + p = self.cls('foo') + e = UnsupportedOperation + self.assertRaises(e, str, p) + self.assertRaises(e, p.as_posix) + with self.assertRaises(e): + p.drive + with self.assertRaises(e): + p.root + with self.assertRaises(e): + p.anchor + with self.assertRaises(e): + p.parts + with self.assertRaises(e): + p.parent + with self.assertRaises(e): + p.parents + with self.assertRaises(e): + p.name + with self.assertRaises(e): + p.stem + with self.assertRaises(e): + p.suffix + with self.assertRaises(e): + p.suffixes + self.assertRaises(e, p.with_name, 'bar') + self.assertRaises(e, p.with_stem, 'bar') + self.assertRaises(e, p.with_suffix, '.txt') + self.assertRaises(e, p.relative_to, '') + self.assertRaises(e, p.is_relative_to, '') + self.assertRaises(e, p.is_absolute) + self.assertRaises(e, p.is_reserved) + self.assertRaises(e, p.match, '*') + def test_magic_methods(self): P = self.cls self.assertFalse(hasattr(P, '__fspath__')) @@ -39,11 +90,12 @@ def test_magic_methods(self): self.assertIs(P.__ge__, object.__ge__) def test_pathmod(self): - self.assertIs(self.cls.pathmod, posixpath) + self.assertIsInstance(self.cls.pathmod, PathModuleBase) class DummyPurePath(PurePathBase): __slots__ = () + pathmod = posixpath def __eq__(self, other): if not isinstance(other, DummyPurePath): @@ -669,6 +721,7 @@ class DummyPath(PathBase): memory. """ __slots__ = () + pathmod = posixpath _files = {} _directories = {} From ba23f5deecd8a12b2045b8be628fbbd1709a6bc8 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 13 Jan 2024 11:24:32 +0000 Subject: [PATCH 2/5] Remove all references to posixpath from ABCs --- Lib/pathlib/__init__.py | 36 +++++++++++++++++++++++ Lib/pathlib/_abc.py | 34 +-------------------- Lib/test/test_pathlib/test_pathlib.py | 1 + Lib/test/test_pathlib/test_pathlib_abc.py | 1 - 4 files changed, 38 insertions(+), 34 deletions(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index e70cfe91d322bc..defe64946a3428 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -33,6 +33,15 @@ ] +# Reference for Windows paths can be found at +# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file . +_WIN_RESERVED_NAMES = frozenset( + {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} | + {f'COM{c}' for c in '123456789\xb9\xb2\xb3'} | + {f'LPT{c}' for c in '123456789\xb9\xb2\xb3'} +) + + class _PathParents(Sequence): """This object provides sequence-like access to the logical ancestors of a path. Don't try to construct it yourself.""" @@ -386,6 +395,33 @@ def is_relative_to(self, other, /, *_deprecated): other = self.with_segments(other) return other == self or other in self.parents + def is_absolute(self): + """True if the path is absolute (has both a root and, if applicable, + a drive).""" + if self.pathmod is posixpath: + # Optimization: work with raw paths on POSIX. + for path in self._raw_paths: + if path.startswith('/'): + return True + return False + return self.pathmod.isabs(self) + + def is_reserved(self): + """Return True if the path contains one of the special names reserved + by the system, if any.""" + if self.pathmod is not ntpath or not self.name: + return False + + # NOTE: the rules for reserved names seem somewhat complicated + # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not + # exist). We err on the side of caution and return True for paths + # which are not considered reserved by Windows. + if self.drive.startswith('\\\\'): + # UNC paths are never reserved. + return False + name = self.name.partition('.')[0].partition(':')[0].rstrip(' ') + return name.upper() in _WIN_RESERVED_NAMES + def as_uri(self): """Return the path as a URI.""" if not self.is_absolute(): diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index ca7bad631f4bee..754c38b476777c 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -12,7 +12,6 @@ """ import functools -import posixpath from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO @@ -20,14 +19,6 @@ # Internals # -# Reference for Windows paths can be found at -# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file . -_WIN_RESERVED_NAMES = frozenset( - {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} | - {f'COM{c}' for c in '123456789\xb9\xb2\xb3'} | - {f'LPT{c}' for c in '123456789\xb9\xb2\xb3'} -) - _WINERROR_NOT_READY = 21 # drive exists but is not accessible _WINERROR_INVALID_NAME = 123 # fix for bpo-35306 _WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself @@ -456,30 +447,7 @@ def parents(self): def is_absolute(self): """True if the path is absolute (has both a root and, if applicable, a drive).""" - if self.pathmod is posixpath: - # Optimization: work with raw paths on POSIX. - for path in self._raw_paths: - if path.startswith('/'): - return True - return False - else: - return self.pathmod.isabs(self._raw_path) - - def is_reserved(self): - """Return True if the path contains one of the special names reserved - by the system, if any.""" - if self.pathmod is posixpath or not self.name: - return False - - # NOTE: the rules for reserved names seem somewhat complicated - # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not - # exist). We err on the side of caution and return True for paths - # which are not considered reserved by Windows. - if self.drive.startswith('\\\\'): - # UNC paths are never reserved. - return False - name = self.name.partition('.')[0].partition(':')[0].rstrip(' ') - return name.upper() in _WIN_RESERVED_NAMES + return self.pathmod.isabs(self._raw_path) def match(self, path_pattern, *, case_sensitive=None): """ diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 1b560adfc3b57a..61d7939ad140b2 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1151,6 +1151,7 @@ def tempdir(self): def test_matches_pathbase_api(self): our_names = {name for name in dir(self.cls) if name[0] != '_'} + our_names.remove('is_reserved') # only present in PurePath path_names = {name for name in dir(pathlib._abc.PathBase) if name[0] != '_'} self.assertEqual(our_names, path_names) for attr_name in our_names: diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 76e4c399394e10..3ec7e06a0452a5 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -73,7 +73,6 @@ def test_unsupported_operation_pure(self): self.assertRaises(e, p.relative_to, '') self.assertRaises(e, p.is_relative_to, '') self.assertRaises(e, p.is_absolute) - self.assertRaises(e, p.is_reserved) self.assertRaises(e, p.match, '*') def test_magic_methods(self): From 2e505ad4c9d21101d07bc7b2862e07dbb1246300 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 13 Jan 2024 11:30:52 +0000 Subject: [PATCH 3/5] Remove unnecessary methods --- Lib/pathlib/_abc.py | 35 +++++++---------------- Lib/test/test_pathlib/test_pathlib_abc.py | 3 -- 2 files changed, 10 insertions(+), 28 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 754c38b476777c..ababeddf2a058b 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -165,14 +165,6 @@ def split(self, path): """ self._unsupported('split()') - def dirname(self, path): - """Return everything before the final path separator.""" - return self.split(path)[0] - - def basename(self, path): - """Return everything after the final path separator.""" - return self.split(path)[1] - def splitroot(self, path): """Split the pathname path into a 3-item tuple (drive, root, tail), where *drive* is a device name or mount point, *root* is a string of @@ -180,13 +172,6 @@ def splitroot(self, path): Any part may be empty.""" self._unsupported('splitroot()') - def splitdrive(self, path): - """Split the pathname path into a 2-item tuple (drive, tail), where - *drive* is a device name or mount point, and *tail* is everything - after the drive. Either part may be empty.""" - drive, root, rel = self.splitroot(path) - return drive, root + rel - def normcase(self, path): """Normalize the case of the path.""" self._unsupported('normcase()') @@ -247,7 +232,7 @@ def as_posix(self): @property def drive(self): """The drive prefix (letter or UNC path), if any.""" - return self.pathmod.splitdrive(self._raw_path)[0] + return self.pathmod.splitroot(self._raw_path)[0] @property def root(self): @@ -263,7 +248,7 @@ def anchor(self): @property def name(self): """The final path component, if any.""" - return self.pathmod.basename(self._raw_path) + return self.pathmod.split(self._raw_path)[1] @property def suffix(self): @@ -304,10 +289,10 @@ def stem(self): def with_name(self, name): """Return a new path with the file name changed.""" - dirname = self.pathmod.dirname - if dirname(name): + split = self.pathmod.split + if split(name)[0]: raise ValueError(f"Invalid name {name!r}") - return self.with_segments(dirname(self._raw_path), name) + return self.with_segments(split(self._raw_path)[0], name) def with_stem(self, stem): """Return a new path with the stem changed.""" @@ -424,7 +409,7 @@ def _stack(self): def parent(self): """The logical parent of the path.""" path = self._raw_path - parent = self.pathmod.dirname(path) + parent = self.pathmod.split(path)[0] if path != parent: parent = self.with_segments(parent) parent._resolving = self._resolving @@ -434,14 +419,14 @@ def parent(self): @property def parents(self): """A sequence of this path's logical parents.""" - dirname = self.pathmod.dirname + split = self.pathmod.split path = self._raw_path - parent = dirname(path) + parent = split(path)[0] parents = [] while path != parent: parents.append(self.with_segments(parent)) path = parent - parent = dirname(path) + parent = split(path)[0] return tuple(parents) def is_absolute(self): @@ -756,7 +741,7 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): raise ValueError("Unacceptable pattern: {!r}".format(pattern)) pattern_parts = list(path_pattern.parts) - if not self.pathmod.basename(pattern): + if not self.pathmod.split(pattern)[1]: # GH-65238: pathlib doesn't preserve trailing slash. Add it back. pattern_parts.append('') diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 3ec7e06a0452a5..7a73d599c19b0f 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -27,10 +27,7 @@ def test_unsupported_operation(self): m.sep self.assertRaises(e, m.join, 'foo') self.assertRaises(e, m.split, 'foo') - self.assertRaises(e, m.dirname, 'foo') - self.assertRaises(e, m.basename, 'foo') self.assertRaises(e, m.splitroot, 'foo') - self.assertRaises(e, m.splitdrive, 'foo') self.assertRaises(e, m.normcase, 'foo') self.assertRaises(e, m.isabs, 'foo') From 52adec57b4f3ecd0f783e4152328d27dd79837f0 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 13 Jan 2024 12:39:48 +0000 Subject: [PATCH 4/5] Make joining methods abstract. --- Lib/pathlib/__init__.py | 24 +++++++++++++++++++++++ Lib/pathlib/_abc.py | 21 ++++++++------------ Lib/test/test_pathlib/test_pathlib_abc.py | 7 +++++-- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index defe64946a3428..f14d35bb0038d0 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -85,6 +85,10 @@ class PurePath(_abc.PurePathBase): """ __slots__ = ( + # The `_raw_paths` slot stores unnormalized string paths. This is set + # in the `__init__()` method. + '_raw_paths', + # The `_drv`, `_root` and `_tail_cached` slots store parsed and # normalized parts of the path. They are set when any of the `drive`, # `root` or `_tail` properties are accessed for the first time. The @@ -150,6 +154,26 @@ def __init__(self, *args): # Avoid calling super().__init__, as an optimisation self._raw_paths = paths + def joinpath(self, *pathsegments): + """Combine this path with one or several arguments, and return a + new path representing either a subpath (if all arguments are relative + paths) or a totally different path (if one of the arguments is + anchored). + """ + return self.with_segments(self, *pathsegments) + + def __truediv__(self, key): + try: + return self.with_segments(self, key) + except TypeError: + return NotImplemented + + def __rtruediv__(self, key): + try: + return self.with_segments(key, self) + except TypeError: + return NotImplemented + def __reduce__(self): # Using the parts tuple helps share interned path parts # when pickling related paths. diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index ababeddf2a058b..ee227a4f7125f9 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -192,9 +192,9 @@ class PurePathBase: """ __slots__ = ( - # The `_raw_paths` slot stores unnormalized string paths. This is set - # in the `__init__()` method. - '_raw_paths', + # The `_raw_path` slot store a joined string path. This is set in the + # `__init__()` method. + '_raw_path', # The '_resolving' slot stores a boolean indicating whether the path # is being processed by `PathBase.resolve()`. This prevents duplicate @@ -203,8 +203,8 @@ class PurePathBase: ) pathmod = PathModuleBase() - def __init__(self, *paths): - self._raw_paths = paths + def __init__(self, path, *paths): + self._raw_path = self.pathmod.join(path, *paths) if paths else path self._resolving = False def with_segments(self, *pathsegments): @@ -214,11 +214,6 @@ def with_segments(self, *pathsegments): """ return type(self)(*pathsegments) - @property - def _raw_path(self): - """The joined but unnormalized path.""" - return self.pathmod.join(*self._raw_paths) - def __str__(self): """Return the string representation of the path, suitable for passing to system calls.""" @@ -374,17 +369,17 @@ def joinpath(self, *pathsegments): paths) or a totally different path (if one of the arguments is anchored). """ - return self.with_segments(*self._raw_paths, *pathsegments) + return self.with_segments(self._raw_path, *pathsegments) def __truediv__(self, key): try: - return self.joinpath(key) + return self.with_segments(self._raw_path, key) except TypeError: return NotImplemented def __rtruediv__(self, key): try: - return self.with_segments(key, *self._raw_paths) + return self.with_segments(key, self._raw_path) except TypeError: return NotImplemented diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 7a73d599c19b0f..c3c568c296e25c 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -42,8 +42,6 @@ class PurePathBaseTest(unittest.TestCase): def test_unsupported_operation_pure(self): p = self.cls('foo') e = UnsupportedOperation - self.assertRaises(e, str, p) - self.assertRaises(e, p.as_posix) with self.assertRaises(e): p.drive with self.assertRaises(e): @@ -64,6 +62,11 @@ def test_unsupported_operation_pure(self): p.suffix with self.assertRaises(e): p.suffixes + with self.assertRaises(e): + p / 'bar' + with self.assertRaises(e): + 'bar' / p + self.assertRaises(e, p.joinpath, 'bar') self.assertRaises(e, p.with_name, 'bar') self.assertRaises(e, p.with_stem, 'bar') self.assertRaises(e, p.with_suffix, '.txt') From 3fe8593b71ee1d187cebf557ee9211bb4ae57282 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 13 Jan 2024 16:17:56 +0000 Subject: [PATCH 5/5] Fix join() signature. --- Lib/pathlib/_abc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index ee227a4f7125f9..1fdca004d6b31f 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -154,7 +154,7 @@ def sep(self): """The character used to separate path components.""" self._unsupported('sep') - def join(self, *paths): + def join(self, path, *paths): """Join path segments.""" self._unsupported('join()')