Skip to content

GH-110109: Add pathlib._PurePathBase #110670

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Dec 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 47 additions & 36 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,14 +198,13 @@ def __repr__(self):
return "<{}.parents>".format(type(self._path).__name__)


class PurePath:
"""Base class for manipulating paths without I/O.
class _PurePathBase:
"""Base class for pure path objects.

PurePath represents a filesystem path and offers operations which
don't imply any actual filesystem I/O. Depending on your system,
instantiating a PurePath will return either a PurePosixPath or a
PureWindowsPath object. You can also instantiate either of these classes
directly, regardless of your system.
This class *does not* provide several magic methods that are defined in
its subclass PurePath. They are: __fspath__, __bytes__, __reduce__,
__hash__, __eq__, __lt__, __le__, __gt__, __ge__. Its initializer and path
joining methods accept only strings, not os.PathLike objects more broadly.
"""

__slots__ = (
Expand All @@ -227,29 +226,17 @@ class PurePath:
# for the first time. It's used to implement `_str_normcase`
'_str',

# The `_str_normcase_cached` slot stores the string path with
# normalized case. It is set when the `_str_normcase` property is
# accessed for the first time. It's used to implement `__eq__()`
# `__hash__()`, and `_parts_normcase`
'_str_normcase_cached',

# The `_parts_normcase_cached` slot stores the case-normalized
# string path after splitting on path separators. It's set when the
# `_parts_normcase` property is accessed for the first time. It's used
# to implement comparison methods like `__lt__()`.
'_parts_normcase_cached',

# The `_hash` slot stores the hash of the case-normalized string
# path. It's set when `__hash__()` is called for the first time.
'_hash',

# The '_resolving' slot stores a boolean indicating whether the path
# is being processed by `_PathBase.resolve()`. This prevents duplicate
# work from occurring when `resolve()` calls `stat()` or `readlink()`.
'_resolving',
)
pathmod = os.path

def __init__(self, *paths):
self._raw_paths = paths
self._resolving = False

def with_segments(self, *pathsegments):
"""Construct a new path object from any number of path-like objects.
Subclasses may override this method to customize how new path objects
Expand Down Expand Up @@ -444,7 +431,7 @@ def relative_to(self, other, /, *_deprecated, walk_up=False):
warnings._deprecated("pathlib.PurePath.relative_to(*args)", msg,
remove=(3, 14))
other = self.with_segments(other, *_deprecated)
elif not isinstance(other, PurePath):
elif not isinstance(other, _PurePathBase):
other = self.with_segments(other)
for step, path in enumerate(chain([other], other.parents)):
if path == self or path in self.parents:
Expand All @@ -468,7 +455,7 @@ def is_relative_to(self, other, /, *_deprecated):
warnings._deprecated("pathlib.PurePath.is_relative_to(*args)",
msg, remove=(3, 14))
other = self.with_segments(other, *_deprecated)
elif not isinstance(other, PurePath):
elif not isinstance(other, _PurePathBase):
other = self.with_segments(other)
return other == self or other in self.parents

Expand All @@ -487,7 +474,7 @@ def joinpath(self, *pathsegments):
paths) or a totally different path (if one of the arguments is
anchored).
"""
return self.with_segments(self, *pathsegments)
return self.with_segments(*self._raw_paths, *pathsegments)

def __truediv__(self, key):
try:
Expand All @@ -497,7 +484,7 @@ def __truediv__(self, key):

def __rtruediv__(self, key):
try:
return self.with_segments(key, self)
return self.with_segments(key, *self._raw_paths)
except TypeError:
return NotImplemented

Expand Down Expand Up @@ -555,7 +542,7 @@ def match(self, path_pattern, *, case_sensitive=None):
"""
Return True if this path matches the given pattern.
"""
if not isinstance(path_pattern, PurePath):
if not isinstance(path_pattern, _PurePathBase):
path_pattern = self.with_segments(path_pattern)
if case_sensitive is None:
case_sensitive = _is_case_sensitive(self.pathmod)
Expand All @@ -570,6 +557,35 @@ def match(self, path_pattern, *, case_sensitive=None):
match = _compile_pattern(pattern_str, sep, case_sensitive)
return match(str(self)) is not None


class PurePath(_PurePathBase):
"""Base class for manipulating paths without I/O.

PurePath represents a filesystem path and offers operations which
don't imply any actual filesystem I/O. Depending on your system,
instantiating a PurePath will return either a PurePosixPath or a
PureWindowsPath object. You can also instantiate either of these classes
directly, regardless of your system.
"""

__slots__ = (
# The `_str_normcase_cached` slot stores the string path with
# normalized case. It is set when the `_str_normcase` property is
# accessed for the first time. It's used to implement `__eq__()`
# `__hash__()`, and `_parts_normcase`
'_str_normcase_cached',

# The `_parts_normcase_cached` slot stores the case-normalized
# string path after splitting on path separators. It's set when the
# `_parts_normcase` property is accessed for the first time. It's used
# to implement comparison methods like `__lt__()`.
'_parts_normcase_cached',

# The `_hash` slot stores the hash of the case-normalized string
# path. It's set when `__hash__()` is called for the first time.
'_hash',
)

def __new__(cls, *args, **kwargs):
"""Construct a PurePath from one or several strings and or existing
PurePath objects. The strings and path objects are combined so as
Expand Down Expand Up @@ -600,8 +616,7 @@ def __init__(self, *args):
"object where __fspath__ returns a str, "
f"not {type(path).__name__!r}")
paths.append(path)
self._raw_paths = paths
self._resolving = False
super().__init__(*paths)

def __reduce__(self):
# Using the parts tuple helps share interned path parts
Expand Down Expand Up @@ -719,7 +734,7 @@ class PureWindowsPath(PurePath):
# Filesystem-accessing classes


class _PathBase(PurePath):
class _PathBase(_PurePathBase):
"""Base class for concrete path objects.

This class provides dummy implementations for many methods that derived
Expand All @@ -733,8 +748,6 @@ class _PathBase(PurePath):
such as paths in archive files or on remote storage systems.
"""
__slots__ = ()
__bytes__ = None
__fspath__ = None # virtual paths have no local file system representation

@classmethod
def _unsupported(cls, method_name):
Expand Down Expand Up @@ -1341,7 +1354,7 @@ def as_uri(self):
self._unsupported("as_uri")


class Path(_PathBase):
class Path(_PathBase, PurePath):
"""PurePath subclass that can make system calls.

Path represents a filesystem path but unlike PurePath, also offers
Expand All @@ -1351,8 +1364,6 @@ class Path(_PathBase):
but cannot instantiate a WindowsPath on a POSIX system or vice versa.
"""
__slots__ = ()
__bytes__ = PurePath.__bytes__
__fspath__ = PurePath.__fspath__
as_uri = PurePath.as_uri

def __init__(self, *args, **kwargs):
Expand Down
82 changes: 65 additions & 17 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,35 @@ def test_is_notimplemented(self):
# Tests for the pure classes.
#

class PurePathTest(unittest.TestCase):
cls = pathlib.PurePath

class PurePathBaseTest(unittest.TestCase):
cls = pathlib._PurePathBase

def test_magic_methods(self):
P = self.cls
self.assertFalse(hasattr(P, '__fspath__'))
self.assertFalse(hasattr(P, '__bytes__'))
self.assertIs(P.__reduce__, object.__reduce__)
self.assertIs(P.__hash__, object.__hash__)
self.assertIs(P.__eq__, object.__eq__)
self.assertIs(P.__lt__, object.__lt__)
self.assertIs(P.__le__, object.__le__)
self.assertIs(P.__gt__, object.__gt__)
self.assertIs(P.__ge__, object.__ge__)


class DummyPurePath(pathlib._PurePathBase):
def __eq__(self, other):
if not isinstance(other, DummyPurePath):
return NotImplemented
return str(self) == str(other)

def __hash__(self):
return hash(str(self))


class DummyPurePathTest(unittest.TestCase):
cls = DummyPurePath

# Keys are canonical paths, values are list of tuples of arguments
# supposed to produce equal paths.
Expand Down Expand Up @@ -82,12 +109,6 @@ def test_constructor_common(self):
P('/a', 'b', 'c')
P('a/b/c')
P('/a/b/c')
P(FakePath("a/b/c"))
self.assertEqual(P(P('a')), P('a'))
self.assertEqual(P(P('a'), 'b'), P('a/b'))
self.assertEqual(P(P('a'), P('b')), P('a/b'))
self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c")))
self.assertEqual(P(P('./a:b')), P('./a:b'))

def test_concrete_class(self):
if self.cls is pathlib.PurePath:
Expand Down Expand Up @@ -193,8 +214,6 @@ def test_join_common(self):
self.assertIs(type(pp), type(p))
pp = p.joinpath('c', 'd')
self.assertEqual(pp, P('a/b/c/d'))
pp = p.joinpath(P('c'))
self.assertEqual(pp, P('a/b/c'))
pp = p.joinpath('/c')
self.assertEqual(pp, P('/c'))

Expand All @@ -211,8 +230,6 @@ def test_div_common(self):
self.assertEqual(pp, P('a/b/c/d'))
pp = 'c' / p / 'd'
self.assertEqual(pp, P('c/a/b/d'))
pp = p / P('c')
self.assertEqual(pp, P('a/b/c'))
pp = p/ '/c'
self.assertEqual(pp, P('/c'))

Expand Down Expand Up @@ -678,6 +695,29 @@ def test_is_relative_to_common(self):
self.assertFalse(p.is_relative_to(''))
self.assertFalse(p.is_relative_to(P('a')))


class PurePathTest(DummyPurePathTest):
cls = pathlib.PurePath

def test_constructor_nested(self):
P = self.cls
P(FakePath("a/b/c"))
self.assertEqual(P(P('a')), P('a'))
self.assertEqual(P(P('a'), 'b'), P('a/b'))
self.assertEqual(P(P('a'), P('b')), P('a/b'))
self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c")))
self.assertEqual(P(P('./a:b')), P('./a:b'))

def test_join_nested(self):
P = self.cls
p = P('a/b').joinpath(P('c'))
self.assertEqual(p, P('a/b/c'))

def test_div_nested(self):
P = self.cls
p = P('a/b') / P('c')
self.assertEqual(p, P('a/b/c'))

def test_pickling_common(self):
P = self.cls
p = P('/a/b')
Expand Down Expand Up @@ -1545,7 +1585,7 @@ class cls(pathlib.PurePath):
# Tests for the virtual classes.
#

class PathBaseTest(PurePathTest):
class PathBaseTest(PurePathBaseTest):
cls = pathlib._PathBase

def test_unsupported_operation(self):
Expand Down Expand Up @@ -1636,6 +1676,14 @@ class DummyPath(pathlib._PathBase):
_directories = {}
_symlinks = {}

def __eq__(self, other):
if not isinstance(other, DummyPath):
return NotImplemented
return str(self) == str(other)

def __hash__(self):
return hash(str(self))

def stat(self, *, follow_symlinks=True):
if follow_symlinks:
path = str(self.resolve())
Expand Down Expand Up @@ -1707,7 +1755,7 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False):
self.mkdir(mode, parents=False, exist_ok=exist_ok)


class DummyPathTest(unittest.TestCase):
class DummyPathTest(DummyPurePathTest):
"""Tests for PathBase methods that use stat(), open() and iterdir()."""

cls = DummyPath
Expand Down Expand Up @@ -2014,7 +2062,7 @@ def _check(path, glob, expected):

def test_rglob_common(self):
def _check(glob, expected):
self.assertEqual(sorted(glob), sorted(P(BASE, q) for q in expected))
self.assertEqual(set(glob), {P(BASE, q) for q in expected})
P = self.cls
p = P(BASE)
it = p.rglob("fileA")
Expand Down Expand Up @@ -2198,7 +2246,7 @@ def test_glob_above_recursion_limit(self):
# directory_depth > recursion_limit
directory_depth = recursion_limit + 10
base = self.cls(BASE, 'deep')
path = self.cls(base, *(['d'] * directory_depth))
path = base.joinpath(*(['d'] * directory_depth))
path.mkdir(parents=True)

with set_recursion_limit(recursion_limit):
Expand Down Expand Up @@ -2741,7 +2789,7 @@ def test_walk_above_recursion_limit(self):
# directory_depth > recursion_limit
directory_depth = recursion_limit + 10
base = self.cls(BASE, 'deep')
path = self.cls(base, *(['d'] * directory_depth))
path = base.joinpath(*(['d'] * directory_depth))
path.mkdir(parents=True)

with set_recursion_limit(recursion_limit):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Add private ``pathlib._PurePathBase`` class: a base class for
:class:`pathlib.PurePath` that omits certain magic methods. It may be made
public (along with ``_PathBase``) in future.