Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pathlib._abc.PathModuleBase #113893

Merged
merged 6 commits into from
Jan 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions Lib/pathlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@
]


# Reference for Windows paths can be found at
# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file .
_WIN_RESERVED_NAMES = frozenset(
{'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
{f'COM{c}' for c in '123456789\xb9\xb2\xb3'} |
{f'LPT{c}' for c in '123456789\xb9\xb2\xb3'}
)


class _PathParents(Sequence):
"""This object provides sequence-like access to the logical ancestors
of a path. Don't try to construct it yourself."""
Expand Down Expand Up @@ -76,6 +85,10 @@ class PurePath(_abc.PurePathBase):
"""

__slots__ = (
# The `_raw_paths` slot stores unnormalized string paths. This is set
# in the `__init__()` method.
'_raw_paths',

# The `_drv`, `_root` and `_tail_cached` slots store parsed and
# normalized parts of the path. They are set when any of the `drive`,
# `root` or `_tail` properties are accessed for the first time. The
Expand Down Expand Up @@ -141,6 +154,26 @@ def __init__(self, *args):
# Avoid calling super().__init__, as an optimisation
self._raw_paths = paths

def joinpath(self, *pathsegments):
"""Combine this path with one or several arguments, and return a
new path representing either a subpath (if all arguments are relative
paths) or a totally different path (if one of the arguments is
anchored).
"""
return self.with_segments(self, *pathsegments)

def __truediv__(self, key):
try:
return self.with_segments(self, key)
except TypeError:
return NotImplemented

def __rtruediv__(self, key):
try:
return self.with_segments(key, self)
except TypeError:
return NotImplemented

def __reduce__(self):
# Using the parts tuple helps share interned path parts
# when pickling related paths.
Expand Down Expand Up @@ -386,6 +419,33 @@ def is_relative_to(self, other, /, *_deprecated):
other = self.with_segments(other)
return other == self or other in self.parents

def is_absolute(self):
"""True if the path is absolute (has both a root and, if applicable,
a drive)."""
if self.pathmod is posixpath:
# Optimization: work with raw paths on POSIX.
for path in self._raw_paths:
if path.startswith('/'):
return True
return False
return self.pathmod.isabs(self)

def is_reserved(self):
"""Return True if the path contains one of the special names reserved
by the system, if any."""
if self.pathmod is not ntpath or not self.name:
return False

# NOTE: the rules for reserved names seem somewhat complicated
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
# exist). We err on the side of caution and return True for paths
# which are not considered reserved by Windows.
if self.drive.startswith('\\\\'):
# UNC paths are never reserved.
return False
name = self.name.partition('.')[0].partition(':')[0].rstrip(' ')
return name.upper() in _WIN_RESERVED_NAMES

def as_uri(self):
"""Return the path as a URI."""
if not self.is_absolute():
Expand Down
124 changes: 67 additions & 57 deletions Lib/pathlib/_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,13 @@
"""

import functools
import posixpath
from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO

#
# Internals
#

# Reference for Windows paths can be found at
# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file .
_WIN_RESERVED_NAMES = frozenset(
{'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
{f'COM{c}' for c in '123456789\xb9\xb2\xb3'} |
{f'LPT{c}' for c in '123456789\xb9\xb2\xb3'}
)

_WINERROR_NOT_READY = 21 # drive exists but is not accessible
_WINERROR_INVALID_NAME = 123 # fix for bpo-35306
_WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself
Expand Down Expand Up @@ -144,6 +135,53 @@ class UnsupportedOperation(NotImplementedError):
pass


class PathModuleBase:
"""Base class for path modules, which do low-level path manipulation.

Path modules provide a subset of the os.path API, specifically those
functions needed to provide PurePathBase functionality. Each PurePathBase
subclass references its path module via a 'pathmod' class attribute.

Every method in this base class raises an UnsupportedOperation exception.
"""

@classmethod
def _unsupported(cls, attr):
raise UnsupportedOperation(f"{cls.__name__}.{attr} is unsupported")

@property
def sep(self):
"""The character used to separate path components."""
self._unsupported('sep')

def join(self, path, *paths):
"""Join path segments."""
self._unsupported('join()')

def split(self, path):
"""Split the path into a pair (head, tail), where *head* is everything
before the final path separator, and *tail* is everything after.
Either part may be empty.
"""
self._unsupported('split()')

def splitroot(self, path):
"""Split the pathname path into a 3-item tuple (drive, root, tail),
where *drive* is a device name or mount point, *root* is a string of
separators after the drive, and *tail* is everything after the root.
Any part may be empty."""
self._unsupported('splitroot()')

def normcase(self, path):
"""Normalize the case of the path."""
self._unsupported('normcase()')

def isabs(self, path):
"""Returns whether the path is absolute, i.e. unaffected by the
current directory or drive."""
self._unsupported('isabs()')


class PurePathBase:
"""Base class for pure path objects.

Expand All @@ -154,19 +192,19 @@ class PurePathBase:
"""

__slots__ = (
# The `_raw_paths` slot stores unnormalized string paths. This is set
# in the `__init__()` method.
'_raw_paths',
# The `_raw_path` slot store a joined string path. This is set in the
# `__init__()` method.
'_raw_path',

# The '_resolving' slot stores a boolean indicating whether the path
# is being processed by `PathBase.resolve()`. This prevents duplicate
# work from occurring when `resolve()` calls `stat()` or `readlink()`.
'_resolving',
)
pathmod = posixpath
pathmod = PathModuleBase()

def __init__(self, *paths):
self._raw_paths = paths
def __init__(self, path, *paths):
self._raw_path = self.pathmod.join(path, *paths) if paths else path
self._resolving = False

def with_segments(self, *pathsegments):
Expand All @@ -176,11 +214,6 @@ def with_segments(self, *pathsegments):
"""
return type(self)(*pathsegments)

@property
def _raw_path(self):
"""The joined but unnormalized path."""
return self.pathmod.join(*self._raw_paths)

def __str__(self):
"""Return the string representation of the path, suitable for
passing to system calls."""
Expand All @@ -194,7 +227,7 @@ def as_posix(self):
@property
def drive(self):
"""The drive prefix (letter or UNC path), if any."""
return self.pathmod.splitdrive(self._raw_path)[0]
return self.pathmod.splitroot(self._raw_path)[0]

@property
def root(self):
Expand All @@ -210,7 +243,7 @@ def anchor(self):
@property
def name(self):
"""The final path component, if any."""
return self.pathmod.basename(self._raw_path)
return self.pathmod.split(self._raw_path)[1]

@property
def suffix(self):
Expand Down Expand Up @@ -251,10 +284,10 @@ def stem(self):

def with_name(self, name):
"""Return a new path with the file name changed."""
dirname = self.pathmod.dirname
if dirname(name):
split = self.pathmod.split
if split(name)[0]:
raise ValueError(f"Invalid name {name!r}")
return self.with_segments(dirname(self._raw_path), name)
return self.with_segments(split(self._raw_path)[0], name)

def with_stem(self, stem):
"""Return a new path with the stem changed."""
Expand Down Expand Up @@ -336,17 +369,17 @@ def joinpath(self, *pathsegments):
paths) or a totally different path (if one of the arguments is
anchored).
"""
return self.with_segments(*self._raw_paths, *pathsegments)
return self.with_segments(self._raw_path, *pathsegments)

def __truediv__(self, key):
try:
return self.joinpath(key)
return self.with_segments(self._raw_path, key)
except TypeError:
return NotImplemented

def __rtruediv__(self, key):
try:
return self.with_segments(key, *self._raw_paths)
return self.with_segments(key, self._raw_path)
except TypeError:
return NotImplemented

Expand All @@ -371,7 +404,7 @@ def _stack(self):
def parent(self):
"""The logical parent of the path."""
path = self._raw_path
parent = self.pathmod.dirname(path)
parent = self.pathmod.split(path)[0]
if path != parent:
parent = self.with_segments(parent)
parent._resolving = self._resolving
Expand All @@ -381,43 +414,20 @@ def parent(self):
@property
def parents(self):
"""A sequence of this path's logical parents."""
dirname = self.pathmod.dirname
split = self.pathmod.split
path = self._raw_path
parent = dirname(path)
parent = split(path)[0]
parents = []
while path != parent:
parents.append(self.with_segments(parent))
path = parent
parent = dirname(path)
parent = split(path)[0]
return tuple(parents)

def is_absolute(self):
"""True if the path is absolute (has both a root and, if applicable,
a drive)."""
if self.pathmod is posixpath:
# Optimization: work with raw paths on POSIX.
for path in self._raw_paths:
if path.startswith('/'):
return True
return False
else:
return self.pathmod.isabs(self._raw_path)

def is_reserved(self):
"""Return True if the path contains one of the special names reserved
by the system, if any."""
if self.pathmod is posixpath or not self.name:
return False

# NOTE: the rules for reserved names seem somewhat complicated
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
# exist). We err on the side of caution and return True for paths
# which are not considered reserved by Windows.
if self.drive.startswith('\\\\'):
# UNC paths are never reserved.
return False
name = self.name.partition('.')[0].partition(':')[0].rstrip(' ')
return name.upper() in _WIN_RESERVED_NAMES
return self.pathmod.isabs(self._raw_path)

def match(self, path_pattern, *, case_sensitive=None):
"""
Expand Down Expand Up @@ -726,7 +736,7 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
raise ValueError("Unacceptable pattern: {!r}".format(pattern))

pattern_parts = list(path_pattern.parts)
if not self.pathmod.basename(pattern):
if not self.pathmod.split(pattern)[1]:
# GH-65238: pathlib doesn't preserve trailing slash. Add it back.
pattern_parts.append('')

Expand Down
1 change: 1 addition & 0 deletions Lib/test/test_pathlib/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1151,6 +1151,7 @@ def tempdir(self):

def test_matches_pathbase_api(self):
our_names = {name for name in dir(self.cls) if name[0] != '_'}
our_names.remove('is_reserved') # only present in PurePath
path_names = {name for name in dir(pathlib._abc.PathBase) if name[0] != '_'}
self.assertEqual(our_names, path_names)
for attr_name in our_names:
Expand Down
Loading
Loading