Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-101362: Optimise pathlib by deferring path normalisation #101560

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 61 additions & 37 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import warnings
from _collections_abc import Sequence
from errno import ENOENT, ENOTDIR, EBADF, ELOOP
from operator import attrgetter
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
from urllib.parse import quote_from_bytes as urlquote_from_bytes

Expand Down Expand Up @@ -216,8 +215,8 @@ class _PathParents(Sequence):
def __init__(self, path):
# We don't store the instance to avoid reference cycles
self._pathcls = type(path)
self._drv = path._drv
self._root = path._root
self._drv = path.drive
self._root = path.root
self._parts = path._parts

def __len__(self):
Expand Down Expand Up @@ -251,7 +250,7 @@ class PurePath(object):
directly, regardless of your system.
"""
__slots__ = (
'_drv', '_root', '_parts',
'_fspath', '_drv', '_root', '_parts_cached',
'_str', '_hash', '_parts_tuple', '_parts_normcase_cached',
)
_flavour = os.path
Expand All @@ -269,18 +268,16 @@ def __new__(cls, *args):
def __reduce__(self):
# Using the parts tuple helps share interned path parts
# when pickling related paths.
return (self.__class__, tuple(self._parts))
return (self.__class__, self.parts)

@classmethod
def _parse_parts(cls, parts):
def _from_parts(cls, parts):
if not parts:
return '', '', []
path = ''
elif len(parts) == 1:
path = os.fspath(parts[0])
else:
path = cls._flavour.join(*parts)
sep = cls._flavour.sep
altsep = cls._flavour.altsep
if isinstance(path, str):
# Force-cast str subclasses to str (issue #21127)
path = str(path)
Expand All @@ -289,6 +286,16 @@ def _parse_parts(cls, parts):
"argument should be a str or an os.PathLike "
"object where __fspath__ returns a str, "
f"not {type(path).__name__!r}")
self = object.__new__(cls)
self._fspath = path
return self

@classmethod
def _parse_path(cls, path):
if not path:
return '', '', []
sep = cls._flavour.sep
altsep = cls._flavour.altsep
if altsep:
path = path.replace(altsep, sep)
drv, root, rel = cls._flavour.splitroot(path)
Expand All @@ -299,21 +306,18 @@ def _parse_parts(cls, parts):
parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
return drv, root, parsed

@classmethod
def _from_parts(cls, args):
self = object.__new__(cls)
drv, root, parts = self._parse_parts(args)
def _load_parts(self):
drv, root, parts = self._parse_path(self._fspath)
self._drv = drv
self._root = root
self._parts = parts
return self
self._parts_cached = parts

@classmethod
def _from_parsed_parts(cls, drv, root, parts):
self = object.__new__(cls)
self._drv = drv
self._root = root
self._parts = parts
self._parts_cached = parts
return self

@classmethod
Expand All @@ -330,7 +334,7 @@ def __str__(self):
try:
return self._str
except AttributeError:
self._str = self._format_parsed_parts(self._drv, self._root,
self._str = self._format_parsed_parts(self.drive, self.root,
self._parts) or '.'
return self._str

Expand All @@ -356,7 +360,7 @@ def as_uri(self):
if not self.is_absolute():
raise ValueError("relative path can't be expressed as a file URI")

drive = self._drv
drive = self.drive
if len(drive) == 2 and drive[1] == ':':
# It's a path on a local drive => 'file:///c:/a/b'
prefix = 'file:///' + drive
Expand Down Expand Up @@ -412,23 +416,43 @@ def __ge__(self, other):
return NotImplemented
return self._parts_normcase >= other._parts_normcase

drive = property(attrgetter('_drv'),
doc="""The drive prefix (letter or UNC path), if any.""")
@property
def drive(self):
"""The drive prefix (letter or UNC path), if any."""
try:
return self._drv
except AttributeError:
self._load_parts()
return self._drv

root = property(attrgetter('_root'),
doc="""The root of the path, if any.""")
@property
def root(self):
"""The root of the path, if any."""
try:
return self._root
except AttributeError:
self._load_parts()
return self._root

@property
def _parts(self):
try:
return self._parts_cached
except AttributeError:
self._load_parts()
return self._parts_cached

@property
def anchor(self):
"""The concatenation of the drive and root, or ''."""
anchor = self._drv + self._root
anchor = self.drive + self.root
return anchor

@property
def name(self):
"""The final path component, if any."""
parts = self._parts
if len(parts) == (1 if (self._drv or self._root) else 0):
if len(parts) == (1 if (self.drive or self.root) else 0):
return ''
return parts[-1]

Expand Down Expand Up @@ -477,7 +501,7 @@ def with_name(self, name):
drv, root, tail = f.splitroot(name)
if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail):
raise ValueError("Invalid name %r" % (name))
return self._from_parsed_parts(self._drv, self._root,
return self._from_parsed_parts(self.drive, self.root,
self._parts[:-1] + [name])

def with_stem(self, stem):
Expand All @@ -502,7 +526,7 @@ def with_suffix(self, suffix):
name = name + suffix
else:
name = name[:-len(old_suffix)] + suffix
return self._from_parsed_parts(self._drv, self._root,
return self._from_parsed_parts(self.drive, self.root,
self._parts[:-1] + [name])

def relative_to(self, other, /, *_deprecated, walk_up=False):
Expand Down Expand Up @@ -593,8 +617,8 @@ def __rtruediv__(self, key):
@property
def parent(self):
"""The logical parent of the path."""
drv = self._drv
root = self._root
drv = self.drive
root = self.root
parts = self._parts
if len(parts) == 1 and (drv or root):
return self
Expand All @@ -610,7 +634,7 @@ def is_absolute(self):
a drive)."""
# ntpath.isabs() is defective - see GH-44626 .
if self._flavour is ntpath:
return bool(self._drv and self._root)
return bool(self.drive and self.root)
return self._flavour.isabs(self)

def is_reserved(self):
Expand All @@ -634,7 +658,7 @@ def match(self, path_pattern):
Return True if this path matches the given pattern.
"""
path_pattern = self._flavour.normcase(path_pattern)
drv, root, pat_parts = self._parse_parts((path_pattern,))
drv, root, pat_parts = self._parse_path(path_pattern)
if not pat_parts:
raise ValueError("empty pattern")
parts = self._parts_normcase
Expand Down Expand Up @@ -700,7 +724,7 @@ def _make_child_relpath(self, part):
# This is an optimization used for dir walking. `part` must be
# a single part relative to this path.
parts = self._parts + [part]
return self._from_parsed_parts(self._drv, self._root, parts)
return self._from_parsed_parts(self.drive, self.root, parts)

def __enter__(self):
# In previous versions of pathlib, __exit__() marked this path as
Expand Down Expand Up @@ -770,7 +794,7 @@ def glob(self, pattern):
sys.audit("pathlib.Path.glob", self, pattern)
if not pattern:
raise ValueError("Unacceptable pattern: {!r}".format(pattern))
drv, root, pattern_parts = self._parse_parts((pattern,))
drv, root, pattern_parts = self._parse_path(pattern)
if drv or root:
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
Expand All @@ -785,7 +809,7 @@ def rglob(self, pattern):
this subtree.
"""
sys.audit("pathlib.Path.rglob", self, pattern)
drv, root, pattern_parts = self._parse_parts((pattern,))
drv, root, pattern_parts = self._parse_path(pattern)
if drv or root:
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
Expand All @@ -802,9 +826,9 @@ def absolute(self):
"""
if self.is_absolute():
return self
elif self._drv:
elif self.drive:
# There is a CWD on each drive-letter drive.
cwd = self._flavour.abspath(self._drv)
cwd = self._flavour.abspath(self.drive)
else:
cwd = os.getcwd()
return self._from_parts([cwd] + self._parts)
Expand Down Expand Up @@ -1184,12 +1208,12 @@ def expanduser(self):
""" Return a new path with expanded ~ and ~user constructs
(as returned by os.path.expanduser)
"""
if (not (self._drv or self._root) and
if (not (self.drive or self.root) and
self._parts and self._parts[0][:1] == '~'):
homedir = self._flavour.expanduser(self._parts[0])
if homedir[:1] == "~":
raise RuntimeError("Could not determine home directory.")
drv, root, parts = self._parse_parts((homedir,))
drv, root, parts = self._parse_path(homedir)
return self._from_parsed_parts(drv, root, parts + self._parts[1:])

return self
Expand Down
4 changes: 3 additions & 1 deletion Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
class _BaseFlavourTest(object):

def _check_parse_parts(self, arg, expected):
f = self.cls._parse_parts
def f(parts):
path = self.flavour.join(*parts) if parts else ''
return self.cls._parse_path(path)
sep = self.flavour.sep
altsep = self.flavour.altsep
actual = f([x.replace('/', sep) for x in arg])
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Speed up construction of :class:`pathlib.PurePath` objects by deferring
path parsing and normalization.