Skip to content

Commit

Permalink
pythonGH-102783: Speed up pathlib.PurePath.__fspath__() by returnin…
Browse files Browse the repository at this point in the history
…g raw path

Return an unnormalized path from `pathlib.PurePath.__fspath__()`. This is
equivalent to a normalized path (because pathlib's normalization doesn't
change the meaning of paths), but considerably cheaper to generate.
  • Loading branch information
barneygale committed Nov 25, 2023
1 parent 1619f43 commit 2085494
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 18 deletions.
42 changes: 24 additions & 18 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,12 @@ class PurePath:
# in the `__init__()` method.
'_raw_paths',

# The `_raw_path_cached` slot stores a joined but unnormalized string
# path. This is set in the `__init__()` method or `_raw_path`
# property. It's returned from `__fspath__()`, and used as the basis
# for the normalized path parts (see following slots).
'_raw_path_cached',

# The `_drv`, `_root` and `_tail_cached` slots store parsed and
# normalized parts of the path. They are set when any of the `drive`,
# `root` or `_tail` properties are accessed for the first time. The
Expand Down Expand Up @@ -273,19 +279,6 @@ def _parse_path(cls, path):
parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.']
return drv, root, parsed

def _load_parts(self):
paths = self._raw_paths
if len(paths) == 0:
path = ''
elif len(paths) == 1:
path = paths[0]
else:
path = self.pathmod.join(*paths)
drv, root, tail = self._parse_path(path)
self._drv = drv
self._root = root
self._tail_cached = tail

def _from_parsed_parts(self, drv, root, tail):
path_str = self._format_parsed_parts(drv, root, tail)
path = self.with_segments(path_str)
Expand Down Expand Up @@ -321,13 +314,22 @@ def as_posix(self):
def __repr__(self):
return "{}({!r})".format(self.__class__.__name__, self.as_posix())

@property
def _raw_path(self):
"""The joined (but unnormalized) string path."""
try:
return self._raw_path_cached
except AttributeError:
self._raw_path_cached = self.pathmod.join(*self._raw_paths)
return self._raw_path_cached

@property
def drive(self):
"""The drive prefix (letter or UNC path), if any."""
try:
return self._drv
except AttributeError:
self._load_parts()
self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path)
return self._drv

@property
Expand All @@ -336,15 +338,15 @@ def root(self):
try:
return self._root
except AttributeError:
self._load_parts()
self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path)
return self._root

@property
def _tail(self):
try:
return self._tail_cached
except AttributeError:
self._load_parts()
self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path)
return self._tail_cached

@property
Expand Down Expand Up @@ -605,6 +607,10 @@ def __init__(self, *args):
f"not {type(path).__name__!r}")
paths.append(path)
self._raw_paths = paths
if len(paths) == 1:
self._raw_path_cached = paths[0]
elif len(paths) == 0:
self._raw_path_cached = ''
self._resolving = False

def __reduce__(self):
Expand All @@ -613,12 +619,12 @@ def __reduce__(self):
return (self.__class__, self.parts)

def __fspath__(self):
return str(self)
return self._raw_path

def __bytes__(self):
"""Return the bytes representation of the path. This is only
recommended to use under Unix."""
return os.fsencode(self)
return os.fsencode(str(self))

@property
def _str_normcase(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Speed up ``pathlib.PurePath.__fspath__()`` by returning an unnormalized
path.

0 comments on commit 2085494

Please sign in to comment.