Skip to content

Commit e5b08dd

Browse files
barneygaleeryksunAlexWaygood
authored
gh-101000: Add os.path.splitroot() (#101002)
Co-authored-by: Eryk Sun <eryksun@gmail.com> Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
1 parent 37f15a5 commit e5b08dd

9 files changed

+279
-165
lines changed

Doc/library/os.path.rst

+33
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,39 @@ the :mod:`glob` module.)
488488
Accepts a :term:`path-like object`.
489489

490490

491+
.. function:: splitroot(path)
492+
493+
Split the pathname *path* into a 3-item tuple ``(drive, root, tail)`` where
494+
*drive* is a device name or mount point, *root* is a string of separators
495+
after the drive, and *tail* is everything after the root. Any of these
496+
items may be the empty string. In all cases, ``drive + root + tail`` will
497+
be the same as *path*.
498+
499+
On POSIX systems, *drive* is always empty. The *root* may be empty (if *path* is
500+
relative), a single forward slash (if *path* is absolute), or two forward slashes
501+
(implementation-defined per `IEEE Std 1003.1-2017; 4.13 Pathname Resolution
502+
<https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13>`_.)
503+
For example::
504+
505+
>>> splitroot('/home/sam')
506+
('', '/', 'home/sam')
507+
>>> splitroot('//home/sam')
508+
('', '//', 'home/sam')
509+
>>> splitroot('///home/sam')
510+
('', '/', '//home/sam')
511+
512+
On Windows, *drive* may be empty, a drive-letter name, a UNC share, or a device
513+
name. The *root* may be empty, a forward slash, or a backward slash. For
514+
example::
515+
516+
>>> splitroot('C:/Users/Sam')
517+
('C:', '/', 'Users/Sam')
518+
>>> splitroot('//Server/Share/Users/Sam')
519+
('//Server/Share', '/', 'Users/Sam')
520+
521+
.. versionadded:: 3.12
522+
523+
491524
.. function:: splitext(path)
492525

493526
Split the pathname *path* into a pair ``(root, ext)`` such that ``root + ext ==

Doc/whatsnew/3.12.rst

+8-3
Original file line numberDiff line numberDiff line change
@@ -288,13 +288,18 @@ os
288288
for a process with :func:`os.pidfd_open` in non-blocking mode.
289289
(Contributed by Kumar Aditya in :gh:`93312`.)
290290

291-
* Add :func:`os.path.isjunction` to check if a given path is a junction.
292-
(Contributed by Charles Machalow in :gh:`99547`.)
293-
294291
* :class:`os.DirEntry` now includes an :meth:`os.DirEntry.is_junction`
295292
method to check if the entry is a junction.
296293
(Contributed by Charles Machalow in :gh:`99547`.)
297294

295+
os.path
296+
-------
297+
298+
* Add :func:`os.path.isjunction` to check if a given path is a junction.
299+
(Contributed by Charles Machalow in :gh:`99547`.)
300+
301+
* Add :func:`os.path.splitroot` to split a path into a triad
302+
``(drive, root, tail)``. (Contributed by Barney Gale in :gh:`101000`.)
298303

299304
shutil
300305
------

Lib/ntpath.py

+72-54
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from genericpath import *
2525

2626

27-
__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
27+
__all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext",
2828
"basename","dirname","commonprefix","getsize","getmtime",
2929
"getatime","getctime", "islink","exists","lexists","isdir","isfile",
3030
"ismount", "expanduser","expandvars","normpath","abspath",
@@ -117,19 +117,21 @@ def join(path, *paths):
117117
try:
118118
if not paths:
119119
path[:0] + sep #23780: Ensure compatible data type even if p is null.
120-
result_drive, result_path = splitdrive(path)
120+
result_drive, result_root, result_path = splitroot(path)
121121
for p in map(os.fspath, paths):
122-
p_drive, p_path = splitdrive(p)
123-
if p_path and p_path[0] in seps:
122+
p_drive, p_root, p_path = splitroot(p)
123+
if p_root:
124124
# Second path is absolute
125125
if p_drive or not result_drive:
126126
result_drive = p_drive
127+
result_root = p_root
127128
result_path = p_path
128129
continue
129130
elif p_drive and p_drive != result_drive:
130131
if p_drive.lower() != result_drive.lower():
131132
# Different drives => ignore the first path entirely
132133
result_drive = p_drive
134+
result_root = p_root
133135
result_path = p_path
134136
continue
135137
# Same drive in different case
@@ -139,10 +141,10 @@ def join(path, *paths):
139141
result_path = result_path + sep
140142
result_path = result_path + p_path
141143
## add separator between UNC and non-absolute path
142-
if (result_path and result_path[0] not in seps and
144+
if (result_path and not result_root and
143145
result_drive and result_drive[-1:] != colon):
144146
return result_drive + sep + result_path
145-
return result_drive + result_path
147+
return result_drive + result_root + result_path
146148
except (TypeError, AttributeError, BytesWarning):
147149
genericpath._check_arg_types('join', path, *paths)
148150
raise
@@ -169,35 +171,61 @@ def splitdrive(p):
169171
170172
Paths cannot contain both a drive letter and a UNC path.
171173
174+
"""
175+
drive, root, tail = splitroot(p)
176+
return drive, root + tail
177+
178+
179+
def splitroot(p):
180+
"""Split a pathname into drive, root and tail. The drive is defined
181+
exactly as in splitdrive(). On Windows, the root may be a single path
182+
separator or an empty string. The tail contains anything after the root.
183+
For example:
184+
185+
splitroot('//server/share/') == ('//server/share', '/', '')
186+
splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
187+
splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
188+
splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
172189
"""
173190
p = os.fspath(p)
174-
if len(p) >= 2:
175-
if isinstance(p, bytes):
176-
sep = b'\\'
177-
altsep = b'/'
178-
colon = b':'
179-
unc_prefix = b'\\\\?\\UNC\\'
180-
else:
181-
sep = '\\'
182-
altsep = '/'
183-
colon = ':'
184-
unc_prefix = '\\\\?\\UNC\\'
185-
normp = p.replace(altsep, sep)
186-
if normp[0:2] == sep * 2:
191+
if isinstance(p, bytes):
192+
sep = b'\\'
193+
altsep = b'/'
194+
colon = b':'
195+
unc_prefix = b'\\\\?\\UNC\\'
196+
empty = b''
197+
else:
198+
sep = '\\'
199+
altsep = '/'
200+
colon = ':'
201+
unc_prefix = '\\\\?\\UNC\\'
202+
empty = ''
203+
normp = p.replace(altsep, sep)
204+
if normp[:1] == sep:
205+
if normp[1:2] == sep:
187206
# UNC drives, e.g. \\server\share or \\?\UNC\server\share
188207
# Device drives, e.g. \\.\device or \\?\device
189208
start = 8 if normp[:8].upper() == unc_prefix else 2
190209
index = normp.find(sep, start)
191210
if index == -1:
192-
return p, p[:0]
211+
return p, empty, empty
193212
index2 = normp.find(sep, index + 1)
194213
if index2 == -1:
195-
return p, p[:0]
196-
return p[:index2], p[index2:]
197-
if normp[1:2] == colon:
198-
# Drive-letter drives, e.g. X:
199-
return p[:2], p[2:]
200-
return p[:0], p
214+
return p, empty, empty
215+
return p[:index2], p[index2:index2 + 1], p[index2 + 1:]
216+
else:
217+
# Relative path with root, e.g. \Windows
218+
return empty, p[:1], p[1:]
219+
elif normp[1:2] == colon:
220+
if normp[2:3] == sep:
221+
# Absolute drive-letter path, e.g. X:\Windows
222+
return p[:2], p[2:3], p[3:]
223+
else:
224+
# Relative path with drive, e.g. X:Windows
225+
return p[:2], empty, p[2:]
226+
else:
227+
# Relative path, e.g. Windows
228+
return empty, empty, p
201229

202230

203231
# Split a path in head (everything up to the last '/') and tail (the
@@ -212,15 +240,13 @@ def split(p):
212240
Either part may be empty."""
213241
p = os.fspath(p)
214242
seps = _get_bothseps(p)
215-
d, p = splitdrive(p)
243+
d, r, p = splitroot(p)
216244
# set i to index beyond p's last slash
217245
i = len(p)
218246
while i and p[i-1] not in seps:
219247
i -= 1
220248
head, tail = p[:i], p[i:] # now tail has no slashes
221-
# remove trailing slashes from head, unless it's all slashes
222-
head = head.rstrip(seps) or head
223-
return d + head, tail
249+
return d + r + head.rstrip(seps), tail
224250

225251

226252
# Split a path in root and extension.
@@ -311,10 +337,10 @@ def ismount(path):
311337
path = os.fspath(path)
312338
seps = _get_bothseps(path)
313339
path = abspath(path)
314-
root, rest = splitdrive(path)
315-
if root and root[0] in seps:
316-
return (not rest) or (rest in seps)
317-
if rest and rest in seps:
340+
drive, root, rest = splitroot(path)
341+
if drive and drive[0] in seps:
342+
return not rest
343+
if root and not rest:
318344
return True
319345

320346
if _getvolumepathname:
@@ -525,13 +551,8 @@ def normpath(path):
525551
curdir = '.'
526552
pardir = '..'
527553
path = path.replace(altsep, sep)
528-
prefix, path = splitdrive(path)
529-
530-
# collapse initial backslashes
531-
if path.startswith(sep):
532-
prefix += sep
533-
path = path.lstrip(sep)
534-
554+
drive, root, path = splitroot(path)
555+
prefix = drive + root
535556
comps = path.split(sep)
536557
i = 0
537558
while i < len(comps):
@@ -541,7 +562,7 @@ def normpath(path):
541562
if i > 0 and comps[i-1] != pardir:
542563
del comps[i-1:i+1]
543564
i -= 1
544-
elif i == 0 and prefix.endswith(sep):
565+
elif i == 0 and root:
545566
del comps[i]
546567
else:
547568
i += 1
@@ -765,8 +786,8 @@ def relpath(path, start=None):
765786
try:
766787
start_abs = abspath(normpath(start))
767788
path_abs = abspath(normpath(path))
768-
start_drive, start_rest = splitdrive(start_abs)
769-
path_drive, path_rest = splitdrive(path_abs)
789+
start_drive, _, start_rest = splitroot(start_abs)
790+
path_drive, _, path_rest = splitroot(path_abs)
770791
if normcase(start_drive) != normcase(path_drive):
771792
raise ValueError("path is on mount %r, start on mount %r" % (
772793
path_drive, start_drive))
@@ -816,21 +837,19 @@ def commonpath(paths):
816837
curdir = '.'
817838

818839
try:
819-
drivesplits = [splitdrive(p.replace(altsep, sep).lower()) for p in paths]
820-
split_paths = [p.split(sep) for d, p in drivesplits]
840+
drivesplits = [splitroot(p.replace(altsep, sep).lower()) for p in paths]
841+
split_paths = [p.split(sep) for d, r, p in drivesplits]
821842

822-
try:
823-
isabs, = set(p[:1] == sep for d, p in drivesplits)
824-
except ValueError:
825-
raise ValueError("Can't mix absolute and relative paths") from None
843+
if len({r for d, r, p in drivesplits}) != 1:
844+
raise ValueError("Can't mix absolute and relative paths")
826845

827846
# Check that all drive letters or UNC paths match. The check is made only
828847
# now otherwise type errors for mixing strings and bytes would not be
829848
# caught.
830-
if len(set(d for d, p in drivesplits)) != 1:
849+
if len({d for d, r, p in drivesplits}) != 1:
831850
raise ValueError("Paths don't have the same drive")
832851

833-
drive, path = splitdrive(paths[0].replace(altsep, sep))
852+
drive, root, path = splitroot(paths[0].replace(altsep, sep))
834853
common = path.split(sep)
835854
common = [c for c in common if c and c != curdir]
836855

@@ -844,8 +863,7 @@ def commonpath(paths):
844863
else:
845864
common = common[:len(s1)]
846865

847-
prefix = drive + sep if isabs else drive
848-
return prefix + sep.join(common)
866+
return drive + root + sep.join(common)
849867
except (TypeError, AttributeError):
850868
genericpath._check_arg_types('commonpath', *paths)
851869
raise

Lib/pathlib.py

+7-17
Original file line numberDiff line numberDiff line change
@@ -271,19 +271,6 @@ def __reduce__(self):
271271
# when pickling related paths.
272272
return (self.__class__, tuple(self._parts))
273273

274-
@classmethod
275-
def _split_root(cls, part):
276-
sep = cls._flavour.sep
277-
rel = cls._flavour.splitdrive(part)[1].lstrip(sep)
278-
anchor = part.removesuffix(rel)
279-
if anchor:
280-
anchor = cls._flavour.normpath(anchor)
281-
drv, root = cls._flavour.splitdrive(anchor)
282-
if drv.startswith(sep):
283-
# UNC paths always have a root.
284-
root = sep
285-
return drv, root, rel
286-
287274
@classmethod
288275
def _parse_parts(cls, parts):
289276
if not parts:
@@ -293,7 +280,10 @@ def _parse_parts(cls, parts):
293280
path = cls._flavour.join(*parts)
294281
if altsep:
295282
path = path.replace(altsep, sep)
296-
drv, root, rel = cls._split_root(path)
283+
drv, root, rel = cls._flavour.splitroot(path)
284+
if drv.startswith(sep):
285+
# pathlib assumes that UNC paths always have a root.
286+
root = sep
297287
unfiltered_parsed = [drv + root] + rel.split(sep)
298288
parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
299289
return drv, root, parsed
@@ -493,9 +483,9 @@ def with_name(self, name):
493483
"""Return a new path with the file name changed."""
494484
if not self.name:
495485
raise ValueError("%r has an empty name" % (self,))
496-
drv, root, parts = self._parse_parts((name,))
497-
if (not name or name[-1] in [self._flavour.sep, self._flavour.altsep]
498-
or drv or root or len(parts) != 1):
486+
f = self._flavour
487+
drv, root, tail = f.splitroot(name)
488+
if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail):
499489
raise ValueError("Invalid name %r" % (name))
500490
return self._from_parsed_parts(self._drv, self._root,
501491
self._parts[:-1] + [name])

0 commit comments

Comments
 (0)