diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst index 684466d354aef8..849316553e408a 100644 --- a/Doc/library/glob.rst +++ b/Doc/library/glob.rst @@ -75,10 +75,6 @@ The :mod:`glob` module defines the following functions: Using the "``**``" pattern in large directory trees may consume an inordinate amount of time. - .. note:: - This function may return duplicate path names if *pathname* - contains multiple "``**``" patterns and *recursive* is true. - .. versionchanged:: 3.5 Support for recursive globs using "``**``". @@ -88,6 +84,11 @@ The :mod:`glob` module defines the following functions: .. versionchanged:: 3.11 Added the *include_hidden* parameter. + .. versionchanged:: 3.14 + Matching path names are returned only once. In previous versions, this + function may return duplicate path names if *pathname* contains multiple + "``**``" patterns and *recursive* is true. + .. function:: iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False, \ include_hidden=False) @@ -98,10 +99,6 @@ The :mod:`glob` module defines the following functions: .. audit-event:: glob.glob pathname,recursive glob.iglob .. audit-event:: glob.glob/2 pathname,recursive,root_dir,dir_fd glob.iglob - .. note:: - This function may return duplicate path names if *pathname* - contains multiple "``**``" patterns and *recursive* is true. - .. versionchanged:: 3.5 Support for recursive globs using "``**``". @@ -111,6 +108,11 @@ The :mod:`glob` module defines the following functions: .. versionchanged:: 3.11 Added the *include_hidden* parameter. + .. versionchanged:: 3.14 + Matching path names are yielded only once. In previous versions, this + function may yield duplicate path names if *pathname* contains multiple + "``**``" patterns and *recursive* is true. + .. function:: escape(pathname) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index dbd59a9d7be150..3c876a193fad32 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -968,6 +968,14 @@ base64 (Contributed by Bénédikt Tran, Chris Markiewicz, and Adam Turner in :gh:`118761`.) +glob +---- + +* Reduce the number of system calls in :func:`glob.glob` and :func:`~glob.iglob`, + thereby improving the speed of globbing operations by 20-80%. + (Contributed by Barney Gale in :gh:`116380`.) + + io --- * :mod:`io` which provides the built-in :func:`open` makes less system calls diff --git a/Lib/glob.py b/Lib/glob.py index d1a6dddeeb1610..7ac497d4f0075a 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -1,13 +1,10 @@ """Filename globbing utility.""" -import contextlib import os import re import fnmatch import functools -import itertools import operator -import stat import sys @@ -45,82 +42,35 @@ def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False, """ sys.audit("glob.glob", pathname, recursive) sys.audit("glob.glob/2", pathname, recursive, root_dir, dir_fd) - if root_dir is not None: - root_dir = os.fspath(root_dir) - else: - root_dir = pathname[:0] - it = _iglob(pathname, root_dir, dir_fd, recursive, False, - include_hidden=include_hidden) - if not pathname or recursive and _isrecursive(pathname[:2]): - try: - s = next(it) # skip empty string - if s: - it = itertools.chain((s,), it) - except StopIteration: - pass - return it - -def _iglob(pathname, root_dir, dir_fd, recursive, dironly, - include_hidden=False): - dirname, basename = os.path.split(pathname) - if not has_magic(pathname): - assert not dironly - if basename: - if _lexists(_join(root_dir, pathname), dir_fd): - yield pathname - else: - # Patterns ending with a slash should match only directories - if _isdir(_join(root_dir, dirname), dir_fd): - yield pathname - return - if not dirname: - if recursive and _isrecursive(basename): - yield from _glob2(root_dir, basename, dir_fd, dironly, - include_hidden=include_hidden) - else: - yield from _glob1(root_dir, basename, dir_fd, dironly, - include_hidden=include_hidden) - return - # `os.path.split()` returns the argument itself as a dirname if it is a - # drive or UNC path. Prevent an infinite recursion if a drive or UNC path - # contains magic characters (i.e. r'\\?\C:'). - if dirname != pathname and has_magic(dirname): - dirs = _iglob(dirname, root_dir, dir_fd, recursive, True, - include_hidden=include_hidden) - else: - dirs = [dirname] - if has_magic(basename): - if recursive and _isrecursive(basename): - glob_in_dir = _glob2 - else: - glob_in_dir = _glob1 + pathname = os.fspath(pathname) + if isinstance(pathname, bytes): + pathname = os.fsdecode(pathname) + if root_dir is not None: + root_dir = os.fsdecode(root_dir) + paths = _iglob(pathname, root_dir, dir_fd, recursive, include_hidden) + return map(os.fsencode, paths) else: - glob_in_dir = _glob0 - for dirname in dirs: - for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly, - include_hidden=include_hidden): - yield os.path.join(dirname, name) - -# These 2 helper functions non-recursively glob inside a literal directory. -# They return a list of basenames. _glob1 accepts a pattern while _glob0 -# takes a literal basename (so it only has to check for its existence). - -def _glob1(dirname, pattern, dir_fd, dironly, include_hidden=False): - names = _listdir(dirname, dir_fd, dironly) - if not (include_hidden or _ishidden(pattern)): - names = (x for x in names if not _ishidden(x)) - return fnmatch.filter(names, pattern) - -def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False): - if basename: - if _lexists(_join(dirname, basename), dir_fd): - return [basename] + return _iglob(pathname, root_dir, dir_fd, recursive, include_hidden) + +def _iglob(pathname, root_dir, dir_fd, recursive, include_hidden): + if os.path.altsep: + pathname = pathname.replace(os.path.altsep, os.path.sep) + drive, root, tail = os.path.splitroot(pathname) + parts = tail.split(os.path.sep)[::-1] if tail else [] + globber = _StringGlobber(recursive=recursive, include_hidden=include_hidden) + select = globber.selector(parts) + if drive: + root = drive + root + return select(root, dir_fd, root) + elif root: + return select(root, dir_fd, root, exists=True) + elif not root_dir: + return select(root, dir_fd, root, empty=True) else: - # `os.path.split()` returns an empty basename for paths ending with a - # directory separator. 'q*x/' should match only directories. - if _isdir(dirname, dir_fd): - return [basename] - return [] + root = os.path.join(root_dir, '') + root_len = len(root) + paths = select(root, dir_fd, root, empty=True) + return (path[root_len:] for path in paths) _deprecated_function_message = ( "{name} is deprecated and will be removed in Python {remove}. Use " @@ -130,102 +80,16 @@ def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False): def glob0(dirname, pattern): import warnings warnings._deprecated("glob.glob0", _deprecated_function_message, remove=(3, 15)) - return _glob0(dirname, pattern, None, False) + dirname = os.path.join(dirname, '') + select = _StringGlobber().literal_selector(pattern, []) + return [path[len(dirname):] for path in select(dirname)] def glob1(dirname, pattern): import warnings warnings._deprecated("glob.glob1", _deprecated_function_message, remove=(3, 15)) - return _glob1(dirname, pattern, None, False) - -# This helper function recursively yields relative pathnames inside a literal -# directory. - -def _glob2(dirname, pattern, dir_fd, dironly, include_hidden=False): - assert _isrecursive(pattern) - if not dirname or _isdir(dirname, dir_fd): - yield pattern[:0] - yield from _rlistdir(dirname, dir_fd, dironly, - include_hidden=include_hidden) - -# If dironly is false, yields all file names inside a directory. -# If dironly is true, yields only directory names. -def _iterdir(dirname, dir_fd, dironly): - try: - fd = None - fsencode = None - if dir_fd is not None: - if dirname: - fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd) - else: - arg = dir_fd - if isinstance(dirname, bytes): - fsencode = os.fsencode - elif dirname: - arg = dirname - elif isinstance(dirname, bytes): - arg = bytes(os.curdir, 'ASCII') - else: - arg = os.curdir - try: - with os.scandir(arg) as it: - for entry in it: - try: - if not dironly or entry.is_dir(): - if fsencode is not None: - yield fsencode(entry.name) - else: - yield entry.name - except OSError: - pass - finally: - if fd is not None: - os.close(fd) - except OSError: - return - -def _listdir(dirname, dir_fd, dironly): - with contextlib.closing(_iterdir(dirname, dir_fd, dironly)) as it: - return list(it) - -# Recursively yields relative pathnames inside a literal directory. -def _rlistdir(dirname, dir_fd, dironly, include_hidden=False): - names = _listdir(dirname, dir_fd, dironly) - for x in names: - if include_hidden or not _ishidden(x): - yield x - path = _join(dirname, x) if dirname else x - for y in _rlistdir(path, dir_fd, dironly, - include_hidden=include_hidden): - yield _join(x, y) - - -def _lexists(pathname, dir_fd): - # Same as os.path.lexists(), but with dir_fd - if dir_fd is None: - return os.path.lexists(pathname) - try: - os.lstat(pathname, dir_fd=dir_fd) - except (OSError, ValueError): - return False - else: - return True - -def _isdir(pathname, dir_fd): - # Same as os.path.isdir(), but with dir_fd - if dir_fd is None: - return os.path.isdir(pathname) - try: - st = os.stat(pathname, dir_fd=dir_fd) - except (OSError, ValueError): - return False - else: - return stat.S_ISDIR(st.st_mode) - -def _join(dirname, basename): - # It is common if dirname or basename is empty - if not dirname or not basename: - return dirname or basename - return os.path.join(dirname, basename) + dirname = os.path.join(dirname, '') + select = _StringGlobber().wildcard_selector(pattern, []) + return [path[len(dirname):] for path in select(dirname)] magic_check = re.compile('([*?[])') magic_check_bytes = re.compile(b'([*?[])') @@ -237,15 +101,6 @@ def has_magic(s): match = magic_check.search(s) return match is not None -def _ishidden(path): - return path[0] in ('.', b'.'[0]) - -def _isrecursive(pattern): - if isinstance(pattern, bytes): - return pattern == b'**' - else: - return pattern == '**' - def escape(pathname): """Escape all special characters. """ @@ -319,12 +174,13 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None): return fr'(?s:{res})\Z' -@functools.lru_cache(maxsize=512) -def _compile_pattern(pat, sep, case_sensitive, recursive=True): +@functools.lru_cache(maxsize=1024) +def _compile_pattern(pat, sep, case_sensitive, recursive, include_hidden): """Compile given glob pattern to a re.Pattern object (observing case sensitivity).""" flags = re.NOFLAG if case_sensitive else re.IGNORECASE - regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep) + regex = translate(pat, recursive=recursive, + include_hidden=include_hidden, seps=sep) return re.compile(regex, flags=flags).match @@ -332,11 +188,13 @@ class _GlobberBase: """Abstract class providing shell-style pattern matching and globbing. """ - def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): + def __init__(self, sep=os.path.sep, case_sensitive=os.name != 'nt', + case_pedantic=False, recursive=False, include_hidden=False): self.sep = sep self.case_sensitive = case_sensitive self.case_pedantic = case_pedantic self.recursive = recursive + self.include_hidden = include_hidden # Abstract methods @@ -346,12 +204,38 @@ def lexists(path): """ raise NotImplementedError + @staticmethod + def lstat(path, dir_fd=None): + """Implements os.lstat() + """ + raise NotImplementedError + + @staticmethod + def open(path, flags, dir_fd=None): + """Implements os.open() + """ + raise NotImplementedError + @staticmethod def scandir(path): """Like os.scandir(), but generates (entry, name, path) tuples. """ raise NotImplementedError + @staticmethod + def scandir_cwd(): + raise NotImplementedError + + @staticmethod + def scandir_fd(fd, prefix): + raise NotImplementedError + + @staticmethod + def close(fd): + """Implements os.close(). + """ + raise NotImplementedError + @staticmethod def concat_path(path, text): """Implements path concatenation. @@ -361,7 +245,8 @@ def concat_path(path, text): # High-level methods def compile(self, pat): - return _compile_pattern(pat, self.sep, self.case_sensitive, self.recursive) + return _compile_pattern(pat, self.sep, self.case_sensitive, + self.recursive, self.include_hidden) def selector(self, parts): """Returns a function that selects from a given path, walking and @@ -386,10 +271,14 @@ def special_selector(self, part, parts): if parts: part += self.sep select_next = self.selector(parts) + if not part: + return select_next - def select_special(path, exists=False): + def select_special(path, dir_fd=None, rel_path=None, exists=False, empty=False): path = self.concat_path(path, part) - return select_next(path, exists) + if dir_fd is not None: + rel_path = self.concat_path(rel_path, part) + return select_next(path, dir_fd, rel_path, exists) return select_special def literal_selector(self, part, parts): @@ -406,9 +295,11 @@ def literal_selector(self, part, parts): select_next = self.selector(parts) - def select_literal(path, exists=False): + def select_literal(path, dir_fd=None, rel_path=None, exists=False, empty=False): path = self.concat_path(path, part) - return select_next(path, exists=False) + if dir_fd is not None: + rel_path = self.concat_path(rel_path, part) + return select_next(path, dir_fd, rel_path) return select_literal def wildcard_selector(self, part, parts): @@ -416,14 +307,24 @@ def wildcard_selector(self, part, parts): filtering by pattern. """ - match = None if part == '*' else self.compile(part) + match = None if self.include_hidden and part == '*' else self.compile(part) dir_only = bool(parts) if dir_only: select_next = self.selector(parts) - def select_wildcard(path, exists=False): + def select_wildcard(path, dir_fd=None, rel_path=None, exists=False, empty=False): + close_fd = False try: - entries = self.scandir(path) + if dir_fd is None: + fd = None + entries = self.scandir(path) if path else self.scandir_cwd() + elif not rel_path: + fd = dir_fd + entries = self.scandir_fd(fd, path) + else: + fd = self.open(rel_path, _dir_open_flags, dir_fd=dir_fd) + close_fd = True + entries = self.scandir_fd(fd, path) except OSError: pass else: @@ -436,9 +337,17 @@ def select_wildcard(path, exists=False): except OSError: continue entry_path = self.concat_path(entry_path, self.sep) - yield from select_next(entry_path, exists=True) + if fd is not None: + entry_name = entry_name + self.sep + yield from select_next( + entry_path, fd, entry_name, exists=True) else: + # Optimization: directly yield the path if this is + # last pattern part. yield entry_path + finally: + if close_fd: + self.close(fd) return select_wildcard def recursive_selector(self, part, parts): @@ -460,26 +369,49 @@ def recursive_selector(self, part, parts): while parts and parts[-1] not in _special_parts: part += self.sep + parts.pop() - match = None if part == '**' else self.compile(part) + match = None if self.include_hidden and part == '**' else self.compile(part) dir_only = bool(parts) select_next = self.selector(parts) - def select_recursive(path, exists=False): + def select_recursive(path, dir_fd=None, rel_path=None, exists=False, empty=False): match_pos = len(str(path)) if match is None or match(str(path), match_pos): - yield from select_next(path, exists) - stack = [path] - while stack: - yield from select_recursive_step(stack, match_pos) + yield from select_next(path, dir_fd, rel_path, exists, empty) + stack = [(path, dir_fd, rel_path)] + try: + while stack: + yield from select_recursive_step(stack, match_pos) + finally: + # Close any file descriptors still on the stack. + while stack: + path, dir_fd, _rel_path = stack.pop() + if path is None: + try: + self.close(dir_fd) + except OSError: + pass def select_recursive_step(stack, match_pos): - path = stack.pop() + path, dir_fd, rel_path = stack.pop() try: - entries = self.scandir(path) + if path is None: + self.close(dir_fd) + return + elif dir_fd is None: + fd = None + entries = self.scandir(path) if path else self.scandir_cwd() + elif not rel_path: + fd = dir_fd + entries = self.scandir_fd(fd, path) + else: + fd = self.open(rel_path, _dir_open_flags, dir_fd=dir_fd) + # Schedule the file descriptor to be closed next step. + stack.append((None, fd, None)) + entries = self.scandir_fd(fd, path) except OSError: pass else: - for entry, _entry_name, entry_path in entries: + for entry, entry_name, entry_path in entries: is_dir = False try: if entry.is_dir(follow_symlinks=follow_symlinks): @@ -491,25 +423,38 @@ def select_recursive_step(stack, match_pos): entry_path_str = str(entry_path) if dir_only: entry_path = self.concat_path(entry_path, self.sep) + if fd is not None: + entry_name = entry_name + self.sep if match is None or match(entry_path_str, match_pos): if dir_only: - yield from select_next(entry_path, exists=True) + yield from select_next( + entry_path, fd, entry_name, exists=True) else: # Optimization: directly yield the path if this is # last pattern part. yield entry_path if is_dir: - stack.append(entry_path) + stack.append((entry_path, fd, entry_name)) return select_recursive - def select_exists(self, path, exists=False): - """Yields the given path, if it exists. + def select_exists(self, path, dir_fd=None, rel_path=None, exists=False, empty=False): + """Yields the given path, if it exists. If *dir_fd* is given, we check + whether *rel_path* exists relative to the fd. """ - if exists: + if empty: + # Suppress initial path so iglob() doesn't yield the empty string. + pass + elif exists: # Optimization: this path is already known to exist, e.g. because # it was returned from os.scandir(), so we skip calling lstat(). yield path + elif dir_fd is not None: + try: + self.lstat(rel_path, dir_fd=dir_fd) + yield path + except OSError: + pass elif self.lexists(path): yield path @@ -518,6 +463,9 @@ class _StringGlobber(_GlobberBase): """Provides shell-style pattern matching and globbing for string paths. """ lexists = staticmethod(os.path.lexists) + lstat = staticmethod(os.lstat) + open = staticmethod(os.open) + close = staticmethod(os.close) concat_path = operator.add @staticmethod @@ -528,6 +476,20 @@ def scandir(path): entries = list(scandir_it) return ((entry, entry.name, entry.path) for entry in entries) + @staticmethod + def scandir_cwd(): + with os.scandir() as scandir_it: + entries = list(scandir_it) + # Suppress leading dot when scanning current directory. + return ((entry, entry.name, entry.name) for entry in entries) + + @staticmethod + def scandir_fd(fd, prefix): + prefix = os.path.join(prefix, prefix[:0]) + with os.scandir(fd) as scandir_it: + entries = list(scandir_it) + return ((entry, entry.name, prefix + entry.name) for entry in entries) + class _PathGlobber(_GlobberBase): """Provides shell-style pattern matching and globbing for pathlib paths. diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 930701d4789f5c..bd2252ef77dcf6 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -196,7 +196,8 @@ def full_match(self, pattern, *, case_sensitive=None): pattern = self.with_segments(pattern) if case_sensitive is None: case_sensitive = self.parser.normcase('Aa') == 'Aa' - globber = _PathGlobber(pattern.parser.sep, case_sensitive, recursive=True) + globber = _PathGlobber(pattern.parser.sep, case_sensitive, + recursive=True, include_hidden=True) match = globber.compile(str(pattern)) return match(str(self)) is not None diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 83da6960f00489..492bae0b7f7cd8 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -558,7 +558,8 @@ def full_match(self, pattern, *, case_sensitive=None): # paths shouldn't match wildcards, so we change it to the empty string. path = str(self) if self.parts else '' pattern = str(pattern) if pattern.parts else '' - globber = _StringGlobber(self.parser.sep, case_sensitive, recursive=True) + globber = _StringGlobber(self.parser.sep, case_sensitive, + recursive=True, include_hidden=True) return globber.compile(pattern)(path) is not None def match(self, path_pattern, *, case_sensitive=None): @@ -580,7 +581,8 @@ def match(self, path_pattern, *, case_sensitive=None): return False if len(path_parts) > len(pattern_parts) and path_pattern.anchor: return False - globber = _StringGlobber(self.parser.sep, case_sensitive) + globber = _StringGlobber(self.parser.sep, case_sensitive, + include_hidden=True) for path_part, pattern_part in zip(path_parts, pattern_parts): match = globber.compile(pattern_part) if match(path_part) is None: @@ -855,7 +857,8 @@ def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False): case_pedantic = True parts = self._parse_pattern(pattern) recursive = True if recurse_symlinks else _no_recurse_symlinks - globber = _StringGlobber(self.parser.sep, case_sensitive, case_pedantic, recursive) + globber = _StringGlobber(self.parser.sep, case_sensitive, case_pedantic, + recursive, include_hidden=True) select = globber.selector(parts[::-1]) root = str(self) paths = select(self.parser.join(root, '')) diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index da73769c16e9af..2290e06e60e377 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -4,14 +4,18 @@ import shutil import sys import unittest +import unittest.mock import warnings from test import support -from test.support import is_wasi, Py_DEBUG +from test.support import is_wasi, Py_DEBUG, infinite_recursion from test.support.os_helper import (TESTFN, skip_unless_symlink, can_symlink, create_empty_file, change_cwd) +_supports_dir_fd = {os.open, os.stat} <= os.supports_dir_fd and os.scandir in os.supports_fd + + class GlobTests(unittest.TestCase): dir_fd = None @@ -49,7 +53,7 @@ def setUp(self): def open_dirfd(self): if self.dir_fd is not None: os.close(self.dir_fd) - if {os.open, os.stat} <= os.supports_dir_fd and os.scandir in os.supports_fd: + if _supports_dir_fd: self.dir_fd = os.open(self.tempdir, os.O_RDONLY | os.O_DIRECTORY) else: self.dir_fd = None @@ -177,20 +181,18 @@ def test_glob_directory_with_trailing_slash(self): self.assertEqual(glob.glob(self.norm('Z*Z') + sep), []) self.assertEqual(glob.glob(self.norm('ZZZ') + sep), []) self.assertEqual(glob.glob(self.norm('aaa') + sep), - [self.norm('aaa') + sep]) - # Preserving the redundant separators is an implementation detail. + [self.norm('aaa') + os.sep]) + # Redundant separators are preserved and normalized self.assertEqual(glob.glob(self.norm('aaa') + sep*2), - [self.norm('aaa') + sep*2]) + [self.norm('aaa') + os.sep*2]) # When there is a wildcard pattern which ends with a pathname # separator, glob() doesn't blow. # The result should end with the pathname separator. - # Normalizing the trailing separator is an implementation detail. eq = self.assertSequencesEqual_noorder eq(glob.glob(self.norm('aa*') + sep), [self.norm('aaa') + os.sep, self.norm('aab') + os.sep]) - # Stripping the redundant separators is an implementation detail. eq(glob.glob(self.norm('aa*') + sep*2), - [self.norm('aaa') + os.sep, self.norm('aab') + os.sep]) + [self.norm('aaa') + os.sep*2, self.norm('aab') + os.sep*2]) def test_glob_bytes_directory_with_trailing_slash(self): # Same as test_glob_directory_with_trailing_slash, but with a @@ -200,16 +202,16 @@ def test_glob_bytes_directory_with_trailing_slash(self): self.assertEqual(glob.glob(os.fsencode(self.norm('Z*Z') + sep)), []) self.assertEqual(glob.glob(os.fsencode(self.norm('ZZZ') + sep)), []) self.assertEqual(glob.glob(os.fsencode(self.norm('aaa') + sep)), - [os.fsencode(self.norm('aaa') + sep)]) + [os.fsencode(self.norm('aaa') + os.sep)]) self.assertEqual(glob.glob(os.fsencode(self.norm('aaa') + sep*2)), - [os.fsencode(self.norm('aaa') + sep*2)]) + [os.fsencode(self.norm('aaa') + os.sep*2)]) eq = self.assertSequencesEqual_noorder eq(glob.glob(os.fsencode(self.norm('aa*') + sep)), [os.fsencode(self.norm('aaa') + os.sep), os.fsencode(self.norm('aab') + os.sep)]) eq(glob.glob(os.fsencode(self.norm('aa*') + sep*2)), - [os.fsencode(self.norm('aaa') + os.sep), - os.fsencode(self.norm('aab') + os.sep)]) + [os.fsencode(self.norm('aaa') + os.sep*2), + os.fsencode(self.norm('aab') + os.sep*2)]) @skip_unless_symlink def test_glob_symlinks(self): @@ -326,8 +328,12 @@ def test_recursive_glob(self): with change_cwd(self.tempdir): join = os.path.join eq(glob.glob('**', recursive=True), [join(*i) for i in full]) + eq(glob.glob(join('**', '**'), recursive=True), + [join(*i) for i in full]) eq(glob.glob(join('**', ''), recursive=True), [join(*i) for i in dirs]) + eq(glob.glob(join('**', '**', ''), recursive=True), + [join(*i) for i in dirs]) eq(glob.glob(join('**', '*'), recursive=True), [join(*i) for i in full]) eq(glob.glob(join(os.curdir, '**'), recursive=True), @@ -394,6 +400,33 @@ def test_glob_many_open_files(self): for it in iters: self.assertEqual(next(it), p) + def test_glob_above_recursion_limit(self): + depth = 30 + base = os.path.join(self.tempdir, 'deep') + p = os.path.join(base, *(['d']*depth)) + os.makedirs(p) + pattern = os.path.join(base, '**', 'd') + with infinite_recursion(depth - 5): + glob.glob(pattern, recursive=True) + + @unittest.skipUnless(_supports_dir_fd, "Needs support for iglob(dir_fd=...)") + def test_iglob_iter_close(self): + base = os.path.join(self.tempdir, 'deep') + p = os.path.join(base, *(['d'] * 10)) + os.makedirs(p) + with ( + unittest.mock.patch("glob._StringGlobber.open", wraps=os.open) as os_open, + unittest.mock.patch("glob._StringGlobber.close", wraps=os.close) as os_close + ): + self.assertEqual(os_open.call_count, os_close.call_count) + iter = glob.iglob('**/*/d', dir_fd=self.dir_fd, recursive=True) + self.assertEqual(os_open.call_count, os_close.call_count) + self.assertEqual(next(iter), 'deep/d') + self.assertEqual(next(iter), 'deep/d/d') + self.assertGreater(os_open.call_count, os_close.call_count) + iter.close() + self.assertEqual(os_open.call_count, os_close.call_count) + def test_glob0(self): with self.assertWarns(DeprecationWarning): glob.glob0(self.tempdir, 'a') diff --git a/Misc/NEWS.d/next/Library/2024-03-05-23-08-11.gh-issue-116380.56HU7I.rst b/Misc/NEWS.d/next/Library/2024-03-05-23-08-11.gh-issue-116380.56HU7I.rst new file mode 100644 index 00000000000000..b7f27ab7191a96 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-03-05-23-08-11.gh-issue-116380.56HU7I.rst @@ -0,0 +1,2 @@ +Speed up :func:`glob.glob` and :func:`glob.iglob` by making use of +:func:`glob.translate` and tracking path existence more precisely.