From f268157234eb4833b0781ad56677c35f7af8383f Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 2 May 2023 20:43:07 +0100 Subject: [PATCH 1/3] Optimize `pathlib.Path.glob()` handling of `../` pattern segments These segments do not require a `stat()` call, as the selector's `_select_from()` method is called after we've established that the parent is a directory. --- Lib/pathlib.py | 12 ++++++++++++ .../2023-05-02-20-43-03.gh-issue-104102.vgSdEJ.rst | 2 ++ 2 files changed, 14 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-05-02-20-43-03.gh-issue-104102.vgSdEJ.rst diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 8eb08949fa9b43..abb1fc49472516 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -71,6 +71,8 @@ def _make_selector(pattern_parts, flavour): return _TerminatingSelector() if pat == '**': cls = _RecursiveWildcardSelector + elif pat == '..': + cls = _ParentSelector elif '**' in pat: raise ValueError("Invalid pattern: '**' can only be an entire path component") elif _is_wildcard_pattern(pat): @@ -112,6 +114,16 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase): yield parent_path +class _ParentSelector(_Selector): + def __init__(self, name, child_parts, case_sensitive): + _Selector.__init__(self, child_parts, case_sensitive) + + def _select_from(self, parent_path, is_dir, exists, scandir, normcase): + path = parent_path._make_child_relpath('..') + for p in self.successor._select_from(path, is_dir, exists, scandir, normcase): + yield p + + class _PreciseSelector(_Selector): def __init__(self, name, child_parts, flavour): diff --git a/Misc/NEWS.d/next/Library/2023-05-02-20-43-03.gh-issue-104102.vgSdEJ.rst b/Misc/NEWS.d/next/Library/2023-05-02-20-43-03.gh-issue-104102.vgSdEJ.rst new file mode 100644 index 00000000000000..7101de908a5004 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-05-02-20-43-03.gh-issue-104102.vgSdEJ.rst @@ -0,0 +1,2 @@ +Improve performance of :meth:`pathlib.Path.glob` when evaluating patterns +that contain ``'../'`` segments. From e35153fc93460bd77d35b0a506bf53a80a9ffbe6 Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 2 May 2023 22:55:03 +0100 Subject: [PATCH 2/3] Fix argument name --- Lib/pathlib.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index a0dc4a221d7763..c69089f4e1bc5d 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -117,8 +117,8 @@ def _select_from(self, parent_path, is_dir, exists, scandir): class _ParentSelector(_Selector): - def __init__(self, name, child_parts, case_sensitive): - _Selector.__init__(self, child_parts, case_sensitive) + def __init__(self, name, child_parts, flavour): + _Selector.__init__(self, child_parts, flavour) def _select_from(self, parent_path, is_dir, exists, scandir): path = parent_path._make_child_relpath('..') From 6ab6f0f34bc81ef8d720f627d980e9920f5d2cb4 Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 2 May 2023 23:53:55 +0100 Subject: [PATCH 3/3] Expand tests --- Lib/test/test_pathlib.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 8b5b61a818bbbc..9902b7242205f3 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1892,8 +1892,13 @@ def test_glob_dotdot(self): P = self.cls p = P(BASE) self.assertEqual(set(p.glob("..")), { P(BASE, "..") }) + self.assertEqual(set(p.glob("../..")), { P(BASE, "..", "..") }) + self.assertEqual(set(p.glob("dirA/..")), { P(BASE, "dirA", "..") }) self.assertEqual(set(p.glob("dirA/../file*")), { P(BASE, "dirA/../fileA") }) + self.assertEqual(set(p.glob("dirA/../file*/..")), set()) self.assertEqual(set(p.glob("../xyzzy")), set()) + self.assertEqual(set(p.glob("xyzzy/..")), set()) + self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(BASE, *[".."] * 50)}) @os_helper.skip_unless_symlink def test_glob_permissions(self):