|
4 | 4 | import os
|
5 | 5 | import re
|
6 | 6 | import fnmatch
|
| 7 | +import functools |
7 | 8 | import itertools
|
| 9 | +import operator |
8 | 10 | import stat
|
9 | 11 | import sys
|
10 | 12 |
|
@@ -256,7 +258,9 @@ def escape(pathname):
|
256 | 258 | return drive + pathname
|
257 | 259 |
|
258 | 260 |
|
| 261 | +_special_parts = ('', '.', '..') |
259 | 262 | _dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)
|
| 263 | +_no_recurse_symlinks = object() |
260 | 264 |
|
261 | 265 |
|
262 | 266 | def translate(pat, *, recursive=False, include_hidden=False, seps=None):
|
@@ -312,3 +316,185 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None):
|
312 | 316 | results.append(any_sep)
|
313 | 317 | res = ''.join(results)
|
314 | 318 | return fr'(?s:{res})\Z'
|
| 319 | + |
| 320 | + |
| 321 | +@functools.lru_cache(maxsize=512) |
| 322 | +def _compile_pattern(pat, sep, case_sensitive, recursive=True): |
| 323 | + """Compile given glob pattern to a re.Pattern object (observing case |
| 324 | + sensitivity).""" |
| 325 | + flags = re.NOFLAG if case_sensitive else re.IGNORECASE |
| 326 | + regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep) |
| 327 | + return re.compile(regex, flags=flags).match |
| 328 | + |
| 329 | + |
| 330 | +class _Globber: |
| 331 | + """Class providing shell-style pattern matching and globbing. |
| 332 | + """ |
| 333 | + |
| 334 | + def __init__(self, sep, case_sensitive, recursive=False): |
| 335 | + self.sep = sep |
| 336 | + self.case_sensitive = case_sensitive |
| 337 | + self.recursive = recursive |
| 338 | + |
| 339 | + # Low-level methods |
| 340 | + |
| 341 | + lstat = staticmethod(os.lstat) |
| 342 | + scandir = staticmethod(os.scandir) |
| 343 | + parse_entry = operator.attrgetter('path') |
| 344 | + concat_path = operator.add |
| 345 | + |
| 346 | + if os.name == 'nt': |
| 347 | + @staticmethod |
| 348 | + def add_slash(pathname): |
| 349 | + tail = os.path.splitroot(pathname)[2] |
| 350 | + if not tail or tail[-1] in '\\/': |
| 351 | + return pathname |
| 352 | + return f'{pathname}\\' |
| 353 | + else: |
| 354 | + @staticmethod |
| 355 | + def add_slash(pathname): |
| 356 | + if not pathname or pathname[-1] == '/': |
| 357 | + return pathname |
| 358 | + return f'{pathname}/' |
| 359 | + |
| 360 | + # High-level methods |
| 361 | + |
| 362 | + def compile(self, pat): |
| 363 | + return _compile_pattern(pat, self.sep, self.case_sensitive, self.recursive) |
| 364 | + |
| 365 | + def selector(self, parts): |
| 366 | + """Returns a function that selects from a given path, walking and |
| 367 | + filtering according to the glob-style pattern parts in *parts*. |
| 368 | + """ |
| 369 | + if not parts: |
| 370 | + return self.select_exists |
| 371 | + part = parts.pop() |
| 372 | + if self.recursive and part == '**': |
| 373 | + selector = self.recursive_selector |
| 374 | + elif part in _special_parts: |
| 375 | + selector = self.special_selector |
| 376 | + else: |
| 377 | + selector = self.wildcard_selector |
| 378 | + return selector(part, parts) |
| 379 | + |
| 380 | + def special_selector(self, part, parts): |
| 381 | + """Returns a function that selects special children of the given path. |
| 382 | + """ |
| 383 | + select_next = self.selector(parts) |
| 384 | + |
| 385 | + def select_special(path, exists=False): |
| 386 | + path = self.concat_path(self.add_slash(path), part) |
| 387 | + return select_next(path, exists) |
| 388 | + return select_special |
| 389 | + |
| 390 | + def wildcard_selector(self, part, parts): |
| 391 | + """Returns a function that selects direct children of a given path, |
| 392 | + filtering by pattern. |
| 393 | + """ |
| 394 | + |
| 395 | + match = None if part == '*' else self.compile(part) |
| 396 | + dir_only = bool(parts) |
| 397 | + if dir_only: |
| 398 | + select_next = self.selector(parts) |
| 399 | + |
| 400 | + def select_wildcard(path, exists=False): |
| 401 | + try: |
| 402 | + # We must close the scandir() object before proceeding to |
| 403 | + # avoid exhausting file descriptors when globbing deep trees. |
| 404 | + with self.scandir(path) as scandir_it: |
| 405 | + entries = list(scandir_it) |
| 406 | + except OSError: |
| 407 | + pass |
| 408 | + else: |
| 409 | + for entry in entries: |
| 410 | + if match is None or match(entry.name): |
| 411 | + if dir_only: |
| 412 | + try: |
| 413 | + if not entry.is_dir(): |
| 414 | + continue |
| 415 | + except OSError: |
| 416 | + continue |
| 417 | + entry_path = self.parse_entry(entry) |
| 418 | + if dir_only: |
| 419 | + yield from select_next(entry_path, exists=True) |
| 420 | + else: |
| 421 | + yield entry_path |
| 422 | + return select_wildcard |
| 423 | + |
| 424 | + def recursive_selector(self, part, parts): |
| 425 | + """Returns a function that selects a given path and all its children, |
| 426 | + recursively, filtering by pattern. |
| 427 | + """ |
| 428 | + # Optimization: consume following '**' parts, which have no effect. |
| 429 | + while parts and parts[-1] == '**': |
| 430 | + parts.pop() |
| 431 | + |
| 432 | + # Optimization: consume and join any following non-special parts here, |
| 433 | + # rather than leaving them for the next selector. They're used to |
| 434 | + # build a regular expression, which we use to filter the results of |
| 435 | + # the recursive walk. As a result, non-special pattern segments |
| 436 | + # following a '**' wildcard don't require additional filesystem access |
| 437 | + # to expand. |
| 438 | + follow_symlinks = self.recursive is not _no_recurse_symlinks |
| 439 | + if follow_symlinks: |
| 440 | + while parts and parts[-1] not in _special_parts: |
| 441 | + part += self.sep + parts.pop() |
| 442 | + |
| 443 | + match = None if part == '**' else self.compile(part) |
| 444 | + dir_only = bool(parts) |
| 445 | + select_next = self.selector(parts) |
| 446 | + |
| 447 | + def select_recursive(path, exists=False): |
| 448 | + path = self.add_slash(path) |
| 449 | + match_pos = len(str(path)) |
| 450 | + if match is None or match(str(path), match_pos): |
| 451 | + yield from select_next(path, exists) |
| 452 | + stack = [path] |
| 453 | + while stack: |
| 454 | + yield from select_recursive_step(stack, match_pos) |
| 455 | + |
| 456 | + def select_recursive_step(stack, match_pos): |
| 457 | + path = stack.pop() |
| 458 | + try: |
| 459 | + # We must close the scandir() object before proceeding to |
| 460 | + # avoid exhausting file descriptors when globbing deep trees. |
| 461 | + with self.scandir(path) as scandir_it: |
| 462 | + entries = list(scandir_it) |
| 463 | + except OSError: |
| 464 | + pass |
| 465 | + else: |
| 466 | + for entry in entries: |
| 467 | + is_dir = False |
| 468 | + try: |
| 469 | + if entry.is_dir(follow_symlinks=follow_symlinks): |
| 470 | + is_dir = True |
| 471 | + except OSError: |
| 472 | + pass |
| 473 | + |
| 474 | + if is_dir or not dir_only: |
| 475 | + entry_path = self.parse_entry(entry) |
| 476 | + if match is None or match(str(entry_path), match_pos): |
| 477 | + if dir_only: |
| 478 | + yield from select_next(entry_path, exists=True) |
| 479 | + else: |
| 480 | + # Optimization: directly yield the path if this is |
| 481 | + # last pattern part. |
| 482 | + yield entry_path |
| 483 | + if is_dir: |
| 484 | + stack.append(entry_path) |
| 485 | + |
| 486 | + return select_recursive |
| 487 | + |
| 488 | + def select_exists(self, path, exists=False): |
| 489 | + """Yields the given path, if it exists. |
| 490 | + """ |
| 491 | + if exists: |
| 492 | + # Optimization: this path is already known to exist, e.g. because |
| 493 | + # it was returned from os.scandir(), so we skip calling lstat(). |
| 494 | + yield path |
| 495 | + else: |
| 496 | + try: |
| 497 | + self.lstat(path) |
| 498 | + yield path |
| 499 | + except OSError: |
| 500 | + pass |
0 commit comments