|
2 | 2 |
|
3 | 3 | import contextlib
|
4 | 4 | import os
|
| 5 | +import re |
5 | 6 | import fnmatch
|
| 7 | +import functools |
6 | 8 | import itertools
|
| 9 | +import operator |
7 | 10 | import stat
|
8 | 11 | import sys
|
9 | 12 |
|
10 |
| -from pathlib._glob import translate, magic_check, magic_check_bytes |
11 | 13 |
|
12 | 14 | __all__ = ["glob", "iglob", "escape", "translate"]
|
13 | 15 |
|
@@ -225,6 +227,9 @@ def _join(dirname, basename):
|
225 | 227 | return dirname or basename
|
226 | 228 | return os.path.join(dirname, basename)
|
227 | 229 |
|
| 230 | +magic_check = re.compile('([*?[])') |
| 231 | +magic_check_bytes = re.compile(b'([*?[])') |
| 232 | + |
228 | 233 | def has_magic(s):
|
229 | 234 | if isinstance(s, bytes):
|
230 | 235 | match = magic_check_bytes.search(s)
|
@@ -254,4 +259,324 @@ def escape(pathname):
|
254 | 259 | return drive + pathname
|
255 | 260 |
|
256 | 261 |
|
| 262 | +_special_parts = ('', '.', '..') |
257 | 263 | _dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)
|
| 264 | +_no_recurse_symlinks = object() |
| 265 | + |
| 266 | + |
| 267 | +def translate(pat, *, recursive=False, include_hidden=False, seps=None): |
| 268 | + """Translate a pathname with shell wildcards to a regular expression. |
| 269 | +
|
| 270 | + If `recursive` is true, the pattern segment '**' will match any number of |
| 271 | + path segments. |
| 272 | +
|
| 273 | + If `include_hidden` is true, wildcards can match path segments beginning |
| 274 | + with a dot ('.'). |
| 275 | +
|
| 276 | + If a sequence of separator characters is given to `seps`, they will be |
| 277 | + used to split the pattern into segments and match path separators. If not |
| 278 | + given, os.path.sep and os.path.altsep (where available) are used. |
| 279 | + """ |
| 280 | + if not seps: |
| 281 | + if os.path.altsep: |
| 282 | + seps = (os.path.sep, os.path.altsep) |
| 283 | + else: |
| 284 | + seps = os.path.sep |
| 285 | + escaped_seps = ''.join(map(re.escape, seps)) |
| 286 | + any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps |
| 287 | + not_sep = f'[^{escaped_seps}]' |
| 288 | + if include_hidden: |
| 289 | + one_last_segment = f'{not_sep}+' |
| 290 | + one_segment = f'{one_last_segment}{any_sep}' |
| 291 | + any_segments = f'(?:.+{any_sep})?' |
| 292 | + any_last_segments = '.*' |
| 293 | + else: |
| 294 | + one_last_segment = f'[^{escaped_seps}.]{not_sep}*' |
| 295 | + one_segment = f'{one_last_segment}{any_sep}' |
| 296 | + any_segments = f'(?:{one_segment})*' |
| 297 | + any_last_segments = f'{any_segments}(?:{one_last_segment})?' |
| 298 | + |
| 299 | + results = [] |
| 300 | + parts = re.split(any_sep, pat) |
| 301 | + last_part_idx = len(parts) - 1 |
| 302 | + for idx, part in enumerate(parts): |
| 303 | + if part == '*': |
| 304 | + results.append(one_segment if idx < last_part_idx else one_last_segment) |
| 305 | + elif recursive and part == '**': |
| 306 | + if idx < last_part_idx: |
| 307 | + if parts[idx + 1] != '**': |
| 308 | + results.append(any_segments) |
| 309 | + else: |
| 310 | + results.append(any_last_segments) |
| 311 | + else: |
| 312 | + if part: |
| 313 | + if not include_hidden and part[0] in '*?': |
| 314 | + results.append(r'(?!\.)') |
| 315 | + results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)) |
| 316 | + if idx < last_part_idx: |
| 317 | + results.append(any_sep) |
| 318 | + res = ''.join(results) |
| 319 | + return fr'(?s:{res})\Z' |
| 320 | + |
| 321 | + |
| 322 | +@functools.lru_cache(maxsize=512) |
| 323 | +def _compile_pattern(pat, sep, case_sensitive, recursive=True): |
| 324 | + """Compile given glob pattern to a re.Pattern object (observing case |
| 325 | + sensitivity).""" |
| 326 | + flags = re.NOFLAG if case_sensitive else re.IGNORECASE |
| 327 | + regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep) |
| 328 | + return re.compile(regex, flags=flags).match |
| 329 | + |
| 330 | + |
| 331 | +class _Globber: |
| 332 | + """Class providing shell-style pattern matching and globbing. |
| 333 | + """ |
| 334 | + |
| 335 | + def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): |
| 336 | + self.sep = sep |
| 337 | + self.case_sensitive = case_sensitive |
| 338 | + self.case_pedantic = case_pedantic |
| 339 | + self.recursive = recursive |
| 340 | + |
| 341 | + # Low-level methods |
| 342 | + |
| 343 | + lstat = operator.methodcaller('lstat') |
| 344 | + add_slash = operator.methodcaller('joinpath', '') |
| 345 | + |
| 346 | + @staticmethod |
| 347 | + def scandir(path): |
| 348 | + """Emulates os.scandir(), which returns an object that can be used as |
| 349 | + a context manager. This method is called by walk() and glob(). |
| 350 | + """ |
| 351 | + return contextlib.nullcontext(path.iterdir()) |
| 352 | + |
| 353 | + @staticmethod |
| 354 | + def concat_path(path, text): |
| 355 | + """Appends text to the given path. |
| 356 | + """ |
| 357 | + return path.with_segments(path._raw_path + text) |
| 358 | + |
| 359 | + @staticmethod |
| 360 | + def parse_entry(entry): |
| 361 | + """Returns the path of an entry yielded from scandir(). |
| 362 | + """ |
| 363 | + return entry |
| 364 | + |
| 365 | + # High-level methods |
| 366 | + |
| 367 | + def compile(self, pat): |
| 368 | + return _compile_pattern(pat, self.sep, self.case_sensitive, self.recursive) |
| 369 | + |
| 370 | + def selector(self, parts): |
| 371 | + """Returns a function that selects from a given path, walking and |
| 372 | + filtering according to the glob-style pattern parts in *parts*. |
| 373 | + """ |
| 374 | + if not parts: |
| 375 | + return self.select_exists |
| 376 | + part = parts.pop() |
| 377 | + if self.recursive and part == '**': |
| 378 | + selector = self.recursive_selector |
| 379 | + elif part in _special_parts: |
| 380 | + selector = self.special_selector |
| 381 | + elif not self.case_pedantic and magic_check.search(part) is None: |
| 382 | + selector = self.literal_selector |
| 383 | + else: |
| 384 | + selector = self.wildcard_selector |
| 385 | + return selector(part, parts) |
| 386 | + |
| 387 | + def special_selector(self, part, parts): |
| 388 | + """Returns a function that selects special children of the given path. |
| 389 | + """ |
| 390 | + select_next = self.selector(parts) |
| 391 | + |
| 392 | + def select_special(path, exists=False): |
| 393 | + path = self.concat_path(self.add_slash(path), part) |
| 394 | + return select_next(path, exists) |
| 395 | + return select_special |
| 396 | + |
| 397 | + def literal_selector(self, part, parts): |
| 398 | + """Returns a function that selects a literal descendant of a path. |
| 399 | + """ |
| 400 | + |
| 401 | + # Optimization: consume and join any subsequent literal parts here, |
| 402 | + # rather than leaving them for the next selector. This reduces the |
| 403 | + # number of string concatenation operations and calls to add_slash(). |
| 404 | + while parts and magic_check.search(parts[-1]) is None: |
| 405 | + part += self.sep + parts.pop() |
| 406 | + |
| 407 | + select_next = self.selector(parts) |
| 408 | + |
| 409 | + def select_literal(path, exists=False): |
| 410 | + path = self.concat_path(self.add_slash(path), part) |
| 411 | + return select_next(path, exists=False) |
| 412 | + return select_literal |
| 413 | + |
| 414 | + def wildcard_selector(self, part, parts): |
| 415 | + """Returns a function that selects direct children of a given path, |
| 416 | + filtering by pattern. |
| 417 | + """ |
| 418 | + |
| 419 | + match = None if part == '*' else self.compile(part) |
| 420 | + dir_only = bool(parts) |
| 421 | + if dir_only: |
| 422 | + select_next = self.selector(parts) |
| 423 | + |
| 424 | + def select_wildcard(path, exists=False): |
| 425 | + try: |
| 426 | + # We must close the scandir() object before proceeding to |
| 427 | + # avoid exhausting file descriptors when globbing deep trees. |
| 428 | + with self.scandir(path) as scandir_it: |
| 429 | + entries = list(scandir_it) |
| 430 | + except OSError: |
| 431 | + pass |
| 432 | + else: |
| 433 | + for entry in entries: |
| 434 | + if match is None or match(entry.name): |
| 435 | + if dir_only: |
| 436 | + try: |
| 437 | + if not entry.is_dir(): |
| 438 | + continue |
| 439 | + except OSError: |
| 440 | + continue |
| 441 | + entry_path = self.parse_entry(entry) |
| 442 | + if dir_only: |
| 443 | + yield from select_next(entry_path, exists=True) |
| 444 | + else: |
| 445 | + yield entry_path |
| 446 | + return select_wildcard |
| 447 | + |
| 448 | + def recursive_selector(self, part, parts): |
| 449 | + """Returns a function that selects a given path and all its children, |
| 450 | + recursively, filtering by pattern. |
| 451 | + """ |
| 452 | + # Optimization: consume following '**' parts, which have no effect. |
| 453 | + while parts and parts[-1] == '**': |
| 454 | + parts.pop() |
| 455 | + |
| 456 | + # Optimization: consume and join any following non-special parts here, |
| 457 | + # rather than leaving them for the next selector. They're used to |
| 458 | + # build a regular expression, which we use to filter the results of |
| 459 | + # the recursive walk. As a result, non-special pattern segments |
| 460 | + # following a '**' wildcard don't require additional filesystem access |
| 461 | + # to expand. |
| 462 | + follow_symlinks = self.recursive is not _no_recurse_symlinks |
| 463 | + if follow_symlinks: |
| 464 | + while parts and parts[-1] not in _special_parts: |
| 465 | + part += self.sep + parts.pop() |
| 466 | + |
| 467 | + match = None if part == '**' else self.compile(part) |
| 468 | + dir_only = bool(parts) |
| 469 | + select_next = self.selector(parts) |
| 470 | + |
| 471 | + def select_recursive(path, exists=False): |
| 472 | + path = self.add_slash(path) |
| 473 | + match_pos = len(str(path)) |
| 474 | + if match is None or match(str(path), match_pos): |
| 475 | + yield from select_next(path, exists) |
| 476 | + stack = [path] |
| 477 | + while stack: |
| 478 | + yield from select_recursive_step(stack, match_pos) |
| 479 | + |
| 480 | + def select_recursive_step(stack, match_pos): |
| 481 | + path = stack.pop() |
| 482 | + try: |
| 483 | + # We must close the scandir() object before proceeding to |
| 484 | + # avoid exhausting file descriptors when globbing deep trees. |
| 485 | + with self.scandir(path) as scandir_it: |
| 486 | + entries = list(scandir_it) |
| 487 | + except OSError: |
| 488 | + pass |
| 489 | + else: |
| 490 | + for entry in entries: |
| 491 | + is_dir = False |
| 492 | + try: |
| 493 | + if entry.is_dir(follow_symlinks=follow_symlinks): |
| 494 | + is_dir = True |
| 495 | + except OSError: |
| 496 | + pass |
| 497 | + |
| 498 | + if is_dir or not dir_only: |
| 499 | + entry_path = self.parse_entry(entry) |
| 500 | + if match is None or match(str(entry_path), match_pos): |
| 501 | + if dir_only: |
| 502 | + yield from select_next(entry_path, exists=True) |
| 503 | + else: |
| 504 | + # Optimization: directly yield the path if this is |
| 505 | + # last pattern part. |
| 506 | + yield entry_path |
| 507 | + if is_dir: |
| 508 | + stack.append(entry_path) |
| 509 | + |
| 510 | + return select_recursive |
| 511 | + |
| 512 | + def select_exists(self, path, exists=False): |
| 513 | + """Yields the given path, if it exists. |
| 514 | + """ |
| 515 | + if exists: |
| 516 | + # Optimization: this path is already known to exist, e.g. because |
| 517 | + # it was returned from os.scandir(), so we skip calling lstat(). |
| 518 | + yield path |
| 519 | + else: |
| 520 | + try: |
| 521 | + self.lstat(path) |
| 522 | + yield path |
| 523 | + except OSError: |
| 524 | + pass |
| 525 | + |
| 526 | + @classmethod |
| 527 | + def walk(cls, root, top_down, on_error, follow_symlinks): |
| 528 | + """Walk the directory tree from the given root, similar to os.walk(). |
| 529 | + """ |
| 530 | + paths = [root] |
| 531 | + while paths: |
| 532 | + path = paths.pop() |
| 533 | + if isinstance(path, tuple): |
| 534 | + yield path |
| 535 | + continue |
| 536 | + try: |
| 537 | + with cls.scandir(path) as scandir_it: |
| 538 | + dirnames = [] |
| 539 | + filenames = [] |
| 540 | + if not top_down: |
| 541 | + paths.append((path, dirnames, filenames)) |
| 542 | + for entry in scandir_it: |
| 543 | + name = entry.name |
| 544 | + try: |
| 545 | + if entry.is_dir(follow_symlinks=follow_symlinks): |
| 546 | + if not top_down: |
| 547 | + paths.append(cls.parse_entry(entry)) |
| 548 | + dirnames.append(name) |
| 549 | + else: |
| 550 | + filenames.append(name) |
| 551 | + except OSError: |
| 552 | + filenames.append(name) |
| 553 | + except OSError as error: |
| 554 | + if on_error is not None: |
| 555 | + on_error(error) |
| 556 | + else: |
| 557 | + if top_down: |
| 558 | + yield path, dirnames, filenames |
| 559 | + if dirnames: |
| 560 | + prefix = cls.add_slash(path) |
| 561 | + paths += [cls.concat_path(prefix, d) for d in reversed(dirnames)] |
| 562 | + |
| 563 | + |
| 564 | +class _StringGlobber(_Globber): |
| 565 | + lstat = staticmethod(os.lstat) |
| 566 | + scandir = staticmethod(os.scandir) |
| 567 | + parse_entry = operator.attrgetter('path') |
| 568 | + concat_path = operator.add |
| 569 | + |
| 570 | + if os.name == 'nt': |
| 571 | + @staticmethod |
| 572 | + def add_slash(pathname): |
| 573 | + tail = os.path.splitroot(pathname)[2] |
| 574 | + if not tail or tail[-1] in '\\/': |
| 575 | + return pathname |
| 576 | + return f'{pathname}\\' |
| 577 | + else: |
| 578 | + @staticmethod |
| 579 | + def add_slash(pathname): |
| 580 | + if not pathname or pathname[-1] == '/': |
| 581 | + return pathname |
| 582 | + return f'{pathname}/' |
0 commit comments