|
2 | 2 |
|
3 | 3 | import contextlib
|
4 | 4 | import os
|
5 |
| -import re |
6 | 5 | import fnmatch
|
7 |
| -import functools |
8 | 6 | import itertools
|
9 |
| -import operator |
10 | 7 | import stat
|
11 | 8 | import sys
|
12 | 9 |
|
13 |
| -__all__ = ["glob", "iglob", "escape"] |
| 10 | +from pathlib._glob import translate, magic_check, magic_check_bytes |
| 11 | + |
| 12 | +__all__ = ["glob", "iglob", "escape", "translate"] |
14 | 13 |
|
15 | 14 | def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False,
|
16 | 15 | include_hidden=False):
|
@@ -226,9 +225,6 @@ def _join(dirname, basename):
|
226 | 225 | return dirname or basename
|
227 | 226 | return os.path.join(dirname, basename)
|
228 | 227 |
|
229 |
| -magic_check = re.compile('([*?[])') |
230 |
| -magic_check_bytes = re.compile(b'([*?[])') |
231 |
| - |
232 | 228 | def has_magic(s):
|
233 | 229 | if isinstance(s, bytes):
|
234 | 230 | match = magic_check_bytes.search(s)
|
@@ -258,300 +254,4 @@ def escape(pathname):
|
258 | 254 | return drive + pathname
|
259 | 255 |
|
260 | 256 |
|
261 |
| -_special_parts = ('', '.', '..') |
262 | 257 | _dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)
|
263 |
| -_no_recurse_symlinks = object() |
264 |
| - |
265 |
| - |
266 |
| -def translate(pat, *, recursive=False, include_hidden=False, seps=None): |
267 |
| - """Translate a pathname with shell wildcards to a regular expression. |
268 |
| -
|
269 |
| - If `recursive` is true, the pattern segment '**' will match any number of |
270 |
| - path segments. |
271 |
| -
|
272 |
| - If `include_hidden` is true, wildcards can match path segments beginning |
273 |
| - with a dot ('.'). |
274 |
| -
|
275 |
| - If a sequence of separator characters is given to `seps`, they will be |
276 |
| - used to split the pattern into segments and match path separators. If not |
277 |
| - given, os.path.sep and os.path.altsep (where available) are used. |
278 |
| - """ |
279 |
| - if not seps: |
280 |
| - if os.path.altsep: |
281 |
| - seps = (os.path.sep, os.path.altsep) |
282 |
| - else: |
283 |
| - seps = os.path.sep |
284 |
| - escaped_seps = ''.join(map(re.escape, seps)) |
285 |
| - any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps |
286 |
| - not_sep = f'[^{escaped_seps}]' |
287 |
| - if include_hidden: |
288 |
| - one_last_segment = f'{not_sep}+' |
289 |
| - one_segment = f'{one_last_segment}{any_sep}' |
290 |
| - any_segments = f'(?:.+{any_sep})?' |
291 |
| - any_last_segments = '.*' |
292 |
| - else: |
293 |
| - one_last_segment = f'[^{escaped_seps}.]{not_sep}*' |
294 |
| - one_segment = f'{one_last_segment}{any_sep}' |
295 |
| - any_segments = f'(?:{one_segment})*' |
296 |
| - any_last_segments = f'{any_segments}(?:{one_last_segment})?' |
297 |
| - |
298 |
| - results = [] |
299 |
| - parts = re.split(any_sep, pat) |
300 |
| - last_part_idx = len(parts) - 1 |
301 |
| - for idx, part in enumerate(parts): |
302 |
| - if part == '*': |
303 |
| - results.append(one_segment if idx < last_part_idx else one_last_segment) |
304 |
| - elif recursive and part == '**': |
305 |
| - if idx < last_part_idx: |
306 |
| - if parts[idx + 1] != '**': |
307 |
| - results.append(any_segments) |
308 |
| - else: |
309 |
| - results.append(any_last_segments) |
310 |
| - else: |
311 |
| - if part: |
312 |
| - if not include_hidden and part[0] in '*?': |
313 |
| - results.append(r'(?!\.)') |
314 |
| - results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)) |
315 |
| - if idx < last_part_idx: |
316 |
| - results.append(any_sep) |
317 |
| - res = ''.join(results) |
318 |
| - return fr'(?s:{res})\Z' |
319 |
| - |
320 |
| - |
321 |
| -@functools.lru_cache(maxsize=512) |
322 |
| -def _compile_pattern(pat, sep, case_sensitive, recursive=True): |
323 |
| - """Compile given glob pattern to a re.Pattern object (observing case |
324 |
| - sensitivity).""" |
325 |
| - flags = re.NOFLAG if case_sensitive else re.IGNORECASE |
326 |
| - regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep) |
327 |
| - return re.compile(regex, flags=flags).match |
328 |
| - |
329 |
| - |
330 |
| -class _Globber: |
331 |
| - """Class providing shell-style pattern matching and globbing. |
332 |
| - """ |
333 |
| - |
334 |
| - def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): |
335 |
| - self.sep = sep |
336 |
| - self.case_sensitive = case_sensitive |
337 |
| - self.case_pedantic = case_pedantic |
338 |
| - self.recursive = recursive |
339 |
| - |
340 |
| - # Low-level methods |
341 |
| - |
342 |
| - lstat = staticmethod(os.lstat) |
343 |
| - scandir = staticmethod(os.scandir) |
344 |
| - parse_entry = operator.attrgetter('path') |
345 |
| - concat_path = operator.add |
346 |
| - |
347 |
| - if os.name == 'nt': |
348 |
| - @staticmethod |
349 |
| - def add_slash(pathname): |
350 |
| - tail = os.path.splitroot(pathname)[2] |
351 |
| - if not tail or tail[-1] in '\\/': |
352 |
| - return pathname |
353 |
| - return f'{pathname}\\' |
354 |
| - else: |
355 |
| - @staticmethod |
356 |
| - def add_slash(pathname): |
357 |
| - if not pathname or pathname[-1] == '/': |
358 |
| - return pathname |
359 |
| - return f'{pathname}/' |
360 |
| - |
361 |
| - # High-level methods |
362 |
| - |
363 |
| - def compile(self, pat): |
364 |
| - return _compile_pattern(pat, self.sep, self.case_sensitive, self.recursive) |
365 |
| - |
366 |
| - def selector(self, parts): |
367 |
| - """Returns a function that selects from a given path, walking and |
368 |
| - filtering according to the glob-style pattern parts in *parts*. |
369 |
| - """ |
370 |
| - if not parts: |
371 |
| - return self.select_exists |
372 |
| - part = parts.pop() |
373 |
| - if self.recursive and part == '**': |
374 |
| - selector = self.recursive_selector |
375 |
| - elif part in _special_parts: |
376 |
| - selector = self.special_selector |
377 |
| - elif not self.case_pedantic and magic_check.search(part) is None: |
378 |
| - selector = self.literal_selector |
379 |
| - else: |
380 |
| - selector = self.wildcard_selector |
381 |
| - return selector(part, parts) |
382 |
| - |
383 |
| - def special_selector(self, part, parts): |
384 |
| - """Returns a function that selects special children of the given path. |
385 |
| - """ |
386 |
| - select_next = self.selector(parts) |
387 |
| - |
388 |
| - def select_special(path, exists=False): |
389 |
| - path = self.concat_path(self.add_slash(path), part) |
390 |
| - return select_next(path, exists) |
391 |
| - return select_special |
392 |
| - |
393 |
| - def literal_selector(self, part, parts): |
394 |
| - """Returns a function that selects a literal descendant of a path. |
395 |
| - """ |
396 |
| - |
397 |
| - # Optimization: consume and join any subsequent literal parts here, |
398 |
| - # rather than leaving them for the next selector. This reduces the |
399 |
| - # number of string concatenation operations and calls to add_slash(). |
400 |
| - while parts and magic_check.search(parts[-1]) is None: |
401 |
| - part += self.sep + parts.pop() |
402 |
| - |
403 |
| - select_next = self.selector(parts) |
404 |
| - |
405 |
| - def select_literal(path, exists=False): |
406 |
| - path = self.concat_path(self.add_slash(path), part) |
407 |
| - return select_next(path, exists=False) |
408 |
| - return select_literal |
409 |
| - |
410 |
| - def wildcard_selector(self, part, parts): |
411 |
| - """Returns a function that selects direct children of a given path, |
412 |
| - filtering by pattern. |
413 |
| - """ |
414 |
| - |
415 |
| - match = None if part == '*' else self.compile(part) |
416 |
| - dir_only = bool(parts) |
417 |
| - if dir_only: |
418 |
| - select_next = self.selector(parts) |
419 |
| - |
420 |
| - def select_wildcard(path, exists=False): |
421 |
| - try: |
422 |
| - # We must close the scandir() object before proceeding to |
423 |
| - # avoid exhausting file descriptors when globbing deep trees. |
424 |
| - with self.scandir(path) as scandir_it: |
425 |
| - entries = list(scandir_it) |
426 |
| - except OSError: |
427 |
| - pass |
428 |
| - else: |
429 |
| - for entry in entries: |
430 |
| - if match is None or match(entry.name): |
431 |
| - if dir_only: |
432 |
| - try: |
433 |
| - if not entry.is_dir(): |
434 |
| - continue |
435 |
| - except OSError: |
436 |
| - continue |
437 |
| - entry_path = self.parse_entry(entry) |
438 |
| - if dir_only: |
439 |
| - yield from select_next(entry_path, exists=True) |
440 |
| - else: |
441 |
| - yield entry_path |
442 |
| - return select_wildcard |
443 |
| - |
444 |
| - def recursive_selector(self, part, parts): |
445 |
| - """Returns a function that selects a given path and all its children, |
446 |
| - recursively, filtering by pattern. |
447 |
| - """ |
448 |
| - # Optimization: consume following '**' parts, which have no effect. |
449 |
| - while parts and parts[-1] == '**': |
450 |
| - parts.pop() |
451 |
| - |
452 |
| - # Optimization: consume and join any following non-special parts here, |
453 |
| - # rather than leaving them for the next selector. They're used to |
454 |
| - # build a regular expression, which we use to filter the results of |
455 |
| - # the recursive walk. As a result, non-special pattern segments |
456 |
| - # following a '**' wildcard don't require additional filesystem access |
457 |
| - # to expand. |
458 |
| - follow_symlinks = self.recursive is not _no_recurse_symlinks |
459 |
| - if follow_symlinks: |
460 |
| - while parts and parts[-1] not in _special_parts: |
461 |
| - part += self.sep + parts.pop() |
462 |
| - |
463 |
| - match = None if part == '**' else self.compile(part) |
464 |
| - dir_only = bool(parts) |
465 |
| - select_next = self.selector(parts) |
466 |
| - |
467 |
| - def select_recursive(path, exists=False): |
468 |
| - path = self.add_slash(path) |
469 |
| - match_pos = len(str(path)) |
470 |
| - if match is None or match(str(path), match_pos): |
471 |
| - yield from select_next(path, exists) |
472 |
| - stack = [path] |
473 |
| - while stack: |
474 |
| - yield from select_recursive_step(stack, match_pos) |
475 |
| - |
476 |
| - def select_recursive_step(stack, match_pos): |
477 |
| - path = stack.pop() |
478 |
| - try: |
479 |
| - # We must close the scandir() object before proceeding to |
480 |
| - # avoid exhausting file descriptors when globbing deep trees. |
481 |
| - with self.scandir(path) as scandir_it: |
482 |
| - entries = list(scandir_it) |
483 |
| - except OSError: |
484 |
| - pass |
485 |
| - else: |
486 |
| - for entry in entries: |
487 |
| - is_dir = False |
488 |
| - try: |
489 |
| - if entry.is_dir(follow_symlinks=follow_symlinks): |
490 |
| - is_dir = True |
491 |
| - except OSError: |
492 |
| - pass |
493 |
| - |
494 |
| - if is_dir or not dir_only: |
495 |
| - entry_path = self.parse_entry(entry) |
496 |
| - if match is None or match(str(entry_path), match_pos): |
497 |
| - if dir_only: |
498 |
| - yield from select_next(entry_path, exists=True) |
499 |
| - else: |
500 |
| - # Optimization: directly yield the path if this is |
501 |
| - # last pattern part. |
502 |
| - yield entry_path |
503 |
| - if is_dir: |
504 |
| - stack.append(entry_path) |
505 |
| - |
506 |
| - return select_recursive |
507 |
| - |
508 |
| - def select_exists(self, path, exists=False): |
509 |
| - """Yields the given path, if it exists. |
510 |
| - """ |
511 |
| - if exists: |
512 |
| - # Optimization: this path is already known to exist, e.g. because |
513 |
| - # it was returned from os.scandir(), so we skip calling lstat(). |
514 |
| - yield path |
515 |
| - else: |
516 |
| - try: |
517 |
| - self.lstat(path) |
518 |
| - yield path |
519 |
| - except OSError: |
520 |
| - pass |
521 |
| - |
522 |
| - @classmethod |
523 |
| - def walk(cls, root, top_down, on_error, follow_symlinks): |
524 |
| - """Walk the directory tree from the given root, similar to os.walk(). |
525 |
| - """ |
526 |
| - paths = [root] |
527 |
| - while paths: |
528 |
| - path = paths.pop() |
529 |
| - if isinstance(path, tuple): |
530 |
| - yield path |
531 |
| - continue |
532 |
| - try: |
533 |
| - with cls.scandir(path) as scandir_it: |
534 |
| - dirnames = [] |
535 |
| - filenames = [] |
536 |
| - if not top_down: |
537 |
| - paths.append((path, dirnames, filenames)) |
538 |
| - for entry in scandir_it: |
539 |
| - name = entry.name |
540 |
| - try: |
541 |
| - if entry.is_dir(follow_symlinks=follow_symlinks): |
542 |
| - if not top_down: |
543 |
| - paths.append(cls.parse_entry(entry)) |
544 |
| - dirnames.append(name) |
545 |
| - else: |
546 |
| - filenames.append(name) |
547 |
| - except OSError: |
548 |
| - filenames.append(name) |
549 |
| - except OSError as error: |
550 |
| - if on_error is not None: |
551 |
| - on_error(error) |
552 |
| - else: |
553 |
| - if top_down: |
554 |
| - yield path, dirnames, filenames |
555 |
| - if dirnames: |
556 |
| - prefix = cls.add_slash(path) |
557 |
| - paths += [cls.concat_path(prefix, d) for d in reversed(dirnames)] |
0 commit comments