Skip to content

Commit

Permalink
Merge pull request #390 from reef-technologies/circular-links
Browse files Browse the repository at this point in the history
Fixing circular symlinks (issue 513)
  • Loading branch information
ppolewicz authored May 23, 2023
2 parents df4a1db + 09a8a3d commit 3fd20f1
Show file tree
Hide file tree
Showing 8 changed files with 593 additions and 46 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Fixed
* Circular symlinks no longer cause infinite loops when syncing a folder

## [1.21.0] - 2023-04-17

### Added
Expand Down
86 changes: 59 additions & 27 deletions b2sdk/scan/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@

import logging
import os
from pathlib import Path
import platform
import re
import sys

from abc import ABCMeta, abstractmethod
from typing import Iterator, Optional
from typing import Iterator, Optional, Set

from ..utils import fix_windows_path_limit, get_file_mtime, is_file_readable
from .exception import EmptyDirectory, EnvironmentEncodingError, NotADirectory, UnableToCreateDirectory, UnsupportedFilename
Expand Down Expand Up @@ -132,7 +133,8 @@ def all_files(self, reporter: Optional[ProgressReport],
:param reporter: a place to report errors
:param policies_manager: a policy manager object, default is DEFAULT_SCAN_MANAGER
"""
yield from self._walk_relative_paths(self.root, '', reporter, policies_manager)
root_path = Path(self.root)
yield from self._walk_relative_paths(root_path, Path(''), reporter, policies_manager)

def make_full_path(self, file_name):
"""
Expand Down Expand Up @@ -178,17 +180,23 @@ def ensure_non_empty(self):
raise EmptyDirectory(self.root)

def _walk_relative_paths(
self, local_dir: str, relative_dir_path: str, reporter,
policies_manager: ScanPoliciesManager
self,
local_dir: Path,
relative_dir_path: Path,
reporter: ProgressReport,
policies_manager: ScanPoliciesManager,
visited_symlinks: Optional[Set[int]] = None,
):
"""
Yield a File object for each of the files anywhere under this folder, in the
order they would appear in B2, unless the path is excluded by policies manager.
:param relative_dir_path: the path of this dir relative to the scan point, or '' if at scan point
:param local_dir: the path to the local directory that we are currently inspecting
:param relative_dir_path: the path of this dir relative to the scan point, or Path('') if at scan point
:param reporter: a reporter object to report errors and warnings
:param policies_manager: a policies manager object
:param visited_symlinks: a set of paths to symlinks that have already been visited. Using inode numbers to reduce memory usage
"""
if not isinstance(local_dir, str):
raise ValueError('folder path should be unicode: %s' % repr(local_dir))

# Collect the names. We do this before returning any results, because
# directories need to sort as if their names end in '/'.
Expand All @@ -204,39 +212,59 @@ def _walk_relative_paths(
#
# This is because in Unicode '.' comes before '/', which comes before '0'.
names = [] # list of (name, local_path, relative_file_path)
for name in os.listdir(local_dir):
# We expect listdir() to return unicode if dir_path is unicode.
# If the file name is not valid, based on the file system
# encoding, then listdir() will return un-decoded str/bytes.
if not isinstance(name, str):
name = self._handle_non_unicode_file_name(name)

visited_symlinks = visited_symlinks or set()

if local_dir.is_symlink():
real_path = local_dir.resolve()
inode_number = real_path.stat().st_ino

visited_symlinks_count = len(visited_symlinks)

# Add symlink to visited_symlinks to prevent infinite symlink loops
visited_symlinks.add(inode_number)

# Check if set size has changed, if not, symlink has already been visited
if len(visited_symlinks) == visited_symlinks_count:
# Infinite symlink loop detected, report warning and skip symlink
if reporter is not None:
reporter.circular_symlink_skipped(str(local_dir))
return

visited_symlinks.add(inode_number)

for name in (x.name for x in local_dir.iterdir()):

if '/' in name:
raise UnsupportedFilename(
"scan does not support file names that include '/'",
"%s in dir %s" % (name, local_dir)
)

local_path = os.path.join(local_dir, name)
local_path = local_dir / name
relative_file_path = join_b2_path(
relative_dir_path, name
str(relative_dir_path), name
) # file path relative to the scan point

# Skip broken symlinks or other inaccessible files
if not is_file_readable(local_path, reporter):
if not is_file_readable(str(local_path), reporter):
continue

if policies_manager.exclude_all_symlinks and os.path.islink(local_path):
if policies_manager.exclude_all_symlinks and local_path.is_symlink():
if reporter is not None:
reporter.symlink_skipped(local_path)
reporter.symlink_skipped(str(local_path))
continue

if os.path.isdir(local_path):
if local_path.is_dir():
name += '/'
if policies_manager.should_exclude_local_directory(relative_file_path):
if policies_manager.should_exclude_local_directory(str(relative_file_path)):
continue

names.append((name, local_path, relative_file_path))
# remove the leading './' from the relative path to ensure backward compatibility
relative_file_path_str = str(relative_file_path)
if relative_file_path_str.startswith("./"):
relative_file_path_str = relative_file_path_str[2:]
names.append((name, local_path, relative_file_path_str))

# Yield all of the answers.
#
Expand All @@ -245,19 +273,23 @@ def _walk_relative_paths(
for (name, local_path, relative_file_path) in sorted(names):
if name.endswith('/'):
for subdir_file in self._walk_relative_paths(
local_path, relative_file_path, reporter, policies_manager
local_path,
relative_file_path,
reporter,
policies_manager,
visited_symlinks,
):
yield subdir_file
else:
# Check that the file still exists and is accessible, since it can take a long time
# to iterate through large folders
if is_file_readable(local_path, reporter):
file_mod_time = get_file_mtime(local_path)
file_size = os.path.getsize(local_path)
if is_file_readable(str(local_path), reporter):
file_mod_time = get_file_mtime(str(local_path))
file_size = local_path.stat().st_size

local_scan_path = LocalPath(
absolute_path=self.make_full_path(relative_file_path),
relative_path=relative_file_path,
absolute_path=self.make_full_path(str(relative_file_path)),
relative_path=str(relative_file_path),
mod_time=file_mod_time,
size=file_size,
)
Expand Down
2 changes: 1 addition & 1 deletion b2sdk/scan/folder_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,4 @@ def _parse_bucket_and_folder(bucket_and_path, api, b2_folder_class):
(bucket_name, folder_name) = bucket_and_path.split('/', 1)
if folder_name.endswith('/'):
folder_name = folder_name[:-1]
return b2_folder_class(bucket_name, folder_name, api)
return b2_folder_class(bucket_name, folder_name, api)
2 changes: 1 addition & 1 deletion b2sdk/scan/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,4 @@ def __eq__(self, other):
self.relative_path == other.relative_path and
self.selected_version == other.selected_version and
self.all_versions == other.all_versions
)
)
35 changes: 19 additions & 16 deletions b2sdk/scan/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,29 +66,27 @@ def __enter__(self):
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()

def error(self, message):
def error(self, message: str) -> None:
"""
Print an error, gracefully interleaving it with a progress bar.
:param message: an error message
:type message: str
"""
self.print_completion(message)

def print_completion(self, message):
def print_completion(self, message: str) -> None:
"""
Remove the progress bar, prints a message, and puts the progress
bar back.
:param message: an error message
:type message: str
"""
with self.lock:
self._print_line(message, True)
self._last_update_time = 0
self._update_progress()

def update_count(self, delta: int):
def update_count(self, delta: int) -> None:
"""
Report that items have been processed.
"""
Expand Down Expand Up @@ -117,14 +115,12 @@ def _update_progress(self):

self._print_line(message, False)

def _print_line(self, line, newline):
def _print_line(self, line: str, newline: bool) -> None:
"""
Print a line to stdout.
:param line: a string without a \r or \n in it.
:type line: str
:param newline: True if the output should move to a new line after this one.
:type newline: bool
"""
if len(line) < len(self.current_line):
line += ' ' * (len(self.current_line) - len(line))
Expand All @@ -150,48 +146,55 @@ def _print_line(self, line, newline):
self.current_line = line
self.stdout.flush()

def update_total(self, delta):
def update_total(self, delta: int) -> None:
"""
Report that more files have been found for comparison.
:param delta: number of files found since the last check
:type delta: int
"""
with self.lock:
self.total_count += delta
self._update_progress()

def end_total(self):
def end_total(self) -> None:
"""
Total files count is done. Can proceed to step 2.
"""
with self.lock:
self.total_done = True
self._update_progress()

def local_access_error(self, path):
def local_access_error(self, path: str) -> None:
"""
Add a file access error message to the list of warnings.
:param path: file path
:type path: str
"""
self.warnings.append('WARNING: %s could not be accessed (broken symlink?)' % (path,))

def local_permission_error(self, path):
def local_permission_error(self, path: str) -> None:
"""
Add a permission error message to the list of warnings.
:param path: file path
:type path: str
"""
self.warnings.append(
'WARNING: %s could not be accessed (no permissions to read?)' % (path,)
)

def symlink_skipped(self, path):
def symlink_skipped(self, path: str) -> None:
pass

def circular_symlink_skipped(self, path: str) -> None:
"""
Add a circular symlink error message to the list of warnings.
:param path: file path
"""
self.warnings.append(
'WARNING: %s is a circular symlink, which was already visited. Skipping.' % (path,)
)


def sample_report_run():
"""
Expand Down
1 change: 1 addition & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
'pytest-lazy-fixture==0.6.3',
'pyfakefs==4.5.6',
'pytest-xdist==2.5.0',
'pytest-timeout==2.1.0',
]
REQUIREMENTS_BUILD = ['setuptools>=20.2']

Expand Down
Loading

0 comments on commit 3fd20f1

Please sign in to comment.