Skip to content

Commit

Permalink
Merge pull request #508 from reef-technologies/fix_scan_perm_errors
Browse files Browse the repository at this point in the history
Fix folder scan erroring out when encountering inaccessible directory
  • Loading branch information
mjurbanski-reef authored Aug 15, 2024
2 parents 470de14 + b8e8e5d commit 76e3e96
Show file tree
Hide file tree
Showing 7 changed files with 252 additions and 28 deletions.
36 changes: 33 additions & 3 deletions b2sdk/_internal/scan/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,20 @@ def join_b2_path(relative_dir_path: str | Path, file_name: str):
return relative_dir_path + '/' + file_name


if sys.platform == 'win32':

def _file_read_access(path):
try:
with open(path, 'rb', buffering=0):
return True
except PermissionError:
return False
else:

def _file_read_access(path):
return os.access(path, os.R_OK)


class LocalFolder(AbstractFolder):
"""
Folder interface to a directory on the local machine.
Expand Down Expand Up @@ -236,7 +250,14 @@ def _walk_relative_paths(
return # Skip if symlink already visited
visited_symlinks.add(inode_number)

for local_path in sorted(local_dir.iterdir()):
try:
dir_children = sorted(local_dir.iterdir())
except PermissionError: # `chmod -r dir` can trigger this
if reporter is not None:
reporter.local_permission_error(str(local_dir))
return

for local_path in dir_children:
name = local_path.name
relative_file_path = join_b2_path(relative_dir_path, name)

Expand All @@ -251,7 +272,16 @@ def _walk_relative_paths(
reporter.invalid_name(str(local_path), str(e))
continue

if local_path.is_dir():
try:
is_dir = local_path.is_dir()
except PermissionError: # `chmod -x dir` can trigger this
if reporter is not None and not policies_manager.should_exclude_local_directory(
str(relative_file_path)
):
reporter.local_permission_error(str(local_path))
continue

if is_dir:
if policies_manager.should_exclude_local_directory(str(relative_file_path)):
continue # Skip excluded directories
# Recurse into directories
Expand All @@ -278,7 +308,7 @@ def _walk_relative_paths(
if policies_manager.should_exclude_local_path(local_scan_path):
continue # Skip excluded files

if not os.access(local_path, os.R_OK):
if not _file_read_access(local_path):
if reporter is not None:
reporter.local_permission_error(str(local_path))
continue
Expand Down
24 changes: 20 additions & 4 deletions b2sdk/_internal/scan/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from __future__ import annotations

import logging
import re
import threading
import time
from dataclasses import dataclass
Expand All @@ -20,6 +21,21 @@

logger = logging.getLogger(__name__)

_REMOVE_EXTENDED_PATH_PREFIX = re.compile(r'\\\\\?\\')


def _safe_path_print(path: str) -> str:
"""
Print a path, escaping control characters if necessary.
Windows extended path prefix is removed from the path before printing for better readability.
Since Windows 10 the prefix is not needed.
:param path: a path to print
:return: a path that can be printed
"""
return escape_control_chars(_REMOVE_EXTENDED_PATH_PREFIX.sub('', path))


@dataclass
class ProgressReport:
Expand Down Expand Up @@ -180,7 +196,7 @@ def local_access_error(self, path: str) -> None:
:param path: file path
"""
self.warnings.append(
f'WARNING: {escape_control_chars(path)} could not be accessed (broken symlink?)'
f'WARNING: {_safe_path_print(path)} could not be accessed (broken symlink?)'
)

def local_permission_error(self, path: str) -> None:
Expand All @@ -190,7 +206,7 @@ def local_permission_error(self, path: str) -> None:
:param path: file path
"""
self.warnings.append(
f'WARNING: {escape_control_chars(path)} could not be accessed (no permissions to read?)'
f'WARNING: {_safe_path_print(path)} could not be accessed (no permissions to read?)'
)

def symlink_skipped(self, path: str) -> None:
Expand All @@ -203,7 +219,7 @@ def circular_symlink_skipped(self, path: str) -> None:
:param path: file path
"""
self.warnings.append(
f'WARNING: {escape_control_chars(path)} is a circular symlink, which was already visited. Skipping.'
f'WARNING: {_safe_path_print(path)} is a circular symlink, which was already visited. Skipping.'
)

def invalid_name(self, path: str, error: str) -> None:
Expand All @@ -213,7 +229,7 @@ def invalid_name(self, path: str, error: str) -> None:
:param path: file path
"""
self.warnings.append(
f'WARNING: {escape_control_chars(path)} path contains invalid name ({error}). Skipping.'
f'WARNING: {_safe_path_print(path)} path contains invalid name ({error}). Skipping.'
)


Expand Down
2 changes: 2 additions & 0 deletions changelog.d/+scan_perm_errors.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix LocalFolder.all_files(..) erroring out if one of the non-excluded directories is not readable by the user running the scan.
Warning is added to ProgressReport instead as other file access errors are.
25 changes: 24 additions & 1 deletion pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ classifiers = [
"Programming Language :: Python :: 3.12",
]

[project.optional-dependencies]

[project.urls]
Homepage = "https://github.com/Backblaze/b2-sdk-python"

Expand Down Expand Up @@ -174,6 +172,7 @@ test = [
# remove `and platform_python_implementation!='PyPy'` after dropping Python 3.7 support as that
# will allow us to update pydantic to a version which installs properly under PyPy
"pydantic>=2.0.1,<3; python_version>='3.8' and platform_python_implementation!='PyPy'",
"pywin32>=306; sys_platform == \"win32\" and platform_python_implementation!='PyPy'",
]
release = [
"towncrier==23.11.0; python_version>='3.8'",
Expand Down
95 changes: 95 additions & 0 deletions test/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,18 @@
from __future__ import annotations

import os
import shutil
import sys
from glob import glob
from pathlib import Path

try:
import ntsecuritycon
import win32api
import win32security
except ImportError:
ntsecuritycon = win32api = win32security = None

import pytest

pytest.register_assert_rewrite('test.unit')
Expand Down Expand Up @@ -192,3 +200,90 @@ def bucket(b2api):
@pytest.fixture
def file_info():
return {'key': 'value'}


class PermTool:
def allow_access(self, path):
pass

def deny_access(self, path):
pass


class UnixPermTool(PermTool):
def allow_access(self, path):
path.chmod(0o700)

def deny_access(self, path):
path.chmod(0o000)


class WindowsPermTool(PermTool):
def __init__(self):
self.user_sid = win32security.GetTokenInformation(
win32security.OpenProcessToken(win32api.GetCurrentProcess(), win32security.TOKEN_QUERY),
win32security.TokenUser
)[0]

def allow_access(self, path):
dacl = win32security.ACL()
dacl.AddAccessAllowedAce(
win32security.ACL_REVISION, ntsecuritycon.FILE_ALL_ACCESS, self.user_sid
)

security_desc = win32security.GetFileSecurity(
str(path), win32security.DACL_SECURITY_INFORMATION
)
security_desc.SetSecurityDescriptorDacl(1, dacl, 0)
win32security.SetFileSecurity(
str(path), win32security.DACL_SECURITY_INFORMATION, security_desc
)

def deny_access(self, path):
dacl = win32security.ACL()
dacl.AddAccessDeniedAce(
win32security.ACL_REVISION, ntsecuritycon.FILE_ALL_ACCESS, self.user_sid
)

security_desc = win32security.GetFileSecurity(
str(path), win32security.DACL_SECURITY_INFORMATION
)
security_desc.SetSecurityDescriptorDacl(1, dacl, 0)
win32security.SetFileSecurity(
str(path), win32security.DACL_SECURITY_INFORMATION, security_desc
)


@pytest.fixture
def fs_perm_tool(tmp_path):
"""
Ensure tmp_path is delete-able after the test.
Important for the tests that mess with filesystem permissions.
"""
if os.name == 'nt':
if win32api is None:
pytest.skip('pywin32 is required to run this test')
perm_tool = WindowsPermTool()
else:
perm_tool = UnixPermTool()
yield perm_tool

try:
shutil.rmtree(tmp_path)
except OSError:
perm_tool.allow_access(tmp_path)

for root, dirs, files in os.walk(tmp_path, topdown=True):
for name in dirs:
perm_tool.allow_access(Path(root) / name)
for name in files:
file_path = Path(root) / name
perm_tool.allow_access(file_path)
file_path.unlink()

for root, dirs, files in os.walk(tmp_path, topdown=False):
for name in dirs:
(Path(root) / name).rmdir()

tmp_path.rmdir()
Loading

0 comments on commit 76e3e96

Please sign in to comment.