From d53f67db6d50c31d55a2d1177b1830476ec8ca68 Mon Sep 17 00:00:00 2001 From: Maciej Urbanski Date: Sun, 12 Nov 2023 16:18:52 +0100 Subject: [PATCH 1/3] refactor integration tests for better pytest & eager bucket cleanup --- CHANGELOG.md | 1 + test/integration/base.py | 65 ++++++++++-------------- test/integration/bucket_cleaner.py | 68 +++++++++++++++++-------- test/integration/cleanup_buckets.py | 5 +- test/integration/conftest.py | 73 ++++++++++++++++++++++++++- test/integration/fixtures/__init__.py | 23 --------- test/integration/helpers.py | 10 +++- test/integration/test_download.py | 3 +- test/integration/test_upload.py | 1 - 9 files changed, 160 insertions(+), 89 deletions(-) delete mode 100644 test/integration/fixtures/__init__.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 540598139..11eea1b4c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Fix spellcheck erroring out on LICENSE file * Fix snyk reporting vulnerability due to tornado package use in docs generation * Deduplicate test_base files in test suite +* Refactor integration tests for better pytest compatibility & eager bucket cleanup ## [1.24.1] - 2023-09-27 diff --git a/test/integration/base.py b/test/integration/base.py index 5499b9ebb..99e7a7553 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -9,55 +9,42 @@ ###################################################################### from __future__ import annotations -import http.client -import os +from test.integration.bucket_cleaner import BucketCleaner +from test.integration.helpers import ( + BUCKET_CREATED_AT_MILLIS, + random_bucket_name, +) import pytest -from b2sdk.v2 import current_time_millis +from b2sdk.v2 import B2Api, current_time_millis from b2sdk.v2.exception import DuplicateBucketName -from .bucket_cleaner import BucketCleaner -from .helpers import ( - BUCKET_CREATED_AT_MILLIS, - BUCKET_NAME_LENGTH, - GENERAL_BUCKET_NAME_PREFIX, - authorize, - bucket_name_part, -) - +@pytest.mark.usefixtures("cls_setup") class IntegrationTestBase: - @pytest.fixture(autouse=True) - def set_http_debug(self): - if os.environ.get('B2_DEBUG_HTTP'): - http.client.HTTPConnection.debuglevel = 1 - - @pytest.fixture(autouse=True) - def save_settings(self, dont_cleanup_old_buckets, b2_auth_data): - type(self).dont_cleanup_old_buckets = dont_cleanup_old_buckets - type(self).b2_auth_data = b2_auth_data - - @classmethod - def setup_class(cls): - cls.this_run_bucket_name_prefix = GENERAL_BUCKET_NAME_PREFIX + bucket_name_part(8) - - @classmethod - def teardown_class(cls): - BucketCleaner( - cls.dont_cleanup_old_buckets, - *cls.b2_auth_data, - current_run_prefix=cls.this_run_bucket_name_prefix - ).cleanup_buckets() + b2_api: B2Api + this_run_bucket_name_prefix: str + bucket_cleaner: BucketCleaner + + @pytest.fixture(autouse=True, scope="class") + def cls_setup(self, request, b2_api, b2_auth_data, bucket_name_prefix, bucket_cleaner): + cls = request.cls + cls.b2_auth_data = b2_auth_data + cls.this_run_bucket_name_prefix = bucket_name_prefix + cls.bucket_cleaner = bucket_cleaner + cls.b2_api = b2_api + cls.info = b2_api.account_info @pytest.fixture(autouse=True) def setup_method(self): - self.b2_api, self.info = authorize(self.b2_auth_data) + self.buckets_created = [] + yield + for bucket in self.buckets_created: + self.bucket_cleaner.cleanup_bucket(bucket) def generate_bucket_name(self): - return self.this_run_bucket_name_prefix + bucket_name_part( - BUCKET_NAME_LENGTH - len(self.this_run_bucket_name_prefix) - ) + return random_bucket_name(self.this_run_bucket_name_prefix) def write_zeros(self, file, number): line = b'0' * 1000 + b'\n' @@ -70,7 +57,7 @@ def write_zeros(self, file, number): def create_bucket(self): bucket_name = self.generate_bucket_name() try: - return self.b2_api.create_bucket( + bucket = self.b2_api.create_bucket( bucket_name, 'allPublic', bucket_info={BUCKET_CREATED_AT_MILLIS: str(current_time_millis())} @@ -78,6 +65,8 @@ def create_bucket(self): except DuplicateBucketName: self._duplicated_bucket_name_debug_info(bucket_name) raise + self.buckets_created.append(bucket) + return bucket def _duplicated_bucket_name_debug_info(self, bucket_name: str) -> None: # Trying to obtain as much information as possible about this bucket. diff --git a/test/integration/bucket_cleaner.py b/test/integration/bucket_cleaner.py index 0c85689e4..522f9e720 100644 --- a/test/integration/bucket_cleaner.py +++ b/test/integration/bucket_cleaner.py @@ -9,25 +9,32 @@ ###################################################################### from __future__ import annotations -from b2sdk.v2 import * +import logging -from .helpers import BUCKET_CREATED_AT_MILLIS, GENERAL_BUCKET_NAME_PREFIX, authorize +from b2sdk.v2 import ( + NO_RETENTION_FILE_SETTING, + B2Api, + Bucket, + LegalHold, + RetentionMode, + current_time_millis, +) +from b2sdk.v2.exception import BadRequest + +from .helpers import BUCKET_CREATED_AT_MILLIS, GENERAL_BUCKET_NAME_PREFIX ONE_HOUR_MILLIS = 60 * 60 * 1000 +logger = logging.getLogger(__name__) + class BucketCleaner: def __init__( - self, - dont_cleanup_old_buckets: bool, - b2_application_key_id: str, - b2_application_key: str, - current_run_prefix: str | None = None + self, dont_cleanup_old_buckets: bool, b2_api: B2Api, current_run_prefix: str | None = None ): self.current_run_prefix = current_run_prefix self.dont_cleanup_old_buckets = dont_cleanup_old_buckets - self.b2_application_key_id = b2_application_key_id - self.b2_application_key = b2_application_key + self.b2_api = b2_api def _should_remove_bucket(self, bucket: Bucket): if self.current_run_prefix and bucket.name.startswith(self.current_run_prefix): @@ -42,27 +49,41 @@ def _should_remove_bucket(self, bucket: Bucket): return False def cleanup_buckets(self): - b2_api, _ = authorize((self.b2_application_key_id, self.b2_application_key)) - buckets = b2_api.list_buckets() + buckets = self.b2_api.list_buckets() for bucket in buckets: - if not self._should_remove_bucket(bucket): - print('Skipping bucket removal:', bucket.name) - else: - print('Trying to remove bucket:', bucket.name) + self.cleanup_bucket(bucket) + + def cleanup_bucket(self, bucket: Bucket): + b2_api = self.b2_api + if not self._should_remove_bucket(bucket): + logger.info('Skipping bucket removal:', bucket.name) + else: + logger.info('Trying to remove bucket:', bucket.name) + files_leftover = False + try: + b2_api.delete_bucket(bucket) + except BadRequest: + logger.info('Bucket is not empty, removing files') + files_leftover = True + + if files_leftover: files_leftover = False file_versions = bucket.ls(latest_only=False, recursive=True) for file_version_info, _ in file_versions: if file_version_info.file_retention: if file_version_info.file_retention.mode == RetentionMode.GOVERNANCE: - print('Removing retention from file version:', file_version_info.id_) + logger.info( + 'Removing retention from file version: %s', file_version_info.id_ + ) b2_api.update_file_retention( file_version_info.id_, file_version_info.file_name, NO_RETENTION_FILE_SETTING, True ) elif file_version_info.file_retention.mode == RetentionMode.COMPLIANCE: if file_version_info.file_retention.retain_until > current_time_millis(): # yapf: disable - print( - f'File version: {file_version_info.id_} cannot be removed due to compliance mode retention' + logger.info( + 'File version: %s cannot be removed due to compliance mode retention', + file_version_info.id_, ) files_leftover = True continue @@ -73,15 +94,18 @@ def cleanup_buckets(self): f'Unknown retention mode: {file_version_info.file_retention.mode}' ) if file_version_info.legal_hold.is_on(): - print('Removing legal hold from file version:', file_version_info.id_) + logger.info( + 'Removing legal hold from file version: %s', file_version_info.id_ + ) b2_api.update_file_legal_hold( file_version_info.id_, file_version_info.file_name, LegalHold.OFF ) - print('Removing file version:', file_version_info.id_) + logger.info('Removing file version:', file_version_info.id_) b2_api.delete_file_version(file_version_info.id_, file_version_info.file_name) if files_leftover: - print('Unable to remove bucket because some retained files remain') + logger.info('Unable to remove bucket because some retained files remain') + return else: - print('Removing bucket:', bucket.name) b2_api.delete_bucket(bucket) + logger.info('Removed bucket:', bucket.name) diff --git a/test/integration/cleanup_buckets.py b/test/integration/cleanup_buckets.py index 6ffdb6378..220aaf570 100755 --- a/test/integration/cleanup_buckets.py +++ b/test/integration/cleanup_buckets.py @@ -9,10 +9,13 @@ ###################################################################### from __future__ import annotations +from test.integration.helpers import authorize + from . import get_b2_auth_data from .bucket_cleaner import BucketCleaner from .test_raw_api import cleanup_old_buckets if __name__ == '__main__': cleanup_old_buckets() - BucketCleaner(False, *get_b2_auth_data()).cleanup_buckets() + BucketCleaner(dont_cleanup_old_buckets=False, + b2_api=authorize(get_b2_auth_data())[0]).cleanup_buckets() diff --git a/test/integration/conftest.py b/test/integration/conftest.py index 210cbaab4..c5e462044 100644 --- a/test/integration/conftest.py +++ b/test/integration/conftest.py @@ -9,18 +9,87 @@ ###################################################################### from __future__ import annotations +import http +import http.client +import os +from test.integration import get_b2_auth_data +from test.integration.bucket_cleaner import BucketCleaner +from test.integration.helpers import ( + BUCKET_CREATED_AT_MILLIS, + authorize, + get_bucket_name_prefix, + random_bucket_name, +) + import pytest +from b2sdk.utils import current_time_millis + def pytest_addoption(parser): """Add a flag for not cleaning up old buckets""" parser.addoption( - '--dont-cleanup-old-buckets', + "--dont-cleanup-old-buckets", action="store_true", default=False, ) -@pytest.fixture +@pytest.fixture(scope="session") def dont_cleanup_old_buckets(request): return request.config.getoption("--dont-cleanup-old-buckets") + + +@pytest.fixture(autouse=True, scope="session") +def set_http_debug(): + if os.environ.get("B2_DEBUG_HTTP"): + http.client.HTTPConnection.debuglevel = 1 + + +@pytest.fixture(scope="session") +def b2_auth_data(): + try: + return get_b2_auth_data() + except ValueError as ex: + pytest.fail(ex.args[0]) + + +@pytest.fixture(scope="session") +def bucket_name_prefix(): + return get_bucket_name_prefix(8) + + +@pytest.fixture(scope="session") +def _b2_api(b2_auth_data): + b2_api, _ = authorize(b2_auth_data) + return b2_api + + +@pytest.fixture(scope="session") +def bucket_cleaner(bucket_name_prefix, dont_cleanup_old_buckets, _b2_api): + cleaner = BucketCleaner( + dont_cleanup_old_buckets, + _b2_api, + current_run_prefix=bucket_name_prefix, + ) + yield cleaner + cleaner.cleanup_buckets() + + +@pytest.fixture(scope="session") +def b2_api(_b2_api): + return _b2_api + + +@pytest.fixture +def bucket(b2_api, bucket_name_prefix, bucket_cleaner): + bucket = b2_api.create_bucket( + random_bucket_name(bucket_name_prefix), + "allPrivate", + bucket_info={ + "created_by": "b2-sdk integration test", + BUCKET_CREATED_AT_MILLIS: str(current_time_millis()), + }, + ) + yield bucket + bucket_cleaner.cleanup_bucket(bucket) diff --git a/test/integration/fixtures/__init__.py b/test/integration/fixtures/__init__.py deleted file mode 100644 index eaa285700..000000000 --- a/test/integration/fixtures/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -###################################################################### -# -# File: test/integration/fixtures/__init__.py -# -# Copyright 2021 Backblaze Inc. All Rights Reserved. -# -# License https://www.backblaze.com/using_b2_code.html -# -###################################################################### -from __future__ import annotations - -import os - -import pytest -from .. import get_b2_auth_data - - -@pytest.fixture -def b2_auth_data(): - try: - return get_b2_auth_data() - except ValueError as ex: - pytest.fail(ex.args[0]) diff --git a/test/integration/helpers.py b/test/integration/helpers.py index d3c77bdc7..735653fb3 100644 --- a/test/integration/helpers.py +++ b/test/integration/helpers.py @@ -21,10 +21,18 @@ BUCKET_CREATED_AT_MILLIS = 'created_at_millis' -def bucket_name_part(length): +def _bucket_name_prefix_part(length: int) -> str: return ''.join(random.choice(BUCKET_NAME_CHARS) for _ in range(length)) +def get_bucket_name_prefix(rnd_len: int = 8) -> str: + return GENERAL_BUCKET_NAME_PREFIX + _bucket_name_prefix_part(rnd_len) + + +def random_bucket_name(prefix: str = GENERAL_BUCKET_NAME_PREFIX) -> str: + return prefix + _bucket_name_prefix_part(BUCKET_NAME_LENGTH - len(prefix)) + + def authorize(b2_auth_data, api_config=DEFAULT_HTTP_API_CONFIG): info = InMemoryAccountInfo() b2_api = B2Api(info, api_config=api_config) diff --git a/test/integration/test_download.py b/test/integration/test_download.py index 6383706ac..24a821552 100644 --- a/test/integration/test_download.py +++ b/test/integration/test_download.py @@ -16,11 +16,12 @@ from pprint import pprint from unittest import mock +import pytest + from b2sdk.utils import Sha1HexDigest from b2sdk.v2 import * from .base import IntegrationTestBase -from .fixtures import * # noqa: F401, F403 from .helpers import authorize diff --git a/test/integration/test_upload.py b/test/integration/test_upload.py index 3bd8621e4..b3d79939b 100644 --- a/test/integration/test_upload.py +++ b/test/integration/test_upload.py @@ -17,7 +17,6 @@ from b2sdk.v2 import B2RawHTTPApi from .base import IntegrationTestBase -from .fixtures import b2_auth_data # noqa from .test_raw_api import authorize_raw_api From b228d86ab99fdedd94831c785a5aab66c4c703c7 Mon Sep 17 00:00:00 2001 From: Maciej Urbanski Date: Sun, 12 Nov 2023 22:05:09 +0100 Subject: [PATCH 2/3] fix downloading to non-seekable file, such as /dev/stdout Co-authored-by: athakur-reef --- CHANGELOG.md | 4 +- b2sdk/_internal/__init__.py | 14 ++++ b2sdk/_internal/utils/__init__.py | 9 +++ b2sdk/_internal/utils/filesystem.py | 36 +++++++++ b2sdk/_v3/__init__.py | 5 ++ b2sdk/exception.py | 6 +- b2sdk/transfer/inbound/downloaded_file.py | 90 ++++++++++++++++++++--- b2sdk/utils/__init__.py | 28 ++++++- b2sdk/v1/download_dest.py | 6 +- test/conftest.py | 18 +++++ test/helpers.py | 11 +++ test/integration/conftest.py | 2 +- test/integration/test_download.py | 54 ++++++++++++++ test/unit/bucket/test_bucket.py | 27 ++++++- test/unit/utils/test_filesystem.py | 53 +++++++++++++ 15 files changed, 336 insertions(+), 27 deletions(-) create mode 100644 b2sdk/_internal/__init__.py create mode 100644 b2sdk/_internal/utils/__init__.py create mode 100644 b2sdk/_internal/utils/filesystem.py create mode 100644 test/conftest.py create mode 100644 test/unit/utils/test_filesystem.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 11eea1b4c..650c170bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,12 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added -* Add `*_PART_SIZE` constants to public interface +* Add `*_PART_SIZE` constants +* Add `points_to_fifo`, `points_to_stdout`, `STDOUT_FILEPATH` to API ### Changed * Mark `TempDir` as deprecated in favor of `tempfile.TemporaryDirectory` ### Fixed +* Fix downloading to a non-seekable file, such as /dev/stdout * Fix ScanPoliciesManager support for compiled regexes ### Infrastructure diff --git a/b2sdk/_internal/__init__.py b/b2sdk/_internal/__init__.py new file mode 100644 index 000000000..3a8f1d109 --- /dev/null +++ b/b2sdk/_internal/__init__.py @@ -0,0 +1,14 @@ +###################################################################### +# +# File: b2sdk/_internal/__init__.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +""" +b2sdk._internal package contains internal modules, and should not be used directly. + +Please use chosen apiver package instead, e.g. b2sdk.v2 +""" diff --git a/b2sdk/_internal/utils/__init__.py b/b2sdk/_internal/utils/__init__.py new file mode 100644 index 000000000..e87ebe6e6 --- /dev/null +++ b/b2sdk/_internal/utils/__init__.py @@ -0,0 +1,9 @@ +###################################################################### +# +# File: b2sdk/_internal/utils/__init__.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### diff --git a/b2sdk/_internal/utils/filesystem.py b/b2sdk/_internal/utils/filesystem.py new file mode 100644 index 000000000..eaf543659 --- /dev/null +++ b/b2sdk/_internal/utils/filesystem.py @@ -0,0 +1,36 @@ +###################################################################### +# +# File: b2sdk/_internal/utils/filesystem.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +import pathlib +import platform +import stat + +_IS_WINDOWS = platform.system() == "Windows" + + +def points_to_fifo(path: pathlib.Path) -> bool: + """Check if the path points to a fifo.""" + path = path.resolve() + try: + + return stat.S_ISFIFO(path.stat().st_mode) + except OSError: + return False + + +_STDOUT_FILENAME = "CON" if _IS_WINDOWS else "/dev/stdout" +STDOUT_FILEPATH = pathlib.Path(_STDOUT_FILENAME) + + +def points_to_stdout(path: pathlib.Path) -> bool: + """Check if the path points to stdout.""" + try: + return path == STDOUT_FILEPATH or path.resolve() == STDOUT_FILEPATH + except OSError: + return False diff --git a/b2sdk/_v3/__init__.py b/b2sdk/_v3/__init__.py index af7f73457..095204330 100644 --- a/b2sdk/_v3/__init__.py +++ b/b2sdk/_v3/__init__.py @@ -64,6 +64,11 @@ IncrementalHexDigester, ) +from b2sdk._internal.utils.filesystem import ( + points_to_fifo, + points_to_stdout, + STDOUT_FILEPATH, +) from b2sdk.utils import trace_call from b2sdk.utils.docs import get_b2sdk_doc_urls diff --git a/b2sdk/exception.py b/b2sdk/exception.py index c04faecf9..77e9468de 100644 --- a/b2sdk/exception.py +++ b/b2sdk/exception.py @@ -556,7 +556,11 @@ class PotentialS3EndpointPassedAsRealm(InvalidJsonResponse): pass -class DestinationDirectoryError(B2Error): +class DestinationError(B2Error): + pass + + +class DestinationDirectoryError(DestinationError): pass diff --git a/b2sdk/transfer/inbound/downloaded_file.py b/b2sdk/transfer/inbound/downloaded_file.py index c866fb695..14f252d81 100644 --- a/b2sdk/transfer/inbound/downloaded_file.py +++ b/b2sdk/transfer/inbound/downloaded_file.py @@ -9,23 +9,32 @@ ###################################################################### from __future__ import annotations +import contextlib import io import logging import pathlib -from typing import TYPE_CHECKING +import sys +from typing import TYPE_CHECKING, BinaryIO from requests.models import Response +from b2sdk._internal.utils.filesystem import _IS_WINDOWS, points_to_fifo, points_to_stdout from b2sdk.exception import ( ChecksumMismatch, DestinationDirectoryDoesntAllowOperation, DestinationDirectoryDoesntExist, + DestinationError, DestinationIsADirectory, DestinationParentIsNotADirectory, TruncatedOutput, ) from b2sdk.utils import set_file_mtime +try: + from typing_extensions import Literal +except ImportError: + from typing import Literal + from ...encryption.setting import EncryptionSetting from ...file_version import DownloadVersion from ...progress import AbstractProgressListener @@ -40,6 +49,9 @@ class MtimeUpdatedFile(io.IOBase): """ Helper class that facilitates updating a files mod_time after closing. + + Over the time this class has grown, and now it also adds better exception handling. + Usage: .. code-block: python @@ -50,13 +62,27 @@ class MtimeUpdatedFile(io.IOBase): # 'some_local_path' has the mod_time set according to metadata in B2 """ - def __init__(self, path_, mod_time_millis: int, mode='wb+', buffering=None): - self.path_ = path_ + def __init__( + self, + path_: str | pathlib.Path, + mod_time_millis: int, + mode: Literal['wb', 'wb+'] = 'wb+', + buffering: int | None = None, + ): + self.path = pathlib.Path(path_) if isinstance(path_, str) else path_ self.mode = mode self.buffering = buffering if buffering is not None else -1 self.mod_time_to_set = mod_time_millis self.file = None + @property + def path_(self) -> str: + return str(self.path) + + @path_.setter + def path_(self, value: str) -> None: + self.path = pathlib.Path(value) + def write(self, value): """ This method is overwritten (monkey-patched) in __enter__ for performance reasons @@ -69,6 +95,9 @@ def read(self, *a): """ raise NotImplementedError + def seekable(self) -> bool: + return self.file.seekable() + def seek(self, offset, whence=0): return self.file.seek(offset, whence) @@ -77,7 +106,7 @@ def tell(self): def __enter__(self): try: - path = pathlib.Path(self.path_) + path = self.path if not path.parent.exists(): raise DestinationDirectoryDoesntExist() @@ -91,14 +120,18 @@ def __enter__(self): except PermissionError as ex: raise DestinationDirectoryDoesntAllowOperation() from ex - # All remaining problems should be with permissions. try: - self.file = open(self.path_, self.mode, buffering=self.buffering) + self.file = open( + self.path, + self.mode, + buffering=self.buffering, + ) except PermissionError as ex: raise DestinationDirectoryDoesntAllowOperation() from ex self.write = self.file.write self.read = self.file.read + self.mode = self.file.mode return self def __exit__(self, exc_type, exc_val, exc_tb): @@ -106,7 +139,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): set_file_mtime(self.path_, self.mod_time_to_set) def __str__(self): - return str(self.path_) + return str(self.path) class DownloadedFile: @@ -157,7 +190,7 @@ def _validate_download(self, bytes_read, actual_sha1): if bytes_read != desired_length: raise TruncatedOutput(bytes_read, desired_length) - def save(self, file, allow_seeking=True): + def save(self, file: BinaryIO, allow_seeking: bool | None = None) -> None: """ Read data from B2 cloud and write it to a file-like object @@ -165,6 +198,12 @@ def save(self, file, allow_seeking=True): :param allow_seeking: if False, download strategies that rely on seeking to write data (parallel strategies) will be discarded. """ + if allow_seeking is None: + allow_seeking = file.seekable() + elif allow_seeking and not file.seekable(): + logger.warning('File is not seekable, disabling strategies that require seeking') + allow_seeking = False + if self.progress_listener: file = WritingStreamWithProgress(file, self.progress_listener) if self.range_ is not None: @@ -187,7 +226,12 @@ def save(self, file, allow_seeking=True): ) self._validate_download(bytes_read, actual_sha1) - def save_to(self, path_, mode='wb+', allow_seeking=True): + def save_to( + self, + path_: str | pathlib.Path, + mode: Literal['wb', 'wb+'] | None = None, + allow_seeking: bool | None = None, + ) -> None: """ Open a local file and write data from B2 cloud to it, also update the mod_time. @@ -196,10 +240,34 @@ def save_to(self, path_, mode='wb+', allow_seeking=True): :param allow_seeking: if False, download strategies that rely on seeking to write data (parallel strategies) will be discarded. """ + path_ = pathlib.Path(path_) + is_stdout = points_to_stdout(path_) + if is_stdout or points_to_fifo(path_): + if mode not in (None, 'wb'): + raise DestinationError(f'invalid mode requested {mode!r} for FIFO file {path_!r}') + + if is_stdout and _IS_WINDOWS: + if self.write_buffer_size and self.write_buffer_size not in ( + -1, io.DEFAULT_BUFFER_SIZE + ): + logger.warning( + 'Unable to set arbitrary write_buffer_size for stdout on Windows' + ) + context = contextlib.nullcontext(sys.stdout.buffer) + else: + context = open(path_, 'wb', buffering=self.write_buffer_size or -1) + + try: + with context as file: + return self.save(file, allow_seeking=allow_seeking) + finally: + if not is_stdout: + set_file_mtime(path_, self.download_version.mod_time_millis) + with MtimeUpdatedFile( path_, mod_time_millis=self.download_version.mod_time_millis, - mode=mode, + mode=mode or 'wb+', buffering=self.write_buffer_size, ) as file: - self.save(file, allow_seeking=allow_seeking) + return self.save(file, allow_seeking=allow_seeking) diff --git a/b2sdk/utils/__init__.py b/b2sdk/utils/__init__.py index 85c353b52..cac740cc5 100644 --- a/b2sdk/utils/__init__.py +++ b/b2sdk/utils/__init__.py @@ -11,7 +11,9 @@ import base64 import hashlib +import logging import os +import pathlib import platform import re import time @@ -23,6 +25,8 @@ from logfury.v1 import DefaultTraceAbstractMeta, DefaultTraceMeta, limit_trace_arguments, disable_trace, trace_call +logger = logging.getLogger(__name__) + Sha1HexDigest = NewType('Sha1HexDigest', str) T = TypeVar('T') # TODO: When we drop Python 3.7 support, this should be replaced @@ -277,14 +281,26 @@ def get_file_mtime(local_path): return int(mod_time) -def set_file_mtime(local_path, mod_time_millis): +def is_special_file(path: str | pathlib.Path) -> bool: + """ + Is the path a special file, such as /dev/null or stdout? + + :param path: a "file" path + :return: True if the path is a special file + """ + path_str = str(path) + return ( + path == os.devnull or path_str.startswith('/dev/') or + platform.system() == 'Windows' and path_str.upper() in ('CON', 'NUL') + ) + + +def set_file_mtime(local_path: str | pathlib.Path, mod_time_millis: int) -> None: """ Set modification time of a file in milliseconds. :param local_path: a file path - :type local_path: str :param mod_time_millis: time to be set - :type mod_time_millis: int """ mod_time = mod_time_millis / 1000.0 @@ -299,7 +315,11 @@ def set_file_mtime(local_path, mod_time_millis): # See #617 for details. mod_time = float(Decimal('%.3f5' % mod_time)) - os.utime(local_path, (mod_time, mod_time)) + try: + os.utime(local_path, (mod_time, mod_time)) + except OSError: + if not is_special_file(local_path): + raise def fix_windows_path_limit(path): diff --git a/b2sdk/v1/download_dest.py b/b2sdk/v1/download_dest.py index 3ba73ed33..4aeb43e1a 100644 --- a/b2sdk/v1/download_dest.py +++ b/b2sdk/v1/download_dest.py @@ -93,11 +93,7 @@ def write_to_local_file_context(self, mod_time_millis): with open(self.local_file_path, self.MODE) as f: yield f - # After it's closed, set the mod time. - # This is an ugly hack to make the tests work. I can't think - # of any other cases where set_file_mtime might fail. - if self.local_file_path != os.devnull: - set_file_mtime(self.local_file_path, mod_time_millis) + set_file_mtime(self.local_file_path, mod_time_millis) # Set the flag that means to leave the downloaded file on disk. completed = True diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 000000000..17045c374 --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,18 @@ +###################################################################### +# +# File: test/conftest.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +import concurrent.futures + +import pytest + + +@pytest.fixture +def bg_executor(): + with concurrent.futures.ThreadPoolExecutor() as executor: + yield executor diff --git a/test/helpers.py b/test/helpers.py index b17d8bbd0..550e32493 100644 --- a/test/helpers.py +++ b/test/helpers.py @@ -11,6 +11,7 @@ import contextlib import inspect +import io from unittest.mock import patch @@ -33,3 +34,13 @@ def patch_bind_params(instance, method_name): *mock_method.call_args[0], **mock_method.call_args[1] ).arguments yield mock_method + + +class NonSeekableIO(io.BytesIO): + """Emulate a non-seekable file""" + + def seek(self, *args, **kwargs): + raise OSError('not seekable') + + def seekable(self): + return False diff --git a/test/integration/conftest.py b/test/integration/conftest.py index c5e462044..cf96039ef 100644 --- a/test/integration/conftest.py +++ b/test/integration/conftest.py @@ -77,7 +77,7 @@ def bucket_cleaner(bucket_name_prefix, dont_cleanup_old_buckets, _b2_api): @pytest.fixture(scope="session") -def b2_api(_b2_api): +def b2_api(_b2_api, bucket_cleaner): return _b2_api diff --git a/test/integration/test_download.py b/test/integration/test_download.py index 24a821552..a7faf772c 100644 --- a/test/integration/test_download.py +++ b/test/integration/test_download.py @@ -11,13 +11,16 @@ import gzip import io +import os import pathlib +import platform import tempfile from pprint import pprint from unittest import mock import pytest +from b2sdk._internal.utils.filesystem import _IS_WINDOWS from b2sdk.utils import Sha1HexDigest from b2sdk.v2 import * @@ -126,3 +129,54 @@ def test_gzip(self): ) with open(downloaded_uncompressed_file, 'rb') as duf: assert duf.read() == data_to_write + + +@pytest.fixture +def source_file(tmp_path): + source_file = tmp_path / 'source.txt' + source_file.write_text('hello world') + return source_file + + +@pytest.fixture +def uploaded_source_file_version(bucket, source_file): + file_version = bucket.upload_local_file(str(source_file), source_file.name) + return file_version + + +@pytest.mark.skipif(platform.system() == 'Windows', reason='no os.mkfifo() on Windows') +def test_download_to_fifo(bucket, tmp_path, source_file, uploaded_source_file_version, bg_executor): + output_file = tmp_path / 'output.txt' + os.mkfifo(output_file) + output_string = None + + def reader(): + nonlocal output_string + output_string = output_file.read_text() + + reader_future = bg_executor.submit(reader) + + bucket.download_file_by_id(file_id=uploaded_source_file_version.id_).save_to(output_file) + + reader_future.result(timeout=1) + assert source_file.read_text() == output_string + + +@pytest.fixture +def binary_cap(request): + """ + Get best suited capture. + + For Windows we need capsys as capfd fails, while on any other (i.e. POSIX systems) we need capfd. + This is sadly tied directly to how .save_to() is implemented, as Windows required special handling. + """ + cap = request.getfixturevalue("capsysbinary" if _IS_WINDOWS else "capfdbinary") + yield cap + + +def test_download_to_stdout(bucket, source_file, uploaded_source_file_version, binary_cap): + output_file = "CON" if _IS_WINDOWS else "/dev/stdout" + + bucket.download_file_by_id(file_id=uploaded_source_file_version.id_).save_to(output_file) + + assert binary_cap.readouterr().out == source_file.read_bytes() diff --git a/test/unit/bucket/test_bucket.py b/test/unit/bucket/test_bucket.py index fd5e19f92..58c29259e 100644 --- a/test/unit/bucket/test_bucket.py +++ b/test/unit/bucket/test_bucket.py @@ -19,6 +19,7 @@ import unittest.mock as mock from contextlib import suppress from io import BytesIO +from test.helpers import NonSeekableIO import apiver_deps import pytest @@ -2289,6 +2290,26 @@ def _check_local_file_contents(self, path, expected_contents): contents = f.read() self.assertEqual(contents, expected_contents) + @pytest.mark.apiver(from_ver=2) + def test_download_to_non_seekable_file(self): + file_version = self.bucket.upload_bytes(self.DATA.encode(), 'file1') + + non_seekable_strategies = [ + strat for strat in self.bucket.api.services.download_manager.strategies + if not isinstance(strat, ParallelDownloader) + ] + context = contextlib.nullcontext() if non_seekable_strategies else pytest.raises( + ValueError, + match='no strategy suitable for download was found!', + ) + output_file = NonSeekableIO() + with context: + self.download_file_by_id( + file_version.id_, + v2_file=output_file, + ) + assert output_file.getvalue() == self.DATA.encode() + # download empty file @@ -2515,14 +2536,12 @@ def test_get_chunk_size_alignment(self): def test_buffering_in_save_to(self): with tempfile.TemporaryDirectory() as d: - path = os.path.join(d, 'file2') + path = pathlib.Path(d) / 'file2' with mock.patch('b2sdk.transfer.inbound.downloaded_file.open') as mock_open: mock_open.side_effect = open self.bucket.download_file_by_id(self.file_version.id_).save_to(path) mock_open.assert_called_once_with(path, mock.ANY, buffering=self.ALIGN_FACTOR) - with open(path) as f: - contents = f.read() - assert contents == self.DATA + assert path.read_text() == self.DATA def test_set_write_buffer_parallel_called_get_chunk_size(self): self._check_called_on_downloader( diff --git a/test/unit/utils/test_filesystem.py b/test/unit/utils/test_filesystem.py new file mode 100644 index 000000000..278d25a73 --- /dev/null +++ b/test/unit/utils/test_filesystem.py @@ -0,0 +1,53 @@ +###################################################################### +# +# File: test/unit/utils/test_filesystem.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +import os +import pathlib +import platform + +import pytest +from apiver_deps import ( + STDOUT_FILEPATH, + points_to_fifo, + points_to_stdout, +) + +EXPECTED_STDOUT_PATH = pathlib.Path("CON" if platform.system() == "Windows" else "/dev/stdout") + + +class TestPointsToFifo: + @pytest.mark.skipif(platform.system() == "Windows", reason="no os.mkfifo() on Windows") + def test_fifo_path(self, tmp_path): + fifo_path = tmp_path / "fifo" + os.mkfifo(fifo_path) + assert points_to_fifo(fifo_path) is True + + def test_non_fifo_path(self, tmp_path): + path = tmp_path / "subdir" + path.mkdir(parents=True) + assert points_to_fifo(path) is False + + def test_non_existent_path(self, tmp_path): + path = tmp_path / "file.txt" + assert points_to_fifo(path) is False + + +class TestPointsToStdout: + def test_stdout_path(self): + assert points_to_stdout(EXPECTED_STDOUT_PATH) is True + assert points_to_stdout(STDOUT_FILEPATH) is True + + def test_non_stdout_path(self, tmp_path): + path = tmp_path / "file.txt" + path.touch() + assert points_to_stdout(path) is False + + def test_non_existent_stdout_path(self, tmp_path): + path = tmp_path / "file.txt" + assert points_to_stdout(path) is False From 23e383fd1acef3aa198b8eb4105261d70b1b1363 Mon Sep 17 00:00:00 2001 From: Maciej Urbanski Date: Wed, 15 Nov 2023 10:29:29 +0100 Subject: [PATCH 3/3] expose BUCKET_NAME_* consts and improve test bucket name generation --- CHANGELOG.md | 4 ++-- b2sdk/_v3/__init__.py | 3 +++ b2sdk/http_constants.py | 7 +++++++ test/integration/helpers.py | 18 ++++++++++++------ 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 650c170bd..bf89618c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added -* Add `*_PART_SIZE` constants -* Add `points_to_fifo`, `points_to_stdout`, `STDOUT_FILEPATH` to API +* Add `*_PART_SIZE`, `BUCKET_NAME_*`, `STDOUT_FILEPATH` constants +* Add `points_to_fifo`, `points_to_stdout` functions ### Changed * Mark `TempDir` as deprecated in favor of `tempfile.TemporaryDirectory` diff --git a/b2sdk/_v3/__init__.py b/b2sdk/_v3/__init__.py index 095204330..ff00f16a5 100644 --- a/b2sdk/_v3/__init__.py +++ b/b2sdk/_v3/__init__.py @@ -244,6 +244,9 @@ from b2sdk.cache import DummyCache from b2sdk.cache import InMemoryCache from b2sdk.http_constants import ( + BUCKET_NAME_CHARS, + BUCKET_NAME_CHARS_UNIQ, + BUCKET_NAME_LENGTH_RANGE, DEFAULT_MAX_PART_SIZE, DEFAULT_MIN_PART_SIZE, DEFAULT_RECOMMENDED_UPLOAD_PART_SIZE, diff --git a/b2sdk/http_constants.py b/b2sdk/http_constants.py index da2acab74..ead34b456 100644 --- a/b2sdk/http_constants.py +++ b/b2sdk/http_constants.py @@ -9,9 +9,16 @@ ###################################################################### from __future__ import annotations +import string + # These constants are needed in different modules, so they are stored in this module, that # imports nothing, thus avoiding circular imports +# https://www.backblaze.com/docs/cloud-storage-buckets#bucket-names +BUCKET_NAME_CHARS = string.ascii_lowercase + string.digits + '-' +BUCKET_NAME_CHARS_UNIQ = string.ascii_lowercase + string.digits + '-' +BUCKET_NAME_LENGTH_RANGE = (6, 63) + LIST_FILE_NAMES_MAX_LIMIT = 10000 # https://www.backblaze.com/b2/docs/b2_list_file_names.html FILE_INFO_HEADER_PREFIX = 'X-Bz-Info-' diff --git a/test/integration/helpers.py b/test/integration/helpers.py index 735653fb3..141474760 100644 --- a/test/integration/helpers.py +++ b/test/integration/helpers.py @@ -10,19 +10,25 @@ from __future__ import annotations import os -import random -import string +import secrets -from b2sdk.v2 import * +from b2sdk.v2 import ( + BUCKET_NAME_CHARS_UNIQ, + BUCKET_NAME_LENGTH_RANGE, + DEFAULT_HTTP_API_CONFIG, + B2Api, + InMemoryAccountInfo, +) GENERAL_BUCKET_NAME_PREFIX = 'sdktst' -BUCKET_NAME_CHARS = string.ascii_letters + string.digits + '-' -BUCKET_NAME_LENGTH = 50 +BUCKET_NAME_LENGTH = BUCKET_NAME_LENGTH_RANGE[1] BUCKET_CREATED_AT_MILLIS = 'created_at_millis' +RNG = secrets.SystemRandom() + def _bucket_name_prefix_part(length: int) -> str: - return ''.join(random.choice(BUCKET_NAME_CHARS) for _ in range(length)) + return ''.join(RNG.choice(BUCKET_NAME_CHARS_UNIQ) for _ in range(length)) def get_bucket_name_prefix(rnd_len: int = 8) -> str: